In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import os,glob
import matplotlib.pyplot as plt
from joblib import Parallel, delayed

# Base

In [None]:
paths = glob.glob("../input/petfinder-pawpularity-score/train/*.jpg")
print(len(paths))
size_array = [224,384]

for size in size_array:
    os.makedirs(f"image_size{size}", exist_ok=True)
    
def make_img(path):
    id = path.split("/")[-1].replace(".jpg","")
    img = cv2.imread(path)
    h,w,c = img.shape
    
    for size in size_array:
        r = size/max(h,w)
        saveimg = cv2.resize(img,(int(w*r),int(h*r)))
        cv2.imwrite(f"image_size{size}/{id}.jpg",saveimg)
    
Parallel(n_jobs = -1, verbose = 1)(delayed(make_img)(path) for path in paths)       
for size in size_array:
    !zip -qr image_size{size}.zip image_size{size}
    !rm -r image_size{size}

In [None]:
# check image information
meanl = []
stdl = []
hl = []
wl = []
sl = []
rl = []

for path in paths:
    img = cv2.imread(path)
    h,w,c = img.shape
    hl.append(h)
    wl.append(w)
    sl.append(384/max(h,w))
    rl.append(min(h,w)/max(h,w))
    meanl.append(img.mean())
    stdl.append(img.std())

In [None]:
train = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
train["height"] = hl
train["width"] = wl
train["mean"] = meanl
train["std"] = stdl
train["rr"] = sl
train["rhw"] = rl
train["ratio_hw"] = train["height"]/train["width"]
train["diff_hw"] = abs(train["height"] - train["width"])

train

In [None]:
train.to_csv("train.csv")

# Cropped dataset

In [None]:
df = pd.read_csv("../input/yolopred/objectdetection.csv",index_col=0)
df2 = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
df["area"] = df["h"]*df["w"]
df = df[df.score > 0.8].drop("score",axis=1)
df = df[df.Index == 1].drop("Index",axis=1)
df = df[df.area > 0.81]

df["xmin"] = df["x"] - df["w"]/2
df["xmax"] = df["x"] + df["w"]/2
df["ymin"] = df["y"] - df["h"]/2
df["ymax"] = df["y"] + df["h"]/2

df = df[df.xmin >=0]
df = df[df.ymin >=0]
df = df[df.xmax <=1.0]
df = df[df.ymax <=1.0]

df = pd.merge(df,df2[["Id","Group","Info"]],on=["Id"])
df = df[df.Info == 0].drop("Info",axis=1)
df = df[df.Group == 0].drop("Group",axis=1)
df = df[df.label.isin([15,16])].drop("label",axis=1).reset_index(drop=True)

df

In [None]:
train = pd.merge(df2,df[["Id"]],on=["Id"])
train.to_csv("train_crop.csv")

In [None]:
size_array = [224,384]
for size in size_array:
    os.makedirs(f"image_size{size}_crop", exist_ok=True)
    
def make_cropimg(row):
    path = f"../input/petfinder-pawpularity-score/train/{row.Id}.jpg"
    img = cv2.imread(path)
    h,w,c = img.shape
    xmin,xmax,ymin,ymax = int(row.xmin*w),int(row.xmax*w),int(row.ymin*h),int(row.ymax*h)
    img = img[ymin:ymax,xmin:xmax]
    h,w,c = img.shape
    
    for size in size_array:
        r = size/max(h,w)
        saveimg = cv2.resize(img,(int(w*r),int(h*r)))
        cv2.imwrite(f"image_size{size}_crop/{row.Id}.jpg",saveimg)
        
Parallel(n_jobs = -1, verbose = 1)(delayed(make_cropimg)(row) for index,row in df.iterrows())

for size in size_array:
    !zip -qr image_size{size}_crop.zip image_size{size}_crop
    !rm -r image_size{size}_crop