# import libraries

In [None]:
import cv2
import glob
import gc
import imageio
from multiprocessing import cpu_count
import os
from PIL import Image
import pathlib
import random
import sys
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

In [None]:
# import torch
# import torchvision
# import torch.nn as nn
# import torch.nn.functional as F
# from torch.utils.data import Dataset, DataLoader
# print(f'python version: {sys.version}')
# print(f'torch version: {torch.__version__}')

# CFG

In [None]:
# Asthetics
pd.set_option('display.max_columns', None)

# Utilities

In [None]:
def return_filpath(name, folder):
    path = os.path.join(folder, f'{name}.jpg')
    return path

# Load YOLOV5 Offline and set function(pet labels & info)

In [None]:
# Hacky way of loading YOLOV5 offline, don't try this at home

# Add YOLOV5 master to cache
!cp -R '/kaggle/input/yolov5/torch/root/.cache/torch' '/root/.cache/torch'
# Add Ultralytics (whatever this is) to the config folder
!cp -R '/kaggle/input/yolov5/ultralytics/root/.config/Ultralytics' '/root/.config/Ultralytics'

In [None]:
# Load Best Performing YOLOV5X Model
yolov5x6_model = torch.hub.load('ultralytics/yolov5', 'yolov5x6')

In [None]:
def get_pet_label(file_path):
    # Read Image
    image = imageio.imread(file_path)
    
    # Get YOLOV5 results using Test Time Augmentation for better result
    results = yolov5x6_model(image, augment=True)
    
    # Save info for each pet
    for x1, y1, x2, y2, treshold, label_int in results.xyxy[0].cpu().detach().numpy():
        # Map integer encoded label to label
        label = results.names[int(label_int)]
        # Objects detected are already sorted on confidence, return first cat or dog
        if label in [1, 0]:
            return label
        
    # Could not detect pet, "unknown" label
    return 2

In [None]:
def crop_pets(file_path, filename, plot=False):
    # Read Image
    image = imageio.imread(file_path)
    h, w, c = image.shape
    # Get YOLOV5 results using Test Time Augmentation for better result
    results = yolov5x6_model(image, augment=True)
        
    # Dictionary to Save Image Info
    h, w, _ = image.shape
    n_pets = 0
    x_min = w-1
    x_max = 0
    y_min = h-1
    y_max = 0
    
    # Save found pets to draw bounding boxes
    pets_found = []
    
    # Save info for each pet
    for x1, y1, x2, y2, treshold, label in results.xyxy[0].cpu().detach().numpy():
        label = results.names[int(label)]
        if label in ['cat', 'dog']:          
            # Add found pet
            pets_found.append([x1, x2, y1, y2, label])
            n_pets += 1
    
    for x1, x2, y1, y2, label in pets_found:
        x_min = min(x_min,x1)
        x_max = max(x_max,x2)
        y_min = min(y_min,y1)
        y_max = max(y_max,y2)

    if n_pets >= 1:
        print(f"x_min:{x_min}, x_max:{x_max}, y_min:{y_min}, y_max:{y_max}")
        im = Image.open(file_path)
        im.crop((x_min, y_min, x_max, y_max)).save(f"./train_petcrop/{filename}.jpg", quality=95)
    else:
        print(f"404 PETS NOT FOUND. x_min:{x_min}, x_max:{x_max}, y_min:{y_min}, y_max:{y_max}")
        im = Image.open(file_path)
        im.save(f"./train_petcrop/{filename}.jpg", quality=95)
    
    return im

# Load train and set fold

In [None]:
dataset_path = pathlib.Path('../input/petfinder-pawpularity-score/')
train_df = pd.read_csv("../input/make-new-csv/train_add_f.csv")
train_df.head()

# Train data petcrop

In [None]:
if not os.path.exists("./train_petcrop"):
    os.makedirs("./train_petcrop")
    print("train_petcrop directory is created")
else:
    print("./train_petcrop directory is already exists")

In [None]:
train_df['path'] = train_df['Id'].map(lambda x:str(dataset_path/'train'/x)+'.jpg')

for idx, file_path in enumerate(train_df['path']):
    filename = train_df["Id"][idx]
    crop_pets(file_path, filename, plot=True)
    gc.collect()

In [None]:
train_df.head()