In [4]:
''' 
Link to Documentation: https://github.com/ultralytics/yolov5

RUN IN TERMINAL BEFORE PROCEEDED IN THE 'fashion-forecast' DIRECTORY:
>   pip install ultralytics
>   git clone https://github.com/ultralytics/yolov5  
>   cd yolov5
>   pip install -r requirements.txt  
>   cd ..
'''

" \nLink to Documentation: https://github.com/ultralytics/yolov5\n\nRUN IN TERMINAL BEFORE PROCEEDED IN THE 'fashion-forecast' DIRECTORY:\n>   pip install ultralytics\n>   git clone https://github.com/ultralytics/yolov5  \n>   cd yolov5\n>   pip install -r requirements.txt  \n>   cd ..\n"

In [5]:
import torch
import numpy as np
import pandas as pd
import os
import math
from PIL import Image

%matplotlib inline 
from matplotlib import pyplot as plt

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [6]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5n', pretrained=True) #force_reload=True

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /home/cardon/.cache/torch/hub/master.zip
YOLOv5 🚀 2024-4-16 Python-3.11.5 torch-2.2.2+cu121 CPU

Fusing layers... 
YOLOv5n summary: 213 layers, 1867405 parameters, 0 gradients, 4.5 GFLOPs
Adding AutoShape... 


In [7]:
'''
CODE TO CHANGE DEVICE:
model.cpu()  # CPU
model.cuda()  # GPU
model.to(device)  # i.e. device=torch.device(0)
'''

'\nCODE TO CHANGE DEVICE:\nmodel.cpu()  # CPU\nmodel.cuda()  # GPU\nmodel.to(device)  # i.e. device=torch.device(0)\n'

In [8]:
data = pd.read_csv('filtered_style_stats.csv')
 
# no of csv files with row size
size = 10000
k = math.ceil(len(data.index)/size)
 
for i in range(k):
    df = data[size*i:size*(i+1)]
    df.to_csv(f'./split_csv/style_stats_{i+1}.csv', index=False)


In [9]:
def read_images(img_list, img_names, seasons_only_csv):
    counter = 0
    directory = './yolov5/yolov5/runs/exp4/labels/'
    for img_name in os.listdir(directory):
        if counter == len(seasons_only_csv.index):
            break
        if img_name not in seasons_only_csv.iloc[:, 0].values:
            continue
        if counter%((len(seasons_only_csv.index)/100)*5) == 0:
            print(f"Running... {(counter/len(seasons_only_csv.index))*100:.2f}% finished")
        img_path = os.path.join('./yolov5/yolov5/images/', img_name)
        pil_img = Image.open(img_path)
        np_img = np.array(pil_img)
        #plt.imshow(np_img)             # used to display the images
        #plt.show()
        img_list.append(np_img)
        img_names.append(str(img_path))
        counter += 1
    print(f'Finished with a set of {len(img_list)} images')
    return img_list,img_names 

In [10]:
"""Takes the current results and saves only the information from the maximum 'person' detections
def prioritize_detections_old(detection_df):
    cleaned_df_lst = []
    user_grouped = detection_df.groupby('image')
    for idx, (username, grp) in enumerate(user_grouped):
        person_grp = grp[grp['class name']=='person']
        if (person_grp.size() == 0):
            continue
        if (person_grp.size() == 1):
            row = person_grp.iloc[0].tolist()
            row.append(idx)
            cleaned_df_lst.append(row)
            continue
        # multiple detections -> choose the highest confidence of person
        maxValueIndex = person_grp['confidence'].idxmax()
        row = person_grp.iloc[maxValueIndex].tolist()
        row.append(idx)
        cleaned_df_lst.append(row)

    col_names = list(detection_df[0].columns) + ['df_idx']
    cleaned_df = pd.DataFrame(cleaned_df_lst, columns = col_names)
    return cleaned_df
"""

"Takes the current results and saves only the information from the maximum 'person' detections\ndef prioritize_detections_old(detection_df):\n    cleaned_df_lst = []\n    user_grouped = detection_df.groupby('image')\n    for idx, (username, grp) in enumerate(user_grouped):\n        person_grp = grp[grp['class name']=='person']\n        if (person_grp.size() == 0):\n            continue\n        if (person_grp.size() == 1):\n            row = person_grp.iloc[0].tolist()\n            row.append(idx)\n            cleaned_df_lst.append(row)\n            continue\n        # multiple detections -> choose the highest confidence of person\n        maxValueIndex = person_grp['confidence'].idxmax()\n        row = person_grp.iloc[maxValueIndex].tolist()\n        row.append(idx)\n        cleaned_df_lst.append(row)\n\n    col_names = list(detection_df[0].columns) + ['df_idx']\n    cleaned_df = pd.DataFrame(cleaned_df_lst, columns = col_names)\n    return cleaned_df\n"

In [11]:
"""Takes the current results and saves only the information from the maximum 'person' detections"""
def prioritize_detections(detection_df):
    max_confidence = detection_df.groupby(['image'])['confidence'].transform('max')
    #print(max_confidence)
    cleaned_df = detection_df.loc[detection_df['confidence'] == max_confidence]
    #print(cleaned_df)
    return cleaned_df

In [22]:
# Used to crop image in a 1:1 aspect ratio no smaller than 256 pixels centered around the person detection
def crop_image(img, xmin, ymin, xmax, ymax): 
    w, h = (ymax-ymin), (xmax-xmin)
    max_dim = max(w, h)
    
    img = img[xmin:xmax, ymin:ymax]
    img_square = np.ones([max_dim,max_dim], dtype=np.uint8)*0 # chose black for the least data insight
    background = Image.fromarray(img_square).convert('RGB')
    pil_img = Image.fromarray(img)

    background.paste(pil_img, (ymin,xmin))
    return background

In [23]:
"""Takes the cleaned df data and crops each image to it's detected bounding box dimensions"""
def crop_detections(cleaned_df, TOTAL_IMG):
    directory = './yolov5/yolov5/images/'
    save_dir = './yolov5/yolov5/crop-images/'
    df_idx = 0
    for idx, img_name in enumerate(os.listdir(directory)):
        if idx % 10000 == 0:
            print(f"{idx/TOTAL_IMG * 100:2f}% photos run through")
            print(f"{df_idx} cropped images saved")
        if idx == TOTAL_IMG:
            break
        #print(img_name[:-4] not in cleaned_df['image'].values)
        if img_name[:-4] not in cleaned_df['image'].values:
            continue
        xmin, ymin, xmax, ymax = cleaned_df.iloc[df_idx][['xmin', 'ymin', 'xmax', 'ymax']]
        img_path = os.path.join(directory, img_name)
        pil_img = Image.open(img_path)
        np_img = np.array(pil_img)
        crop_img = crop_image(np_img, int(xmin), int(ymin), int(xmax), int(ymax))

        save_path = os.path.join(save_dir, img_name)
        #print(save_path)
        crop_img.save(save_path)
        df_idx += 1

        
    return df_idx


In [24]:
directory = './split_csv/'
'''for filename in os.listdir(directory):
    print(f"---------------------------------------\nBeginning {filename} batch\n")
    
    filepath = os.path.join(directory, filename)
    df = pd.read_csv(filepath)
    img_list = []
    img_names = []
    img_list, img_names = read_images(img_list, img_names, df)

    print(f"\nRunning the model")
    results = model(img_list)  # inference
    results.save() # cropped detections dictionary
    #results.show()
'''
print(f"---------------------------------------\nBeginning run\n")
print(f"Uploading model results to a pandas dataframe")
detection_df = pd.read_csv("yolov5_labels.csv", header=0)
print(f"Results saved with length {len(detection_df)}\n")
'''
for idx, df in enumerate(detection_df):
    person_df = df[df['name']=='person']
    print(person_df.index)
'''

print(f"Removing all inaccurate and low confidence detections")
cleaned_df = prioritize_detections(detection_df)
TOTAL_IMG = len(cleaned_df.index)
print(f"Left with a dataset of {TOTAL_IMG} images\n")

print(f"Performing final image crop")
num_saved = crop_detections(cleaned_df, TOTAL_IMG)
print(f"Cropping complete.\n{num_saved} saved to \'./yolov5/yolov5/crop-data/\'")
print("Execution complete for full dataset :D")

    

---------------------------------------
Beginning run

Uploading model results to a pandas dataframe
Results saved with length 345883

Removing all inaccurate and low confidence detections
Left with a dataset of 276648 images

Performing final image crop
0.000000% photos run through
0 cropped images saved
3.614702% photos run through
9822 cropped images saved
7.229403% photos run through
19705 cropped images saved
10.844105% photos run through
29526 cropped images saved
14.458807% photos run through
39384 cropped images saved
18.073509% photos run through
49223 cropped images saved
21.688210% photos run through
58935 cropped images saved
25.302912% photos run through
68787 cropped images saved
28.917614% photos run through
78620 cropped images saved
32.532315% photos run through
88460 cropped images saved
36.147017% photos run through
98183 cropped images saved
39.761719% photos run through
107948 cropped images saved
43.376421% photos run through
117617 cropped images saved
46.991122%