Notebook that detects objects in Shopee train set and outputs information in One-Hot format

In [None]:
import pandas as pd
import tqdm
import math
import torch
import glob
from collections import defaultdict

In [None]:
YOLO_CONFIDENCE = 0.3

# Load data

In [None]:
df_train = pd.read_csv('../input/shopee-product-matching/train.csv')
df_train

# Load Object Detector

In [None]:
yolov5 = torch.hub.load('../input/yolov5-git/yolov5-master/yolov5-master', 'yolov5m', source='local')
yolov5.conf = YOLO_CONFIDENCE  # confidence threshold

# Predict objects

Get names of photos

In [None]:
photos = glob.glob('../input/shopee-product-matching/train_images/*.jpg')

Auxiliary functions

In [None]:
def get_area(x1, x2, y1, y2):
    return round(abs(x1-x2) * abs(y1-y2), 2)

def get_distance_to_center(x1, x2, y1, y2):
    # Distance of normalized coordinates to image center (0.5, 0.5)
    return round(((x1+x2)/2 - 0.5)**2 + ((y1+y2)/2 - 0.5)**2, 4)

Predict objects. For each element, the following is obtained:
* class_index: list of objects detected. Each object is identified by an index. To access the name of the object, do yolov5.names[index]
* n_occurences: more than one instance per object may be detected, so the count is stored here
* presence: variable equal to n_occurences > 0, but stored for easier access
* confidence: confidence of prediction. Note that becuase of One-Hot format only the information of the instance with highest confidence per object is stored
* norm_area: normalized area (computed with normalized coordinates, which go from 0 to 1)
* norm_dis_to_org: normalized distance to the origin of the image from the center of the object (computed with normalized coordinates, which go from 0 to 1)

In [None]:
elements_per_batch = 70
n_batches = math.ceil(len(photos)/elements_per_batch)
last_index = 0
last_slash = photos[0].rfind('/')  # Index for last slash is always the same

predictions_dict = {}

for _ in tqdm.tqdm(range(n_batches)):
    if last_index + elements_per_batch > len(photos):
        current_photos = photos[last_index:]
    else:
        current_photos = photos[last_index:last_index+elements_per_batch]
    current_photos_names = list(current_photos)  # the model modifies this list, so we keep a copy
    
    results = yolov5(current_photos)
    
    for photo_name, predictions in zip(current_photos_names, results.xyxyn):
        photo_name = photo_name[last_slash+1:]
        predictions_by_photo = defaultdict(list)
        for p in predictions:
            x1, y1, x2, y2, confidence, class_index = [round(element, 2) for element in p.tolist()]
            class_index = int(class_index)
            try:
                element_index = predictions_by_photo['class_index'].index(class_index)
                predictions_by_photo['n_occurences'][element_index] += 1
                if confidence > predictions_by_photo['confidence'][element_index]:  # I think that the model returns predictions ordered by confidence, but just in case
                    predictions_by_photo['confidence'][element_index] = confidence
                    predictions_by_photo['norm_area'][element_index] = get_area(x1, x2, y1, y2)
                    predictions_by_photo['norm_dis_to_org'][element_index] = get_distance_to_center(x1, x2, y1, y2)
            except ValueError:  # class first occurence
                predictions_by_photo['class_index'].append(class_index)
                predictions_by_photo['n_occurences'].append(1)
                predictions_by_photo['presence'].append(1)
                predictions_by_photo['confidence'].append(confidence)
                predictions_by_photo['norm_area'].append(get_area(x1, x2, y1, y2))
                predictions_by_photo['norm_dis_to_org'].append(get_distance_to_center(x1, x2, y1, y2))
            
        predictions_dict[photo_name] = predictions_by_photo
                
    last_index += elements_per_batch
    

# Objects to DataFrame

Create empty columns for all objects to then get values with .apply per row

In [None]:
obj_names_without_spaces = [name.replace(' ', '-') for name in yolov5.names]
features = ['n_occurences', 'confidence', 'norm_area', 'norm_dis_to_org', 'presence']
new_cols = [f'objects_{feature}_{obj_names_without_spaces[object_index]}' for object_index in range(len(yolov5.names)) for feature in features]

df_train = df_train.reindex(columns=df_train.columns.tolist() + new_cols, fill_value=0)

Auxiliary function to transfer data to DataFrame

In [None]:
def get_values_for_row(row):
    predictions = predictions_dict[row['image']]
    values = [0] * len(features) * len(yolov5.names)  # Fill all values with 0s and only replace the not null ones
    for element_index, class_index in enumerate(predictions['class_index']):
        for feature_index, feature_name in enumerate(features):
            # Replace 0 with actual value
            values[class_index*len(features) + feature_index] = predictions[feature_name][element_index]
    return pd.Series(values)

In [None]:
df_train[new_cols] = df_train.apply(get_values_for_row, axis=1)

In [None]:
df_train

In [None]:
df_train.to_csv(f'train_obj_0{int(YOLO_CONFIDENCE*10)}_one-hot.csv')