Notebook that uses information of objects detection to crop images by most centered object or biggest object

In [None]:
from PIL import Image
from IPython.display import display
import pandas as pd

In [None]:
df = pd.read_csv('../input/shopee-train-with-objects/train_obj_05.csv')
# Since dataframe is recovered from file, lists have to be converted from strings to Python lists. 
# This step (with high cost) will not be necessary in the test eval, since objects are computed in same instance
features = ['class_index', 'confidence', 'norm_area', 'coordinates', 'norm_dis_to_org']
new_cols = [f'objects_{feature}' for feature in features]
for col in new_cols:
    df[col] = df.apply(lambda row: eval(row[col]), axis=1)
df

Get object names to specify objects to crop by name and not by index

In [None]:
objects_names = eval(open('../input/shopee-train-with-objects/objects_names.txt').read())

Path to images

In [None]:
path_to_images = '../input/shopee-product-matching/train_images/'

# Specify which objects to crop

Note that this could just be done by filtering the detected objects. However, the way it is done you can change this value dinamically without having to load the objects again

In [None]:
objects_to_crop = ['bottle']  # objects_to_crop = objects_names (to consider all)
indexes_to_crop = [objects_names.index(obj) for obj in objects_to_crop]

Auxiliary functions to filter objects to consider

In [None]:
def get_indexes_to_consider(row):
    return [i for i, object_index in enumerate(row['objects_class_index']) if object_index in indexes_to_crop]
def items_from_indexes(row, attribute, indexes_to_consider):
    return [row[attribute][index] for index in indexes_to_consider]

# Functions to choose object to crop

In [None]:
def most_center_coordinates(row, objects_coordinates, indexes_to_consider):
    objects_center_distance = items_from_indexes(row, 'objects_norm_dis_to_org', indexes_to_consider)
    most_center_index = objects_center_distance.index(min(objects_center_distance))
    return objects_coordinates[most_center_index]

def biggest_coordinates(row, objects_coordinates, indexes_to_consider):
    objects_area = items_from_indexes(row, 'objects_norm_area', indexes_to_consider)
    biggest_object_index = objects_area.index(max(objects_area))
    return objects_coordinates[biggest_object_index]

def best_coordinates(row, indexes_to_consider):
    objects_coordinates = items_from_indexes(row, 'objects_coordinates', indexes_to_consider)
    return biggest_coordinates(row, objects_coordinates, indexes_to_consider)

# Crop images

In [None]:
paths = []
def crop_image(row):
    if row['objects_norm_area']:  # First check if there are any detected objects
        indexes_to_consider = get_indexes_to_consider(row)  # Second check if there are objects from the desired list
        if indexes_to_consider:
            image_path = path_to_images + row['image']
            img = Image.open(image_path)
            img_cropped = img.crop(best_coordinates(row, indexes_to_consider))
            image_path_cropped = row['image'][:-4] + '_cropped' + row['image'][-4:]
            paths.append(image_path_cropped)
            img_cropped.save(image_path_cropped)
            return image_path_cropped

In [None]:
%%time
df.apply(crop_image, axis=1)

# Check crops

In [None]:
img = Image.open('../input/shopee-product-matching/train_images/0c4d36922e3907cd4ece22654fd998b9.jpg')
display(img)

In [None]:
img = Image.open('0c4d36922e3907cd4ece22654fd998b9_cropped.jpg')
display(img)

In [None]:
img = Image.open('../input/shopee-product-matching/train_images/d6c97d3fbc979bdd5e70c998433bb958.jpg')
display(img)

In [None]:
img = Image.open('d6c97d3fbc979bdd5e70c998433bb958_cropped.jpg')
display(img)