In [1]:
import json
import ast
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import torchvision.transforms as transforms
from PIL import Image

In [2]:
save_csv_file = True

In [3]:
def read_megadetector_json(path_json:str = 'train_features_output.json'):
    mega_json = json.load(open(path_json))

    df_images = pd.DataFrame(mega_json["images"])
    df_images = df_images.reset_index(drop=True)
    return df_images


def transform_bbox(image=None, normalized_bbox=None, image_size:int=None):
    if image is not None:
        image_width, image_height = image.size
    else:
        image_width, image_height = image_size, image_size
        
    x, y, width, height = normalized_bbox
    remove_bbox_xy = 4
    remove_bbox_wh = remove_bbox_xy * 2

    x = int(np.round(x * image_width, 0)) + remove_bbox_xy
    y = int(np.round(y * image_height, 0)) + remove_bbox_xy
    width = int(np.round(width * image_width, 0)) - remove_bbox_wh
    height = int(np.round(height * image_height, 0)) - remove_bbox_wh
    return y, x, height, width


def get_correct_box(df):
    bbox_transformed = []
    bbox_transformed_im_size_224 = []
    image_size = 224
    for image_name in df.index:
        if df.loc[image_name]["bbox_true"]:
            path = r"../competition_data/train_features/" + df.loc[image_name]["file"]
            image = Image.open(path).convert("RGB")

            normalized_bbox = df.loc[image_name]['bbox_normalized']
            bbox_transformed.append(transform_bbox(image=image, normalized_bbox=normalized_bbox))
            bbox_transformed_im_size_224.append(transform_bbox(normalized_bbox=normalized_bbox, image_size=image_size))
        else:
            bbox_transformed.append(pd.NA)
            bbox_transformed_im_size_224.append(pd.NA)

    df['bbox'] = bbox_transformed
    df['bbox_im_size_224'] = bbox_transformed_im_size_224
    return df

def crop_image_to_normalized_bbox(image, normalized_bbox):
    y, x, height, width = transform_bbox(image, normalized_bbox)
    image_tensor = transforms.ToTensor()(image)
    return transforms.ToPILImage()((transforms.functional.crop(image_tensor,y,x,height,width)))

def crop_image_to_bbox(image, bbox):
    y, x, height, width = bbox
    image_tensor = transforms.ToTensor()(image)
    return transforms.ToPILImage()((transforms.functional.crop(image_tensor,y,x,height,width)))


def get_clean_dataframe_from_json(path_json, save_csv=False, file_name=''):
    '''
    filter: filters bbox below threshold if true
    '''

    df_images = read_megadetector_json(path_json)
    dict_detection_cat = {'1': 'animal', '2': 'person', '3': 'vehicle'}

    df_images_detections = df_images['detections'].apply(pd.Series, dtype='object')
    #display(df_images_detections.head(2))

    df_images_clean = df_images.merge(df_images_detections, left_index=True, right_index=True).drop(columns='detections')
    #display(df_images_clean.head(2))

    df_images_clean = df_images_clean.melt(id_vars=['file', 'max_detection_conf']).sort_values('file')
    #display(df_images_clean)

    # remove nan values 
    df_images_clean = df_images_clean.dropna(subset='value')
    df_images_clean = df_images_clean.drop(columns='variable')

    # expand category, conf, bbox
    df_cat_conf_bbox = df_images_clean['value'].apply(pd.Series, dtype='object')
    df_images_clean = df_images_clean.merge(df_cat_conf_bbox, left_index=True, right_index=True).drop(columns='value')

    # take only detections for max detection conf
    df_images_clean = df_images_clean[df_images_clean['max_detection_conf'] == df_images_clean['conf']]
    df_images_clean = df_images_clean.reset_index(drop=True)

    # leftjoin clean data
    df_images_clean = pd.merge(df_images['file'], df_images_clean, on='file', how='left')


    # cast datatypes
    df_images_clean['category'] = df_images_clean['category'].astype('category')
    df_images_clean['category'] = df_images_clean['category'].replace(dict_detection_cat)

    # change index to image name
    df_images_clean['image_name'] = [image.replace('.jpg', '') for image in df_images_clean['file']]
    df_images_clean.index = df_images_clean['image_name']
    df_images_clean = df_images_clean.drop(columns='image_name')

    # remove duplicates
    df_images_clean = df_images_clean[df_images_clean.index.duplicated() == False]

    # bbox
    df_images_clean['bbox_normalized'] = df_images_clean['bbox']
    df_images_clean['bbox_true'] = df_images_clean['bbox_normalized'].notnull()

    # transform bbox
    df_images_clean = get_correct_box(df_images_clean)
            
    if save_csv: df_images_clean.to_csv(f'{file_name}.csv')

    return df_images_clean

def get_csv_file(path_json, merge_csv='../competition_data/train_features_with_split.csv', save_csv=False, file_name=''):
    df_images_clean = get_clean_dataframe_from_json(path_json=path_json)
    df_merge = pd.read_csv(merge_csv, index_col='id')

    df_combined = df_merge.merge(df_images_clean, left_index=True, right_index=True, how='left')
    df_combined = df_combined.drop(columns=['file', 'max_detection_conf', 'category'])
    if save_csv: df_combined.to_csv(f'{file_name}.csv')

    return df_combined


## Trainfeatures

In [4]:
df_train_megadet_bbox = get_clean_dataframe_from_json(path_json='train_features_output.json', 
                                                save_csv=save_csv_file, 
                                                file_name='megadet_bbox_train')
df_train_megadet_bbox

Unnamed: 0_level_0,file,max_detection_conf,category,conf,bbox,bbox_normalized,bbox_true,bbox_im_size_224
image_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ZJ000000,ZJ000000.jpg,0.690,animal,0.690,"(322, 109, 92, 69)","[0.1093, 0.5888, 0.0802, 0.1851]",True,"(136, 28, 33, 10)"
ZJ000001,ZJ000001.jpg,0.813,animal,0.813,"(4, 542, 515, 120)","[0.5604, 0, 0.1333, 0.9685]",True,"(4, 130, 209, 22)"
ZJ000002,ZJ000002.jpg,0.612,animal,0.612,"(284, 330, 56, 74)","[0.5093, 0.7777, 0.1281, 0.1777]",True,"(178, 118, 32, 21)"
ZJ000003,ZJ000003.jpg,0.686,animal,0.686,"(230, 4, 107, 129)","[0, 0.6277, 0.214, 0.3194]",True,"(145, 4, 64, 40)"
ZJ000004,ZJ000004.jpg,0.476,animal,0.476,"(194, 326, 91, 66)","[0.5031, 0.5671, 0.1156, 0.2955]",True,"(131, 117, 58, 18)"
...,...,...,...,...,...,...,...,...
ZJ016483,ZJ016483.jpg,,,,,,False,
ZJ016484,ZJ016484.jpg,0.528,animal,0.528,"(4, 399, 335, 237)","[0.6171, 0, 0.3828, 0.9527]",True,"(4, 142, 205, 78)"
ZJ016485,ZJ016485.jpg,0.151,animal,0.151,"(291, 279, 49, 129)","[0.4296, 0.7972, 0.214, 0.1583]",True,"(183, 100, 27, 40)"
ZJ016486,ZJ016486.jpg,0.264,animal,0.264,"(302, 124, 46, 85)","[0.125, 0.5518, 0.09687, 0.1]",True,"(128, 32, 14, 14)"


In [6]:
df_trainfeature_megadet_bbox = get_csv_file(path_json='train_features_output.json', 
                                                save_csv=save_csv_file, 
                                                file_name='trainfeatures_megadet_bbox_split')
df_trainfeature_megadet_bbox

Unnamed: 0_level_0,filepath,site,split,conf,bbox,bbox_normalized,bbox_true,bbox_im_size_224
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ZJ000000,train_features/ZJ000000.jpg,S0120,2.0,0.690,"(322, 109, 92, 69)","[0.1093, 0.5888, 0.0802, 0.1851]",True,"(136, 28, 33, 10)"
ZJ000001,train_features/ZJ000001.jpg,S0069,3.0,0.813,"(4, 542, 515, 120)","[0.5604, 0, 0.1333, 0.9685]",True,"(4, 130, 209, 22)"
ZJ000002,train_features/ZJ000002.jpg,S0009,0.0,0.612,"(284, 330, 56, 74)","[0.5093, 0.7777, 0.1281, 0.1777]",True,"(178, 118, 32, 21)"
ZJ000003,train_features/ZJ000003.jpg,S0008,4.0,0.686,"(230, 4, 107, 129)","[0, 0.6277, 0.214, 0.3194]",True,"(145, 4, 64, 40)"
ZJ000004,train_features/ZJ000004.jpg,S0036,0.0,0.476,"(194, 326, 91, 66)","[0.5031, 0.5671, 0.1156, 0.2955]",True,"(131, 117, 58, 18)"
...,...,...,...,...,...,...,...,...
ZJ016483,train_features/ZJ016483.jpg,S0093,1.0,,,,False,
ZJ016484,train_features/ZJ016484.jpg,S0043,3.0,0.528,"(4, 399, 335, 237)","[0.6171, 0, 0.3828, 0.9527]",True,"(4, 142, 205, 78)"
ZJ016485,train_features/ZJ016485.jpg,S0089,3.0,0.151,"(291, 279, 49, 129)","[0.4296, 0.7972, 0.214, 0.1583]",True,"(183, 100, 27, 40)"
ZJ016486,train_features/ZJ016486.jpg,S0095,2.0,0.264,"(302, 124, 46, 85)","[0.125, 0.5518, 0.09687, 0.1]",True,"(128, 32, 14, 14)"


## Testfeatures

In [None]:
# df_images_clean_test = get_clean_dataframe_from_json(path_json='test_features_output.json', 
#                                                 save_csv=save_csv_file, 
#                                                 file_name='megadet_bbox_test')
# df_images_clean_test

## Test Crop Bild

In [None]:
def crop_image_to_normalized_bbox(image, normalized_bbox):
    y, x, height, width = transform_bbox(image, normalized_bbox)
    image_tensor = transforms.ToTensor()(image)
    return transforms.ToPILImage()((transforms.functional.crop(image_tensor,y,x,height,width)))

def crop_image_to_bbox(image, bbox):
    y, x, height, width = bbox
    image_tensor = transforms.ToTensor()(image)
    return transforms.ToPILImage()((transforms.functional.crop(image_tensor,y,x,height,width)))

In [None]:
id= 16102

df = df_trainfeature_megadet_bbox.copy()

path = r"../competition_data/" + df.iloc[id]["filepath"]
image = Image.open(path).convert("RGB")
normalized_bbox = df.iloc[id]['bbox_normalized']
bbox = df.iloc[id]['bbox']

cropped_image = crop_image_to_bbox(image, bbox)
cropped_image_norm = crop_image_to_normalized_bbox(image, normalized_bbox)

display(image)
display(cropped_image)
display(cropped_image_norm)

In [None]:
# Pfad zum Bild, das zugeschnitten werden soll
image_path = "../megadetector/train_features_detection_th01/zj000000_detections.jpg"
# Normalisierte Bounding-Box-Koordinaten (x, y, Breite, Höhe)
normalized_bbox = (0.1093, 0.5888, 0.0802, 0.1851)
# Schneiden Sie das Bild auf die Bounding-Box zu und speichern Sie das Ergebnis
cropped_image = crop_image_to_normalized_bbox(image_path, normalized_bbox)
#cropped_image.save("cropped_normalized_image.jpg")
display(cropped_image)