In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from scipy.io import loadmat
import glob
import pandas as pd
import os
import numpy as np
import re
import zipfile

In [3]:
# Unzip files
base_path = './Testing.zip'
zip_arch = zipfile.ZipFile(base_path, 'r')
zip_arch.extractall()
zip_arch.close()
base_path = base_path.replace('.zip', '')

# Get .mat files
annotation_files = glob.glob('./Testing/' + '**/*.mat', recursive=True)

In [4]:
def generate_annotations(list_of_files):
    
    annot_data = []
    for file in list_of_files:
        x=loadmat(file)
        y=x['annotation']
        h = y[0][0]
        a = h.flatten()
        a=a[0]
        
        nutte = a.flatten()
        nutte = nutte[0]
        fickdi = nutte.flatten()
        huy = fickdi[0][0]
        huyy = fickdi[0][1]
        huyyy = fickdi[0][2]

        # Image ID
        anno_name = os.path.basename(file)
        image_name = anno_name.replace('.mat', '').replace('anno.', '')
        
        # Store ID
        store_id = re.findall('store[0-9]*', file)[0]
        
        # Image path
        image_path = './Testing/' + store_id + '/images/' + \
                        store_id + '_' + str(image_name) + '.jpg'
        
        hex_0 = huy.flat[0].flatten()
        tmp = hex_0[1]
        hex_0[1] = hex_0[2]
        hex_0[2] = tmp
        hexx_0 = huyy.flat[0].flatten()[0][0]
        hexxx_0 = huyyy.flat[0].flatten()[0]
        cock_0 = np.append(hex_0,hexx_0)
        cock_0 = np.append(cock_0,hexxx_0)
        cock_0 = np.insert(cock_0,0,image_path,axis=0)
        cock_0 = [cock_0]
        cock_0 = np.asarray(cock_0)

        for i in range(1,h[0][0].shape[0]):
            hex = huy.flat[i].flatten()
            tmpp = hex[1]
            hex[1] = hex[2]
            hex[2] = tmpp
            hexx = huyy.flat[i].flatten()[0][0]
            hexxx = huyyy.flat[i].flatten()[0]
            cock = np.append(hex,hexx)
            cock = np.append(cock,hexxx)
            cock = np.insert(cock,0,image_path,axis=0)
            cock = [cock]
            cock = np.asarray(cock)
            cock_0 = np.append(cock_0,cock,axis=0)

        columns = ['image_path','bbox_x','bbox_y','bbox_w','bbox_h','label','class']
        annotation_frame = pd.DataFrame(cock_0, columns=columns)
        annotation_frame['image_path'] = image_path
        annotation_frame['label'] = annotation_frame['class']
        annotation_frame.drop(columns=['class'], inplace=True)
        annot_data.append(annotation_frame)
        
    return pd.concat(annot_data)
        
annotations = generate_annotations(annotation_files)
print(annotations['label'].unique())
annotations.head()

['9' '26' '34' '30' '33' '18' '28' '21' '24' '19' '32' '13' '31' '27' '10'
 '11' '16' '20' '23' '25' '14' '12' '17' '22' '29' '35' '15']
['./Testing/store4/images/store4_59.jpg'
 './Testing/store4/images/store4_14.jpg'
 './Testing/store4/images/store4_27.jpg'
 './Testing/store4/images/store4_52.jpg'
 './Testing/store4/images/store4_17.jpg'
 './Testing/store4/images/store4_1.jpg'
 './Testing/store4/images/store4_63.jpg'
 './Testing/store4/images/store4_15.jpg'
 './Testing/store4/images/store4_3.jpg'
 './Testing/store4/images/store4_18.jpg'
 './Testing/store4/images/store4_78.jpg'
 './Testing/store4/images/store4_64.jpg'
 './Testing/store4/images/store4_24.jpg'
 './Testing/store4/images/store4_79.jpg'
 './Testing/store4/images/store4_75.jpg'
 './Testing/store4/images/store4_80.jpg'
 './Testing/store4/images/store4_25.jpg'
 './Testing/store4/images/store4_69.jpg'
 './Testing/store4/images/store4_10.jpg'
 './Testing/store4/images/store4_42.jpg'
 './Testing/store4/images/store4_9.jpg'
 './T

Unnamed: 0,image_path,bbox_x,bbox_y,bbox_w,bbox_h,label
0,./Testing/store4/images/store4_59.jpg,0.0151143790849673,0.1979166666666666,0.5657679738562091,0.5410539215686274,9
1,./Testing/store4/images/store4_59.jpg,0.0298202614379085,0.6488970588235294,0.9758986928104576,0.9761029411764706,9
0,./Testing/store4/images/store4_14.jpg,0.0401348039215686,0.005718954248366,0.990502450980392,0.2606209150326797,26
1,./Testing/store4/images/store4_14.jpg,0.5180759803921569,0.5228758169934641,0.9325980392156864,0.9934640522875816,26
2,./Testing/store4/images/store4_14.jpg,0.0042892156862745,0.5044934640522876,0.5171568627450981,0.99468954248366,26


In [5]:
# Need to fix the mapping to my keys
key_maps = loadmat('TrainingClassesIndex.mat')
classes = key_maps['classes'].flatten()
classes = [item[0].replace('/', '_') for item in classes]
labels = key_maps['indices'].flatten()
key_maps = pd.DataFrame({'class_name':classes, 'label':labels})
annotations['label'] = annotations['label'].astype(int)

# Join their keys on annotations
merged_annotations = annotations.copy().merge(key_maps, on='label', how='left')

# Read in our grozi mappings
our_mappings = pd.read_csv('our_encoded_grozi_mappings.csv')
final_annotations = merged_annotations.copy().merge(our_mappings, how='left',
                                                    left_on = 'class_name',
                                                    right_on='label')

final_annotations.drop(columns=['class_name', 'label_x', 'label_y'], inplace=True)
final_annotations.rename(columns={'label_encoded':'label'}, inplace=True)
print(final_annotations['label'].unique())
final_annotations.to_csv('final_box_labels_grozi.csv', index=False)

[ 7 24 32 28 31 16 26 19 22 17 30 11 29 25  8  9 14 18 21 23 12 10 15 20
 27 33 13]
