In [23]:
import os
os.chdir('/Users/nick/Documents/school/research/EfficientLPR')
from scipy.io import loadmat
import numpy as np
from cv2 import cv2
import matplotlib.pyplot as plt
import pandas as pd

In [22]:
# SURVEILENCE (COLOR) DATA
in_dir = 'data/raw/comp-cars'
surv_dir = in_dir + '/sv_data'
label_mat = loadmat(surv_dir + '/color_list.mat')["color_list"]

colors = ["black", "white", "red", "yellow", "blue", "green", "purple", "brown", "champagne", "silver"]

# color_labels: [n, [fname, color]]
color_annos = [[x[0][0], colors[x[1][0][0]]] for x in label_mat if not x[1][0][0] == -1 ] # omit unrecognized colors
color_annos = np.array(color_annos)

# split train and test
split = int(color_annos.shape[0] * 0.8)
indices = np.random.permutation(color_annos.shape[0])
train_idx, test_idx = indices[:split], indices[split:]
train_annos, test_annos = color_annos[train_idx], color_annos[test_idx]
# val/train split
split = int(train_annos.shape[0] * 0.9)
indices = np.random.permutation(train_annos.shape[0])
train_idx, val_idx = indices[:split], indices[split:]
train_annos, val_annos = train_annos[train_idx], train_annos[val_idx]
pd.DataFrame(train_annos).to_csv(surv_dir + '/train_annotations.csv', header=None, index=None)
pd.DataFrame(val_annos).to_csv(surv_dir + '/val_annotations.csv', header=None, index=None)
pd.DataFrame(test_annos).to_csv(surv_dir + '/test_annotations.csv', header=None, index=None)

color_out = np.hstack([np.expand_dims(colors,1), np.expand_dims(range(len(colors)), 1)])
pd.DataFrame(color_out).to_csv(surv_dir + '/colors.csv', header=None, index=None)

In [18]:
# WEB DATA
web_dir = in_dir + '/data'
label_dir = web_dir + '/label'

makes = [x[0][0] for x in loadmat(web_dir + '/misc/make_model_name.mat')["make_names"]]
makes[makes.index('Lamorghini ')] = 'Lamborghini' # fix type in DS

side_view_id = 3

def get_annos(annos):
    for make_id in sorted(os.listdir(label_dir)):
        make_dir = '{}/{}'.format(label_dir, make_id)

        for model_id in sorted(os.listdir(make_dir)):
            model_path = '{}/{}'.format(make_dir, model_id)

            for year in sorted(os.listdir(model_path)):
                leaf_path = '{}/{}'.format(model_path, year)
                
                for fname in sorted(os.listdir(leaf_path)):
                    file_path = '{}/{}'.format(leaf_path, fname)
                    with open(file_path, 'r') as f:
                        file_data = f.read().strip().split('\n')
                        viewpoint_id = int(file_data[0])
                        bbox = file_data[2].split(' ')
                        if not (viewpoint_id == side_view_id):
                            annos.append(["image/" + "/".join(file_path[:-4].split("/")[5:]) + '.jpg', bbox[0], bbox[1], bbox[2], bbox[3], makes[int(make_id)-1]])
    return annos
# annos: [n, [fname (of image), make, year, x1, y1, x2, y2]]
annos = np.array(get_annos([]))
annos = np.hstack([annos, np.repeat('black', annos.shape[0]).reshape(-1,1)]) # add dummy color column

In [19]:
np.random.seed(1)
# split train and test
split = int(annos.shape[0] * 0.8) # 80/20 split
indices = np.random.permutation(annos.shape[0])
train_idx, test_idx = indices[:split], indices[split:]
train_annos, test_annos = annos[train_idx], annos[test_idx]
# split train and val
split = int(train_annos.shape[0]*0.9)
indices = np.random.permutation(train_annos.shape[0])
train_idx, val_idx = indices[:split], indices[split:]
train_annos, val_annos = train_annos[train_idx], train_annos[val_idx]
pd.DataFrame(train_annos).to_csv(web_dir + '/train_annotations.csv', header=None, index=None)
pd.DataFrame(val_annos).to_csv(web_dir + '/val_annotations.csv', header=None, index=None)
pd.DataFrame(test_annos).to_csv(web_dir + '/test_annotations.csv', header=None, index=None)

makes_out = np.hstack([np.expand_dims(makes,1), np.expand_dims(range(len(makes)),1)])
pd.DataFrame(makes_out).to_csv(web_dir + '/makes.csv', header=None, index=None)