In [54]:
import os
os.chdir('/Users/nick/Documents/school/research/EfficientLPR')
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn import svm
from rapidfuzz import fuzz
import pickle

In [34]:
def get_l2_norm(y_trues, y_preds):
    distances = []
    for i in range(len(y_preds)): # calculate euclidian distance between preds and answer
        y_true, y_pred = y_trues[i], y_preds[i]
        distances.append(np.linalg.norm(y_true - y_pred))
    return np.expand_dims(distances,1)

def get_lev_distance(y_true, y_preds):
    lev_distances = []
    for row in np.hstack([np.expand_dims(y_preds, 1), np.expand_dims(y_true, 1)]):
        lev_distances.append(fuzz.ratio(row[0], row[1]))
    return np.expand_dims(lev_distances,1)

In [39]:
"""
Script that transforms the predictions into three features:
    lp_distance: a similarity metric representing the levenshtein distance between true and predicted LP
    body_distance: distance between true and predicted body
    color_distance: distance between true and predicted color

"""

predictions_path = 'predictions_phi0_nzvd.full.csv'
trues_path = 'data/processed/nzvd/test_annotations.csv'
lps_path = 'data/raw/nzvd/test_labels.csv'
classes_path = 'data/processed/classes.csv'
colors_path = 'data/processed/colors.csv'

# get class data
classes = [x[0] for x in pd.read_csv(classes_path, header=None).values]
colors = [x[0] for x in pd.read_csv(colors_path, header=None).values]
class_labels = {x:i for i,x in enumerate(classes)}
color_labels = {x:i for i,x in enumerate(colors)}

# get y_pred 
preds = pd.read_csv(predictions_path)
preds = preds.fillna('') # fill NaN values with empty string

# get y_true
lps = pd.read_csv(lps_path)[['lp-string']].T.squeeze()
lps = lps.str.strip() # trim leading spaces
trues = pd.read_csv(trues_path, header=None)
trues.columns = ['file', 't', 'l', 'h', 'w', 'body', 'color']
trues = trues.sort_values(by=['file'])
trues.reset_index(inplace=True)
trues = trues.assign(lp=lps)

# LICENSE PLATES    
lp_true, lp_pred = trues[['lp']].values.squeeze(), preds[['lp']].values.squeeze()
lp_acc = np.mean([lp_true == lp_pred])
print("LP Exact Accuracy:", f'{lp_acc}%', )

def featurize(trues, preds):
    """Converts [preds, true] into [levenshtein distance, CCE_body, CCE_color]"""

    # levenshtein distance of license plates
    lev_distances = get_lev_distance(trues[['lp']].values.squeeze(), preds[['lp']].values.squeeze())

    # BODY
    body_true = list(map(lambda x: class_labels[x], trues[['body']].values.squeeze().tolist()))
    body_true = tf.one_hot(body_true, depth=len(class_labels))
    body_headers = [header for header in preds.columns if header.startswith('body')]
    body_pred = preds[body_headers].values
    body_cce = np.expand_dims(tf.losses.categorical_crossentropy(body_true, body_pred).numpy(), 1)

    # COLOR
    color_true = list(map(lambda x: color_labels[x], trues[['color']].values.squeeze().tolist()))
    color_true = tf.one_hot(color_true, depth=len(color_labels))
    color_headers = [header for header in preds.columns if header.startswith('color')]
    color_pred = preds[color_headers].values
    color_cce = np.expand_dims(tf.losses.categorical_crossentropy(color_true, color_pred).numpy(), 1)

    return [lev_distances, body_cce, color_cce]

y_positive = np.expand_dims(np.repeat([1], len(preds)), 1) # positive samples have class==1
y_negative = np.expand_dims(np.repeat([0], len(preds)),1) # negative samples have class==0
x_positive = np.hstack([*featurize(trues, preds)])
x_negative = np.hstack([*featurize(trues, preds.sample(frac=1))]) # use shuffled preds for negative samples
x = np.vstack([x_positive, x_negative])
y = np.vstack([y_positive, y_negative])

LP Exact Accuracy: 0.7%


In [53]:
# compare methods

# LDA
lda_classifier = LinearDiscriminantAnalysis(solver="lsqr")
lda_classifier = lda_classifier.fit(x,y)

# SVM
svm_classifier = svm.SVC()
svm_classifier = svm_classifier.fit(x, y)

# Random Forest
rf_classifier = RandomForestClassifier()
rf_classifier = rf_classifier.fit(x,y)

# evaluate models on test data


# pickle models
def save_classifier(clf, name):
    pickle.dumps(clf ,open(name + '.p', 'wb'))