In [9]:
import os
os.chdir('/Users/nick/Documents/school/research/EfficientLPR')
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.ensemble import RandomForestClassifier
from rapidfuzz import fuzz

In [87]:
"""
Script that transforms the predictions into three features:
    lp_distance: a similarity metric representing the levenshtein distance between true and predicted LP
    body_distance: distance between true and predicted body
    color_distance: distance between true and predicted color
For all three features, higher value indicates a worse prediction
"""

predictions_path = 'predictions_phi0_nzvd.full.csv'
trues_path = 'data/processed/nzvd/test_annotations.csv'
lps_path = 'data/raw/nzvd/test_labels.csv'
classes_path = 'data/processed/classes.csv'
colors_path = 'data/processed/colors.csv'

# get class data
classes = [x[0] for x in pd.read_csv(classes_path, header=None).values]
colors = [x[0] for x in pd.read_csv(colors_path, header=None).values]
class_labels = {x:i for i,x in enumerate(classes)}
color_labels = {x:i for i,x in enumerate(colors)}

# get y_pred and y_true
preds = pd.read_csv(predictions_path)
preds = preds.fillna('') # fill NaN values with empty string
lps = pd.read_csv(lps_path)[['lp-string']].T.squeeze()
lps = lps.str.strip() # trim leading spaces
trues = pd.read_csv(trues_path, header=None)
trues.columns = ['file', 't', 'l', 'h', 'w', 'body', 'color']

# improve y_true structure
trues = trues.sort_values(by=['file'])
trues.reset_index(inplace=True)
trues = trues.assign(lp=lps)

# license plates
lp_true, lp_pred = trues[['lp']].values.squeeze(), preds[['lp']].values.squeeze()
lp_acc = np.mean([lp_true == lp_pred])
print("LP Exact Accuracy:", f'{lp_acc}%', )

lev_distances = []
for row in np.hstack([np.expand_dims(lp_pred, 1), np.expand_dims(lp_true, 1)]):
    lev_distances.append(fuzz.ratio(row[0], row[1]))

# body
body_true = list(map(lambda x: class_labels[x], trues[['body']].values.squeeze().tolist()))
body_true = tf.one_hot(body_true, depth=len(class_labels))
body_headers = [header for header in preds.columns if header.startswith('body')]
body_pred = preds[body_headers].values

body_distances = []
for i in range(len(body_pred)): # calculate euclidian distance between preds and answer
    y_true, y_pred = body_true[i], body_pred[i]
    body_distances.append(np.linalg.norm(y_true - y_pred))

# color
color_true = list(map(lambda x: color_labels[x], trues[['color']].values.squeeze().tolist()))
color_true = tf.one_hot(color_true, depth=len(color_labels))
color_headers = [header for header in preds.columns if header.startswith('color')]
color_pred = preds[color_headers].values

color_distances = []
for i in range(len(color_pred)): # calculate euclidian distance between preds and answer
    y_true, y_pred = color_true[i], color_pred[i]
    color_distances.append(np.linalg.norm(y_true - y_pred))
color_distances = np.expand_dims(color_distances, 1)



LP Exact Accuracy: 0.7%
[[1.2253700e-02]
 [1.3626155e-01]
 [6.8777655e-03]
 [1.1095041e+00]
 [2.5311399e-01]
 [5.7137853e-01]
 [1.4138515e+00]
 [1.1935792e+00]
 [1.3437393e+00]
 [4.4943103e-01]
 [1.2161180e+00]
 [1.4139557e+00]
 [1.4128063e+00]
 [4.0506441e-04]
 [1.3334696e+00]
 [5.6742471e-01]
 [1.4026563e+00]
 [8.4616220e-01]
 [1.3915608e+00]
 [1.1883125e+00]
 [1.2650239e+00]
 [1.3014051e+00]
 [1.3099039e+00]
 [4.4022897e-01]
 [1.3723917e+00]
 [1.3534169e+00]
 [1.1695337e+00]
 [2.5761637e-01]
 [3.1474105e-03]
 [4.7503848e-02]
 [9.8148686e-01]
 [8.7894446e-01]
 [1.2236258e+00]
 [1.1954485e+00]
 [1.3789237e+00]
 [1.2060877e+00]
 [1.4123561e+00]
 [1.0692899e+00]
 [9.0373141e-01]
 [1.3096752e+00]
 [1.0466224e+00]
 [1.9370592e-01]
 [9.2638516e-01]
 [2.0723005e-01]
 [1.4090786e+00]
 [3.3809838e-01]
 [1.4139956e+00]
 [1.4125915e+00]
 [1.0702970e+00]
 [3.8935542e-01]
 [3.8103533e-01]
 [1.0593280e+00]
 [3.1715930e-02]
 [4.3086624e-01]
 [1.4665969e-01]
 [1.4142076e+00]
 [1.2852143e+00]
 [3.081