In [127]:
import os
os.chdir('/Users/nick/Documents/school/research/EfficientLPR')
import pandas as pd
import numpy as np
import tensorflow as tf

In [130]:
def iterative_levenshtein(s, t, costs=(1, 1, 1)):
    """ 
        iterative_levenshtein(s, t) -> ldist
        ldist is the Levenshtein distance between the strings 
        s and t.
        For all i and j, dist[i,j] will contain the Levenshtein 
        distance between the first i characters of s and the 
        first j characters of t
        
        costs: a tuple or a list with three integers (d, i, s)
               where d defines the costs for a deletion
                     i defines the costs for an insertion and
                     s defines the costs for a substitution
    """

    rows = len(s)+1
    cols = len(t)+1
    deletes, inserts, substitutes = costs
    
    dist = [[0 for x in range(cols)] for x in range(rows)]

    # source prefixes can be transformed into empty strings 
    # by deletions:
    for row in range(1, rows):
        dist[row][0] = row * deletes

    # target prefixes can be created from an empty source string
    # by inserting the characters
    for col in range(1, cols):
        dist[0][col] = col * inserts
        
    for col in range(1, cols):
        for row in range(1, rows):
            if s[row-1] == t[col-1]:
                cost = 0
            else:
                cost = substitutes
            dist[row][col] = min(dist[row-1][col] + deletes,
                                 dist[row][col-1] + inserts,
                                 dist[row-1][col-1] + cost) # substitution

    # for r in range(rows):
    #     print(dist[r])
    
 
    return dist[row][col]

In [174]:
predictions_path = 'predictions_phi0_nzvd.full.csv'
trues_path = 'data/processed/nzvd/test_annotations.csv'
lps_path = 'data/raw/nzvd/test_labels.csv'
classes_path = 'data/processed/classes.csv'
colors_path = 'data/processed/colors.csv'

# get class data
classes = [x[0] for x in pd.read_csv(classes_path, header=None).values]
colors = [x[0] for x in pd.read_csv(colors_path, header=None).values]
class_labels = {x:i for i,x in enumerate(classes)}
color_labels = {x:i for i,x in enumerate(colors)}

# get y_pred and y_true
preds = pd.read_csv(predictions_path)
preds = preds.fillna('') # fill NaN values with empty string
lps = pd.read_csv(lps_path)[['lp-string']].T.squeeze()
lps = lps.str.strip() # trim leading spaces
trues = pd.read_csv(trues_path, header=None)
trues.columns = ['file', 't', 'l', 'h', 'w', 'body', 'color']

# format y_true better
trues = trues.sort_values(by=['file'])
trues.reset_index(inplace=True)
trues = trues.assign(lp=lps)

# license plates
lp_true, lp_pred = trues[['lp']].values.squeeze(), preds[['lp']].values.squeeze()
lp_acc = np.mean([lp_true == lp_pred])
print("LP Exact Accuracy:", f'{lp_acc}%', )

# body



# levenshtein computational complexity too high (O(mn))
# should look into a textual IOU instead

LP Exact Accuracy: 0.7%


In [173]:
# example implementation of a O(n) textual similarity comparison IOU
# use mean so it is symmetrical
a = 'abcd'
b = 'def'
s = set(a + b + '')
i1 = sum([1 for char in a if char in s])
i2 = sum([1 for char in b if char in s])
m = np.mean([i1, i2])

3.5
