In [None]:
import numpy as np                     # numpy
import pandas as pd                    # pandas

import matplotlib.pyplot as plt

from numpy.linalg import norm

In [None]:
%run "../Task2/Implementation.ipynb"

In [None]:
def evaluateQueryEngine(dir: str):
    featureMatrixDF = createFeatureMatrixForDirectory(dir=dir)
    # normalize
    for column in featureMatrixDF:
        if column != 'File':
            featureMatrixDF[column] = (featureMatrixDF[column] - featureMatrixDF[column].min()) / (featureMatrixDF[column].max() - featureMatrixDF[column].min())  
    
    featureMatrix = featureMatrixDF.to_numpy()
    for mainFeatureVector in featureMatrix:
        
        evaluationFrame = pd.DataFrame(columns=['File', 'Relevance', 'Euclidean Distance', 'Precision', 'Recall', 'F'])
        
        # Euclidean Distance and adding to dataframe
        for compareFeatureVector in featureMatrix:
            if (mainFeatureVector!=compareFeatureVector).all(): #https://stackoverflow.com/questions/10580676/comparing-two-numpy-arrays-for-equality-element-wise #if vectors are not the same
                A = mainFeatureVector[1:] # all dimensions except the filename
                B = compareFeatureVector[1:]
                euclideanDistance = np.linalg.norm(A-B)
                newRow = pd.DataFrame.from_records([{ # add to frame
                        'File': compareFeatureVector[0],
                        'Euclidean Distance': euclideanDistance
                    }])
                evaluationFrame = pd.concat([evaluationFrame, newRow])
        evaluationFrame = evaluationFrame.sort_values(by='Euclidean Distance', ascending=True)
        evaluationFrame.reset_index(drop=True, inplace=True) # making indices standardly incrementing
        
        # Relevance
        for fileName in evaluationFrame["File"]: # detect relevance
            if fileName[0:2] == mainFeatureVector[0][0:2]: # if they start with the same 2 characters
                evaluationFrame.loc[evaluationFrame['File'] == fileName, 'Relevance'] = 1
            else:
                evaluationFrame.loc[evaluationFrame['File'] == fileName, 'Relevance'] = 0
        
        # Precision, Recall and F-Measure
        retrievedCursor = 0
        relevantCursor = 0
        relevantItems = sum(evaluationFrame["Relevance"])
        for relevance in evaluationFrame["Relevance"]:
            retrievedCursor += 1
            relevantCursor += relevance
            
            precision = relevantCursor/retrievedCursor
            recall = relevantCursor/relevantItems
            
            evaluationFrame.loc[retrievedCursor - 1, 'Precision'] = precision # Set precision
            evaluationFrame.loc[retrievedCursor - 1, 'Recall'] = recall # Set recall
            
            # f-measure
            beta = 0.5
            if precision > 0:
                fmeasure = ((pow(beta, 2)+1) * precision * recall) / (pow(beta, 2)*precision + recall)
            else:
                fmeasure = 0
            
            evaluationFrame.loc[retrievedCursor - 1, 'F'] = fmeasure # Set f-measure
            
            
        print()
        print(mainFeatureVector[0])
        display(evaluationFrame)
        
        print("Top 5:")
        print("Precision: " + str(evaluationFrame['Precision'][4]))
        print("Recall: " + str(evaluationFrame['Recall'][4]))
        print("F-Measure: " + str(evaluationFrame['F'][4]))
        print()
        print("Top 10:")
        print("Precision: " + str(evaluationFrame['Precision'][9]))
        print("Recall: " + str(evaluationFrame['Recall'][9]))
        print("F-Measure: " + str(evaluationFrame['F'][9]))
        
        plt.title(mainFeatureVector[0])
        plt.plot(evaluationFrame['Recall'],evaluationFrame['Precision'])
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.xlim(0,1.02)
        plt.ylim(0,1.02)
        plt.show()
        
        

In [None]:
evaluateQueryEngine("../videos/processed")