In [22]:
import pandas as pd
from sklearn.metrics import precision_score

In [3]:
FILES = ['data/prediction_model_01.csv', 'data/prediction_model_02.csv', 'data/prediction_model_03.csv']

In [40]:
GENRES = ['Action',
 'Adventure',
 'Animation',
 'Biography',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Family',
 'Fantasy',
 'History',
 'Horror',
 'Music',
 'Musical',
 'Mystery',
 'News',
 'Romance',
 'Sci-Fi',
 'Sport',
 'Thriller',
 'War',
 'Reality-TV',
 'Western']

In [62]:
def get_macro_precision(path):
    
    #TP = actual genre when correct is 1 [INDEX 0]
    #FP = predicted value when correct is 0 [INDEX 1]
    #FN = actual genres when correct is 0 [INDEX 2]

    # precision = TP/(TP+FP)
    # recall = TP/ (TP+FN)
    # F1 = (precision * recall)/(precision + recall)
    
    genre_map = {genre:[0,0,0] for genre in GENRES}

    df = pd.read_csv(path)

    for i, line in df.iterrows():
        
        correct = bool(line['correct?'])
        predicted = line['predicted'][0]
        actual = eval(line['actual genres'])
        
        if predicted not in genre_map:
            genre_map[predicted] = [0,0,0]
        
        if correct:
            for genre in actual:
                genre_map[genre][0] += 1
        else:
            genre_map[predicted][1] += 1
            
            for genre in actual:
                try:
                    genre_map[genre][2] += 1
                except KeyError:
                    genre_map[genre] = [0,0,0]
                    genre_map[genre][2] += 1
                    
                    
    precision_lst, recall_lst, f1_lst = [], [], []
    for key in genre_map:
        TP = genre_map[key][0]
        FP = genre_map[key][1]
        FN = genre_map[key][2]
        
        
        try:
            precision = TP/(TP+FP)
        except ZeroDivisionError:
            precision = 0
        try:
            recall = TP/(TP+FN)
        except ZeroDivisionError:
            recall = 0
        try:
            f1 = (2* precision * recall)/(precision+recall)
        except ZeroDivisionError:
            f1 = 0
        
        precision_lst.append(precision)
        recall_lst.append(recall)
        f1_lst.append(f1)
        
    precision_avg = sum(precision_lst)/len(precision_lst)
    recall_avg = sum(recall_lst)/len(recall_lst)
    f1_avg = sum(f1_lst)/len(f1_lst)
        
    return(precision_avg, recall_avg, f1_avg)
        

def get_micro_precision(path):
    
    #TP = actual genre when correct is 1 [INDEX 0]
    #FP = predicted value when correct is 0 [INDEX 1]
    #FN = actual genres when correct is 0 [INDEX 2]
    
    TP = 0
    FP = 0
    FN = 0
    df = pd.read_csv(path)
    
    for i, line in df.iterrows():
        correct = bool(line['correct?'])
        predicted = line['predicted'][0]
        actual = eval(line['actual genres'])
        
        if correct:
            TP += 1
        else:
            FP +=1
            for genre in actual:
                FN+=1

    # precision = TP/(TP+FP)
    # recall = TP/ (TP+FN)
    # F1 = (precision * recall)/(precision + recall)
    
    precision = TP/(TP+FP)
    recall = TP/(TP+FN)
    f1 = (2*precision*recall)/(precision+recall)
    
    return(precision, recall, f1)

In [70]:
macro = get_macro_precision("data/prediction_model_01.csv")
micro = get_micro_precision("data/prediction_model_01.csv")

labels = ['Precision', 'Recall', 'F1 Score']

df = pd.DataFrame([macro, micro], columns=labels, index=['Macro Average', 'Micro Average'])
print("Preciction Model #1 Scores")

df

Preciction Model #1 Scores


Unnamed: 0,Precision,Recall,F1 Score
Macro Average,0.527778,0.05232,0.094596
Micro Average,0.085942,0.042464,0.056842


In [71]:
macro = get_macro_precision("data/prediction_model_02.csv")
micro = get_micro_precision("data/prediction_model_02.csv")

labels = ['Precision', 'Recall', 'F1 Score']

df = pd.DataFrame([macro, micro], columns=labels, index=['Macro Average', 'Micro Average'])

print("Preciction Model #2 Scores")

df

Preciction Model #2 Scores


Unnamed: 0,Precision,Recall,F1 Score
Macro Average,0.6,0.320478,0.411542
Micro Average,0.551724,0.385043,0.453554


In [72]:
macro = get_macro_precision("data/prediction_model_03.csv")
micro = get_micro_precision("data/prediction_model_03.csv")

labels = ['Precision', 'Recall', 'F1 Score']

df = pd.DataFrame([macro, micro], columns=labels, index=['Macro Average', 'Micro Average'])

print("Preciction Model #3 Scores")

df

Preciction Model #3 Scores


Unnamed: 0,Precision,Recall,F1 Score
Macro Average,0.8,0.377784,0.48732
Micro Average,0.498143,0.331451,0.39805
