# Basic Metric

This is the basic comparison of simply comparing how many of the sentences the model got correct in terms of the extracted ground truth

In [58]:
import pandas as pd

In [59]:
ground_truth = pd.read_csv('/home/ada/humor/standup_data.csv')
model = pd.read_csv('/home/ada/humor/humor/gemma_answers.csv')

In [60]:
print(ground_truth.columns)

Index(['comedian', 'laugh_start', 'laugh_end', 'sentence'], dtype='object')


In [61]:
ground_truth.head()

Unnamed: 0,comedian,laugh_start,laugh_end,sentence
0,Donald_Glover,17.268,19.352,"He wasn't crying. Just tears, he was giving me..."
1,Donald_Glover,32.292,34.182,He would... The sweetest thing he was allowed ...
2,Donald_Glover,38.79,41.441,He was just allowed to have mints. So he would...
3,Donald_Glover,44.903,53.441,So his breath was so fresh... the vapors from ...
4,Donald_Glover,68.88,73.824,And I would take him to the park and I was the...


In [62]:
model.head()

Unnamed: 0,Comedian,Sentence
0,Anthony_Jeselnik,"When I was a kid, I used to fantasize about ge..."
1,Anthony_Jeselnik,My mom actually should've been on one of the p...
2,Anthony_Jeselnik,"When I was a kid, like nine years old, I'd com..."
3,Anthony_Jeselnik_2,I've never talked to a group of people without...
4,Anthony_Jeselnik_2,"And I know my grandma loved it too, because it..."


## Let's calculate the score!

First, simplify the sentences by changing them to all lowercase and removing punctuation.

In [63]:
import re

def simple_sentence(sentence):
    cleaned_sentence = sentence.lower()
    cleaned_sentence = re.sub(r'[^\w\s]', '', cleaned_sentence)
    return cleaned_sentence

Calculate the score by first checking if the string matches entirely or is in the ground truth. If this is not the case, move onto fuzzy string matching to see the similarity of the responses. If the score is above 50%, we can add this to the total score. The score is the average of correct responses per transcript.

In [64]:
from thefuzz import fuzz

In [65]:
found = {}

for index, row in ground_truth.iterrows():
    comedian_name = row['comedian']
    truth = row['sentence']   
    simple_truth = simple_sentence(truth)
    matching_rows = model[model['Comedian'] == comedian_name]
    
    score = 0
    num_sentences = set()

    if truth not in found:
        found[truth] = [comedian_name, False, score, 0]  

    for index2, row2 in matching_rows.iterrows():
        model_answer = row2['Sentence']      
        simple_model_answer = simple_sentence(model_answer)
        num_sentences.add(model_answer)
        
        if simple_truth == simple_model_answer or simple_model_answer in simple_truth:
            score = 100 
            found[truth][0] = comedian_name  
            found[truth][1] = True 
            found[truth][2] = score
        else:
            fuzzy_score = fuzz.partial_ratio(simple_truth, simple_model_answer)
            if fuzzy_score > 60:
                found[truth][0] = comedian_name 
                found[truth][1] = True
                if fuzzy_score >  found[truth][2]:
                    found[truth][2] = fuzzy_score
    found[truth][3] = len(num_sentences)

In [66]:
def calculate_score(found):
    correct_guesses = {}
    num_sentences = {}
    
    for val in found.values():
        comedian_name = val[0]
        number_of_sentences = val[3]
        if comedian_name not in correct_guesses:
            correct_guesses[comedian_name] = val[2]
            num_sentences[comedian_name] = number_of_sentences
        else:
            correct_guesses[comedian_name] += val[2]
    
    for comedian_name, score in correct_guesses.items():
        correct_guesses[comedian_name] = (correct_guesses[comedian_name]/num_sentences[comedian_name]) 
        
    return correct_guesses 
    

In [67]:
correct_guesses = calculate_score(found)

In [68]:
correct_guesses

{'Donald_Glover': 51.333333333333336,
 'Donald_Glover_2': 126.66666666666667,
 'Anthony_Jeselnik': 100.0,
 'Anthony_Jeselnik_2': 66.0,
 'Chelsea_Peretti': 110.33333333333333,
 'Chelsea_Peretti_2': 50.0,
 'Louis_CK': 106.8,
 'Louis_CK_2': 50.0,
 'John_Mulaney': 103.8,
 'John_Mulaney_2': 42.75,
 'Ali_Wong': 25.0,
 'Ali_Wong_2': 66.66666666666667,
 'Hasan_Minhaj': 91.5,
 'Hasan_Minhaj_2': 25.0,
 'Iliza_Shlesinger': 90.0,
 'Iliza_Shlesinger_2': 50.0,
 'Jim_Gaffigan': 82.25,
 'Jim_Gaffigan_2': 79.33333333333333,
 'Joe_List': 50.0,
 'Joe_List_2': 25.0,
 'Jimmy_Yang': 50.0,
 'Jimmy_Yang_2': 75.0,
 'Nate_Bargatze': 60.333333333333336,
 'Nate_Bargatze_2': 15.25,
 'Nate_Bargatze_TK': 99.5,
 'Nate_Bargatze_TK_2': 20.0,
 'Russell_Peters': 74.25,
 'Russell_Peters_2': 76.4,
 'Sam_Morril': 97.0,
 'Sam_Morril_2': 41.5,
 'Trevor_Noah': 46.0,
 'Trevor_Noah_2': 115.33333333333333,
 'Tom_Segura': 73.4,
 'Tom_Segura_2': 150.0}

Start from the model to avoid duplicates

In [69]:
quotes_count_dict = {}
for comedian in model['Comedian']:
    if comedian in quotes_count_dict:
        quotes_count_dict[comedian] += 1
    else:
        quotes_count_dict[comedian] = 1

print(quotes_count_dict)

{'Anthony_Jeselnik': 3, 'Anthony_Jeselnik_2': 3, 'Ali_Wong': 4, 'Ali_Wong_2': 3, 'Chelsea_Peretti': 3, 'Chelsea_Peretti_2': 4, 'Donald_Glover': 3, 'Donald_Glover_2': 3, 'Hasan_Minhaj': 4, 'Hasan_Minhaj_2': 4, 'Iliza_Shlesinger': 3, 'Iliza_Shlesinger_2': 2, 'Jim_Gaffigan': 4, 'Jim_Gaffigan_2': 3, 'Joe_List': 4, 'Joe_List_2': 4, 'John_Mulaney': 5, 'John_Mulaney_2': 4, 'Jimmy_Yang': 2, 'Jimmy_Yang_2': 4, 'Louis_CK': 5, 'Louis_CK_2': 4, 'Nate_Bargatze': 3, 'Nate_Bargatze_2': 4, 'Nate_Bargatze_TK': 4, 'Nate_Bargatze_TK_2': 5, 'Russell_Peters': 4, 'Russell_Peters_2': 5, 'Sam_Morril': 2, 'Sam_Morril_2': 4, 'Trevor_Noah': 2, 'Trevor_Noah_2': 3, 'Tom_Segura': 5, 'Tom_Segura_2': 2}


In [70]:
found = {}

for index, row in model.iterrows():
    comedian_name = row['Comedian']
    model_output = row['Sentence']   
    simple_model_output = simple_sentence(model_output)
    matching = ground_truth[ground_truth['comedian'] == comedian_name]

    if truth not in found:
        found[model_output] = [comedian_name, 0]  

    for index2, row2 in matching.iterrows():
        truth = row2['sentence']
        simple_truth = simple_sentence(truth)

        if simple_truth == simple_model_output or simple_model_output in simple_truth:
            found[model_output][0] = comedian_name  
            found[model_output][1] = 100
        else:
            fuzzy_score = fuzz.partial_ratio(simple_truth, simple_model_output)
            if fuzzy_score > 60:
                found[model_output][0] = comedian_name 
                found[model_output][1] = max(found[model_output][1], fuzzy_score)

In [71]:
def calculate_score(found, num_sentences):
    correct_guesses = {}
    
    for val in found.values():
        comedian_name = val[0]
        if comedian_name not in correct_guesses:
            correct_guesses[comedian_name] = val[1]
        else:
            correct_guesses[comedian_name] += val[1]
    
    for comedian_name, score in correct_guesses.items():
        correct_guesses[comedian_name] = (correct_guesses[comedian_name]/num_sentences[comedian_name]) 
        
    return correct_guesses 

In [72]:
correct_guesses = calculate_score(found, quotes_count_dict)

In [73]:
correct_guesses

{'Anthony_Jeselnik': 100.0,
 'Anthony_Jeselnik_2': 66.0,
 'Ali_Wong': 42.25,
 'Ali_Wong_2': 66.66666666666667,
 'Chelsea_Peretti': 56.333333333333336,
 'Chelsea_Peretti_2': 50.0,
 'Donald_Glover': 30.333333333333332,
 'Donald_Glover_2': 82.66666666666667,
 'Hasan_Minhaj': 75.0,
 'Hasan_Minhaj_2': 50.0,
 'Iliza_Shlesinger': 66.66666666666667,
 'Iliza_Shlesinger_2': 81.0,
 'Jim_Gaffigan': 87.25,
 'Jim_Gaffigan_2': 54.0,
 'Joe_List': 67.25,
 'Joe_List_2': 25.0,
 'John_Mulaney': 77.0,
 'John_Mulaney_2': 42.75,
 'Jimmy_Yang': 50.0,
 'Jimmy_Yang_2': 75.0,
 'Louis_CK': 100.0,
 'Louis_CK_2': 65.25,
 'Nate_Bargatze': 60.333333333333336,
 'Nate_Bargatze_2': 15.25,
 'Nate_Bargatze_TK': 99.5,
 'Nate_Bargatze_TK_2': 20.0,
 'Russell_Peters': 74.25,
 'Russell_Peters_2': 60.0,
 'Sam_Morril': 97.0,
 'Sam_Morril_2': 25.0,
 'Trevor_Noah': 46.0,
 'Trevor_Noah_2': 93.66666666666667,
 'Tom_Segura': 73.4,
 'Tom_Segura_2': 50.0}

Removing Stop Words

In [74]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [75]:
def remove_stop_words(sentence):
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(sentence)
    filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
    filtered_sentence = []
    
    for w in word_tokens:
        if w not in stop_words:
            filtered_sentence.append(w)
    return " ".join(filtered_sentence)

In [76]:
def base_metric(model_output, ground_truth):
    found = {}

    for index, row in model.iterrows():
        comedian_name = row['Comedian']
        model_output = row['Sentence']   
        simple_model_output = simple_sentence(model_output)
        matching = ground_truth[ground_truth['comedian'] == comedian_name]

        if model_output not in found:
            found[model_output] = [comedian_name, 0]  

        for index2, row2 in matching.iterrows():
            truth = row2['sentence']
            simple_truth = simple_sentence(truth)
            stop_model = remove_stop_words(simple_model_output)
            stop_truth = remove_stop_words(simple_truth)
            if simple_truth == simple_model_output or stop_model in stop_truth:
                found[model_output][0] = comedian_name  
                found[model_output][1] = 100
            else:
                fuzzy_score = fuzz.partial_ratio(stop_truth, stop_model)
                if fuzzy_score > 60:
                    found[model_output][0] = comedian_name 
                    found[model_output][1] = max(found[model_output][1], fuzzy_score)
        
    return found

In [77]:
found = base_metric(model, ground_truth)

In [78]:
def calculate_score(found, num_sentences):
    correct_guesses = {}
    
    for val in found.values():
        comedian_name = val[0]
        if comedian_name not in correct_guesses:
            correct_guesses[comedian_name] = val[1]
        else:
            correct_guesses[comedian_name] += val[1]
    
    for comedian_name, score in correct_guesses.items():
        correct_guesses[comedian_name] = (correct_guesses[comedian_name]/num_sentences[comedian_name]) 
        
    return correct_guesses 

In [79]:
correct_guesses = calculate_score(found, quotes_count_dict)

In [80]:
correct_guesses

{'Anthony_Jeselnik': 100.0,
 'Anthony_Jeselnik_2': 66.66666666666667,
 'Ali_Wong': 42.0,
 'Ali_Wong_2': 66.66666666666667,
 'Chelsea_Peretti': 33.333333333333336,
 'Chelsea_Peretti_2': 50.0,
 'Donald_Glover': 30.0,
 'Donald_Glover_2': 80.66666666666667,
 'Hasan_Minhaj': 75.0,
 'Hasan_Minhaj_2': 50.0,
 'Iliza_Shlesinger': 66.66666666666667,
 'Iliza_Shlesinger_2': 50.0,
 'Jim_Gaffigan': 87.75,
 'Jim_Gaffigan_2': 55.333333333333336,
 'Joe_List': 83.25,
 'Joe_List_2': 25.0,
 'John_Mulaney': 78.4,
 'John_Mulaney_2': 61.25,
 'Jimmy_Yang': 50.0,
 'Jimmy_Yang_2': 75.0,
 'Louis_CK': 100.0,
 'Louis_CK_2': 65.25,
 'Nate_Bargatze': 62.666666666666664,
 'Nate_Bargatze_2': 33.0,
 'Nate_Bargatze_TK': 99.0,
 'Nate_Bargatze_TK_2': 20.0,
 'Russell_Peters': 72.5,
 'Russell_Peters_2': 88.6,
 'Sam_Morril': 100.0,
 'Sam_Morril_2': 42.5,
 'Trevor_Noah': 45.5,
 'Trevor_Noah_2': 94.66666666666667,
 'Tom_Segura': 60.0,
 'Tom_Segura_2': 50.0}

In [81]:
print(sum(correct_guesses.values())/len(correct_guesses))

63.549019607843135


# Testing the metric

In [82]:
import random

In [83]:
transcript = pd.read_csv('/home/ada/humor/humor/standup_transcripts.csv')

In [84]:
random_sentences = []

for i in range(len(transcript['comedian'])):
    comedian = transcript['comedian'][i]
    trans = transcript['transcript'][i]
    sentences = nltk.sent_tokenize(trans)
    selected = random.sample(sentences, min(3, len(sentences)))
    
    for sentence in selected:
        random_sentences.append({'comedian': comedian, 'sentence': sentence})

In [85]:
base_test = pd.DataFrame(random_sentences)
base_test.head()

Unnamed: 0,comedian,sentence
0,Anthony_Jeselnik,Very racist.
1,Anthony_Jeselnik,"When I was a kid, like nine years old, I’d com..."
2,Anthony_Jeselnik,"And when I did that, my mom would act weird."
3,Anthony_Jeselnik_2,I asked a friend for advice.
4,Anthony_Jeselnik_2,"Like… Mark Twain out of my grandma’s mouth, it..."


In [86]:
import random
random_sentences = []

for i in range(len(transcript['comedian'])):
    comedian = transcript['comedian'][i]
    trans = transcript['transcript'][i]
    sentences = nltk.sent_tokenize(trans)

    random.shuffle(sentences)
    selected = []
    for sentence in sentences:
        if len(selected) < 3 and sentence not in ground_truth.loc[ground_truth['comedian'] == comedian, 'sentence'].values:
            selected.append(sentence)
    
    for sentence in selected:
        random_sentences.append({'comedian': comedian, 'sentence': sentence})


In [87]:
base_test = pd.DataFrame(random_sentences)
base_test.head()

Unnamed: 0,comedian,sentence
0,Anthony_Jeselnik,My testimony.
1,Anthony_Jeselnik,Put your money away.”I never get to see my fam...
2,Anthony_Jeselnik,"Once in a while, I’d bring a black friend over."
3,Anthony_Jeselnik_2,I asked a friend for advice.
4,Anthony_Jeselnik_2,I am a real comedian.


In [88]:
found = base_metric(model, base_test)

In [89]:
correct_guesses = calculate_score(found, quotes_count_dict)

In [90]:
correct_guesses

{'Anthony_Jeselnik': 33.333333333333336,
 'Anthony_Jeselnik_2': 33.333333333333336,
 'Ali_Wong': 50.0,
 'Ali_Wong_2': 0.0,
 'Chelsea_Peretti': 0.0,
 'Chelsea_Peretti_2': 41.25,
 'Donald_Glover': 0.0,
 'Donald_Glover_2': 28.333333333333332,
 'Hasan_Minhaj': 20.5,
 'Hasan_Minhaj_2': 0.0,
 'Iliza_Shlesinger': 22.333333333333332,
 'Iliza_Shlesinger_2': 50.0,
 'Jim_Gaffigan': 46.0,
 'Jim_Gaffigan_2': 22.0,
 'Joe_List': 44.75,
 'Joe_List_2': 40.5,
 'John_Mulaney': 13.4,
 'John_Mulaney_2': 43.5,
 'Jimmy_Yang': 0.0,
 'Jimmy_Yang_2': 47.75,
 'Louis_CK': 40.0,
 'Louis_CK_2': 62.75,
 'Nate_Bargatze': 33.333333333333336,
 'Nate_Bargatze_2': 59.5,
 'Nate_Bargatze_TK': 25.0,
 'Nate_Bargatze_TK_2': 0.0,
 'Russell_Peters': 17.0,
 'Russell_Peters_2': 20.0,
 'Sam_Morril': 50.0,
 'Sam_Morril_2': 25.0,
 'Trevor_Noah': 0.0,
 'Trevor_Noah_2': 58.333333333333336,
 'Tom_Segura': 15.6,
 'Tom_Segura_2': 50.0}

In [91]:
print(sum(correct_guesses.values())/len(correct_guesses))

29.22058823529412
