# Basic Metric

This is the basic comparison of simply comparing how many of the sentences the model got correct in terms of the extracted ground truth

In [58]:
import pandas as pd

In [59]:
ground_truth = pd.read_csv('/home/ada/humor/standup_data.csv')
model = pd.read_csv('/home/ada/humor/humor/gemma_answers.csv')

In [60]:
print(ground_truth.columns)

Index(['comedian', 'laugh_start', 'laugh_end', 'sentence'], dtype='object')


In [61]:
ground_truth.head()

Unnamed: 0,comedian,laugh_start,laugh_end,sentence
0,Donald_Glover,17.268,19.352,"He wasn't crying. Just tears, he was giving me..."
1,Donald_Glover,32.292,34.182,He would... The sweetest thing he was allowed ...
2,Donald_Glover,38.79,41.441,He was just allowed to have mints. So he would...
3,Donald_Glover,44.903,53.441,So his breath was so fresh... the vapors from ...
4,Donald_Glover,68.88,73.824,And I would take him to the park and I was the...


In [62]:
model.head()

Unnamed: 0,Comedian,Sentence
0,Anthony_Jeselnik,"When I was a kid, I used to fantasize about ge..."
1,Anthony_Jeselnik,My mom actually should've been on one of the p...
2,Anthony_Jeselnik,"When I was a kid, like nine years old, I'd com..."
3,Anthony_Jeselnik_2,I've never talked to a group of people without...
4,Anthony_Jeselnik_2,"And I know my grandma loved it too, because it..."


## Let's calculate the score!

First, simplify the sentences by changing them to all lowercase and removing punctuation.

In [6]:
import re

def simple_sentence(sentence):
    cleaned_sentence = sentence.lower()
    cleaned_sentence = re.sub(r'[^\w\s]', '', cleaned_sentence)
    return cleaned_sentence

Calculate the score by first checking if the string matches entirely or is in the ground truth. If this is not the case, move onto fuzzy string matching to see the similarity of the responses. If the score is above 50%, we can add this to the total score. The score is the average of correct responses per transcript.

In [7]:
from thefuzz import fuzz

In [8]:
found = {}

for index, row in ground_truth.iterrows():
    comedian_name = row['comedian']
    truth = row['sentence']   
    simple_truth = simple_sentence(truth)
    matching_rows = model[model['Comedian'] == comedian_name]
    
    score = 0
    num_sentences = set()

    if truth not in found:
        found[truth] = [comedian_name, False, score, 0]  

    for index2, row2 in matching_rows.iterrows():
        model_answer = row2['Sentence']      
        simple_model_answer = simple_sentence(model_answer)
        num_sentences.add(model_answer)
        
        if simple_truth == simple_model_answer or simple_model_answer in simple_truth:
            score = 100 
            found[truth][0] = comedian_name  
            found[truth][1] = True 
            found[truth][2] = score
        else:
            fuzzy_score = fuzz.partial_ratio(simple_truth, simple_model_answer)
            if fuzzy_score > 60:
                found[truth][0] = comedian_name 
                found[truth][1] = True
                if fuzzy_score >  found[truth][2]:
                    found[truth][2] = fuzzy_score
    found[truth][3] = len(num_sentences)

In [9]:
def calculate_score(found):
    correct_guesses = {}
    num_sentences = {}
    
    for val in found.values():
        comedian_name = val[0]
        number_of_sentences = val[3]
        if comedian_name not in correct_guesses:
            correct_guesses[comedian_name] = val[2]
            num_sentences[comedian_name] = number_of_sentences
        else:
            correct_guesses[comedian_name] += val[2]
    
    for comedian_name, score in correct_guesses.items():
        correct_guesses[comedian_name] = (correct_guesses[comedian_name]/num_sentences[comedian_name]) 
        
    return correct_guesses 
    

In [10]:
correct_guesses = calculate_score(found)

In [11]:
correct_guesses

{'Donald_Glover': 51.333333333333336,
 'Donald_Glover_2': 126.66666666666667,
 'Anthony_Jeselnik': 100.0,
 'Anthony_Jeselnik_2': 66.0,
 'Chelsea_Peretti': 110.33333333333333,
 'Chelsea_Peretti_2': 50.0,
 'Louis_CK': 106.8,
 'Louis_CK_2': 50.0,
 'John_Mulaney': 103.8,
 'John_Mulaney_2': 42.75,
 'Ali_Wong': 25.0,
 'Ali_Wong_2': 66.66666666666667,
 'Hasan_Minhaj': 91.5,
 'Hasan_Minhaj_2': 25.0,
 'Iliza_Shlesinger': 90.0,
 'Iliza_Shlesinger_2': 50.0,
 'Jim_Gaffigan': 82.25,
 'Jim_Gaffigan_2': 79.33333333333333,
 'Joe_List': 50.0,
 'Joe_List_2': 25.0,
 'Jimmy_Yang': 50.0,
 'Jimmy_Yang_2': 75.0,
 'Nate_Bargatze': 60.333333333333336,
 'Nate_Bargatze_2': 15.25,
 'Nate_Bargatze_TK': 99.5,
 'Nate_Bargatze_TK_2': 20.0,
 'Russell_Peters': 74.25,
 'Russell_Peters_2': 76.4,
 'Sam_Morril': 97.0,
 'Sam_Morril_2': 41.5,
 'Trevor_Noah': 46.0,
 'Trevor_Noah_2': 115.33333333333333,
 'Tom_Segura': 73.4,
 'Tom_Segura_2': 150.0}

Start from the model to avoid duplicates

In [12]:
quotes_count_dict = {}
for comedian in model['Comedian']:
    if comedian in quotes_count_dict:
        quotes_count_dict[comedian] += 1
    else:
        quotes_count_dict[comedian] = 1

print(quotes_count_dict)

{'Anthony_Jeselnik': 3, 'Anthony_Jeselnik_2': 3, 'Ali_Wong': 4, 'Ali_Wong_2': 3, 'Chelsea_Peretti': 3, 'Chelsea_Peretti_2': 4, 'Donald_Glover': 3, 'Donald_Glover_2': 3, 'Hasan_Minhaj': 4, 'Hasan_Minhaj_2': 4, 'Iliza_Shlesinger': 3, 'Iliza_Shlesinger_2': 2, 'Jim_Gaffigan': 4, 'Jim_Gaffigan_2': 3, 'Joe_List': 4, 'Joe_List_2': 4, 'John_Mulaney': 5, 'John_Mulaney_2': 4, 'Jimmy_Yang': 2, 'Jimmy_Yang_2': 4, 'Louis_CK': 5, 'Louis_CK_2': 4, 'Nate_Bargatze': 3, 'Nate_Bargatze_2': 4, 'Nate_Bargatze_TK': 4, 'Nate_Bargatze_TK_2': 5, 'Russell_Peters': 4, 'Russell_Peters_2': 5, 'Sam_Morril': 2, 'Sam_Morril_2': 4, 'Trevor_Noah': 2, 'Trevor_Noah_2': 3, 'Tom_Segura': 5, 'Tom_Segura_2': 2}


In [13]:
found = {}

for index, row in model.iterrows():
    comedian_name = row['Comedian']
    model_output = row['Sentence']   
    simple_model_output = simple_sentence(model_output)
    matching = ground_truth[ground_truth['comedian'] == comedian_name]

    if truth not in found:
        found[model_output] = [comedian_name, 0]  

    for index2, row2 in matching.iterrows():
        truth = row2['sentence']
        simple_truth = simple_sentence(truth)

        if simple_truth == simple_model_output or simple_model_output in simple_truth:
            found[model_output][0] = comedian_name  
            found[model_output][1] = 100
        else:
            fuzzy_score = fuzz.partial_ratio(simple_truth, simple_model_output)
            if fuzzy_score > 60:
                found[model_output][0] = comedian_name 
                found[model_output][1] = max(found[model_output][1], fuzzy_score)

In [14]:
def calculate_score(found, num_sentences):
    correct_guesses = {}
    
    for val in found.values():
        comedian_name = val[0]
        if comedian_name not in correct_guesses:
            correct_guesses[comedian_name] = val[1]
        else:
            correct_guesses[comedian_name] += val[1]
    
    for comedian_name, score in correct_guesses.items():
        correct_guesses[comedian_name] = (correct_guesses[comedian_name]/num_sentences[comedian_name]) 
        
    return correct_guesses 

In [15]:
correct_guesses = calculate_score(found, quotes_count_dict)

In [16]:
correct_guesses

{'Anthony_Jeselnik': 100.0,
 'Anthony_Jeselnik_2': 66.0,
 'Ali_Wong': 42.25,
 'Ali_Wong_2': 66.66666666666667,
 'Chelsea_Peretti': 56.333333333333336,
 'Chelsea_Peretti_2': 50.0,
 'Donald_Glover': 30.333333333333332,
 'Donald_Glover_2': 82.66666666666667,
 'Hasan_Minhaj': 75.0,
 'Hasan_Minhaj_2': 50.0,
 'Iliza_Shlesinger': 66.66666666666667,
 'Iliza_Shlesinger_2': 81.0,
 'Jim_Gaffigan': 87.25,
 'Jim_Gaffigan_2': 54.0,
 'Joe_List': 67.25,
 'Joe_List_2': 25.0,
 'John_Mulaney': 77.0,
 'John_Mulaney_2': 42.75,
 'Jimmy_Yang': 50.0,
 'Jimmy_Yang_2': 75.0,
 'Louis_CK': 100.0,
 'Louis_CK_2': 65.25,
 'Nate_Bargatze': 60.333333333333336,
 'Nate_Bargatze_2': 15.25,
 'Nate_Bargatze_TK': 99.5,
 'Nate_Bargatze_TK_2': 20.0,
 'Russell_Peters': 74.25,
 'Russell_Peters_2': 60.0,
 'Sam_Morril': 97.0,
 'Sam_Morril_2': 25.0,
 'Trevor_Noah': 46.0,
 'Trevor_Noah_2': 93.66666666666667,
 'Tom_Segura': 73.4,
 'Tom_Segura_2': 50.0}

Removing Stop Words

In [17]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [18]:
def remove_stop_words(sentence):
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(sentence)
    filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
    filtered_sentence = []
    
    for w in word_tokens:
        if w not in stop_words:
            filtered_sentence.append(w)
    return " ".join(filtered_sentence)

In [19]:
def base_metric(model_output, ground_truth):
    found = {}

    for index, row in model.iterrows():
        comedian_name = row['Comedian']
        model_output = row['Sentence']   
        simple_model_output = simple_sentence(model_output)
        matching = ground_truth[ground_truth['comedian'] == comedian_name]

        if model_output not in found:
            found[model_output] = [comedian_name, 0]  

        for index2, row2 in matching.iterrows():
            truth = row2['sentence']
            simple_truth = simple_sentence(truth)
            stop_model = remove_stop_words(simple_model_output)
            stop_truth = remove_stop_words(simple_truth)
            if simple_truth == simple_model_output or stop_model in stop_truth:
                found[model_output][0] = comedian_name  
                found[model_output][1] = 100
            else:
                fuzzy_score = fuzz.partial_ratio(stop_truth, stop_model)
                if fuzzy_score > 60:
                    found[model_output][0] = comedian_name 
                    found[model_output][1] = max(found[model_output][1], fuzzy_score)
        
    return found

In [20]:
found = base_metric(model, ground_truth)

In [21]:
def calculate_score(found, num_sentences):
    correct_guesses = {}
    
    for val in found.values():
        comedian_name = val[0]
        if comedian_name not in correct_guesses:
            correct_guesses[comedian_name] = val[1]
        else:
            correct_guesses[comedian_name] += val[1]
    
    for comedian_name, score in correct_guesses.items():
        correct_guesses[comedian_name] = (correct_guesses[comedian_name]/num_sentences[comedian_name]) 
        
    return correct_guesses 

In [22]:
correct_guesses = calculate_score(found, quotes_count_dict)

In [23]:
correct_guesses

{'Anthony_Jeselnik': 100.0,
 'Anthony_Jeselnik_2': 66.66666666666667,
 'Ali_Wong': 42.0,
 'Ali_Wong_2': 66.66666666666667,
 'Chelsea_Peretti': 33.333333333333336,
 'Chelsea_Peretti_2': 50.0,
 'Donald_Glover': 30.0,
 'Donald_Glover_2': 80.66666666666667,
 'Hasan_Minhaj': 75.0,
 'Hasan_Minhaj_2': 50.0,
 'Iliza_Shlesinger': 66.66666666666667,
 'Iliza_Shlesinger_2': 50.0,
 'Jim_Gaffigan': 87.75,
 'Jim_Gaffigan_2': 55.333333333333336,
 'Joe_List': 83.25,
 'Joe_List_2': 25.0,
 'John_Mulaney': 78.4,
 'John_Mulaney_2': 61.25,
 'Jimmy_Yang': 50.0,
 'Jimmy_Yang_2': 75.0,
 'Louis_CK': 100.0,
 'Louis_CK_2': 65.25,
 'Nate_Bargatze': 62.666666666666664,
 'Nate_Bargatze_2': 33.0,
 'Nate_Bargatze_TK': 99.0,
 'Nate_Bargatze_TK_2': 20.0,
 'Russell_Peters': 72.5,
 'Russell_Peters_2': 88.6,
 'Sam_Morril': 100.0,
 'Sam_Morril_2': 42.5,
 'Trevor_Noah': 45.5,
 'Trevor_Noah_2': 94.66666666666667,
 'Tom_Segura': 60.0,
 'Tom_Segura_2': 50.0}

# Testing the metric

In [24]:
import random

In [25]:
transcript = pd.read_csv('/home/ada/humor/humor/standup_transcripts.csv')

In [26]:
random_sentences = []

for i in range(len(transcript['comedian'])):
    comedian = transcript['comedian'][i]
    trans = transcript['transcript'][i]
    sentences = nltk.sent_tokenize(trans)
    selected = random.sample(sentences, min(3, len(sentences)))
    
    for sentence in selected:
        random_sentences.append({'comedian': comedian, 'sentence': sentence})

In [27]:
base_test = pd.DataFrame(random_sentences)
base_test.head()

Unnamed: 0,comedian,sentence
0,Anthony_Jeselnik,That’s ignorance and I hate that.
1,Anthony_Jeselnik,"She was my mom, of course I loved her."
2,Anthony_Jeselnik,"Of course this was before 9/11 so… my bad, eve..."
3,Anthony_Jeselnik_2,"Try not to.” So I walked up and was like, “You..."
4,Anthony_Jeselnik_2,"Was like, “I’ve never talked to a group of peo..."


In [95]:
base_test

Unnamed: 0,comedian,sentence,no_stop
0,Anthony_Jeselnik,That’s ignorance and I hate that.,That ’ ignorance I hate .
1,Anthony_Jeselnik,"She was my mom, of course I loved her.","She mom , course I loved ."
2,Anthony_Jeselnik,"Of course this was before 9/11 so… my bad, eve...","Of course 9/11 so… bad , everybody ."
3,Anthony_Jeselnik_2,"Try not to.” So I walked up and was like, “You...","Try to. ” So I walked like , “ You know favori..."
4,Anthony_Jeselnik_2,"Was like, “I’ve never talked to a group of peo...","Was like , “ I ’ never talked group people wit..."
...,...,...,...
97,Tom_Segura,"Cool.” And he goes, “We just need to go get it...","Cool. ” And goes , “ We need go get it. ” I li..."
98,Tom_Segura,Paralysis.,Paralysis .
99,Tom_Segura_2,"You’re like, “I’m in trouble.” It’s always som...","You ’ like , “ I ’ trouble. ” It ’ always lady..."
100,Tom_Segura_2,It felt like the inside of my body hugged the ...,"It felt like inside body hugged outside body ,..."


In [98]:
ground_truth.apply(lambda row: print(row))

0      Donald_Glover
1      Donald_Glover
2      Donald_Glover
3      Donald_Glover
4      Donald_Glover
           ...      
241       Tom_Segura
242       Tom_Segura
243     Tom_Segura_2
244     Tom_Segura_2
245     Tom_Segura_2
Name: comedian, Length: 246, dtype: object
0      17.2680
1      32.2920
2      38.7900
3      44.9030
4      68.8800
        ...   
241    56.5030
242    61.4021
243    44.7760
244    50.7880
245    58.9700
Name: laugh_start, Length: 246, dtype: float64
0      19.352
1      34.182
2      41.441
3      53.441
4      73.824
        ...  
241    58.176
242    63.451
243    47.440
244    53.995
245    60.597
Name: laugh_end, Length: 246, dtype: float64
0      He wasn't crying. Just tears, he was giving me...
1      He would... The sweetest thing he was allowed ...
2      He was just allowed to have mints. So he would...
3      So his breath was so fresh... the vapors from ...
4      And I would take him to the park and I was the...
                             .

comedian       None
laugh_start    None
laugh_end      None
sentence       None
no_stop        None
dtype: object

In [136]:
model.columns = [c.lower() for c in model.columns]
model

Unnamed: 0,comedian,sentence
0,Anthony_Jeselnik,"When I was a kid, I used to fantasize about ge..."
1,Anthony_Jeselnik,My mom actually should've been on one of the p...
2,Anthony_Jeselnik,"When I was a kid, like nine years old, I'd com..."
3,Anthony_Jeselnik_2,I've never talked to a group of people without...
4,Anthony_Jeselnik_2,"And I know my grandma loved it too, because it..."
...,...,...
116,Tom_Segura,I'll go get it. You stay here and watch my place.
117,Tom_Segura,That's what's up.
118,Tom_Segura,Can we get a description before we agree to te...
119,Tom_Segura_2,I got such a warm rush through my body. It fel...


In [138]:
model_answers = model

scores = model_answers.apply(
    lambda row_model: ground_truth.apply(
        lambda row_truth: 
            fuzz.ratio(row_truth["sentence"], row_model["sentence"]) 
            if row_model["comedian"] == row_truth["comedian"] 
            else None,
    axis=1),
axis=1) \
    .sub(60) \
    .clip(lower=0) \
    .div(100 - 60) \
    .melt(ignore_index=False) \
    .dropna() \
    .reset_index() \
    .join(model_answers["sentence"], on="index") \
    .rename(columns={"sentence": "model"}) \
    .join(ground_truth, on="variable") \
    .rename(columns={"sentence": "truth", "value": "score"})
    
scores = scores[["comedian", "model", "truth", "score"]]

scores.groupby("comedian")["score"].mean()

0.08472222222222221

In [74]:

ground_truth.loc[242, "sentence"]

'Paralysis. But what I said was, "That\'s what\'s up."'

In [59]:
base_test.loc[98, "sentence"]

'Paralysis.'

In [34]:
found = base_metric(base_test, ground_truth)

KeyboardInterrupt: 

In [29]:
correct_guesses = calculate_score(found, quotes_count_dict)

In [30]:
base_test.loc[0, "sentence"]

'That’s ignorance and I hate that.'

In [31]:
ground_truth[ground_truth.comedian == "Anthony_Jeselnik"]

Unnamed: 0,comedian,laugh_start,laugh_end,sentence
17,Anthony_Jeselnik,16.898,19.405,"When I was a kid, I used to fantasize about ge..."
18,Anthony_Jeselnik,29.085,31.1,Sold my passport on the street for 300 bucks t...
19,Anthony_Jeselnik,34.874,36.233,"Weird joke to clap for, but sure."
20,Anthony_Jeselnik,43.756,48.912,My mom actually should've been on one of the p...
21,Anthony_Jeselnik,86.505,88.497,"And when I did that, my mom would act weird. S..."
22,Anthony_Jeselnik,93.982,100.708,"And I would say, ""Shut up, Mom, that's racist...."
23,Anthony_Jeselnik,114.653,120.278,"And we never talk, write letters or any of tha..."


In [32]:
correct_guesses

{'Anthony_Jeselnik': 100.0,
 'Anthony_Jeselnik_2': 66.66666666666667,
 'Ali_Wong': 42.0,
 'Ali_Wong_2': 66.66666666666667,
 'Chelsea_Peretti': 33.333333333333336,
 'Chelsea_Peretti_2': 50.0,
 'Donald_Glover': 30.0,
 'Donald_Glover_2': 80.66666666666667,
 'Hasan_Minhaj': 75.0,
 'Hasan_Minhaj_2': 50.0,
 'Iliza_Shlesinger': 66.66666666666667,
 'Iliza_Shlesinger_2': 50.0,
 'Jim_Gaffigan': 87.75,
 'Jim_Gaffigan_2': 55.333333333333336,
 'Joe_List': 83.25,
 'Joe_List_2': 25.0,
 'John_Mulaney': 78.4,
 'John_Mulaney_2': 61.25,
 'Jimmy_Yang': 50.0,
 'Jimmy_Yang_2': 75.0,
 'Louis_CK': 100.0,
 'Louis_CK_2': 65.25,
 'Nate_Bargatze': 62.666666666666664,
 'Nate_Bargatze_2': 33.0,
 'Nate_Bargatze_TK': 99.0,
 'Nate_Bargatze_TK_2': 20.0,
 'Russell_Peters': 72.5,
 'Russell_Peters_2': 88.6,
 'Sam_Morril': 100.0,
 'Sam_Morril_2': 42.5,
 'Trevor_Noah': 45.5,
 'Trevor_Noah_2': 94.66666666666667,
 'Tom_Segura': 60.0,
 'Tom_Segura_2': 50.0}

In [91]:
print(sum(correct_guesses.values())/len(correct_guesses))

29.22058823529412
