# Trying out Different Models and Prompts

Load Models and necessary libraries

In [1]:
import pandas as pd
import transformers
import inseq
import torch
from pathlib import Path

torch.cuda.is_available()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
from huggingface_hub import login
login(token="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")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/ada/.cache/huggingface/token
Login successful


In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_ID = "google/gemma-2b-it"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto")

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.
Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.66s/it]


In [4]:
transcripts = pd.read_csv('/home/ada/humor/data/stand_up_dataset/standup_transcripts.csv')

In [5]:
transcripts.head()

Unnamed: 0,comedian,transcript
0,Anthony_Jeselnik,"When I was a kid, I used to fantasize about ge..."
1,Anthony_Jeselnik_2,No one should ever ask me to speak at anyone’...
2,Ali_Wong,"The last time I was at home in San Francisco, ..."
3,Ali_Wong_2,I need to have children to keep me company whe...
4,Chelsea_Peretti,A friend of a friend just posted like 500 enga...


#### Preference Model

In [38]:
jeselnik_transcript = transcripts.loc[transcripts['comedian'] == 'Anthony_Jeselnik', 'transcript'].values[0]


In [39]:
instruction = f"Extract the sentences you like best from this stand-up comedy transcript. {transcript} List of quotes:"

In [40]:
results = []

In [41]:
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    instruction = f"Extract the sentences you like best from this stand-up comedy transcript. {transcript} List of quotes:"
    input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(instruction, "").strip()
    
    results.append({'comedian': comedian, 'sentence': response})

In [42]:
results

[{'comedian': 'Anthony_Jeselnik',
  'sentence': '"When I was a kid, I used to fantasize about getting older, growing up and having money, and buying my mom nice things for her birthday."\n\n"When I was a kid, we were poor. So poor I remember, just so I could go to my senior prom, just so I could go to my senior prom, I had to sell my U.S. passport on the street."\n\n"My mom actually should’ve been on one of the planes that crashed'},
 {'comedian': 'Anthony_Jeselnik_2',
  'sentence': '- "I\'ve never talked to a group of people without getting paid a lot of money. How should I handle this?"\n- "Anthony, just go up there and tell a story. Find one moment about you and your grandma you can share with everybody. And don\'t tell a joke. Try not to."\n- "And I know my grandma loved it too, because it combined her two favorite things: spending time with her grandchildren, and using the ‘N’ word'},
 {'comedian': 'Ali_Wong',
  'sentence': '"It was like the worst experience of my life."\n"It was 

In [43]:
r = []
seen_quotes = set()
for entry in results:
    comedian = entry['comedian']
    quotes = entry['sentence'].split('\n')  
    
    for quote in quotes:
        if quote and quote not in seen_quotes:
            r.append({'comedian': comedian, 'sentence': quote})
            seen_quotes.add(quote)


In [44]:
quotes_df = pd.DataFrame(r)

In [45]:
quotes_df.to_csv('/home/ada/humor/fav_quotes.csv', index=False)

#### Random Gemma

In [46]:
instruction = f"Extract random sentences from this stand-up comedy transcript. {transcript} List of quotes:"

In [47]:
results = []
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    instruction = f"Extract random sentences from this stand-up comedy transcript. {transcript} List of quotes:"
    input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(instruction, "").strip()
    
    results.append({'comedian': comedian, 'sentence': response})

In [48]:
r = []
seen_quotes = set()
for entry in results:
    comedian = entry['comedian']
    quotes = entry['sentence'].split('\n')  
    
    for quote in quotes:
        if quote and quote not in seen_quotes:
            r.append({'comedian': comedian, 'sentence': quote})
            seen_quotes.add(quote)

In [49]:
quotes_df = pd.DataFrame(r)

In [50]:
quotes_df.to_csv('/home/ada/humor/data/stand_up_dataset/random_gemma_quotes.csv', index=False)

#### Best Represent

In [51]:
instruction = f"Extract the sentences that best represent this stand-up comedy transcript. {jeselnik_transcript} List of quotes:"
input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = generated_text.replace(instruction, "").strip()

In [52]:
results = []
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    instruction = f"Extract the sentences that best represent this stand-up comedy transcript. {transcript} List of quotes:"
    input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(instruction, "").strip()
    
    results.append({'comedian': comedian, 'sentence': response})

In [53]:
results

[{'comedian': 'Anthony_Jeselnik',
  'sentence': '"When I was a kid, I used to fantasize about getting older, growing up and having money, and buying my mom nice things for her birthday."\n\n"When I was a kid, we were poor. So poor I remember, just so I could go to my senior prom, just so I could go to my senior prom, I had to sell my U.S. passport on the street."\n\n"My mom actually should’ve been on one of the planes that crashed'},
 {'comedian': 'Anthony_Jeselnik_2',
  'sentence': '- "I\'ve never talked to a group of people without getting paid a lot of money. How should I handle this?"\n- "Anthony, just go up there and tell a story. Find one moment about you and your grandma you can share with everybody. And don\'t tell a joke. Try not to."\n- "And I know my grandma loved it too, because it combined her two favorite things: spending time with her grandchildren, and using the ‘N’ word'},
 {'comedian': 'Ali_Wong',
  'sentence': '"It was like the worst experience of my life."\n"It was 

In [54]:
r = []
seen_quotes = set()
for entry in results:
    comedian = entry['comedian']
    quotes = entry['sentence'].split('\n')  
    
    for quote in quotes:
        if quote and quote not in seen_quotes:
            r.append({'comedian': comedian, 'sentence': quote})
            seen_quotes.add(quote)

In [55]:
quotes_df = pd.DataFrame(r)

In [56]:
quotes_df.to_csv('/home/ada/humor/data/stand_up_dataset/best_rep_quotes.csv', index=False)

#### Most Offensive Sentences

In [57]:
instruction = f"Extract the sentences that you find are most offensive in this stand-up comedy transcript. {jeselnik_transcript} List of quotes:"
input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = generated_text.replace(instruction, "").strip()

In [58]:
results = []
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    instruction = f"Extract the sentences that you find are most offensive in this stand-up comedy transcript. {transcript} List of quotes:"
    input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(instruction, "").strip()
    
    results.append({'comedian': comedian, 'sentence': response})

In [59]:
r = []
seen_quotes = set()
for entry in results:
    comedian = entry['comedian']
    quotes = entry['sentence'].split('\n')  
    
    for quote in quotes:
        if quote and quote not in seen_quotes:
            r.append({'comedian': comedian, 'sentence': quote})
            seen_quotes.add(quote)

In [60]:
quotes_df = pd.DataFrame(r)

In [61]:
quotes_df.to_csv('/home/ada/humor/data/stand_up_dataset/offensive_quotes.csv', index=False)

#### Longest Sentences

In [62]:
results = []
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    instruction = f"Find the sentences that have the longest length in this stand-up comedy transcript. {transcript} List of quotes:"
    input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(instruction, "").strip()
    
    results.append({'comedian': comedian, 'sentence': response})

In [63]:
r = []
seen_quotes = set()
for entry in results:
    comedian = entry['comedian']
    quotes = entry['sentence'].split('\n')  
    
    for quote in quotes:
        if quote and quote not in seen_quotes:
            r.append({'comedian': comedian, 'sentence': quote})
            seen_quotes.add(quote)

In [64]:
quotes_df = pd.DataFrame(r)

In [65]:
quotes_df.to_csv('/home/ada/humor/data/stand_up_dataset/longest_quotes.csv', index=False)

#### Shortest Sentences

In [66]:
results = []
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    instruction = f"Find the sentences that have the shortest length in this stand-up comedy transcript. {transcript} List of quotes:"
    input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(instruction, "").strip()
    
    results.append({'comedian': comedian, 'sentence': response})

In [67]:
r = []
seen_quotes = set()
for entry in results:
    comedian = entry['comedian']
    quotes = entry['sentence'].split('\n')  
    
    for quote in quotes:
        if quote and quote not in seen_quotes:
            r.append({'comedian': comedian, 'sentence': quote})
            seen_quotes.add(quote)

In [68]:
quotes_df = pd.DataFrame(r)

In [69]:
quotes_df.to_csv('/home/ada/humor/data/stand_up_dataset/shortest_quotes.csv', index=False)

#### Absurd Sentences

In [70]:
instruction = f"Extract the sentences that you find are the most absurd in this stand-up comedy transcript. {jeselnik_transcript} List of quotes:"
input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = generated_text.replace(instruction, "").strip()

In [71]:
results = []
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    instruction = f"Extract the sentences that you find are the most absurd in this stand-up comedy transcript. {transcript} List of quotes:"
    input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(instruction, "").strip()
    
    results.append({'comedian': comedian, 'sentence': response})

In [72]:
r = []
seen_quotes = set()
for entry in results:
    comedian = entry['comedian']
    quotes = entry['sentence'].split('\n')  
    
    for quote in quotes:
        if quote and quote not in seen_quotes:
            r.append({'comedian': comedian, 'sentence': quote})
            seen_quotes.add(quote)

In [73]:
quotes_df = pd.DataFrame(r)
quotes_df.to_csv('/home/ada/humor/data/stand_up_dataset/absurd_quotes.csv', index=False)

#### Boring Sentences

In [74]:
instruction = f"What are the sentences that you find are the most boring in this stand-up comedy transcript. {jeselnik_transcript} List of quotes:"
input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = generated_text.replace(instruction, "").strip()

In [75]:
results = []
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    instruction = f"Extract the sentences that are the least funny in this stand-up comedy transcript? {transcript} List of quotes:"
    input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(instruction, "").strip()
    
    results.append({'comedian': comedian, 'sentence': response})

In [76]:
r = []
seen_quotes = set()
for entry in results:
    comedian = entry['comedian']
    quotes = entry['sentence'].split('\n')  
    
    for quote in quotes:
        if quote and quote not in seen_quotes:
            r.append({'comedian': comedian, 'sentence': quote})
            seen_quotes.add(quote)

In [77]:
quotes_df = pd.DataFrame(r)
quotes_df.to_csv('/home/ada/humor/data/stand_up_dataset/boring_quotes.csv', index=False)

#### Gemma funny other prompts

In [78]:
jeselnik_transcript = transcripts.loc[transcripts['comedian'] == 'Tom_Segura', 'transcript'].values[0]

In [79]:
instruction = f"What are the funniest moments in this stand up comedy transcript? {jeselnik_transcript} List of quotes:"
input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = generated_text.replace(instruction, "").strip()

In [80]:
results = []
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    instruction = f"What are the funniest moments in this stand up comedy transcript?  {transcript} List of quotes:"
    input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(instruction, "").strip()
    
    results.append({'comedian': comedian, 'sentence': response})

In [81]:
r = []
seen_quotes = set()
for entry in results:
    comedian = entry['comedian']
    quotes = entry['sentence'].split('\n')  
    
    for quote in quotes:
        if quote and quote not in seen_quotes:
            r.append({'comedian': comedian, 'sentence': quote})
            seen_quotes.add(quote)

In [82]:
quotes_df = pd.DataFrame(r)
quotes_df.to_csv('/home/ada/humor/data/stand_up_dataset/gemma_funny2_quotes.csv', index=False)

In [83]:
results = []
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    instruction = f"What are the most humorous sentences in this stand-up comedy transcript?  {transcript} List of quotes:"
    input_ids = tokenizer(instruction, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=100)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(instruction, "").strip()
    
    results.append({'comedian': comedian, 'sentence': response})

### Different LLMs

PHI3 Mini

In [84]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

In [85]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct", device_map="auto")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 2/2 [00:11<00:00,  5.79s/it]
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [87]:
results = []
instruction = "Extract the key humorous lines and punchlines for this stand-up comedy transcript. Focus on the quotes highlighting the main comedic moments. List of quotes:"

for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    prompt = f"'''{transcript}'''\n\n{instruction}"
    
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=120)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(prompt, "").strip()
    results.append({'comedian': comedian, 'sentence': response})

In [88]:
r = []
seen_quotes = set()
for entry in results:
    comedian = entry['comedian']
    quotes = entry['sentence'].split('\n')  
    
    for quote in quotes:
        if quote and quote not in seen_quotes:
            r.append({'comedian': comedian, 'sentence': quote.strip()})
            seen_quotes.add(quote)

In [101]:
r = [
 {'comedian': 'Anthony_Jeselnik',
  'sentence': "When I was a kid, I used to fantasize about getting older, growing up and having money, and buying my mom nice things for her birthday."},
 {'comedian': 'Anthony_Jeselnik',
  'sentence': "So poor I remember, just so I could go to my senior prom, just so I could go to my senior prom, I had to sell my U.S. passport on the street. Sold my passport on the street for 300 bucks to get to go to my prom."},
 {'comedian': 'Anthony_Jeselnik',
 'sentence': "Of course this was before 9/11 so… my bad, everybody. Weird"},
 {'comedian': 'Anthony_Jeselnik_2',
  'sentence': "I’ve never talked to a group of people without getting paid a lot of money."},
 {'comedian': 'Anthony_Jeselnik_2',
  'sentence': "And I know my grandma loved it too, because it combined her two favorite things: spending time with her grandchildren, and using the ‘N’ word."},
 {'comedian': 'Anthony_Jeselnik_2', 'sentence': '### Solution:'},
 {'comedian': 'Ali_Wong',
  'sentence':  "It was like the worst experience of my life."},
 {'comedian': 'Ali_Wong',
  'sentence': "We were screaming and fighting and yelling and it all came to a climax when she refused to let go of a Texas Instruments TI-82… manual."},
 {'comedian': 'Ali_Wong',
  'sentence': "She don’t even know… where the calculator is."},
 {'comedian': 'Ali_Wong',
  'sentence': "It was like this Judy Jetson’s laptop from the future."},
 {'comedian': 'Ali_Wong',
  'sentence': "I’m not trying to be a procrastinator anymore."},
 {'comedian': 'Ali_Wong_2',
  'sentence': "I need children to be there for me when I’m older, when I get as old as her."}, 
 {'comedian': 'Ali_Wong_2',
  'sentence': 'And when I say be there for me, I mean pay for me when my husband isn’t around to support me anymore.'},
 {'comedian': 'Ali_Wong_2',
  'sentence': 'Old Chinese ladies, they don’t give a fuck. They got no shame.'}, 
 {'comedian': 'Ali_Wong_2',
  'sentence': 'They do it in their– their big-ass V. Stiviano visor, their Darth Vader-Tomb Raider- Boba Fett helmet.'}, 
 {'comedian': 'Chelsea_Peretti',
  'sentence': "It’s like that wedding portraiture, where it’s just like, two assholes back to back in the woods, just like… [laughing shrilly] “To the future!”"}, 
 {'comedian': 'Chelsea_Peretti',
  'sentence': 'This same girl on Facebook, all her posts and all her updates are about her husband. That’s all she seems to write or think about, but she doesn’t call him her husband, which would be awesome, and she doesn’t call him by his name, which would also be great.'},
 {'comedian': 'Chelsea_Peretti_2',
  'sentence': "People say I look just like you."}, 
 {'comedian': 'Chelsea_Peretti_2',
  'sentence': 'It’s actually to stop them from sweating, because why would you, you know.'}, 
 {'comedian': 'Chelsea_Peretti_2',
  'sentence': 'Is that a good trade? like, kind of a deal with the devil.'},
 {'comedian': 'Chelsea_Peretti_2',
  'sentence':  'Whoa, whoa, whoa!'}, 
 {'comedian': 'Chelsea_Peretti_2',
  'sentence': "I’m so drawn to you, but I can’t get near you! This is a whole new kind of juicy booty."},
 {'comedian': 'Donald_Glover',
  'sentence': "I was like, what the fuck is wrong with this kid?"},
 {'comedian': 'Donald_Glover',
  'sentence': "He was just a mean-spirited kid, like, he watched HBO just a little too early and was just kind of a mean kid in general."},
 {'comedian': 'Donald_Glover', 'sentence': "He goes, \'Suck my d!ck!\'"},
 {'comedian': 'Donald_Glover',
  'sentence': "The lady goes, \'Someone betta get this little niglet away from me.\'"},
 {'comedian': 'Donald_Glover_2',
  'sentence': "I can’t do it, I can’t do it, I’m not… in love yet. That was me in a domestic dispute with my… with my butt."},
 {'comedian': 'Hasan_Minhaj',
  'sentence': "Have you seen the show called The Slap? This is a real show on NBC. This is a real show about a white kid that gets slapped at a birthday party."},
 {'comedian': 'Hasan_Minhaj',
  'sentence': "Are you fucking kidding? Thirteen episodes for this kid? Are you kidding me? Do you know when br0wn kids get slapped? Every br0wn birthday party."},
 {'comedian': 'Hasan_Minhaj',
  'sentence': "And usually it’s the kid whose birthday it is, and we stand there and point and laugh."},
 {'comedian': 'Hasan_Minhaj_2',
  'sentence': "I’m like, \'All 30 of us? We’re all going to prom? AP Calc? Me, Jehovah’s Witness girl, Korean exchange students, going to the prom? Thirty for 30? All of us?\'"},
 {'comedian': 'Hasan_Minhaj_2',
  'sentence': "He’s like, \'Hanson, this is not funny.\' He walks over to the board, he pulls it down. It’s March Madness for nerds."},
 {'comedian': 'Iliza_Shlesinger',
  'sentence': 'And you start talking, maybe you have some drinks, maybe you dance, maybe you exchange numbers, maybe you start to date. Now, I happen to think the very beginning of a new relationship is the most exciting part of the relationship.'},
 {'comedian': 'Iliza_Shlesinger',
  'sentence': 'Granted, I’m not married yet, so, technically, I’ve only had beginnings of relationships.'},
 {'comedian': 'Iliza_Shlesinger',
  'sentence': 'I don’t know. I can’t say empirically how fun being married is. I’ll tell you on the next comedy special what that’s all about.'},
 {'comedian': 'Iliza_Shlesinger_2',
  'sentence': 'It’s like, “Maybe she wants me. I don’t know. Oh, my god.”'},
 {'comedian': 'Iliza_Shlesinger_2',
  'sentence': "You don’t want to wear your civilian clothes, okay."},
 {'comedian': 'Iliza_Shlesinger_2', 'sentence': 'No means no.'},
 {'comedian': 'Iliza_Shlesinger_2',
  'sentence': 'You show up in that, he’ll be, like, “Oh, my god!”'},
 {'comedian': 'Jim_Gaffigan',
  'sentence': 'I am now at the size, when I go in clothing stores sales people look at me like, “We got nothing for you. And you can’t use our bathroom.”'},
 {'comedian': 'Jim_Gaffigan',
  'sentence': "When I go out to eat, if I order a salad, the waiter’s always like, “Aw. Look at you try.”"},
 {'comedian': 'Jim_Gaffigan',
  'sentence': "I don’t care. I like to eat. You know, when you like to eat, what’s weird is people assume you enjoy cooking."},
 {'comedian': 'Jim_Gaffigan_2',
  'sentence': "Recently, I was invited to a surprise birthday party. It was a surprise birthday party for a dog."}, 
 {'comedian': 'Jim_Gaffigan_2',
  'sentence': 'And to be fair, the dog was surprised. Didn’t suspect a thing.'},
 {'comedian': 'Jim_Gaffigan_2',
  'sentence': 'The dog didn’t know it was his birthday. The dog didn’t know it had a birthday.'},
 {'comedian': 'Jim_Gaffigan_2',
  'sentence': 'It was the dog’s third birthday, which in dog years doesn’t matter.'}, 
 {'comedian': 'Jim_Gaffigan_2',
  'sentence': 'Someone made that up and we just went along with it. Oh, one year equals seven for doggies?'},
 {'comedian': 'Joe_List',
  'sentence': 'So I went to the ear, nose, throat doctor, which is a cool doctor. It’s one doctor. He knows all three body parts.'},
 {'comedian': 'Joe_List',
  'sentence': "I was hoping it was three doctors saving money, by sharing a small office."},
 {'comedian': 'Joe_List', 'sentence': "Who do ya need?"},
 {'comedian': 'Joe_List',
  'sentence': "I went for my ear, I have an ear issue called, tinnitus, or tin-uh-dus."},
 {'comedian': 'Joe_List',
  'sentence': "It’s when you’re ears ring, or buzz all the time."},
 {'comedian': 'Joe_List_2',
  'sentence': "I’m a rock and roll guy. I love Guns N’ Roses, I’m cool."},
 {'comedian': 'Joe_List_2',
  'sentence': 'I was on a plane the other day. I hated the guy behind me.'},
 {'comedian': 'Joe_List_2',
  'sentence': 'I was like, hope this guy passes away on the flight.'},
 {'comedian': 'Joe_List_2',
  'sentence': 'I’m a rock and roll guy. I love Guns N’ Roses, I’m cool.'},
 {'comedian': 'John_Mulaney',
  'sentence': "I don’t mean to sound down on donating. [chuckles] It’s good to give to charities, you know."},
 {'comedian': 'John_Mulaney',
  'sentence': 'We made these big piles of clothes, we put the piles into these big boxes, then we put the boxes into the back of my car, and then they stayed there for four months.'},
 {'comedian': 'John_Mulaney',
  'sentence': 'And then one day my wife said, “Hey, you took that stuff to Goodwill, right?” And I said, “Of course I did! On an unrelated note, I’m going to walk out"'},
 {'comedian': 'John_Mulaney_2',
  'sentence': "The greatest assembly of them all, once a year, Stranger Danger. Yeah, the hottest ticket in town. The Bruno Mars of assemblies."},
 {'comedian': 'John_Mulaney_2',
  'sentence': 'Go ahead and laugh. His name is ridiculous. That was his name. It was JJ Bittenbinder.'},
 {'comedian': 'John_Mulaney_2',
  'sentence': 'Very sorry, Radio City, did that make you uncomfortable?'},
 {'comedian': 'John_Mulaney_2',
  'sentence': 'And he’s looking at you in the eye to tell you for the first time in your very young life that some adults find you incredibly attractive.'},
 {'comedian': 'Jimmy_Yang',
  'sentence': "Asian people, we don’t need Tinder anymore. We just go to BTS concerts. That’s how we do that parking lot pimping."},
 {'comedian': 'Jimmy_Yang',
  'sentence': 'I’ve been dating a lot of tall girls lately, because it makes me look successful. No, no. I think tall women are beautiful. But some of them like to wear heels. That’s just disrespect. Like, you’re already five inches taller than me. Why the fuck are you wearing heels?'},
 {'comedian': 'Jimmy_Yang_2',
  'sentence': "I’m like first generation."},
 {'comedian': 'Jimmy_Yang_2',
  'sentence': 'But my parents, they’re like negative 9 generation, because they’re so frickin’ Chinese.'},
 {'comedian': 'Jimmy_Yang_2',
  'sentence': 'First of all, old Asian people, they don’t watch TV.'},
 {'comedian': 'Jimmy_Yang_2',
  'sentence': 'He’s just sitting there, arms folded, judging the TV like–'},
 {'comedian': 'Jimmy_Yang_2',
  'sentence': 'He’s made some random noises around the house.'},
 {'comedian': 'Jimmy_Yang_2',
  'sentence': 'Now whenever he sneezes, it’s never just a sneeze. It’s like a whole tsunami of sound waves that comes after'},
 {'comedian': 'Louis_CK',
  'sentence': "I was talking to my friend the other day about Jesus… uh, Christ, and, um… I don’t remember why, but I happened to mention that Jesus was Jewish and my friend said, “He was?” And I said, Yeah. Jesus was Jewish. And he said, “I don’t think so.” And I said, that’s okay, it already all happened. Doesn’t matter where you think. But he’d argued with me. He was like, “Dude, Jesus couldn’t be Jewish. Think"},
 {'comedian': 'Louis_CK_2',
  'sentence': "Dogs are so dumb, it’s fucking tragic."},
 {'comedian': 'Louis_CK_2',
  'sentence': "They’re in our lives and they know nothing about what’s happening."},
 {'comedian': 'Louis_CK_2',
  'sentence': "You ever been having, like, a dramatic moment in your family, like, you’re in the living room telling the kids that grandma died, and everybody’s crying, and the dog’s sitting there like…“I know you! Ha!”"},
 {'comedian': 'Louis_CK_2',
  'sentence': "They don’t even know their own lives, they don’t"},
 {'comedian': 'Nate_Bargatze',
  'sentence': "I love having a kid. We… I love when kids cry, it’s just innocent. They cry over a tag in their shirt. I mean, they bawl. They don’t like… It feels weird."},
 {'comedian': 'Nate_Bargatze',
  'sentence': "She’s on her iPad a lot. You know, that’s the hard part. You got to get these kids off… You don’t want technology all the time."},
 {'comedian': 'Nate_Bargatze',
  'sentence': "She watches these kid YouTube videos, and now she makes her own videos."},
 {'comedian': 'Nate_Bargatze_2',
  'sentence': "We’re doing homework too. Our daughter started bringing it home. Homework’s fun."},
 {'comedian': 'Nate_Bargatze_2',
  'sentence': "First and second grade was awesome. Third grade, you’re like, “Okay.” They throw some stuff in, you’re like, “Oh, all right. All right.”"},
 {'comedian': 'Nate_Bargatze_2',
  'sentence': "It’s, uh… “Okay, learning it earlier than we used to, huh?” I don’t even know if that’s true, but… She brought home Common Core math. That’s fun. It’s a new math they invented, no"},
 {'comedian': 'Nate_Bargatze_TK',
  'sentence': "My dad is a magician. He’s done that my entire life."},
 {'comedian': 'Nate_Bargatze_TK',
  'sentence':'He was a clown at the very beginning, just in case you’re like, “How do you get into something like that?”'},
 {'comedian': 'Nate_Bargatze_TK',
  'sentence':'It goes clown, then magic. There’s two steps. You can take them in either order.'},
 {'comedian': 'Nate_Bargatze_TK',
  'sentence':'I was born, he was a clown. It was never weird to me. I thought everybody’s dad was a clown.'},
 {'comedian': 'Nate_Bargatze_TK_2',
  'sentence': "It was right outside Louisville, Kentucky, where my parents are from."},
 {'comedian': 'Nate_Bargatze_TK_2',
  'sentence': 'My uncle, his daughter’s getting married. He has his tuxedo jacket, pants, cummerbund, bowtie. No shirt.'},
 {'comedian': 'Nate_Bargatze_TK_2',
  'sentence': 'He has a body that you’re like, “Put your jacket on backwards. Flip it around.”'},
 {'comedian': 'Nate_Bargatze_TK_2',
  'sentence': 'Yeah. Then you see his back, and you’re like, “Oof. All right, go back to regular way. Yeah, that’s my fault. I didn’t'},
 {'comedian': 'Russell_Peters',
  'sentence': "So my doctor says, hey, what else is wrong with you? I go, what do you mean what else is wrong with me? He goes, look, you’re a 48-year-old Indian man. I’m like, that’s really fucking racist, but since you asked, I have acid reflux."},
 {'comedian': 'Russell_Peters',
  'sentence': "First of all, you’re lying to me right now. Because there is no way you can be Indian and not have fucking acid reflux."},
 {'comedian': 'Russell_Peters_2',
    'sentence': "I started getting scared when I thought my girlfriend was going to have twins."}, 
  {'comedian': 'Russell_Peters_2',
  'sentence': 'If I have twin girls, these are going to be my daughters, it’s going to be Kate and Duplicate.'},
 {'comedian': 'Russell_Peters_2',
  'sentence': 'If I had twins with a black girl, this is Tyrone and Tyclone.'}, 
 {'comedian': 'Russell_Peters_2',
  'sentence': 'Look at how good my balls are. Look, look. Or because you’re Punjabi, look at how good my junk they are. Looks at this. Look at how good my balls are. My balls are so strong,'},
 {'comedian': 'Sam_Morril',
  'sentence': "I just told a room full of people I got roofied. I did not see one concerned face in here."}, 
 {'comedian': 'Sam_Morril',
  'sentence': 'I can’t, you guys. I can’t take all the credit, you know...'},
 {'comedian': 'Sam_Morril',
  'sentence': 'That’s always the crossroads I find myself at at 4:57 a.m.'},
 {'comedian': 'Sam_Morril',
  'sentence': 'Have you tried living? I’m happy and I want to get fucked up.'},
 {'comedian': 'Sam_Morril',
  'sentence': 'It’s always the same thing, too. It’s always some guy who’s like I heard voices in my head'},
 {'comedian': 'Sam_Morril_2',
  'sentence': "I did nothing all day, man. I watched the news. I saw an anti-smoking ad. It’s weird that they can do anti-smoking ads, but you can’t do pro-cigarette commercials."},
 {'comedian': 'Sam_Morril_2',
  'sentence': 'A hot girl walks up to a guy in the bar and asks to bum a cigarette, and he goes, oh, I don’t smoke. So then she goes outside and bums them from another guy, and they go home together and they fuck and that’s the whole'},
 {'comedian': 'Trevor_Noah',
  'sentence': "I’m afraid of the dark."},
 {'comedian': 'Trevor_Noah',
  'sentence': 'I feel safe, like even if there’s a monster under the bed, he’d be like, “Is that a Russian?”'},
  {'comedian': 'Trevor_Noah',
    'sentence': 'I know we can’t solve this tomorrow, but if you learn the Russian accent, half of those problems would disappear.'},
  {'comedian': 'Trevor_Noah',
  'sentence': 'I don’t want dance with you. Please!'},
  {'comedian': 'Trevor_Noah',
  'sentence': 'You want to see bitch?'},
 {'comedian': 'Trevor_Noah_2',
  'sentence': "He could not be a normal man with a voice like that. You are destined for greatness."},
 {'comedian': 'Trevor_Noah_2',
  'sentence': "You can’t be running in the streets: \'And a Kit Kat, please.\' No. No. It’s so unique."},
 {'comedian': 'Trevor_Noah_2',
  'sentence': "You can’t be silly. What are you making, prank phone calls? \'Who the hell is this?!\' \'I’m not telling you.\' [laughs]"},
 {'comedian': 'Tom_Segura',
  'sentence': "I was like, “Oh, shit. Yeah.”"}, 
 {'comedian': 'Tom_Segura', 'sentence': '“That’s what’s up.”'},
 {'comedian': 'Tom_Segura', 
  'sentence': 'Can we get a description before we agree to terms? How about a height and weight on old mom?'},
 {'comedian': 'Tom_Segura_2',
  'sentence': "By the way, is there any more satisfying feeling than letting an elevator door close on somebody?"},
 {'comedian': 'Tom_Segura_2',
  'sentence': 'I got such a warm rush through my body. It felt like the inside of my body hugged the outside of my body, you know?'},
 {'comedian': 'Tom_Segura_2',
  'sentence': 'I think it’s a taste of power. Like most of us, we have no power in our everyday lives. But if you’re alone in an elevator, -you are lord of the elevator shaft. You get to decide, like a king with his drawbridge.'}]

In [102]:
quotes_df = pd.DataFrame(r)
quotes_df.to_csv('/home/ada/humor/data/stand_up_dataset/phi3_mini_quotes.csv', index=False)

#### LLAMA

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B", device_map="auto")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:23<00:00,  5.76s/it]


In [5]:
transcripts

Unnamed: 0,comedian,transcript
0,Anthony_Jeselnik,"When I was a kid, I used to fantasize about ge..."
1,Anthony_Jeselnik_2,No one should ever ask me to speak at anyone’...
2,Ali_Wong,"The last time I was at home in San Francisco, ..."
3,Ali_Wong_2,I need to have children to keep me company whe...
4,Chelsea_Peretti,A friend of a friend just posted like 500 enga...
5,Chelsea_Peretti_2,If you really want to piss off a really hot gi...
6,Donald_Glover,"I was babysitting this kid once, this mean kid..."
7,Donald_Glover_2,I was hanging out in my neighborhood and this ...
8,Hasan_Minhaj,Have you seen the show called The Slap? This i...
9,Hasan_Minhaj_2,"Now, my AP Calc class was a group of overachie..."


In [31]:
transcript = transcripts.loc[transcripts['comedian'] == 'Donald_Glover', 'transcript'].values[0]
instruction = "Extract the key humorous lines and punchlines for this stand-up comedy transcript. Focus on the quotes highlighting the main comedic moments. List of the funny sentences:"

prompt = f"'''{transcript}'''\n\n{instruction}"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(input_ids=input_ids, pad_token_id=tokenizer.eos_token_id, max_new_tokens=120)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = generated_text.replace(prompt, "").strip()
print(response)




In [28]:
transcript = transcripts.loc[transcripts['comedian'] == 'Donald_Glover_2', 'transcript'].values[0]
instruction = "Extract the key humorous lines and punchlines for this stand-up comedy transcript. Focus on the quotes highlighting the main comedic moments. List of sentences:"

prompt = f"'''{transcript}'''\n\n{instruction}"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(input_ids=input_ids, pad_token_id=tokenizer.eos_token_id, max_new_tokens=120)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = generated_text.replace(prompt, "").strip()
print(response)

```
1. I was hanging out in my neighborhood and this girl was jogging and we started talking about a rape that had happened in the neighborhood, and we were talking about… “Oh, this is awful, this is so messed up. I can’t believe this happened so close to us.” And she goes, “Yeah, well, “if anybody ever tries to rape me, I’m just gonna shit on ’em.” “Yes, I will. I have no problem.” Uh… Two things, lady. Number one, he’s a rapist. So his tolerance for gross


In [None]:
r = [ 
 {'comedian': 'Anthony_Jeselnik',
  'sentence': "When I was a kid, I used to fantasize about getting older, growing up and having money, and buying my mom nice things for her birthday."},
 {'comedian': 'Anthony_Jeselnik',
  'sentence': "I mean, don’t get me wrong, I loved my mother. She was my mom, of course I loved her. We fought a lot. My mom could be very racist. Very racist. And I do not tolerate racism. That’s ignorance and I hate that."},
 {'comedian': 'Anthony_Jeselnik_2',
  'sentence': "No one should ever ask me to speak at anyone’s funeral."},
 {'comedian': 'Anthony_Jeselnik_2',
  'sentence': "Was like, 'I’ve never talked to a group of people without getting paid a lot of money. How should I handle this?” “Anthony, just go up there and tell a story. Find one moment about you and your grandma you can share with everybody. And don’t tell a joke. Try not to.'"},
 {'comedian': 'Ali_Wong',
  'sentence':  "The last time I was at home in San Francisco, I was trying to help her get rid of shit. Don’t ever do that with your mom. It was like the worst experience of my life."},
 {'comedian': 'Ali_Wong',
  'sentence': "We were screaming and fighting and yelling and it all came to a climax when she refused to let go of a Texas Instruments TI-82… manual."},
 {'comedian': 'Ali_Wong',
  'sentence': "She don’t even know… where the calculator is."},
 {'comedian': 'Ali_Wong',
  'sentence': "Those of you under 25 probably don’t know what that calculator is. It was this calculator that bamboozled my generation"},
 {'comedian': 'Ali_Wong_2',
  'sentence': "I need children to be there for me when I’m older."}, 
{'comedian': 'Ali_Wong_2',
  'sentence': "It’s lonely. My mom is 80, going through a full blown mid-life crisis. ‘Cause she knows that she’s got a century more to go."}, 
 {'comedian': 'Ali_Wong_2',
  'sentence': 'All of her white friends, dead. Her Mexican friends, dead. Black friends, dead. I’m just kidding. She doesn’t have any black friends.'},
 {'comedian': 'Ali_Wong_2',
  'sentence': 'Life is not Rush Hour, the movie, OK?'}, 
 {'comedian': 'Chelsea_Peretti',
  'sentence': "It was a photoset of 500 photos. "}, 
 {'comedian': 'Chelsea_Peretti',
  'sentence': 'I’m just obsessed with the entire photoset.'},
 {'comedian': 'Chelsea_Peretti',
  'sentence': 'I’m just like, “Next, next, next, next, more, more, more, more.” '},
 {'comedian': 'Chelsea_Peretti',
  'sentence': 'This same girl on Facebook, all her posts and all her updates are about her husband.'},
 {'comedian': 'Chelsea_Peretti',
  'sentence': ' She calls him her “hubby,” which, to me is on par with the “N” word.'},
 {'comedian': 'Chelsea_Peretti_2',
  'sentence': 'I met a girl who did it, and she was like, “It worked. I did stop sweating out of my armpits, but I started sweating out of my butthole.”'}, 
 {'comedian': 'Chelsea_Peretti_2',
  'sentence': '“She is so fucking hot. Her armpits are bone-fucking-dry… just like I always dreamed of.” Then he’s like, “Whoa, whoa, whoa!” He’s like, “I’m so drawn to you, but I can’t get near you! This is a whole new kind of juicy booty.”'}, 
 
 {'comedian': 'Donald_Glover',
  'sentence': "I was like, what the fuck is wrong with this kid?"},
 {'comedian': 'Donald_Glover',
  'sentence': "He was just a mean-spirited kid, like, he watched HBO just a little too early and was just kind of a mean kid in general."},
 {'comedian': 'Donald_Glover', 'sentence': "He goes, \'Suck my d!ck!\'"},
 {'comedian': 'Donald_Glover',
  'sentence': "The lady goes, \'Someone betta get this little niglet away from me.\'"},
 {'comedian': 'Donald_Glover_2',
  'sentence': "I can’t do it, I can’t do it, I’m not… in love yet. That was me in a domestic dispute with my… with my butt."},
 {'comedian': 'Hasan_Minhaj',
  'sentence': "Have you seen the show called The Slap? This is a real show on NBC. This is a real show about a white kid that gets slapped at a birthday party."},
 {'comedian': 'Hasan_Minhaj',
  'sentence': "Are you fucking kidding? Thirteen episodes for this kid? Are you kidding me? Do you know when br0wn kids get slapped? Every br0wn birthday party."},
 {'comedian': 'Hasan_Minhaj',
  'sentence': "And usually it’s the kid whose birthday it is, and we stand there and point and laugh."},
 {'comedian': 'Hasan_Minhaj_2',
  'sentence': "I’m like, \'All 30 of us? We’re all going to prom? AP Calc? Me, Jehovah’s Witness girl, Korean exchange students, going to the prom? Thirty for 30? All of us?\'"},
 {'comedian': 'Hasan_Minhaj_2',
  'sentence': "He’s like, \'Hanson, this is not funny.\' He walks over to the board, he pulls it down. It’s March Madness for nerds."},
 {'comedian': 'Iliza_Shlesinger',
  'sentence': 'And you start talking, maybe you have some drinks, maybe you dance, maybe you exchange numbers, maybe you start to date. Now, I happen to think the very beginning of a new relationship is the most exciting part of the relationship.'},
 {'comedian': 'Iliza_Shlesinger',
  'sentence': 'Granted, I’m not married yet, so, technically, I’ve only had beginnings of relationships.'},
 {'comedian': 'Iliza_Shlesinger',
  'sentence': 'I don’t know. I can’t say empirically how fun being married is. I’ll tell you on the next comedy special what that’s all about.'},
 {'comedian': 'Iliza_Shlesinger_2',
  'sentence': 'It’s like, “Maybe she wants me. I don’t know. Oh, my god.”'},
 {'comedian': 'Iliza_Shlesinger_2',
  'sentence': "You don’t want to wear your civilian clothes, okay."},
 {'comedian': 'Iliza_Shlesinger_2', 'sentence': 'No means no.'},
 {'comedian': 'Iliza_Shlesinger_2',
  'sentence': 'You show up in that, he’ll be, like, “Oh, my god!”'},
 {'comedian': 'Jim_Gaffigan',
  'sentence': 'I am now at the size, when I go in clothing stores sales people look at me like, “We got nothing for you. And you can’t use our bathroom.”'},
 {'comedian': 'Jim_Gaffigan',
  'sentence': "When I go out to eat, if I order a salad, the waiter’s always like, “Aw. Look at you try.”"},
 {'comedian': 'Jim_Gaffigan',
  'sentence': "I don’t care. I like to eat. You know, when you like to eat, what’s weird is people assume you enjoy cooking."},
 {'comedian': 'Jim_Gaffigan_2',
  'sentence': "Recently, I was invited to a surprise birthday party. It was a surprise birthday party for a dog."}, 
 {'comedian': 'Jim_Gaffigan_2',
  'sentence': 'And to be fair, the dog was surprised. Didn’t suspect a thing.'},
 {'comedian': 'Jim_Gaffigan_2',
  'sentence': 'The dog didn’t know it was his birthday. The dog didn’t know it had a birthday.'},
 {'comedian': 'Jim_Gaffigan_2',
  'sentence': 'It was the dog’s third birthday, which in dog years doesn’t matter.'}, 
 {'comedian': 'Jim_Gaffigan_2',
  'sentence': 'Someone made that up and we just went along with it. Oh, one year equals seven for doggies?'},
 {'comedian': 'Joe_List',
  'sentence': 'So I went to the ear, nose, throat doctor, which is a cool doctor. It’s one doctor. He knows all three body parts.'},
 {'comedian': 'Joe_List',
  'sentence': "I was hoping it was three doctors saving money, by sharing a small office."},
 {'comedian': 'Joe_List', 'sentence': "Who do ya need?"},
 {'comedian': 'Joe_List',
  'sentence': "I went for my ear, I have an ear issue called, tinnitus, or tin-uh-dus."},
 {'comedian': 'Joe_List',
  'sentence': "It’s when you’re ears ring, or buzz all the time."},
 {'comedian': 'Joe_List_2',
  'sentence': "I’m a rock and roll guy. I love Guns N’ Roses, I’m cool."},
 {'comedian': 'Joe_List_2',
  'sentence': 'I was on a plane the other day. I hated the guy behind me.'},
 {'comedian': 'Joe_List_2',
  'sentence': 'I was like, hope this guy passes away on the flight.'},
 {'comedian': 'Joe_List_2',
  'sentence': 'I’m a rock and roll guy. I love Guns N’ Roses, I’m cool.'},
 {'comedian': 'John_Mulaney',
  'sentence': "I don’t mean to sound down on donating. [chuckles] It’s good to give to charities, you know."},
 {'comedian': 'John_Mulaney',
  'sentence': 'We made these big piles of clothes, we put the piles into these big boxes, then we put the boxes into the back of my car, and then they stayed there for four months.'},
 {'comedian': 'John_Mulaney',
  'sentence': 'And then one day my wife said, “Hey, you took that stuff to Goodwill, right?” And I said, “Of course I did! On an unrelated note, I’m going to walk out"'},
 {'comedian': 'John_Mulaney_2',
  'sentence': "The greatest assembly of them all, once a year, Stranger Danger. Yeah, the hottest ticket in town. The Bruno Mars of assemblies."},
 {'comedian': 'John_Mulaney_2',
  'sentence': 'Go ahead and laugh. His name is ridiculous. That was his name. It was JJ Bittenbinder.'},
 {'comedian': 'John_Mulaney_2',
  'sentence': 'Very sorry, Radio City, did that make you uncomfortable?'},
 {'comedian': 'John_Mulaney_2',
  'sentence': 'And he’s looking at you in the eye to tell you for the first time in your very young life that some adults find you incredibly attractive.'},
 {'comedian': 'Jimmy_Yang',
  'sentence': "Asian people, we don’t need Tinder anymore. We just go to BTS concerts. That’s how we do that parking lot pimping."},
 {'comedian': 'Jimmy_Yang',
  'sentence': 'I’ve been dating a lot of tall girls lately, because it makes me look successful. No, no. I think tall women are beautiful. But some of them like to wear heels. That’s just disrespect. Like, you’re already five inches taller than me. Why the fuck are you wearing heels?'},
 {'comedian': 'Jimmy_Yang_2',
  'sentence': "I’m like first generation."},
 {'comedian': 'Jimmy_Yang_2',
  'sentence': 'But my parents, they’re like negative 9 generation, because they’re so frickin’ Chinese.'},
 {'comedian': 'Jimmy_Yang_2',
  'sentence': 'First of all, old Asian people, they don’t watch TV.'},
 {'comedian': 'Jimmy_Yang_2',
  'sentence': 'He’s just sitting there, arms folded, judging the TV like–'},
 {'comedian': 'Jimmy_Yang_2',
  'sentence': 'He’s made some random noises around the house.'},
 {'comedian': 'Jimmy_Yang_2',
  'sentence': 'Now whenever he sneezes, it’s never just a sneeze. It’s like a whole tsunami of sound waves that comes after'},
 {'comedian': 'Louis_CK',
  'sentence': "I was talking to my friend the other day about Jesus… uh, Christ, and, um… I don’t remember why, but I happened to mention that Jesus was Jewish and my friend said, “He was?” And I said, Yeah. Jesus was Jewish. And he said, “I don’t think so.” And I said, that’s okay, it already all happened. Doesn’t matter where you think. But he’d argued with me. He was like, “Dude, Jesus couldn’t be Jewish. Think"},
 {'comedian': 'Louis_CK_2',
  'sentence': "Dogs are so dumb, it’s fucking tragic."},
 {'comedian': 'Louis_CK_2',
  'sentence': "They’re in our lives and they know nothing about what’s happening."},
 {'comedian': 'Louis_CK_2',
  'sentence': "You ever been having, like, a dramatic moment in your family, like, you’re in the living room telling the kids that grandma died, and everybody’s crying, and the dog’s sitting there like…“I know you! Ha!”"},
 {'comedian': 'Louis_CK_2',
  'sentence': "They don’t even know their own lives, they don’t"},
 {'comedian': 'Nate_Bargatze',
  'sentence': "I love having a kid. We… I love when kids cry, it’s just innocent. They cry over a tag in their shirt. I mean, they bawl. They don’t like… It feels weird."},
 {'comedian': 'Nate_Bargatze',
  'sentence': "She’s on her iPad a lot. You know, that’s the hard part. You got to get these kids off… You don’t want technology all the time."},
 {'comedian': 'Nate_Bargatze',
  'sentence': "She watches these kid YouTube videos, and now she makes her own videos."},
 {'comedian': 'Nate_Bargatze_2',
  'sentence': "We’re doing homework too. Our daughter started bringing it home. Homework’s fun."},
 {'comedian': 'Nate_Bargatze_2',
  'sentence': "First and second grade was awesome. Third grade, you’re like, “Okay.” They throw some stuff in, you’re like, “Oh, all right. All right.”"},
 {'comedian': 'Nate_Bargatze_2',
  'sentence': "It’s, uh… “Okay, learning it earlier than we used to, huh?” I don’t even know if that’s true, but… She brought home Common Core math. That’s fun. It’s a new math they invented, no"},
 {'comedian': 'Nate_Bargatze_TK',
  'sentence': "My dad is a magician. He’s done that my entire life."},
 {'comedian': 'Nate_Bargatze_TK',
  'sentence':'He was a clown at the very beginning, just in case you’re like, “How do you get into something like that?”'},
 {'comedian': 'Nate_Bargatze_TK',
  'sentence':'It goes clown, then magic. There’s two steps. You can take them in either order.'},
 {'comedian': 'Nate_Bargatze_TK',
  'sentence':'I was born, he was a clown. It was never weird to me. I thought everybody’s dad was a clown.'},
 {'comedian': 'Nate_Bargatze_TK_2',
  'sentence': "It was right outside Louisville, Kentucky, where my parents are from."},
 {'comedian': 'Nate_Bargatze_TK_2',
  'sentence': 'My uncle, his daughter’s getting married. He has his tuxedo jacket, pants, cummerbund, bowtie. No shirt.'},
 {'comedian': 'Nate_Bargatze_TK_2',
  'sentence': 'He has a body that you’re like, “Put your jacket on backwards. Flip it around.”'},
 {'comedian': 'Nate_Bargatze_TK_2',
  'sentence': 'Yeah. Then you see his back, and you’re like, “Oof. All right, go back to regular way. Yeah, that’s my fault. I didn’t'},
 {'comedian': 'Russell_Peters',
  'sentence': "So my doctor says, hey, what else is wrong with you? I go, what do you mean what else is wrong with me? He goes, look, you’re a 48-year-old Indian man. I’m like, that’s really fucking racist, but since you asked, I have acid reflux."},
 {'comedian': 'Russell_Peters',
  'sentence': "First of all, you’re lying to me right now. Because there is no way you can be Indian and not have fucking acid reflux."},
 {'comedian': 'Russell_Peters_2',
    'sentence': "I started getting scared when I thought my girlfriend was going to have twins."}, 
  {'comedian': 'Russell_Peters_2',
  'sentence': 'If I have twin girls, these are going to be my daughters, it’s going to be Kate and Duplicate.'},
 {'comedian': 'Russell_Peters_2',
  'sentence': 'If I had twins with a black girl, this is Tyrone and Tyclone.'}, 
 {'comedian': 'Russell_Peters_2',
  'sentence': 'Look at how good my balls are. Look, look. Or because you’re Punjabi, look at how good my junk they are. Looks at this. Look at how good my balls are. My balls are so strong,'},
 {'comedian': 'Sam_Morril',
  'sentence': "I just told a room full of people I got roofied. I did not see one concerned face in here."}, 
 {'comedian': 'Sam_Morril',
  'sentence': 'I can’t, you guys. I can’t take all the credit, you know...'},
 {'comedian': 'Sam_Morril',
  'sentence': 'That’s always the crossroads I find myself at at 4:57 a.m.'},
 {'comedian': 'Sam_Morril',
  'sentence': 'Have you tried living? I’m happy and I want to get fucked up.'},
 {'comedian': 'Sam_Morril',
  'sentence': 'It’s always the same thing, too. It’s always some guy who’s like I heard voices in my head'},
 {'comedian': 'Sam_Morril_2',
  'sentence': "I did nothing all day, man. I watched the news. I saw an anti-smoking ad. It’s weird that they can do anti-smoking ads, but you can’t do pro-cigarette commercials."},
 {'comedian': 'Sam_Morril_2',
  'sentence': 'A hot girl walks up to a guy in the bar and asks to bum a cigarette, and he goes, oh, I don’t smoke. So then she goes outside and bums them from another guy, and they go home together and they fuck and that’s the whole'},
 {'comedian': 'Trevor_Noah',
  'sentence': "I’m afraid of the dark."},
 {'comedian': 'Trevor_Noah',
  'sentence': 'I feel safe, like even if there’s a monster under the bed, he’d be like, “Is that a Russian?”'},
  {'comedian': 'Trevor_Noah',
    'sentence': 'I know we can’t solve this tomorrow, but if you learn the Russian accent, half of those problems would disappear.'},
  {'comedian': 'Trevor_Noah',
  'sentence': 'I don’t want dance with you. Please!'},
  {'comedian': 'Trevor_Noah',
  'sentence': 'You want to see bitch?'},
 {'comedian': 'Trevor_Noah_2',
  'sentence': "He could not be a normal man with a voice like that. You are destined for greatness."},
 {'comedian': 'Trevor_Noah_2',
  'sentence': "You can’t be running in the streets: \'And a Kit Kat, please.\' No. No. It’s so unique."},
 {'comedian': 'Trevor_Noah_2',
  'sentence': "You can’t be silly. What are you making, prank phone calls? \'Who the hell is this?!\' \'I’m not telling you.\' [laughs]"},
 {'comedian': 'Tom_Segura',
  'sentence': "I was like, “Oh, shit. Yeah.”"}, 
 {'comedian': 'Tom_Segura', 'sentence': '“That’s what’s up.”'},
 {'comedian': 'Tom_Segura', 
  'sentence': 'Can we get a description before we agree to terms? How about a height and weight on old mom?'},
 {'comedian': 'Tom_Segura_2',
  'sentence': "By the way, is there any more satisfying feeling than letting an elevator door close on somebody?"},
 {'comedian': 'Tom_Segura_2',
  'sentence': 'I got such a warm rush through my body. It felt like the inside of my body hugged the outside of my body, you know?'},
 {'comedian': 'Tom_Segura_2',
  'sentence': 'I think it’s a taste of power. Like most of us, we have no power in our everyday lives. But if you’re alone in an elevator, -you are lord of the elevator shaft. You get to decide, like a king with his drawbridge.'}]

[{'comedian': 'Anthony_Jeselnik',
  'sentence': '"When I was a kid, I used to fantasize about getting older, growing up and having money, and buying my mom nice things for her birthday. When I was a kid, we were poor. So poor I remember, just so I could go to my senior prom, just so I could go to my senior prom, I had to sell my U.S. passport on the street. Sold my passport on the street for 300 bucks to get to go to my prom. Of course this was before 9/11 so… my bad, everybody. Weird joke to clap for,'},
 {'comedian': 'Anthony_Jeselnik_2',
  'sentence': '1. "No one should ever ask me to speak at anyone’s funeral." 2. "I asked a friend for advice. Was like, \'I’ve never talked to a group of people without getting paid a lot of money. How should I handle this?\' \'Anthony, just go up there and tell a story. Find one moment about you and your grandma you can share with everybody. And don’t tell a joke. Try not to.\'" 3. "So I walked up and was like, \'You know what my favorite memory was

Play Pretend

In [23]:
results_dict = {}
instruction = "Pretend that you are a stand-up comedian reading the following stand-up comedy transcript. Extract the key humorous lines and punchlines for this stand-up comedy transcript. Focus on the quotes highlighting the main comedic moments. List of quotes:"
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    prompt = f"'''{transcript}'''\n\n{instruction}"
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=120)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(prompt, "").strip()
    results_dict[comedian] = response

In [24]:
results_dict

{'Anthony_Jeselnik': '- "When I was a kid, I used to fantasize about getting older, growing up and having money, and buying my mom nice things for her birthday."\n- "My mom actually should’ve been on one of the planes that crashed on 9/11."\n- "When I was a kid, like nine years old, I’d come home after school. Once in a while, I’d bring a friend over to play with me at my house. Once in a while, I’d bring a black friend over. And when I did that,',
 'Anthony_Jeselnik_2': '- "I\'ve never talked to a group of people without getting paid a lot of money. How should I handle this?"\n- "You know what my favorite memory was? When I was like four years old, before I learned to read, Grandma would curl up with me on the couch, she had this Southern accent, and she would read to me. She would read Mark Twain to me, and I loved it."\n- "And I know my grandma loved it too, because it combined her two favorite things: spending time with her grandchildren, and using the ‘N’',
 'Ali_Wong': '- "It was

In [17]:
results_dict = {}
instruction = "Pretend that you are a stand-up comedy fan reading the following stand-up comedy transcript. Extract the key humorous lines and punchlines for this stand-up comedy transcript. Focus on the quotes highlighting the main comedic moments. List of quotes:"
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    prompt = f"'''{transcript}'''\n\n{instruction}"
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=120)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(prompt, "").strip()
    results_dict[comedian] = response

In [18]:
results_dict

{'Anthony_Jeselnik': '- "When I was a kid, I used to fantasize about getting older, growing up and having money, and buying my mom nice things for her birthday."\n\n\n- "My mom actually should’ve been on one of the planes that crashed on 9/11."\n\n\n- "When I was a kid, like nine years old, I’d come home after school. Once in a while, I’d bring a friend over to play with me at my house. Once in a while, I’d bring a black friend over. And when I did that,',
 'Anthony_Jeselnik_2': '- "I\'ve never talked to a group of people without getting paid a lot of money. How should I handle this?"\n- "And I know my grandma loved it too, because it combined her two favorite things: spending time with her grandchildren, and using the ‘N’ word."\n- "I am a real comedian. I am a pure comedian. I think I\'m one of the best comedians of all time."\n\n**Key Humorous Lines and Punchlines:**\n\n- "When I was four years old, before I learned to read, Grandma would curl up',
 'Ali_Wong': '- "It was like the w

In [19]:
results_dict = {}
instruction = "Pretend that you are a stand-up comedy fan reading the following stand-up comedy transcript. Extract the key humorous lines and punchlines that would make you laugh as an audience member. Focus on the quotes highlighting the main comedic moments. List of quotes:"
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    prompt = f"'''{transcript}'''\n\n{instruction}"
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=120)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(prompt, "").strip()
    results_dict[comedian] = response

In [20]:
results_dict

{'Anthony_Jeselnik': '- "When I was a kid, I used to fantasize about getting older, growing up and having money, and buying my mom nice things for her birthday."\n- "My mom actually should’ve been on one of the planes that crashed on 9/11."\n- "When I was a kid, like nine years old, I’d come home after school. Once in a while, I’d bring a friend over to play with me at my house. Once in a while, I’d bring a black friend over. And when I did that,',
 'Anthony_Jeselnik_2': '- "I\'ve never talked to a group of people without getting paid a lot of money. How should I handle this?"\n- "And I know my grandma loved it too, because it combined her two favorite things: spending time with her grandchildren, and using the ‘N’ word."\n- "I am a real comedian. I am a pure comedian. I think I\'m one of the best comedians of all time."\n\n**Analysis:**\n\nThe speaker\'s comedic style is characterized by his use of personal anecdotes and humor derived from his childhood experiences. He seamlessly blen

In [21]:
results_dict = {}
instruction = "Pretend that you are a stand-up comedy critic reading the following stand-up comedy transcript. Extract the key humorous lines and punchlines for this stand-up comedy transcript. Focus on the quotes highlighting the main comedic moments. List of quotes:"
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    prompt = f"'''{transcript}'''\n\n{instruction}"
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=120)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(prompt, "").strip()
    results_dict[comedian] = response

In [22]:
results_dict

{'Anthony_Jeselnik': '- "When I was a kid, I used to fantasize about getting older, growing up and having money, and buying my mom nice things for her birthday."\n- "My mom actually should’ve been on one of the planes that crashed on 9/11."\n- "When I was a kid, like nine years old, I’d come home after school. Once in a while, I’d bring a friend over to play with me at my house. Once in a while, I’d bring a black friend over. And when I did that,',
 'Anthony_Jeselnik_2': '- "I\'ve never talked to a group of people without getting paid a lot of money. How should I handle this?"\n- "And I know my grandma loved it too, because it combined her two favorite things: spending time with her grandchildren, and using the ‘N’ word."\n- "I am a real comedian. I am a pure comedian. I think I\'m one of the best comedians of all time."\n\n**Key Humorous Lines and Punchlines:**\n\n- "I\'ve never talked to a group of people without getting paid a lot of money',
 'Ali_Wong': '- "It was like the worst ex

In [None]:
results_dict = {}
instruction = "Pretend that you are a stand-up comedy critic reading the following stand-up comedy transcript. Extract the key humorous lines and punchlines for this stand-up comedy transcript. Focus on the quotes highlighting the main comedic moments. List of quotes:"
for index, row in transcripts.iterrows():
    comedian = row['comedian']
    transcript = row['transcript'] 
    
    prompt = f"'''{transcript}'''\n\n{instruction}"
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids=input_ids, max_new_tokens=120)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated_text.replace(prompt, "").strip()
    results_dict[comedian] = response