In [1]:
import os
import openai

import pandas as pd
import numpy as np

from rouge_score import rouge_scorer

openai.api_key = os.environ["OPENAI_API_KEY"]



# Load Data

In [2]:
df = pd.read_csv("../data/processed/processed.csv", nrows=1000) # contain costs with just 1k samples

In [3]:
df

Unnamed: 0,dialogue,ground_truth
0,Amanda: I baked cookies. Do you want some? ...,Amanda baked cookies and will bring Jerry some...
1,Olivia: Who are you voting for in this electio...,Olivia and Olivier are voting for liberals in ...
2,"Tim: Hi, what's up? Kim: Bad mood tbh, I was...",Kim may try the pomodoro technique recommended...
3,"Edward: Rachel, I think I'm in ove with Bella....",Edward thinks he is in love with Bella. Rachel...
4,Sam: hey overheard rick say something Sam: ...,"Sam is confused, because he overheard Rick com..."
...,...,...
995,"Hazel: Hello Brad. That's insane, we havent se...",Brad is visiting his parents today and cannot ...
996,"Kitty: I don’t have your account numbers, coul...",Kitty will convey money to Barb and Ken.
997,Max: I'll be late Sylvia: why: Max: beer w...,Max will be late because he is having a drink ...
998,"Karolina: hi darling, when are you back in Wa...",Karolina will be back in Warsaw this weekend t...


In [31]:
# Samples for one shot and few-shot

# dia: "Helen: What time is the show?   Alain: 6 p.m.   Eddie: Do you have the tickets?   Alain: I bought them last week    Eddie: Cool   Eddie: How much do I owe you?   Alain: 50$    Helen: I an pay you today   Alain: It's ok. Whenever it's convenient for you. "
# sum: 'The show starts at 6 p.m. Helen will pay Alain back 50$ today.'

# dia: "Karim: i'm sorry i'm running late!!!   Karim: please wait for me!!!!!   Karim: i don't want to go to the party by myself   Irma: don't worry we'll wait for you   Karim: THANK YOU!!! :-D"
# sum: "Karim doesn't want to go to the party by himself. Irma will wait for him."

# dia: "Tom: where are you?   Pitt: still at the supermarket, an enormous queue here   Benny: But we've everything!   Tom: very good"
# sum: 'Pitt and Benny are standing in an enormous queue in the supermarket.'

## Zero Shot Learning

In [4]:
zero_shot_summs = []

for i in range(len(df["dialogue"])):
    content = "Summarize this: " + df.iloc[i, 0]
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=content,
        max_tokens=32,
        temperature=0)
    zero_shot_summs.append(response.choices[0].text)

df["zero_shot_summaries"] = zero_shot_summs

In [11]:
df["zero_shot_summaries"] = df["zero_shot_summaries"].str.replace("\n\n", "   ")

In [30]:
df.to_csv("../data/processed/zero_shot.csv", index=False)

## One Shot Learning

In [2]:
df = pd.read_csv("../data/processed/zero_shot.csv")

In [3]:
example = "The following is a conversation: " + "Karim: i'm sorry i'm running late!!!   \
Karim: please wait for me!!!!!   Karim: i don't want to go to the party by myself   \
Irma: don't worry we'll wait for you   Karim: THANK YOU!!! :-D" + "   Here is a \
summary of that conversation: " + "Karim doesn't want to go to the party by himself. Irma will wait for him."

In [4]:
instruction = " Summarize the following conversation: "

In [5]:
one_shot_summs = []

for i in range(len(df["dialogue"])):
    content = example + instruction + df.iloc[i, 0]
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=content,
        max_tokens=32,
        temperature=0)
    one_shot_summs.append(response.choices[0].text)

df["one_shot_summaries"] = one_shot_summs

In [6]:
df

Unnamed: 0,dialogue,ground_truth,zero_shot_summaries,one_shot_summaries
0,Amanda: I baked cookies. Do you want some? ...,Amanda baked cookies and will bring Jerry some...,Amanda offered to bake cookies for Jerry an...,Jerry is accepting Amanda's offer of cookie...
1,Olivia: Who are you voting for in this electio...,Olivia and Olivier are voting for liberals in ...,minds think alike! Olivia and Oliver both p...,!\n\nOlivia and Oliver are both voting for the...
2,"Tim: Hi, what's up? Kim: Bad mood tbh, I was...",Kim may try the pomodoro technique recommended...,Tim and Kim are discussing Kim's bad mood a...,to keep track of tasks Kim: That's a great ...
3,"Edward: Rachel, I think I'm in ove with Bella....",Edward thinks he is in love with Bella. Rachel...,Rachel is outside Edward's door and tells h...,.. \n\nEdward has confessed to Rachel that he...
4,Sam: hey overheard rick say something Sam: ...,"Sam is confused, because he overheard Rick com...",Sam overheard Rick saying he was unhappy li...,"In this conversation, Sam overheard Rick sa..."
...,...,...,...,...
995,"Hazel: Hello Brad. That's insane, we havent se...",Brad is visiting his parents today and cannot ...,Hazel and Brad catch up after not seeing ea...,"Hazel: ok, no worries :D \n\nHazel and Bra..."
996,"Kitty: I don’t have your account numbers, coul...",Kitty will convey money to Barb and Ken.,Kitty asked Barb for her account numbers in...,!\n\nKitty needs Barb and Ken's account number...
997,Max: I'll be late Sylvia: why: Max: beer w...,Max will be late because he is having a drink ...,Max told Sylvia he would be late because he...,", just don't be too late Max: I won't \n\nM..."
998,"Karolina: hi darling, when are you back in Wa...",Karolina will be back in Warsaw this weekend t...,Karolina and Enid are discussing Enid's ret...,\n\nKarolina and Enid are discussing Enid's pl...


In [7]:
df["one_shot_summaries"] = df["one_shot_summaries"].str.replace("\n\n", "   ")

In [8]:
df.to_csv("../data/processed/one_shot.csv", index=False)

## Multi-Shot Learning

In [9]:
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL', 'rouge2'], use_stemmer=True)

In [10]:
scores = scorer.score(cand, ref)

In [11]:
scores

{'rouge1': Score(precision=0.8888888888888888, recall=0.47058823529411764, fmeasure=0.6153846153846153),
 'rougeL': Score(precision=0.7777777777777778, recall=0.4117647058823529, fmeasure=0.5384615384615384),
 'rouge2': Score(precision=0.25, recall=0.125, fmeasure=0.16666666666666666)}