In [83]:
import os
import openai

import pandas as pd
import numpy as np

from rouge_score import rouge_scorer
import scipy.stats as stats

openai.api_key = os.environ["OPENAI_API_KEY"]

# Load Data

In [2]:
df = pd.read_csv("../data/processed/processed.csv", nrows=1000) # contain costs with just 1k samples

In [3]:
df

Unnamed: 0,dialogue,ground_truth
0,Amanda: I baked cookies. Do you want some? ...,Amanda baked cookies and will bring Jerry some...
1,Olivia: Who are you voting for in this electio...,Olivia and Olivier are voting for liberals in ...
2,"Tim: Hi, what's up? Kim: Bad mood tbh, I was...",Kim may try the pomodoro technique recommended...
3,"Edward: Rachel, I think I'm in ove with Bella....",Edward thinks he is in love with Bella. Rachel...
4,Sam: hey overheard rick say something Sam: ...,"Sam is confused, because he overheard Rick com..."
...,...,...
995,"Hazel: Hello Brad. That's insane, we havent se...",Brad is visiting his parents today and cannot ...
996,"Kitty: I don’t have your account numbers, coul...",Kitty will convey money to Barb and Ken.
997,Max: I'll be late Sylvia: why: Max: beer w...,Max will be late because he is having a drink ...
998,"Karolina: hi darling, when are you back in Wa...",Karolina will be back in Warsaw this weekend t...


## Zero Shot Learning

In [4]:
zero_shot_summs = []

for i in range(len(df["dialogue"])):
    content = "Summarize this: " + df.iloc[i, 0]
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=content,
        max_tokens=32,
        temperature=0)
    zero_shot_summs.append(response.choices[0].text)

df["zero_shot_summaries"] = zero_shot_summs

In [11]:
df["zero_shot_summaries"] = df["zero_shot_summaries"].str.replace("\n\n", "   ")

In [30]:
df.to_csv("../data/processed/zero_shot.csv", index=False)

## One Shot Learning

In [2]:
df = pd.read_csv("../data/processed/zero_shot.csv")

In [3]:
example = "The following is a conversation: " + "Karim: i'm sorry i'm running late!!!   \
Karim: please wait for me!!!!!   Karim: i don't want to go to the party by myself   \
Irma: don't worry we'll wait for you   Karim: THANK YOU!!! :-D" + "   Here is a \
summary of that conversation: " + "Karim doesn't want to go to the party by himself. Irma will wait for him."

In [4]:
instruction = " Summarize the following conversation: "

In [5]:
one_shot_summs = []

for i in range(len(df["dialogue"])):
    content = example + instruction + df.iloc[i, 0]
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=content,
        max_tokens=32,
        temperature=0)
    one_shot_summs.append(response.choices[0].text)

df["one_shot_summaries"] = one_shot_summs

In [6]:
df

Unnamed: 0,dialogue,ground_truth,zero_shot_summaries,one_shot_summaries
0,Amanda: I baked cookies. Do you want some? ...,Amanda baked cookies and will bring Jerry some...,Amanda offered to bake cookies for Jerry an...,Jerry is accepting Amanda's offer of cookie...
1,Olivia: Who are you voting for in this electio...,Olivia and Olivier are voting for liberals in ...,minds think alike! Olivia and Oliver both p...,!\n\nOlivia and Oliver are both voting for the...
2,"Tim: Hi, what's up? Kim: Bad mood tbh, I was...",Kim may try the pomodoro technique recommended...,Tim and Kim are discussing Kim's bad mood a...,to keep track of tasks Kim: That's a great ...
3,"Edward: Rachel, I think I'm in ove with Bella....",Edward thinks he is in love with Bella. Rachel...,Rachel is outside Edward's door and tells h...,.. \n\nEdward has confessed to Rachel that he...
4,Sam: hey overheard rick say something Sam: ...,"Sam is confused, because he overheard Rick com...",Sam overheard Rick saying he was unhappy li...,"In this conversation, Sam overheard Rick sa..."
...,...,...,...,...
995,"Hazel: Hello Brad. That's insane, we havent se...",Brad is visiting his parents today and cannot ...,Hazel and Brad catch up after not seeing ea...,"Hazel: ok, no worries :D \n\nHazel and Bra..."
996,"Kitty: I don’t have your account numbers, coul...",Kitty will convey money to Barb and Ken.,Kitty asked Barb for her account numbers in...,!\n\nKitty needs Barb and Ken's account number...
997,Max: I'll be late Sylvia: why: Max: beer w...,Max will be late because he is having a drink ...,Max told Sylvia he would be late because he...,", just don't be too late Max: I won't \n\nM..."
998,"Karolina: hi darling, when are you back in Wa...",Karolina will be back in Warsaw this weekend t...,Karolina and Enid are discussing Enid's ret...,\n\nKarolina and Enid are discussing Enid's pl...


In [7]:
df["one_shot_summaries"] = df["one_shot_summaries"].str.replace("\n\n", "   ")

In [8]:
df.to_csv("../data/processed/one_shot.csv", index=False)

## Multi-Shot Learning

In [2]:
df = pd.read_csv("../data/processed/one_shot.csv")

In [3]:
df

Unnamed: 0,dialogue,ground_truth,zero_shot_summaries,one_shot_summaries
0,Amanda: I baked cookies. Do you want some? ...,Amanda baked cookies and will bring Jerry some...,Amanda offered to bake cookies for Jerry an...,Jerry is accepting Amanda's offer of cookie...
1,Olivia: Who are you voting for in this electio...,Olivia and Olivier are voting for liberals in ...,minds think alike! Olivia and Oliver both p...,! Olivia and Oliver are both voting for the ...
2,"Tim: Hi, what's up? Kim: Bad mood tbh, I was...",Kim may try the pomodoro technique recommended...,Tim and Kim are discussing Kim's bad mood a...,to keep track of tasks Kim: That's a great ...
3,"Edward: Rachel, I think I'm in ove with Bella....",Edward thinks he is in love with Bella. Rachel...,Rachel is outside Edward's door and tells h...,.. Edward has confessed to Rachel that he ...
4,Sam: hey overheard rick say something Sam: ...,"Sam is confused, because he overheard Rick com...",Sam overheard Rick saying he was unhappy li...,"In this conversation, Sam overheard Rick sa..."
...,...,...,...,...
995,"Hazel: Hello Brad. That's insane, we havent se...",Brad is visiting his parents today and cannot ...,Hazel and Brad catch up after not seeing ea...,"Hazel: ok, no worries :D Hazel and Brad..."
996,"Kitty: I don’t have your account numbers, coul...",Kitty will convey money to Barb and Ken.,Kitty asked Barb for her account numbers in...,! Kitty needs Barb and Ken's account numbers...
997,Max: I'll be late Sylvia: why: Max: beer w...,Max will be late because he is having a drink ...,Max told Sylvia he would be late because he...,", just don't be too late Max: I won't Ma..."
998,"Karolina: hi darling, when are you back in Wa...",Karolina will be back in Warsaw this weekend t...,Karolina and Enid are discussing Enid's ret...,Karolina and Enid are discussing Enid's pla...


In [4]:
example_1 = "The following is a conversation: " + "Helen: What time is the show?   Alain: 6 p.m.   \
Eddie: Do you have the tickets?   Alain: I bought them last week    Eddie: Cool   Eddie: How much do I owe you?   \
Alain: 50$    Helen: I an pay you today   Alain: It's ok. Whenever it's convenient for you. " + "   Here is a \
summary of that conversation: " + "The show starts at 6 p.m. Helen will pay Alain back 50$ today."

example_2 = "Here is another conversation: " + "Tom: where are you?   \
Pitt: still at the supermarket, an enormous queue here   \
Benny: But we've everything!   Tom: very good" + "   And here is a summary of that conversation: " + "Pitt and Benny \
are standing in an enormous queue in the supermarket."

In [5]:
instruction = " Summarize the following conversation: "

In [9]:
multi_shot_summs = []

for i in range(len(df["dialogue"])):
    content = example_1 + example_2 + instruction + df.iloc[i, 0]
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=content,
        max_tokens=32,
        temperature=0)
    multi_shot_summs.append(response.choices[0].text)

df["multi_shot_summaries"] = multi_shot_summs

In [10]:
df["multi_shot_summaries"] = df["multi_shot_summaries"].str.replace("\n\n", "   ")

In [11]:
df.to_csv("../data/processed/multi_shot.csv", index=False)

## Evaluation

In [21]:
df = pd.read_csv("../data/processed/multi_shot.csv")

In [22]:
df

Unnamed: 0,dialogue,ground_truth,zero_shot_summaries,one_shot_summaries,multi_shot_summaries
0,Amanda: I baked cookies. Do you want some? ...,Amanda baked cookies and will bring Jerry some...,Amanda offered to bake cookies for Jerry an...,Jerry is accepting Amanda's offer of cookie...,Jerry: Great! Amanda offered to bring Jer...
1,Olivia: Who are you voting for in this electio...,Olivia and Olivier are voting for liberals in ...,minds think alike! Olivia and Oliver both p...,! Olivia and Oliver are both voting for the ...,! Olivia and Oliver are both voting for the ...
2,"Tim: Hi, what's up? Kim: Bad mood tbh, I was...",Kim may try the pomodoro technique recommended...,Tim and Kim are discussing Kim's bad mood a...,to keep track of tasks Kim: That's a great ...,"to organize tasks Kim: That's a good idea, ..."
3,"Edward: Rachel, I think I'm in ove with Bella....",Edward thinks he is in love with Bella. Rachel...,Rachel is outside Edward's door and tells h...,.. Edward has confessed to Rachel that he ...,. Edward is in love with Bella and Rachel is...
4,Sam: hey overheard rick say something Sam: ...,"Sam is confused, because he overheard Rick com...",Sam overheard Rick saying he was unhappy li...,"In this conversation, Sam overheard Rick sa...",Here is a summary of the conversation: Sam ...
...,...,...,...,...,...
995,"Hazel: Hello Brad. That's insane, we havent se...",Brad is visiting his parents today and cannot ...,Hazel and Brad catch up after not seeing ea...,"Hazel: ok, no worries :D Hazel and Brad...","Hazel: ok, no worries :D A summary of t..."
996,"Kitty: I don’t have your account numbers, coul...",Kitty will convey money to Barb and Ken.,Kitty asked Barb for her account numbers in...,! Kitty needs Barb and Ken's account numbers...,Kitty asked Barb for her account numbers in...
997,Max: I'll be late Sylvia: why: Max: beer w...,Max will be late because he is having a drink ...,Max told Sylvia he would be late because he...,", just don't be too late Max: I won't Ma...",", see you later Max is going to be late be..."
998,"Karolina: hi darling, when are you back in Wa...",Karolina will be back in Warsaw this weekend t...,Karolina and Enid are discussing Enid's ret...,Karolina and Enid are discussing Enid's pla...,"In this conversation, Karolina and Enid are..."


In [50]:
scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)

rouge_1_zero = []
rouge_2_zero = []
rouge_L_zero = []

rouge_1_one = []
rouge_2_one = []
rouge_L_one = []

rouge_1_multi = []
rouge_2_multi = []
rouge_L_multi = []

for i in range(len(df["ground_truth"])):
    cand = df.iloc[i, 1]
    
    rouge_1_zero.append(scorer.score(cand, df.iloc[i, 2])["rouge1"].fmeasure)
    rouge_2_zero.append(scorer.score(cand, df.iloc[i, 2])["rouge2"].fmeasure)
    rouge_L_zero.append(scorer.score(cand, df.iloc[i, 2])["rougeL"].fmeasure)
    
    rouge_1_one.append(scorer.score(cand, df.iloc[i, 3])["rouge1"].fmeasure)
    rouge_2_one.append(scorer.score(cand, df.iloc[i, 3])["rouge2"].fmeasure)
    rouge_L_one.append(scorer.score(cand, df.iloc[i, 3])["rougeL"].fmeasure)   
    
    rouge_1_multi.append(scorer.score(cand, df.iloc[i, 4])["rouge1"].fmeasure)
    rouge_2_multi.append(scorer.score(cand, df.iloc[i, 4])["rouge2"].fmeasure)
    rouge_L_multi.append(scorer.score(cand, df.iloc[i, 4])["rougeL"].fmeasure)
    
df["rouge_1_zero"] = rouge_1_zero
df["rouge_2_zero"] = rouge_2_zero
df["rouge_L_zero"] = rouge_L_zero

df["rouge_1_one"] = rouge_1_one
df["rouge_2_one"] = rouge_2_one
df["rouge_L_one"] = rouge_L_one

df["rouge_1_multi"] = rouge_1_multi
df["rouge_2_multi"] = rouge_2_multi
df["rouge_L_multi"] = rouge_L_multi

In [52]:
df.describe()

Unnamed: 0,rouge_1_zero,rouge_2_zero,rouge_L_zero,rouge_1_one,rouge_2_one,rouge_L_one,rouge_1_multi,rouge_2_multi,rouge_L_multi
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.399796,0.15317,0.313284,0.422746,0.172586,0.333395,0.430433,0.181741,0.338361
std,0.134444,0.124767,0.125632,0.138628,0.135187,0.135407,0.144704,0.142162,0.140745
min,0.058824,0.0,0.057143,0.0,0.0,0.0,0.047619,0.0,0.047619
25%,0.30303,0.059701,0.222222,0.333333,0.066393,0.235294,0.333333,0.067797,0.237177
50%,0.4,0.129734,0.297872,0.416667,0.148701,0.315789,0.425881,0.156863,0.318182
75%,0.486486,0.222222,0.379022,0.510638,0.25,0.411765,0.514286,0.266667,0.421053
max,0.869565,0.7,0.846154,0.923077,0.75,0.923077,0.923077,0.857143,0.923077


In [102]:
# Due to how data is structured, easier to do multiple pairwise 
# ind_ttests instead of one-way ANOVA with Tukey HSD

print("Zero vs 1 Shot, Rouge 1: " + str(stats.ttest_ind(df["rouge_1_zero"], df["rouge_1_one"]).pvalue.round(6)))
print("Zero vs Multi Shot, Rouge 1: " + str(stats.ttest_ind(df["rouge_1_zero"], df["rouge_1_multi"]).pvalue.round(6)))
print("One vs Multi, Rouge 1: " + str(stats.ttest_ind(df["rouge_1_one"], df["rouge_1_multi"]).pvalue.round(6)))
print("----------------------------------")
print("Zero vs 1 Shot, Rouge 2: " + str(stats.ttest_ind(df["rouge_2_zero"], df["rouge_2_one"]).pvalue.round(6)))
print("Zero vs Multi Shot, Rouge 2: " + str(stats.ttest_ind(df["rouge_2_zero"], df["rouge_2_multi"]).pvalue.round(6)))
print("One vs Multi, Rouge 2: " + str(stats.ttest_ind(df["rouge_2_one"], df["rouge_2_multi"]).pvalue.round(6)))
print("----------------------------------")
print("Zero vs 1 Shot, Rouge L: " + str(stats.ttest_ind(df["rouge_L_zero"], df["rouge_L_one"]).pvalue.round(6)))
print("Zero vs Multi Shot, Rouge L: " + str(stats.ttest_ind(df["rouge_L_zero"], df["rouge_L_multi"]).pvalue.round(6)))
print("One vs Multi, Rouge L: " + str(stats.ttest_ind(df["rouge_L_one"], df["rouge_L_multi"]).pvalue.round(6)))

Zero vs 1 Shot, Rouge 1: 0.000176
Zero vs Multi Shot, Rouge 1: 1e-06
One vs Multi, Rouge 1: 0.225237
----------------------------------
Zero vs 1 Shot, Rouge 2: 0.000861
Zero vs Multi Shot, Rouge 2: 2e-06
One vs Multi, Rouge 2: 0.140148
----------------------------------
Zero vs 1 Shot, Rouge L: 0.000587
Zero vs Multi Shot, Rouge L: 2.7e-05
One vs Multi, Rouge L: 0.421443
