# GPT end-to-end with Aveni data

In [2]:
!pip install --upgrade openai
!pip install rouge



In [None]:
import json
from sys import displayhook
from openai import OpenAI
import tiktoken
import nltk
nltk.download('punkt')
from nltk import word_tokenize
import numpy as np
from rouge import Rouge
from pprint import pprint

## utils

In [None]:
client = OpenAI(api_key='') # Insert OpenAI API key

In [3]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

# Following QMSum
def tokenize(sent):
    tokens = ' '.join(word_tokenize(sent.lower()))
    return tokens

# filter some noises caused by speech recognition
def clean_data(text):
    text = text.replace('{ vocalsound }', '')
    text = text.replace('{ disfmarker }', '')
    text = text.replace('a_m_i_', 'ami')
    text = text.replace('l_c_d_', 'lcd')
    text = text.replace('p_m_s', 'pms')
    text = text.replace('t_v_', 'tv')
    text = text.replace('{ pause }', '')
    text = text.replace('{ nonvocalsound }', '')
    text = text.replace('{ gap }', '')
    return text

[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Data loading

In [5]:
with open("../Data/Aveni/all/annotated_demo_08_11.json", "r") as demo: 
     demo_queries = json.load(demo)

# make lists with queries
topic_list = []
general_query_list = []
specific_query_list = []

for i in demo_queries['topic_list']:
    for key, val in i.items():
        if key == "topic":
              topic_list.append(val)
for i in demo_queries['general_query_list']:
    for key, val in i.items():
        if key == "query":
              general_query_list.append(val)
for i in demo_queries['specific_query_list']:
    for key, val in i.items():
        if key == "query":
              specific_query_list.append(val)

In [10]:
with open('../Data/Aveni/all/annotated_demo_08_11.json', 'r') as file:
    meeting_transcript = json.load(file)
    
text = []
for turn in meeting_transcript['meeting_transcripts']:
    cur_turn = turn['speaker'].lower() + ': '
    text.append(clean_data(' '.join(word_tokenize(cur_turn + turn['content'].lower()))))

## Querying the model

In [13]:
info = f"""You're the assistant for a financial advisor. Use the below meeting transcript to answer all questions. If the answer cannot be found, write "n/a"

Transcript:
\"\"\"
{text}
\"\"\"

"""

In [16]:
num_tokens_from_string(info, "cl100k_base")

11657

In [17]:
def ask(query: str,
    print_message: bool = False) -> str:
    #message = query_message(query, df, model=model, token_budget=token_budget)
    if print_message:
        print(message)
    messages = [
        {"role": "system", "content": info},
        {"role": "user", "content": query},
    ]
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-16k",
        messages=messages,
        temperature=0
    )
    response_message = response.choices[0].message.content
    return response_message

In [18]:
general_answers = []
for question in general_query_list:
    response = ask(question)
    general_answers.append(tokenize(response))

In [19]:
specific_answers = []
for question in specific_query_list:
    response = ask(question)
    specific_answers.append(tokenize(response))

## Evaluation

In [20]:
with open('../Data/Aveni/all/annotated_demo_08_11.json') as refs:
    refs_data = json.load(refs)

ref_list = []
for i in refs_data['general_query_list']:
        for key, val in i.items():
            if key == "answer": 
                ref_list.append(tokenize(val))
for i in refs_data['specific_query_list']:
        for key, val in i.items():
            if key == "answer": 
                ref_list.append(tokenize(val))

In [21]:
hyp_list = general_answers+specific_answers
assert len(hyp_list) == len(ref_list)

Summary evaluation

In [23]:

data = []
for i in range(len(hyp_list)):
    data.append({'hyp': hyp_list[i], 'ref': ref_list[i]})

hyps, refs = map(list, zip(*[[d['hyp'], d['ref']] for d in data]))
rouge = Rouge()

scores = rouge.get_scores(hyps, refs)
# or
avg_scores = rouge.get_scores(hyps, refs, avg=True)

pprint(avg_scores)

{'rouge-1': {'f': 0.43126451081695266,
             'p': 0.4151616191643133,
             'r': 0.5051289982384921},
 'rouge-2': {'f': 0.17492208661575478,
             'p': 0.1652444655168414,
             'r': 0.22130857565430687},
 'rouge-l': {'f': 0.3994800070826092,
             'p': 0.3848458488109418,
             'r': 0.46941587001025675}}


In [24]:
with open('gpt_data.txt', 'w') as f:
    for line in data:
        f.write(f"{line}\n")
f.close()

Number of tokens

In [25]:
gen_tok = []
for hyp in hyp_list:
    gen_tok.append(num_tokens_from_string(hyp, "cl100k_base"))
    
print('average generated tokens GPT: ', np.mean(gen_tok))


average generated tokens GPT:  98.975
