In [3]:
import numpy as np
import pandas as pd
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns
import difflib

## Notebook to annotate HLS speeches for relevance
### B: string-based labels

Codebooks:
- B1.0: zero shot
- B1.1: one shot
- B1.2: two shot

- B1.1.1: one shot with specific inclusion of context

Test for 5 different seeds
Batch of 20 sentences
5 iterations

Temperature: 0 - 0.2 - 0.6

Hypothesis: accuracy decreases with temperature

Model selection:
 As of 22-05-2024, gpt-4-turbo-2024-04-09 seems to be the only gpt-model that returns a fingerprint in addition to gpt-4o

  #model= "gpt-4-turbo-2024-04-09"
  #model = "gpt-3.5-turbo-0125"


### Import text to annotate
Select only relevant columns of the full dataframe, in this case:
RELEVANCE

In [None]:
# Import string based datafile
HLS_train = pd.read_csv('data/string/HLS_train_string.csv', index_col=False)

In [2]:
### Select only japan for testing purposes
HLS_train_japan = HLS_train[HLS_train['id']=='COP19_japan']
HLS_train.head()

NameError: name 'HLS_train' is not defined

In [4]:
# Select only columns containing relevance labels
HLS_relevance = HLS_train_japan[['Text', 'RELEVANCE']]

### Import necessary files
- codebooks
- API key
- import gpt_annotate_num

In [5]:
# Load codebook - zero shot
with open('codebooks/B1.0', 'r', encoding='utf-8') as file:
    B10 = file.read()

In [6]:
# OpenAI key
with open('gpt_api_key.txt', 'r') as f:
    key = f.read().strip()

In [7]:
import gpt_annotate_string

### Prepare data for annotation
Compares column names in HLS_relevance to the codes identified by GPT-4o in the codebook. Seed for this identification is set to 1234.

In [8]:
# Prepare dataframe for annotation
text_to_annotate = gpt_annotate_string.prepare_data(HLS_relevance, B10, key, prep_codebook=True)

ChatCompletion(id='chatcmpl-9TUEWFOtD82al8mXP98GEjaK0ItAx', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='RELEVANCE', role='assistant', function_call=None, tool_calls=None))], created=1716814756, model='gpt-4o-2024-05-13', object='chat.completion', system_fingerprint='fp_43dfabdef1', usage=CompletionUsage(completion_tokens=3, prompt_tokens=332, total_tokens=335))

Categories to annotate:
1) RELEVANCE


Data is ready to be annotated using gpt_annotate()!

Glimpse of your data:
Shape of data:  (37, 4)
   unique_id                                               text  \
0          0                         Thank you, Mr. President .   
1          1   On beha lf of the government of Japan , I wou...   
2          2   I would also like to expr ess my d eepest con...   
3          3   Mr. President:  A fair and effective framewor...   
4          4   In this regard, Japan firmly supports the est...   

             RELEVANCE               

Fingerprint used: fp_43dfabdef1

Seed of textpreparation is hardcoded into gpt_annotate. This to ensure that onlye the results of the same fingerprint for all seeds and all iterations. Essentially every time GPT-4o is called only results with this specific fingerprint are saved.

### Run gpt_annotate_num
Evaluation per seed -
5 different seeds
Batch of 20 sentences
5 iterations

Returns 2 outputs per seed;
1. all_iterations_{seed}.csv
2. fingerprints_all.csv

In [9]:
fingerprint = 'fp_43dfabdef1'
seeds = [3644,3441, 280, 5991, 7917]

In [11]:
# Annotate the data - T0
for seed in seeds:
    gpt_annotate_string.gpt_annotate(text_to_annotate, B10, key, seed,fingerprint, experiment="B1.0",  num_iterations=5, model="gpt-4o", temperature=0, batch_size=20, human_labels=True)

Seed: 3644: Iteration 1, batch 1: fingerprintmatch found!
Seed: 3644: Iteration 1, batch 2: fingerprintmatch found!
iteration:  1 completed
Seed: 3644: Iteration 2, batch 1: fingerprintmatch found!
Seed: 3644: Iteration 2, batch 2: fingerprintmatch found!
iteration:  2 completed
Seed: 3644: Iteration 3, batch 1: fingerprintmatch found!
Seed: 3644: Iteration 3, batch 2: fingerprintmatch found!
iteration:  3 completed
Seed: 3644: Iteration 4, batch 1: fingerprintmatch found!
Seed: 3644: Iteration 4, batch 2: fingerprintmatch found!
iteration:  4 completed
Seed: 3644: Iteration 5, batch 1: fingerprintmatch found!
Seed: 3644: Iteration 5, batch 2: fingerprintmatch found!
iteration:  5 completed
Seed: 3441: Iteration 1, batch 1: fingerprintmatch found!
Seed: 3441: Iteration 1, batch 2: fingerprintmatch found!
iteration:  1 completed
Seed: 3441: Iteration 2, batch 1: fingerprintmatch found!
Seed: 3441: Iteration 2, batch 2: fingerprintmatch found!
iteration:  2 completed
Seed: 3441: Iteratio

In [None]:
gpt_annotate_string.gpt_annotate(text_to_annotate, B10, key, fingerprint, seed,experiment='B1.0', num_iterations=5, model="gpt-4o", batch_size=20, human_labels=True, time_cost_warning=True)

## Evaluate accuracy of predictions
Based on similarity between predicted and true labels.
Currently not taking consistency into account - would be done by grouping

In [2]:
def get_similarity_score(Rx, Ry):
    # Ensure Rx and Ry are pandas Series and convert to strings with stripped whitespace
    Rx = Rx.astype(str).str.strip()
    Ry = Ry.astype(str).str.strip()

    # Calculate similarity scores
    similarity_scores = Rx.combine(Ry, lambda x, y: difflib.SequenceMatcher(None, x, y).ratio())

    # Apply the threshold - maybe put higher?
    similarity_scores = similarity_scores.apply(lambda x: x if x >= 0.9 else 0)

    # Return the mean similarity score as a percentage
    return similarity_scores.mean() * 100

In [3]:
# Iterate through each file in the directory
directory = 'STRING_RESULT/B1.0/all_iterations'
similarity_scores_all = []

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    df = pd.read_csv(file_path)
    Rx = df['RELEVANCE_x']
    Ry = df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    similarity_scores_all.append((filename, similarity_score))

similarity_all = pd.DataFrame(similarity_scores_all, columns=['filename', 'similarity ALL'])
similarity_all.to_csv("STRING_RESULT/B1.0/T0_similarity_scores_all", index=False)

similarity_all

Unnamed: 0,filename,similarity ALL
0,all_iterations_string_280.csv,75.263158
1,all_iterations_string_3441.csv,75.789474
2,all_iterations_string_3644.csv,76.842105
3,all_iterations_string_5991.csv,75.263158
4,all_iterations_string_7917.csv,78.421053


In [4]:
similarity_scores_relevant = []

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    similarity_scores_relevant.append((filename, similarity_score))

# Convert the list to a DataFrame
similarity_relevant = pd.DataFrame(similarity_scores_relevant, columns=['filename', 'Similarity RELEVANT'])
similarity_relevant.to_csv("STRING_RESULT/B1.0/T0_similarity_scores_relevant", index=False)

similarity_relevant

Unnamed: 0,filename,Similarity RELEVANT
0,all_iterations_string_280.csv,100.0
1,all_iterations_string_3441.csv,100.0
2,all_iterations_string_3644.csv,100.0
3,all_iterations_string_5991.csv,90.0
4,all_iterations_string_7917.csv,100.0


In [6]:
similarity_scores_SOI = []

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Statement of intent']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    similarity_scores_SOI.append((filename, similarity_score))

# Convert the list to a DataFrame
similarity_SOI = pd.DataFrame(similarity_scores_SOI, columns=['filename', 'Similarity SOI'])
similarity_SOI.to_csv("STRING_RESULT/B1.0/T0_similarity_scores_SOI", index=False)

similarity_SOI

Unnamed: 0,filename,Similarity SOI
0,all_iterations_string_280.csv,72.631579
1,all_iterations_string_3441.csv,73.684211
2,all_iterations_string_3644.csv,72.631579
3,all_iterations_string_5991.csv,71.578947
4,all_iterations_string_7917.csv,83.157895


In [8]:
similarity_scores_NR = []

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Not relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    similarity_scores_NR.append((filename, similarity_score))

# Convert the list to a DataFrame
similarity_NR = pd.DataFrame(similarity_scores_SOI, columns=['filename', 'Similarity SOI'])
similarity_NR.to_csv("STRING_RESULT/B1.0/T0_similarity_scores_NR", index=False)

similarity_NR

Unnamed: 0,filename,Similarity SOI
0,all_iterations_string_280.csv,72.631579
1,all_iterations_string_3441.csv,73.684211
2,all_iterations_string_3644.csv,72.631579
3,all_iterations_string_5991.csv,71.578947
4,all_iterations_string_7917.csv,83.157895


### Evaluate all sentences
Get insights into full prediction. See where there are inconsistencies

In [2]:
full280 = pd.read_csv('STRING_RESULT/B1.0/all_iterations/all_iterations_string_280.csv')

full280

Unnamed: 0,unique_id,text,RELEVANCE_x,llm_query,RELEVANCE_y,iteration
0,0,"Thank you, Mr. President .",Not relevant,"0 Thank you, Mr. President .\n",Not relevant,1
1,0,"Thank you, Mr. President .",Not relevant,"0 Thank you, Mr. President .\n",Not relevant,2
2,0,"Thank you, Mr. President .",Not relevant,"0 Thank you, Mr. President .\n",Not relevant,3
3,0,"Thank you, Mr. President .",Not relevant,"0 Thank you, Mr. President .\n",Not relevant,4
4,0,"Thank you, Mr. President .",Not relevant,"0 Thank you, Mr. President .\n",Not relevant,5
...,...,...,...,...,...,...
185,36,Thank you for your kind attention.,Not relevant,36 Thank you for your kind attention.\n,Not relevant,1
186,36,Thank you for your kind attention.,Not relevant,36 Thank you for your kind attention.\n,Not relevant,2
187,36,Thank you for your kind attention.,Not relevant,36 Thank you for your kind attention.\n,Not relevant,3
188,36,Thank you for your kind attention.,Not relevant,36 Thank you for your kind attention.\n,Not relevant,4
