In [1]:
import numpy as np
import pandas as pd
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns
import difflib

## Notebook to annotate HLS speeches for relevance
### B: string-based labels

Codebooks:
- B1.0: zero shot
- B1.1: one shot
- B1.2: two shot

- B1.1.1: one shot with specific inclusion of context

Test for 5 different seeds
Batch of 20 sentences
Original: 5 iterations; does take very long!
Set to 1 iteration; check for class imbalances between labelled elements. OOK: seed zou moeten zorgen dat iedere iteratie gelijk is - dit hoeft niet het geval te zijn, zeker niet als de temperature is aangepast. > kan wel inherent in model zitten

Main outcomes: T0 - I1
For testing purposes: T0 - T 0.2 - T 0.6 : I3

Temperature: 0 - 0.6
Temperature only focuses on the output probabilities - should thus be set to 0 to make the output as deterministic as possible.
> Do a single test to see what the influence of changing temperature is
> Top_p is set to 1; making temperature the primary factor.

Hypothesis: accuracy decreases with temperature

Model selection:
 As of 22-05-2024, gpt-4-turbo-2024-04-09 seems to be the only gpt-model that returns a fingerprint in addition to gpt-4o

  #model= "gpt-4-turbo-2024-04-09"
  #model = "gpt-3.5-turbo-0125"


### Import text to annotate
Select only relevant columns of the full dataframe, in this case:
RELEVANCE

In [2]:
# Import string based datafile
HLS_train = pd.read_csv('data/string/HLS_train_string.csv')

In [3]:
### Select only japan for testing purposes
#HLS_train_japan = HLS_train[HLS_train['id']=='COP19_japan']
HLS_train

Unnamed: 0,id,Text,Relevance,Principle,Topic,Unit,Shape,RELEVANCE,PRINCIPLE,TOPIC,UNIT,SHAPE
0,COP19_japan,"Thank you, Mr. President .",0,0,0,0,0,Not relevant,not evaluated,not evaluated,not evaluated,not evaluated
1,COP19_japan,"On beha lf of the government of Japan , I wou...",0,0,0,0,0,Not relevant,not evaluated,not evaluated,not evaluated,not evaluated
2,COP19_japan,I would also like to expr ess my d eepest con...,0,0,0,0,0,Not relevant,not evaluated,not evaluated,not evaluated,not evaluated
3,COP19_japan,Mr. President: A fair and effective framewor...,2,3,1,2,2,Relevant,utilitarian,new UNFCCC policy,responsibility,equality
4,COP19_japan,"In this regard, Japan firmly supports the est...",1,0,0,0,0,Statement of intent,not evaluated,not evaluated,not evaluated,not evaluated
...,...,...,...,...,...,...,...,...,...,...,...,...
1207,COP28_newzealand,New Zealand is proud to suppor t several impo...,0,0,0,0,0,Not relevant,not evaluated,not evaluated,not evaluated,not evaluated
1208,COP28_newzealand,"I am joined by New Zealand’s largest business,...",0,0,0,0,0,Not relevant,not evaluated,not evaluated,not evaluated,not evaluated
1209,COP28_newzealand,The commitment o f New Zealanders from across ...,0,0,0,0,0,Not relevant,not evaluated,not evaluated,not evaluated,not evaluated
1210,COP28_newzealand,Thank you Mr President.,0,0,0,0,0,Not relevant,not evaluated,not evaluated,not evaluated,not evaluated


In [4]:
# Select only columns containing relevance labels
HLS_relevance = HLS_train[['Text', 'RELEVANCE']]

### Import necessary files
- codebooks
- API key
- import gpt_annotate_num

In [5]:
# Load codebook - zero shot
with open('codebooks/B1.0', 'r', encoding='utf-8') as file:
    B10 = file.read()

In [6]:
# OpenAI key
with open('gpt_api_key.txt', 'r') as f:
    key = f.read().strip()

In [7]:
import gpt_annotate_string

### Prepare data for annotation
Compares column names in HLS_relevance to the codes identified by GPT-4o in the codebook. Seed for this identification is set to 1234.

In [8]:
# Prepare dataframe for annotation
text_to_annotate = gpt_annotate_string.prepare_data(HLS_relevance, B10, key, prep_codebook=True)

ChatCompletion(id='chatcmpl-9TqwPCRvN3vhQGbaq5TbVxnJsKZxW', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='RELEVANCE', role='assistant', function_call=None, tool_calls=None))], created=1716902045, model='gpt-4o-2024-05-13', object='chat.completion', system_fingerprint='fp_43dfabdef1', usage=CompletionUsage(completion_tokens=3, prompt_tokens=332, total_tokens=335))

Categories to annotate:
1) RELEVANCE


Data is ready to be annotated using gpt_annotate()!

Glimpse of your data:
Shape of data:  (1212, 4)
   unique_id                                               text  \
0          0                         Thank you, Mr. President .   
1          1   On beha lf of the government of Japan , I wou...   
2          2   I would also like to expr ess my d eepest con...   
3          3   Mr. President:  A fair and effective framewor...   
4          4   In this regard, Japan firmly supports the est...   

             RELEVANCE             

Fingerprint used: fp_43dfabdef1

Seed of textpreparation is hardcoded into gpt_annotate. This to ensure that onlye the results of the same fingerprint for all seeds and all iterations. Essentially every time GPT-4o is called only results with this specific fingerprint are saved.

### Run gpt_annotate_num
Evaluation per seed -
5 different seeds
Batch of 20 sentences
5 iterations

Returns 3 outputs:
1. all_iterations_{seed}.csv
2. fingerprints_all.csv
3. missed_batches.csv

In [9]:
fingerprint = 'fp_43dfabdef1'
seeds = [3644,3441, 280, 5991, 7917]

In [10]:
# Annotate the data - T0 - I1
for seed in seeds:
    gpt_annotate_string.gpt_annotate(text_to_annotate, B10, key, seed,fingerprint, experiment="B1.0",  num_iterations=1, model="gpt-4o", temperature=0, batch_size=20, human_labels=True)

3644 - iteration 1
3644 - I1 - B5 fingerprint does not match
3644 - I1 - B8 fingerprint does not match
3644 - I1 - B12 fingerprint does not match
3644 - I1 - B38 fingerprint does not match
iteration:  1 completed
3441 - iteration 1
3441 - I1 - B8 fingerprint does not match
3441 - I1 - B26 fingerprint does not match
3441 - I1 - B27 fingerprint does not match
3441 - I1 - B45 fingerprint does not match
3441 - I1 - B61 fingerprint does not match
iteration:  1 completed
280 - iteration 1
280 - I1 - B29 fingerprint does not match
280 - I1 - B30 fingerprint does not match
280 - I1 - B38 fingerprint does not match
280 - I1 - B58 fingerprint does not match
280 - I1 - B60 fingerprint does not match
iteration:  1 completed
5991 - iteration 1
5991 - I1 - B8 fingerprint does not match
iteration:  1 completed
7917 - iteration 1
7917 - I1 - B7 fingerprint does not match
7917 - I1 - B20 fingerprint does not match
7917 - I1 - B22 fingerprint does not match
7917 - I1 - B46 fingerprint does not match
791

### Evaluate with temperature 0.6

In [20]:
# Annotate the data - T0 - I1
for seed in seeds:
    gpt_annotate_string.gpt_annotate(text_to_annotate, B10, key, seed,fingerprint, experiment="B1.0",  num_iterations=1, model="gpt-4o", temperature=0.6, batch_size=20, human_labels=True)

3644 - iteration 1
3644 - I1 - B5 fingerprint does not match
3644 - I1 - B26 fingerprint does not match
3644 - I1 - B39 fingerprint does not match
3644 - I1 - B55 fingerprint does not match
iteration:  1 completed
3441 - iteration 1
3441 - I1 - B22 fingerprint does not match
3441 - I1 - B29 fingerprint does not match
iteration:  1 completed
280 - iteration 1
280 - I1 - B1 fingerprint does not match
280 - I1 - B33 fingerprint does not match
280 - I1 - B42 fingerprint does not match
280 - I1 - B54 fingerprint does not match
iteration:  1 completed
5991 - iteration 1
5991 - I1 - B32 fingerprint does not match
5991 - I1 - B43 fingerprint does not match
iteration:  1 completed
7917 - iteration 1
7917 - I1 - B1 fingerprint does not match
7917 - I1 - B12 fingerprint does not match
7917 - I1 - B32 fingerprint does not match
iteration:  1 completed


## Evaluate accuracy of predictions
Based on similarity between predicted and true labels.
Currently not taking consistency into account - would be done by grouping


Define function for similarity score

In [10]:
def get_similarity_score(Rx, Ry):
    # Ensure Rx and Ry are pandas Series and convert to strings with stripped whitespace
    Rx = Rx.astype(str).str.strip()
    Ry = Ry.astype(str).str.strip()

    # Calculate similarity scores
    similarity_scores = Rx.combine(Ry, lambda x, y: difflib.SequenceMatcher(None, x, y).ratio())

    # Apply the threshold - maybe put higher?
    similarity_scores = similarity_scores.apply(lambda x: x if x >= 0.9 else 0)

    # Return the mean similarity score as a percentage
    return similarity_scores.mean() * 100

### Evaluate performance

In [12]:
# Iterate through each file in the directory
directory = 'STRING_RESULT/B1.0/all_iterations'
similarity_scores_all = []

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    df = pd.read_csv(file_path)
    Rx = df['RELEVANCE_x']
    Ry = df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    similarity_scores_all.append((filename, similarity_score))

similarity_all = pd.DataFrame(similarity_scores_all, columns=['filename', 'similarity ALL'])
similarity_all.to_csv("STRING_RESULT/B1.0/T0_similarity_scores_all", index=False)

similarity_all

Unnamed: 0,filename,similarity ALL
0,all_iterations_string_T0.6_280.csv,58.922261
1,all_iterations_string_T0.6_3441.csv,59.556314
2,all_iterations_string_T0.6_3644.csv,59.540636
3,all_iterations_string_T0.6_5991.csv,58.617747
4,all_iterations_string_T0.6_7917.csv,59.635417
5,all_iterations_string_T0_280.csv,59.442446
6,all_iterations_string_T0_3441.csv,60.089286
7,all_iterations_string_T0_3644.csv,59.628975
8,all_iterations_string_T0_5991.csv,60.822148
9,all_iterations_string_T0_7917.csv,60.982143


In [13]:
similarity_scores_relevant = []

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    similarity_scores_relevant.append((filename, similarity_score))

# Convert the list to a DataFrame
similarity_relevant = pd.DataFrame(similarity_scores_relevant, columns=['filename', 'Similarity RELEVANT'])
similarity_relevant.to_csv("STRING_RESULT/B1.0/T0_similarity_scores_relevant", index=False)

similarity_relevant

Unnamed: 0,filename,Similarity RELEVANT
0,all_iterations_string_T0.6_280.csv,79.61165
1,all_iterations_string_T0.6_3441.csv,80.555556
2,all_iterations_string_T0.6_3644.csv,84.577114
3,all_iterations_string_T0.6_5991.csv,75.961538
4,all_iterations_string_T0.6_7917.csv,79.710145
5,all_iterations_string_T0_280.csv,76.076555
6,all_iterations_string_T0_3441.csv,81.122449
7,all_iterations_string_T0_3644.csv,79.104478
8,all_iterations_string_T0_5991.csv,81.042654
9,all_iterations_string_T0_7917.csv,77.272727


In [14]:
similarity_scores_SOI = []

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Statement of intent']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    similarity_scores_SOI.append((filename, similarity_score))

# Convert the list to a DataFrame
similarity_SOI = pd.DataFrame(similarity_scores_SOI, columns=['filename', 'Similarity SOI'])
similarity_SOI.to_csv("STRING_RESULT/B1.0/T0_similarity_scores_SOI", index=False)

similarity_SOI

Unnamed: 0,filename,Similarity SOI
0,all_iterations_string_T0.6_280.csv,53.571429
1,all_iterations_string_T0.6_3441.csv,52.604167
2,all_iterations_string_T0.6_3644.csv,49.746193
3,all_iterations_string_T0.6_5991.csv,52.040816
4,all_iterations_string_T0.6_7917.csv,55.384615
5,all_iterations_string_T0_280.csv,53.804348
6,all_iterations_string_T0_3441.csv,52.736318
7,all_iterations_string_T0_3644.csv,55.778894
8,all_iterations_string_T0_5991.csv,54.807692
9,all_iterations_string_T0_7917.csv,56.185567


In [15]:
similarity_scores_NR = []

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Not relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    similarity_scores_NR.append((filename, similarity_score))

# Convert the list to a DataFrame
similarity_NR = pd.DataFrame(similarity_scores_SOI, columns=['filename', 'Similarity NR'])
similarity_NR.to_csv("STRING_RESULT/B1.0/T0_similarity_scores_NR", index=False)

similarity_NR

Unnamed: 0,filename,Similarity NR
0,all_iterations_string_T0.6_280.csv,53.571429
1,all_iterations_string_T0.6_3441.csv,52.604167
2,all_iterations_string_T0.6_3644.csv,49.746193
3,all_iterations_string_T0.6_5991.csv,52.040816
4,all_iterations_string_T0.6_7917.csv,55.384615
5,all_iterations_string_T0_280.csv,53.804348
6,all_iterations_string_T0_3441.csv,52.736318
7,all_iterations_string_T0_3644.csv,55.778894
8,all_iterations_string_T0_5991.csv,54.807692
9,all_iterations_string_T0_7917.csv,56.185567


### Evaluation with context specified in text
codebook B1.0.1

In [29]:
# Load codebook - zero shot
with open('codebooks/B1.0.1', 'r', encoding='utf-8') as file:
    B101 = file.read()

In [30]:
# Annotate the data - T0 - I1
for seed in seeds:
    gpt_annotate_string.gpt_annotate(text_to_annotate, B101, key, seed,fingerprint, experiment="B1.0.1",  num_iterations=1, model="gpt-4o", temperature=0, batch_size=20, human_labels=True)

3644 - iteration 1
3644 - I1 - B39 fingerprint does not match
3644 - I1 - B42 fingerprint does not match
3644 - I1 - B57 fingerprint does not match
iteration:  1 completed
3441 - iteration 1
3441 - I1 - B3 fingerprint does not match
3441 - I1 - B59 fingerprint does not match
iteration:  1 completed
280 - iteration 1
280 - I1 - B12 fingerprint does not match
280 - I1 - B33 fingerprint does not match
280 - I1 - B46 fingerprint does not match
280 - I1 - B56 fingerprint does not match
iteration:  1 completed
5991 - iteration 1
5991 - I1 - B61 fingerprint does not match
iteration:  1 completed
7917 - iteration 1
7917 - I1 - B9 fingerprint does not match
7917 - I1 - B37 fingerprint does not match
7917 - I1 - B52 fingerprint does not match
7917 - I1 - B60 fingerprint does not match
iteration:  1 completed


### Evaluation B1.0 with context - B1.0.1

In [16]:
# Iterate through each file in the directory
directoryB101 = 'STRING_RESULT/B1.0.1/all_iterations'
B101similarity_scores_all = []

for filename in os.listdir(directoryB101):
    file_path = os.path.join(directoryB101, filename)
    df = pd.read_csv(file_path)
    Rx = df['RELEVANCE_x']
    Ry = df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx, Ry)

    #Save the score in a dataframe
    B101similarity_scores_all.append((filename, similarity_score))

B101similarity_all = pd.DataFrame(B101similarity_scores_all, columns=['filename', 'similarity ALL'])
B101similarity_all.to_csv("STRING_RESULT/B1.0.1/T0_similarity_scores_all", index=False)

B101similarity_all

Unnamed: 0,filename,similarity ALL
0,all_iterations_string_T0_280.csv,56.095406
1,all_iterations_string_T0_3441.csv,54.863481
2,all_iterations_string_T0_3644.csv,56.163194
3,all_iterations_string_T0_5991.csv,57.333333
4,all_iterations_string_T0_7917.csv,56.80212


In [17]:
B101similarity_scores_relevant = []

for filename in os.listdir(directoryB101):
    file_path = os.path.join(directoryB101, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B101similarity_scores_relevant.append((filename, similarity_score))

# Convert the list to a DataFrame
B101similarity_relevant = pd.DataFrame(B101similarity_scores_relevant, columns=['filename', 'Similarity RELEVANT'])
B101similarity_relevant.to_csv("STRING_RESULT/B1.0.1/T0_similarity_scores_relevant", index=False)

B101similarity_relevant

Unnamed: 0,filename,Similarity RELEVANT
0,all_iterations_string_T0_280.csv,81.463415
1,all_iterations_string_T0_3441.csv,79.904306
2,all_iterations_string_T0_3644.csv,83.414634
3,all_iterations_string_T0_5991.csv,82.488479
4,all_iterations_string_T0_7917.csv,82.038835


In [18]:
B101similarity_scores_SOI = []

for filename in os.listdir(directoryB101):
    file_path = os.path.join(directoryB101, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Statement of intent']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B101similarity_scores_SOI.append((filename, similarity_score))

# Convert the list to a DataFrame
B101similarity_SOI = pd.DataFrame(B101similarity_scores_SOI, columns=['filename', 'Similarity SOI'])
B101similarity_SOI.to_csv("STRING_RESULT/B1.0.1/T0_similarity_scores_SOI", index=False)

B101similarity_SOI

Unnamed: 0,filename,Similarity SOI
0,all_iterations_string_T0_280.csv,50.515464
1,all_iterations_string_T0_3441.csv,54.411765
2,all_iterations_string_T0_3644.csv,50.246305
3,all_iterations_string_T0_5991.csv,53.140097
4,all_iterations_string_T0_7917.csv,53.88601


In [23]:
B101similarity_scores_NR = []

for filename in os.listdir(directoryB101):
    file_path = os.path.join(directoryB101, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Not relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B101similarity_scores_NR.append((filename, similarity_score))

# Convert the list to a DataFrame
B101similarity_NR = pd.DataFrame(B101similarity_scores_SOI, columns=['filename', 'Similarity NR'])
B101similarity_NR.to_csv("STRING_RESULT/B1.0.1/T0_similarity_scores_NR", index=False)

B101similarity_NR

Unnamed: 0,filename,Similarity NR
0,all_iterations_string_T0_280.csv,50.515464
1,all_iterations_string_T0_3441.csv,54.411765
2,all_iterations_string_T0_3644.csv,50.246305
3,all_iterations_string_T0_5991.csv,53.140097
4,all_iterations_string_T0_7917.csv,53.88601


## B1.1 - Oneshot codebook
Evaluation with T=0 and 1 iteration

In [40]:
# Load codebook - zero shot
with open('codebooks/B1.1', 'r', encoding='utf-8') as file:
    B11 = file.read()

In [41]:
# Annotate the data - T0 - I1
for seed in seeds:
    gpt_annotate_string.gpt_annotate(text_to_annotate, B11, key, seed,fingerprint, experiment="B1.1",  num_iterations=1, model="gpt-4o", temperature=0, batch_size=20, human_labels=True)

3644 - iteration 1
3644 - I1 - B24 fingerprint does not match
iteration:  1 completed
3441 - iteration 1
3441 - I1 - B1 fingerprint does not match
3441 - I1 - B15 fingerprint does not match
3441 - I1 - B31 fingerprint does not match
3441 - I1 - B32 fingerprint does not match
3441 - I1 - B46 fingerprint does not match
iteration:  1 completed
280 - iteration 1
280 - I1 - B7 fingerprint does not match
280 - I1 - B20 fingerprint does not match
280 - I1 - B22 fingerprint does not match
280 - I1 - B23 fingerprint does not match
iteration:  1 completed
5991 - iteration 1
5991 - I1 - B1 fingerprint does not match
5991 - I1 - B4 fingerprint does not match
5991 - I1 - B9 fingerprint does not match
5991 - I1 - B20 fingerprint does not match
5991 - I1 - B22 fingerprint does not match
5991 - I1 - B32 fingerprint does not match
5991 - I1 - B35 fingerprint does not match
iteration:  1 completed
7917 - iteration 1
7917 - I1 - B2 fingerprint does not match
7917 - I1 - B10 fingerprint does not match
791

## Evaluation B1.1

In [19]:
# Iterate through each file in the directory
directoryB11 = 'STRING_RESULT/B1.1/all_iterations'
B11similarity_scores_all = []

for filename in os.listdir(directoryB11):
    file_path = os.path.join(directoryB11, filename)
    df = pd.read_csv(file_path)
    Rx = df['RELEVANCE_x']
    Ry = df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx, Ry)

    #Save the score in a dataframe
    B11similarity_scores_all.append((filename, similarity_score))

B11similarity_all = pd.DataFrame(B11similarity_scores_all, columns=['filename', 'similarity ALL'])
B11similarity_all.to_csv("STRING_RESULT/B1.1/T0_similarity_scores_all", index=False)

B11similarity_all

Unnamed: 0,filename,similarity ALL
0,all_iterations_string_T0_280.csv,64.045936
1,all_iterations_string_T0_3441.csv,64.478417
2,all_iterations_string_T0_3644.csv,63.758389
3,all_iterations_string_T0_5991.csv,63.80597
4,all_iterations_string_T0_7917.csv,63.392857


In [20]:
B11similarity_scores_relevant = []

for filename in os.listdir(directoryB11):
    file_path = os.path.join(directoryB11, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B11similarity_scores_relevant.append((filename, similarity_score))

# Convert the list to a DataFrame
B11similarity_relevant = pd.DataFrame(B11similarity_scores_relevant, columns=['filename', 'Similarity RELEVANT'])
B11similarity_relevant.to_csv("STRING_RESULT/B1.1/T0_similarity_scores_relevant", index=False)

B11similarity_relevant

Unnamed: 0,filename,Similarity RELEVANT
0,all_iterations_string_T0_280.csv,81.683168
1,all_iterations_string_T0_3441.csv,81.122449
2,all_iterations_string_T0_3644.csv,76.497696
3,all_iterations_string_T0_5991.csv,81.521739
4,all_iterations_string_T0_7917.csv,80.097087


In [21]:
B11similarity_scores_SOI = []

for filename in os.listdir(directoryB11):
    file_path = os.path.join(directoryB11, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Statement of intent']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B11similarity_scores_SOI.append((filename, similarity_score))

# Convert the list to a DataFrame
B11similarity_SOI = pd.DataFrame(B11similarity_scores_SOI, columns=['filename', 'Similarity SOI'])
B11similarity_SOI.to_csv("STRING_RESULT/B1.1/T0_similarity_scores_SOI", index=False)

B11similarity_SOI

Unnamed: 0,filename,Similarity SOI
0,all_iterations_string_T0_280.csv,53.513514
1,all_iterations_string_T0_3441.csv,56.701031
2,all_iterations_string_T0_3644.csv,57.560976
3,all_iterations_string_T0_5991.csv,55.307263
4,all_iterations_string_T0_7917.csv,56.111111


In [23]:
B11similarity_scores_NR = []

for filename in os.listdir(directoryB11):
    file_path = os.path.join(directoryB11, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Not relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B11similarity_scores_NR.append((filename, similarity_score))

# Convert the list to a DataFrame
B11similarity_NR = pd.DataFrame(B11similarity_scores_SOI, columns=['filename', 'Similarity NR'])
B11similarity_NR.to_csv("STRING_RESULT/B1.1/T0_similarity_scores_NR", index=False)

B11similarity_NR

Unnamed: 0,filename,Similarity NR
0,all_iterations_string_T0_280.csv,53.513514
1,all_iterations_string_T0_3441.csv,56.701031
2,all_iterations_string_T0_3644.csv,57.560976
3,all_iterations_string_T0_5991.csv,55.307263
4,all_iterations_string_T0_7917.csv,56.111111


## B1.1.1 - Oneshot codebook WITH CONTEXT
Evaluation with T=0 and 1 iteration


In [24]:
# Load codebook - zero shot
with open('codebooks/B1.1.1', 'r', encoding='utf-8') as file:
    B111 = file.read()

In [25]:
# Annotate the data - T0 - I1
for seed in seeds:
    gpt_annotate_string.gpt_annotate(text_to_annotate, B111, key, seed,fingerprint, experiment="B1.1.1",  num_iterations=1, model="gpt-4o", temperature=0, batch_size=20, human_labels=True)

3644 - iteration 1
iteration:  1 completed
3441 - iteration 1
3441 - I1 - B4 fingerprint does not match
3441 - I1 - B5 fingerprint does not match
3441 - I1 - B6 fingerprint does not match
3441 - I1 - B27 fingerprint does not match
3441 - I1 - B30 fingerprint does not match
3441 - I1 - B37 fingerprint does not match
3441 - I1 - B45 fingerprint does not match
iteration:  1 completed
280 - iteration 1
280 - I1 - B13 fingerprint does not match
280 - I1 - B15 fingerprint does not match
280 - I1 - B38 fingerprint does not match
iteration:  1 completed
5991 - iteration 1
5991 - I1 - B59 fingerprint does not match
iteration:  1 completed
7917 - iteration 1
7917 - I1 - B30 fingerprint does not match
7917 - I1 - B56 fingerprint does not match
iteration:  1 completed


## Evaluate B111


In [26]:
# Iterate through each file in the directory
directoryB111 = 'STRING_RESULT/B1.1.1/all_iterations'
B111similarity_scores_all = []

for filename in os.listdir(directoryB111):
    file_path = os.path.join(directoryB111, filename)
    df = pd.read_csv(file_path)
    Rx = df['RELEVANCE_x']
    Ry = df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx, Ry)

    #Save the score in a dataframe
    B111similarity_scores_all.append((filename, similarity_score))

B111similarity_all = pd.DataFrame(B111similarity_scores_all, columns=['filename', 'similarity ALL'])
B111similarity_all.to_csv("STRING_RESULT/B1.1.1/T0_similarity_scores_all", index=False)

B111similarity_all

Unnamed: 0,filename,similarity ALL
0,all_iterations_string_T0_280.csv,62.152778
1,all_iterations_string_T0_3441.csv,61.692015
2,all_iterations_string_T0_3644.csv,61.409396
3,all_iterations_string_T0_5991.csv,62.5
4,all_iterations_string_T0_7917.csv,62.201365


In [27]:
B111similarity_scores_relevant = []

for filename in os.listdir(directoryB111):
    file_path = os.path.join(directoryB111, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B111similarity_scores_relevant.append((filename, similarity_score))

# Convert the list to a DataFrame
B111similarity_relevant = pd.DataFrame(B111similarity_scores_relevant, columns=['filename', 'Similarity RELEVANT'])
B111similarity_relevant.to_csv("STRING_RESULT/B1.1.1/T0_similarity_scores_relevant", index=False)

B111similarity_relevant

Unnamed: 0,filename,Similarity RELEVANT
0,all_iterations_string_T0_280.csv,85.365854
1,all_iterations_string_T0_3441.csv,85.082873
2,all_iterations_string_T0_3644.csv,82.54717
3,all_iterations_string_T0_5991.csv,82.790698
4,all_iterations_string_T0_7917.csv,81.132075


In [28]:
B111similarity_scores_SOI = []

for filename in os.listdir(directoryB111):
    file_path = os.path.join(directoryB111, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Statement of intent']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B111similarity_scores_SOI.append((filename, similarity_score))

# Convert the list to a DataFrame
B111similarity_SOI = pd.DataFrame(B111similarity_scores_SOI, columns=['filename', 'Similarity SOI'])
B111similarity_SOI.to_csv("STRING_RESULT/B1.1.1/T0_similarity_scores_SOI", index=False)

B111similarity_SOI

Unnamed: 0,filename,Similarity SOI
0,all_iterations_string_T0_280.csv,53.960396
1,all_iterations_string_T0_3441.csv,51.041667
2,all_iterations_string_T0_3644.csv,53.140097
3,all_iterations_string_T0_5991.csv,54.146341
4,all_iterations_string_T0_7917.csv,55.276382


In [30]:
B111similarity_scores_NR = []

for filename in os.listdir(directoryB111):
    file_path = os.path.join(directoryB111, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Not relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B111similarity_scores_NR.append((filename, similarity_score))

# Convert the list to a DataFrame
B111similarity_NR = pd.DataFrame(B111similarity_scores_SOI, columns=['filename', 'Similarity NR'])
B111similarity_NR.to_csv("STRING_RESULT/B1.1.1/T0_similarity_scores_NR", index=False)

B111similarity_NR

Unnamed: 0,filename,Similarity NR
0,all_iterations_string_T0_280.csv,53.960396
1,all_iterations_string_T0_3441.csv,51.041667
2,all_iterations_string_T0_3644.csv,53.140097
3,all_iterations_string_T0_5991.csv,54.146341
4,all_iterations_string_T0_7917.csv,55.276382


## B1.2 - TWOshot codebook
Evaluation with T=0 and 1 iteration

Codebook is prepared. Currently not evaluated.
