In [1]:
import numpy as np
import pandas as pd
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns
import difflib

## Notebook to annotate HLS speeches for principles
### B: string-based labels

Codebooks:
- B2.0: zero shot
- B2.1: one shot

Only apply on sentences in OG dataframe labelled as relevant
Temperature: 0
Iterations: 1

Model selection:
 As of 22-05-2024, gpt-4-turbo-2024-04-09 seems to be the only gpt-model that returns a fingerprint in addition to gpt-4o

  #model= "gpt-4-turbo-2024-04-09"
  #model = "gpt-3.5-turbo-0125"


### Import text to annotate
Select only relevant columns of the full dataframe, in this case:
RELEVANCE

In [2]:
# Import string based datafile
HLS_train = pd.read_csv('data/string/HLS_train_string.csv')

In [3]:
### Select only japan for testing purposes
#HLS_train_japan = HLS_train[HLS_train['id']=='COP19_japan']
HLS_train_relevant = HLS_train[HLS_train['RELEVANCE']=='Relevant']

In [4]:
# Select only columns containing relevance labels
HLS_principle = HLS_train[['Text', 'PRINCIPLE']]
HLS_principle.head()

Unnamed: 0,Text,PRINCIPLE
0,"Thank you, Mr. President .",not evaluated
1,"On beha lf of the government of Japan , I wou...",not evaluated
2,I would also like to expr ess my d eepest con...,not evaluated
3,Mr. President: A fair and effective framewor...,utilitarian
4,"In this regard, Japan firmly supports the est...",not evaluated


### Import necessary files
- codebooks
- API key
- import gpt_annotate_num

In [5]:
# Load codebook - zero shot
with open('codebooks/B2.0', 'r', encoding='utf-8') as file:
    B20 = file.read()

In [6]:
# OpenAI key
with open('gpt_api_key.txt', 'r') as f:
    key = f.read().strip()

In [7]:
import gpt_annotate_string

### Prepare data for annotation
Compares column names in HLS_relevance to the codes identified by GPT-4o in the codebook. Seed for this identification is set to 1234.

In [8]:
# Prepare dataframe for annotation
text_to_annotate = gpt_annotate_string.prepare_data(HLS_principle, B20, key, prep_codebook=True)

ChatCompletion(id='chatcmpl-9TtFOSpeXilg3CpKwme9h5TjSrCCc', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='general normative statement, egalitarian, utilitarian, prioritarian, sufficientarian, libertarian', role='assistant', function_call=None, tool_calls=None))], created=1716910910, model='gpt-4o-2024-05-13', object='chat.completion', system_fingerprint='fp_43dfabdef1', usage=CompletionUsage(completion_tokens=18, prompt_tokens=572, total_tokens=590))
ERROR: Column names in codebook and text_to_annotate do not match exactly. Please note that order and capitalization matters.
Change order/spelling in codebook or text_to_annotate.

Exact order and spelling of category names in text_to_annotate:  ['PRINCIPLE']
Exact order and spelling of category names in codebook:  ['general normative statement', 'egalitarian', 'utilitarian', 'prioritarian', 'sufficientarian', 'libertarian']


Fingerprint used: fp_43dfabdef1

Seed of textpreparation is hardcoded into gpt_annotate. This to ensure that onlye the results of the same fingerprint for all seeds and all iterations. Essentially every time GPT-4o is called only results with this specific fingerprint are saved.

### Run gpt_annotate_num
Evaluation per seed -
5 different seeds
Batch of 20 sentences
5 iterations

Returns 3 outputs:
1. all_iterations_{seed}.csv
2. fingerprints_all.csv
3. missed_batches.csv

In [None]:
fingerprint = 'fp_43dfabdef1'
seeds = [3644,3441, 280, 5991, 7917]

In [None]:
# Annotate the data - T0 - I1
for seed in seeds:
    gpt_annotate_string.gpt_annotate(text_to_annotate, B10, key, seed,fingerprint, experiment="B1.0",  num_iterations=1, model="gpt-4o", temperature=0, batch_size=20, human_labels=True)

### Evaluate with temperature 0.6

In [None]:
# Annotate the data - T0 - I1
for seed in seeds:
    gpt_annotate_string.gpt_annotate(text_to_annotate, B10, key, seed,fingerprint, experiment="B1.0",  num_iterations=1, model="gpt-4o", temperature=0.6, batch_size=20, human_labels=True)

## Evaluate accuracy of predictions
Based on similarity between predicted and true labels.
Currently not taking consistency into account - would be done by grouping


Define function for similarity score

In [None]:
def get_similarity_score(Rx, Ry):
    # Ensure Rx and Ry are pandas Series and convert to strings with stripped whitespace
    Rx = Rx.astype(str).str.strip()
    Ry = Ry.astype(str).str.strip()

    # Calculate similarity scores
    similarity_scores = Rx.combine(Ry, lambda x, y: difflib.SequenceMatcher(None, x, y).ratio())

    # Apply the threshold - maybe put higher?
    similarity_scores = similarity_scores.apply(lambda x: x if x >= 0.9 else 0)

    # Return the mean similarity score as a percentage
    return similarity_scores.mean() * 100

### Evaluate performance

In [None]:
# Iterate through each file in the directory
directory = 'STRING_RESULT/B1.0/all_iterations'
similarity_scores_all = []

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    df = pd.read_csv(file_path)
    Rx = df['RELEVANCE_x']
    Ry = df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    similarity_scores_all.append((filename, similarity_score))

similarity_all = pd.DataFrame(similarity_scores_all, columns=['filename', 'similarity ALL'])
similarity_all.to_csv("STRING_RESULT/B1.0/T0_similarity_scores_all", index=False)

similarity_all

In [None]:
similarity_scores_relevant = []

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    similarity_scores_relevant.append((filename, similarity_score))

# Convert the list to a DataFrame
similarity_relevant = pd.DataFrame(similarity_scores_relevant, columns=['filename', 'Similarity RELEVANT'])
similarity_relevant.to_csv("STRING_RESULT/B1.0/T0_similarity_scores_relevant", index=False)

similarity_relevant

In [None]:
similarity_scores_SOI = []

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Statement of intent']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    similarity_scores_SOI.append((filename, similarity_score))

# Convert the list to a DataFrame
similarity_SOI = pd.DataFrame(similarity_scores_SOI, columns=['filename', 'Similarity SOI'])
similarity_SOI.to_csv("STRING_RESULT/B1.0/T0_similarity_scores_SOI", index=False)

similarity_SOI

In [None]:
similarity_scores_NR = []

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Not relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    similarity_scores_NR.append((filename, similarity_score))

# Convert the list to a DataFrame
similarity_NR = pd.DataFrame(similarity_scores_SOI, columns=['filename', 'Similarity NR'])
similarity_NR.to_csv("STRING_RESULT/B1.0/T0_similarity_scores_NR", index=False)

similarity_NR

### Evaluation with context specified in text
codebook B1.0.1

In [None]:
# Load codebook - zero shot
with open('codebooks/B1.0.1', 'r', encoding='utf-8') as file:
    B101 = file.read()

In [None]:
# Annotate the data - T0 - I1
for seed in seeds:
    gpt_annotate_string.gpt_annotate(text_to_annotate, B101, key, seed,fingerprint, experiment="B1.0.1",  num_iterations=1, model="gpt-4o", temperature=0, batch_size=20, human_labels=True)

### Evaluation B1.0 with context - B1.0.1

In [None]:
# Iterate through each file in the directory
directoryB101 = 'STRING_RESULT/B1.0.1/all_iterations'
B101similarity_scores_all = []

for filename in os.listdir(directoryB101):
    file_path = os.path.join(directoryB101, filename)
    df = pd.read_csv(file_path)
    Rx = df['RELEVANCE_x']
    Ry = df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx, Ry)

    #Save the score in a dataframe
    B101similarity_scores_all.append((filename, similarity_score))

B101similarity_all = pd.DataFrame(B101similarity_scores_all, columns=['filename', 'similarity ALL'])
B101similarity_all.to_csv("STRING_RESULT/B1.0.1/T0_similarity_scores_all", index=False)

B101similarity_all

In [None]:
B101similarity_scores_relevant = []

for filename in os.listdir(directoryB101):
    file_path = os.path.join(directoryB101, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B101similarity_scores_relevant.append((filename, similarity_score))

# Convert the list to a DataFrame
B101similarity_relevant = pd.DataFrame(B101similarity_scores_relevant, columns=['filename', 'Similarity RELEVANT'])
B101similarity_relevant.to_csv("STRING_RESULT/B1.0.1/T0_similarity_scores_relevant", index=False)

B101similarity_relevant

In [None]:
B101similarity_scores_relevant_precision = []

for filename in os.listdir(directoryB101):
    file_path = os.path.join(directoryB101, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_y'] == 'Relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B101similarity_scores_relevant_precision.append((filename, similarity_score))

# Convert the list to a DataFrame
B101similarity_relevant_precision = pd.DataFrame(B101similarity_scores_relevant_precision, columns=['filename', 'Similarity RELEVANT'])
B101similarity_relevant_precision.to_csv("STRING_RESULT/B1.0.1/T0_similarity_scores_relevant_precision", index=False)

B101similarity_relevant_precision

In [None]:
B101similarity_scores_SOI = []

for filename in os.listdir(directoryB101):
    file_path = os.path.join(directoryB101, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Statement of intent']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B101similarity_scores_SOI.append((filename, similarity_score))

# Convert the list to a DataFrame
B101similarity_SOI = pd.DataFrame(B101similarity_scores_SOI, columns=['filename', 'Similarity SOI'])
B101similarity_SOI.to_csv("STRING_RESULT/B1.0.1/T0_similarity_scores_SOI", index=False)

B101similarity_SOI

In [None]:
B101similarity_scores_NR = []

for filename in os.listdir(directoryB101):
    file_path = os.path.join(directoryB101, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Not relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B101similarity_scores_NR.append((filename, similarity_score))

# Convert the list to a DataFrame
B101similarity_NR = pd.DataFrame(B101similarity_scores_SOI, columns=['filename', 'Similarity NR'])
B101similarity_NR.to_csv("STRING_RESULT/B1.0.1/T0_similarity_scores_NR", index=False)

B101similarity_NR

## B1.1 - Oneshot codebook
Evaluation with T=0 and 1 iteration

In [None]:
# Load codebook - zero shot
with open('codebooks/B1.1', 'r', encoding='utf-8') as file:
    B11 = file.read()

In [None]:
# Annotate the data - T0 - I1
for seed in seeds:
    gpt_annotate_string.gpt_annotate(text_to_annotate, B11, key, seed,fingerprint, experiment="B1.1",  num_iterations=1, model="gpt-4o", temperature=0, batch_size=20, human_labels=True)

## Evaluation B1.1

In [None]:
# Iterate through each file in the directory
directoryB11 = 'STRING_RESULT/B1.1/all_iterations'
B11similarity_scores_all = []

for filename in os.listdir(directoryB11):
    file_path = os.path.join(directoryB11, filename)
    df = pd.read_csv(file_path)
    Rx = df['RELEVANCE_x']
    Ry = df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx, Ry)

    #Save the score in a dataframe
    B11similarity_scores_all.append((filename, similarity_score))

B11similarity_all = pd.DataFrame(B11similarity_scores_all, columns=['filename', 'similarity ALL'])
B11similarity_all.to_csv("STRING_RESULT/B1.1/T0_similarity_scores_all", index=False)

B11similarity_all

In [None]:
B11similarity_scores_relevant = []

for filename in os.listdir(directoryB11):
    file_path = os.path.join(directoryB11, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B11similarity_scores_relevant.append((filename, similarity_score))

# Convert the list to a DataFrame
B11similarity_relevant = pd.DataFrame(B11similarity_scores_relevant, columns=['filename', 'Similarity RELEVANT'])
B11similarity_relevant.to_csv("STRING_RESULT/B1.1/T0_similarity_scores_relevant", index=False)

B11similarity_relevant

In [None]:
B11similarity_scores_SOI = []

for filename in os.listdir(directoryB11):
    file_path = os.path.join(directoryB11, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Statement of intent']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B11similarity_scores_SOI.append((filename, similarity_score))

# Convert the list to a DataFrame
B11similarity_SOI = pd.DataFrame(B11similarity_scores_SOI, columns=['filename', 'Similarity SOI'])
B11similarity_SOI.to_csv("STRING_RESULT/B1.1/T0_similarity_scores_SOI", index=False)

B11similarity_SOI

In [None]:
B11similarity_scores_NR = []

for filename in os.listdir(directoryB11):
    file_path = os.path.join(directoryB11, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Not relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B11similarity_scores_NR.append((filename, similarity_score))

# Convert the list to a DataFrame
B11similarity_NR = pd.DataFrame(B11similarity_scores_SOI, columns=['filename', 'Similarity NR'])
B11similarity_NR.to_csv("STRING_RESULT/B1.1/T0_similarity_scores_NR", index=False)

B11similarity_NR

## B1.1.1 - Oneshot codebook WITH CONTEXT
Evaluation with T=0 and 1 iteration


In [None]:
# Load codebook - zero shot
with open('codebooks/B1.1.1', 'r', encoding='utf-8') as file:
    B111 = file.read()

In [None]:
# Annotate the data - T0 - I1
for seed in seeds:
    gpt_annotate_string.gpt_annotate(text_to_annotate, B111, key, seed,fingerprint, experiment="B1.1.1",  num_iterations=1, model="gpt-4o", temperature=0, batch_size=20, human_labels=True)

## Evaluate B111


In [None]:
# Iterate through each file in the directory
directoryB111 = 'STRING_RESULT/B1.1.1/all_iterations'
B111similarity_scores_all = []

for filename in os.listdir(directoryB111):
    file_path = os.path.join(directoryB111, filename)
    df = pd.read_csv(file_path)
    Rx = df['RELEVANCE_x']
    Ry = df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx, Ry)

    #Save the score in a dataframe
    B111similarity_scores_all.append((filename, similarity_score))

B111similarity_all = pd.DataFrame(B111similarity_scores_all, columns=['filename', 'similarity ALL'])
B111similarity_all.to_csv("STRING_RESULT/B1.1.1/T0_similarity_scores_all", index=False)

B111similarity_all

In [None]:
B111similarity_scores_relevant = []

for filename in os.listdir(directoryB111):
    file_path = os.path.join(directoryB111, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B111similarity_scores_relevant.append((filename, similarity_score))

# Convert the list to a DataFrame
B111similarity_relevant = pd.DataFrame(B111similarity_scores_relevant, columns=['filename', 'Similarity RELEVANT'])
B111similarity_relevant.to_csv("STRING_RESULT/B1.1.1/T0_similarity_scores_relevant", index=False)

B111similarity_relevant

In [None]:
B111similarity_scores_SOI = []

for filename in os.listdir(directoryB111):
    file_path = os.path.join(directoryB111, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Statement of intent']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B111similarity_scores_SOI.append((filename, similarity_score))

# Convert the list to a DataFrame
B111similarity_SOI = pd.DataFrame(B111similarity_scores_SOI, columns=['filename', 'Similarity SOI'])
B111similarity_SOI.to_csv("STRING_RESULT/B1.1.1/T0_similarity_scores_SOI", index=False)

B111similarity_SOI

In [None]:
B111similarity_scores_NR = []

for filename in os.listdir(directoryB111):
    file_path = os.path.join(directoryB111, filename)
    df = pd.read_csv(file_path)

    relevant_df = df[df['RELEVANCE_x'] == 'Not relevant']

    Rx = relevant_df['RELEVANCE_x']
    Ry = relevant_df['RELEVANCE_y']

    similarity_score = get_similarity_score(Rx,Ry)

    #Save the score in a dataframe
    B111similarity_scores_NR.append((filename, similarity_score))

# Convert the list to a DataFrame
B111similarity_NR = pd.DataFrame(B111similarity_scores_SOI, columns=['filename', 'Similarity NR'])
B111similarity_NR.to_csv("STRING_RESULT/B1.1.1/T0_similarity_scores_NR", index=False)

B111similarity_NR

## B1.2 - TWOshot codebook
Evaluation with T=0 and 1 iteration

Codebook is prepared. Currently not evaluated.
