## Imports

Importing the required modules for the project.

In [1]:
import os
from glob import glob

OpenAI API for interacting with ChatGPT.

In [2]:
from openai import OpenAI

The ```dotenv_values``` function from the dotenv module for accessing configuration file properties.

In [3]:
from dotenv import dotenv_values

Numpy and Pandas for advanced array and dataframe functionality, respectively.

In [4]:
import numpy as np
import pandas as pd

## Global Settings

Global settings and variables to be used throughout the project.

Defining a random number generator to be used throughout the project in order to ensure the same random results are obtained each time the project is run.

In [5]:
rng = np.random.default_rng(seed=101)

In [6]:
csv_files = glob(f"../output/*.csv")

In [7]:
dfs = [pd.read_csv(csv_file, index_col=0) for csv_file in csv_files]

In [8]:
for index, df in enumerate(dfs):
    print(csv_files[index])
    print(len(df))

../output/fbv_san_8_x.csv
91
../output/fbv_san_12.csv
91
../output/second_order_gpt4_4.csv
81
../output/second_order_gpt4_5.csv
81
../output/fbv_san_13.csv
91
../output/fbv_san_14_x.csv
91
../output/fbv_san_16_x.csv
91
../output/fbv_san_11.csv
91
../output/second_order_gpt4_7.csv
81
../output/second_order_8.csv
81
../output/second_order_9.csv
81
../output/second_order_gpt4_6.csv
81
../output/fbv_san_10.csv
91
../output/ullman_results_14_1.csv
20
../output/fbv_san_14.csv
91
../output/fixed.csv
1817
../output/second_order_gpt4_2.csv
81
../output/second_order_gpt4_3.csv
81
../output/fbv_san_15.csv
91
../output/fbv_san_12_x.csv
91
../output/ullman_results_14_2.csv
56
../output/fbv_san_17.csv
91
../output/fbv_san_10_x.csv
91
../output/second_order_gpt4_1.csv
81
../output/fbv_san_16.csv
91
../output/fbv_san_1.csv
91
../output/ullman_results_9.csv
170
../output/ullman_results_13.csv
170
../output/fbv_san_17_x.csv
91
../output/ullman_results_12.csv
170
../output/ullman_results_8.csv
170
../out

In [9]:
combined_df = pd.concat(dfs, ignore_index=True).reset_index(drop=True)

In [10]:
combined_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12308 entries, 0 to 12307
Data columns (total 46 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   study_id                 12308 non-null  int64  
 1   study_name               12308 non-null  object 
 2   scenario_id              12308 non-null  float64
 3   scenario_code            12308 non-null  object 
 4   story_id                 12308 non-null  object 
 5   chat_id                  12308 non-null  float64
 6   story_common_id          12308 non-null  object 
 7   story_category           12308 non-null  object 
 8   story_name               12308 non-null  object 
 9   story_content            12308 non-null  object 
 10  story_language           12308 non-null  object 
 11  chat_name                12308 non-null  object 
 12  chat_language            12308 non-null  object 
 13  chat_has_fbv_zan         476 non-null    float64
 14  chat_has_fbv_san      

In [11]:
import dataframe_image as dfi

In [12]:
combined_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
study_id,12308.0,2.245775,0.696106,1.0,2.0,2.0,3.0,3.0
scenario_id,12308.0,88.61635,25.859614,1.0,76.0,93.0,110.0,116.0
chat_id,12308.0,39.94223,14.41883,1.0,28.0,45.0,50.0,56.0
chat_has_fbv_zan,476.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
chat_has_fbv_san,6202.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
system_message_id,12308.0,4.350179,1.467359,1.0,4.0,4.0,6.0,6.0
question_index,12308.0,1.846198,1.792109,0.0,0.0,2.0,3.0,7.0
question_common_id,12308.0,12.13739,11.20492,1.0,4.0,7.0,21.0,35.0
question_has_fbv_zan,272.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
question_has_fbv_san,3321.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0


In [13]:
combined_df.head(3)

Unnamed: 0,study_id,study_name,scenario_id,scenario_code,story_id,chat_id,story_common_id,story_category,story_name,story_content,...,completion_tokens,prompt_tokens,total_tokens,content,role,function_call,tool_calls,chat_has_fbv,question_has_fbv,index
0,3,Second Order,91.0,4B-TR/43,4B-TR,43.0,4B,Unexpected Transfer,The Balloon Man,Gamze ve Can parkta oynuyorlar. Gamze baloncud...,...,23,280,303,"Can, Gamze'nin balon almak için okula gittiğin...",assistant,,,,,
1,3,Second Order,91.0,4B-TR/43,4B-TR,43.0,4B,Unexpected Transfer,The Balloon Man,Gamze ve Can parkta oynuyorlar. Gamze baloncud...,...,52,336,388,"Can, Gamze'nin balon almak için okula gittiğin...",assistant,,,,,
2,3,Second Order,91.0,4B-TR/43,4B-TR,43.0,4B,Unexpected Transfer,The Balloon Man,Gamze ve Can parkta oynuyorlar. Gamze baloncud...,...,52,424,476,"Hayır, Can baloncunun Gamze'nin nereye gittiği...",assistant,,,,,


In [14]:
a = combined_df.groupby(["scenario_id"]).size().to_frame()

In [15]:
a

Unnamed: 0_level_0,0
scenario_id,Unnamed: 1_level_1
1.0,20
2.0,20
3.0,20
4.0,20
5.0,20
...,...
112.0,320
113.0,480
114.0,480
115.0,640


In [16]:
b = pd.read_csv("../notebooks/combined.csv")

In [17]:
b = b.groupby(["scenario_id"]).size().to_frame()

In [18]:
b

Unnamed: 0_level_0,0
scenario_id,Unnamed: 1_level_1
1.0,20
2.0,20
3.0,20
4.0,20
5.0,20
...,...
112.0,80
113.0,120
114.0,120
115.0,160


In [19]:
a.compare(b)

Unnamed: 0_level_0,0,0
Unnamed: 0_level_1,self,other
scenario_id,Unnamed: 1_level_2,Unnamed: 2_level_2
81.0,120.0,114.0
82.0,120.0,114.0
83.0,114.0,120.0
84.0,120.0,114.0
85.0,120.0,114.0
86.0,114.0,120.0
87.0,160.0,152.0
88.0,160.0,152.0
89.0,152.0,160.0
90.0,140.0,133.0


In [20]:
combined_df

Unnamed: 0,study_id,study_name,scenario_id,scenario_code,story_id,chat_id,story_common_id,story_category,story_name,story_content,...,completion_tokens,prompt_tokens,total_tokens,content,role,function_call,tool_calls,chat_has_fbv,question_has_fbv,index
0,3,Second Order,91.0,4B-TR/43,4B-TR,43.0,4B,Unexpected Transfer,The Balloon Man,Gamze ve Can parkta oynuyorlar. Gamze baloncud...,...,23,280,303,"Can, Gamze'nin balon almak için okula gittiğin...",assistant,,,,,
1,3,Second Order,91.0,4B-TR/43,4B-TR,43.0,4B,Unexpected Transfer,The Balloon Man,Gamze ve Can parkta oynuyorlar. Gamze baloncud...,...,52,336,388,"Can, Gamze'nin balon almak için okula gittiğin...",assistant,,,,,
2,3,Second Order,91.0,4B-TR/43,4B-TR,43.0,4B,Unexpected Transfer,The Balloon Man,Gamze ve Can parkta oynuyorlar. Gamze baloncud...,...,52,424,476,"Hayır, Can baloncunun Gamze'nin nereye gittiği...",assistant,,,,,
3,3,Second Order,91.0,4B-TR/43,4B-TR,43.0,4B,Unexpected Transfer,The Balloon Man,Gamze ve Can parkta oynuyorlar. Gamze baloncud...,...,18,502,520,Baloncu aslında alışveriş merkezine gitmiştir.,assistant,,,,,
4,3,Second Order,91.0,4B-TR/43,4B-TR,43.0,4B,Unexpected Transfer,The Balloon Man,Gamze ve Can parkta oynuyorlar. Gamze baloncud...,...,17,548,565,Gamze baloncunun parkta olduğunu düşünüyor.,assistant,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12303,3,Second Order,116.0,4B-TR/56,4B-TR,56.0,4B,Unexpected Transfer,The Balloon Man,Gamze ve Can parkta oynuyorlar. Gamze baloncud...,...,47,413,460,"Hayır, Can baloncunun Gamze'ye nereye gittiğin...",assistant,,,,,
12304,3,Second Order,116.0,4B-TR/56,4B-TR,56.0,4B,Unexpected Transfer,The Balloon Man,Gamze ve Can parkta oynuyorlar. Gamze baloncud...,...,18,486,504,Baloncu aslında alışveriş merkezine gitmiştir.,assistant,,,,,
12305,3,Second Order,116.0,4B-TR/56,4B-TR,56.0,4B,Unexpected Transfer,The Balloon Man,Gamze ve Can parkta oynuyorlar. Gamze baloncud...,...,17,530,547,Gamze baloncunun okula gittiğini sanıyor.,assistant,,,,,
12306,3,Second Order,116.0,4B-TR/56,4B-TR,56.0,4B,Unexpected Transfer,The Balloon Man,Gamze ve Can parkta oynuyorlar. Gamze baloncud...,...,29,577,606,Gamze'nin baloncuyu bulmak için gideceği yer a...,assistant,,,,,


In [21]:
problematic = a.compare(b).index

In [22]:
problemo = combined_df[combined_df["scenario_id"].isin(problematic)]

In [23]:
problemo[problemo["scenario_id"].isin([104.0, 105.0, 106.0])].to_csv("problemo.csv")

In [24]:
fixed = pd.concat([combined_df[~combined_df["scenario_id"].isin(problematic)], pd.read_csv("../output/fjxed_combined.csv", index_col=0)])

In [25]:
fixed = pd.concat([fixed[~fixed["scenario_id"].isin([104, 105, 106])], pd.read_csv("../output/prowow.csv")])

In [26]:
b.compare(fixed.groupby("scenario_id").size().to_frame())

ValueError: Can only compare identically-labeled (both index and columns) DataFrame objects

In [None]:
fixed.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6513 entries, 182 to 59
Data columns (total 44 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   study_id                 6513 non-null   int64  
 1   study_name               6513 non-null   object 
 2   scenario_id              6513 non-null   float64
 3   scenario_code            6513 non-null   object 
 4   story_id                 6513 non-null   object 
 5   chat_id                  6513 non-null   float64
 6   story_common_id          6513 non-null   object 
 7   story_category           6513 non-null   object 
 8   story_name               6513 non-null   object 
 9   story_content            6513 non-null   object 
 10  story_language           6513 non-null   object 
 11  chat_name                6513 non-null   object 
 12  chat_language            6513 non-null   object 
 13  chat_has_fbv_zan         1540 non-null   float64
 14  chat_has_fbv_san         1540

In [None]:
fixed.columns

Index(['study_id', 'study_name', 'scenario_id', 'scenario_code', 'story_id',
       'chat_id', 'story_common_id', 'story_category', 'story_name',
       'story_content', 'story_language', 'chat_name', 'chat_language',
       'chat_has_fbv_zan', 'chat_has_fbv_san', 'questions',
       'system_message_id', 'system_message_language',
       'system_message_content', 'question_index', 'question_id',
       'question_common_id', 'question_content', 'question_type',
       'question_language', 'question_has_fbv_zan', 'question_has_fbv_san',
       'question_tom_order', 'question_tom_type', 'answer_id',
       'answer_correct', 'id', 'created', 'model', 'object',
       'system_fingerprint', 'completion_tokens', 'prompt_tokens',
       'total_tokens', 'content', 'role', 'function_call', 'tool_calls',
       'chat_has_fbv', 'question_has_fbv', 'index', 'Unnamed: 0'],
      dtype='object')

In [None]:
fixed["question_has_fbv_zan"] = fixed["question_has_fbv_zan"].fillna(fixed["question_has_fbv"])

In [None]:
fixed["chat_has_fbv_zan"] = fixed["chat_has_fbv_zan"].fillna(fixed["chat_has_fbv"])

In [None]:
fixed = fixed.drop(columns=["index", "Unnamed: 0", "chat_has_fbv", "question_has_fbv"])

In [None]:
v = ((fixed.groupby("scenario_id")["questions"].apply(lambda x: x.apply(lambda text: text.count(","))) + 1).to_frame().droplevel(level=1) * 10)["questions"]

In [None]:
extra = pd.read_csv("../output/extra.csv", index_col=0)

In [None]:
new = pd.concat([fixed, extra])

In [None]:
#new["session_id"] = (new["question_index"] == 0).cumsum()

In [None]:
#new["initial_order"] = range(0, len(new))

In [None]:
fixed = pd.read_csv("./all_responses.csv", index_col=0)

In [None]:
new = pd.read_csv("./all_responses.csv", index_col=0)

In [None]:
new.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6560 entries, 0 to 6559
Data columns (total 46 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   initial_order            6560 non-null   int64  
 1   initial_index            6560 non-null   int64  
 2   session_id               6560 non-null   int64  
 3   study_id                 6560 non-null   int64  
 4   study_name               6560 non-null   object 
 5   scenario_id              6560 non-null   float64
 6   scenario_code            6560 non-null   object 
 7   story_id                 6560 non-null   object 
 8   chat_id                  6560 non-null   float64
 9   story_common_id          6560 non-null   object 
 10  story_category           6560 non-null   object 
 11  story_name               6560 non-null   object 
 12  story_content            6560 non-null   object 
 13  story_language           6560 non-null   object 
 14  chat_name                6560

In [None]:
new.columns

Index(['initial_order', 'initial_index', 'session_id', 'study_id',
       'study_name', 'scenario_id', 'scenario_code', 'story_id', 'chat_id',
       'story_common_id', 'story_category', 'story_name', 'story_content',
       'story_language', 'chat_name', 'chat_language', 'chat_has_fbv_zan',
       'chat_has_fbv_san', 'questions', 'system_message_id',
       'system_message_language', 'system_message_content', 'question_index',
       'question_id', 'question_common_id', 'question_content',
       'question_type', 'question_language', 'question_has_fbv_zan',
       'question_has_fbv_san', 'question_tom_order', 'question_tom_type',
       'answer_id', 'answer_correct', 'id', 'created', 'model', 'object',
       'system_fingerprint', 'completion_tokens', 'prompt_tokens',
       'total_tokens', 'content', 'role', 'function_call', 'tool_calls'],
      dtype='object')

In [None]:
new.insert(0, "initial_order", range(0, len(new)))
new.insert(1, "initial_index", new.index)
new.insert(2, "session_id", (new["question_index"] == 0).cumsum() - 1)

In [None]:
new = new.sort_values(["study_id", "scenario_id", "session_id", "question_index"])

In [None]:
new = new.reset_index(drop=True)

In [None]:
new

Unnamed: 0,initial_order,initial_index,session_id,study_id,study_name,scenario_id,scenario_code,story_id,chat_id,story_common_id,...,model,object,system_fingerprint,completion_tokens,prompt_tokens,total_tokens,content,role,function_call,tool_calls
0,679,3896,109,1,Ullman Replication,1.0,1-EN/1,1-EN,1.0,1,...,gpt-3.5-turbo-0613,chat.completion,,10,90,100,Sam believes that the bag is full of chocolate.,assistant,,
1,849,4066,189,1,Ullman Replication,1.0,1-EN/1,1-EN,1.0,1,...,gpt-4-1106-preview,chat.completion,fp_a24b4d720c,3,90,93,chocolate.,assistant,,
2,1019,4327,269,1,Ullman Replication,1.0,1-EN/1,1-EN,1.0,1,...,gpt-4-1106-preview,chat.completion,fp_a24b4d720c,3,90,93,chocolate.,assistant,,
3,1189,4497,349,1,Ullman Replication,1.0,1-EN/1,1-EN,1.0,1,...,gpt-3.5-turbo-0613,chat.completion,,10,90,100,Sam believes that the bag is full of chocolate.,assistant,,
4,1359,4667,429,1,Ullman Replication,1.0,1-EN/1,1-EN,1.0,1,...,gpt-4-1106-preview,chat.completion,fp_a24b4d720c,2,90,92,chocolate,assistant,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6555,6448,4107,2252,3,Second Order,116.0,4B-TR/56,4B-TR,56.0,4B,...,gpt-3.5-turbo-0613,chat.completion,,47,413,460,"Hayır, Can baloncunun Gamze'ye nereye gittiğin...",assistant,,
6556,6449,4108,2252,3,Second Order,116.0,4B-TR/56,4B-TR,56.0,4B,...,gpt-3.5-turbo-0613,chat.completion,,18,486,504,Baloncu aslında alışveriş merkezine gitmiştir.,assistant,,
6557,6450,4109,2252,3,Second Order,116.0,4B-TR/56,4B-TR,56.0,4B,...,gpt-3.5-turbo-0613,chat.completion,,17,530,547,Gamze baloncunun okula gittiğini sanıyor.,assistant,,
6558,6451,4110,2252,3,Second Order,116.0,4B-TR/56,4B-TR,56.0,4B,...,gpt-3.5-turbo-0613,chat.completion,,29,577,606,Gamze'nin baloncuyu bulmak için gideceği yer a...,assistant,,


In [None]:
new["question_tom_type"] = new["question_id"].map(mapping_dict).combine_first(new["question_tom_type"])

In [None]:
def create_new_column(row):
    language = row['story_language']
    
    if pd.notna(row['question_has_fbv_zan']):
        return f"{language}-zan"
    elif pd.notna(row['question_has_fbv_san']):
        return f"{language}-san"
    else:
        return f"{language}"

In [None]:
new["language_category"] = new.apply(create_new_column, axis=1)

In [None]:
new.to_csv("all_responses.csv")

In [None]:
questions = pd.read_excel("../input/parameters.xlsx", sheet_name="tbl_questions")

In [None]:
mapping_dict = dict(zip(questions['question_id'], questions['question_tom_type']))

In [None]:
answers = pd.read_csv("../input/answers.csv", index_col=0)

In [None]:
answers.columns

Index(['question_id', 'question_language', 'question_tom_type',
       'question_tom_order', 'question_type', 'answer_correct', 'content',
       'is_correct', 'has_correct_reasoning', 'is_beyond_scope',
       'has_grammatical_error', 'needs_debating'],
      dtype='object')

In [None]:
answers.drop_duplicates(subset=["question_id", "question_language", "question_tom_type", "question_tom_order", "question_type", "answer_correct", "content"])

Unnamed: 0_level_0,question_id,question_language,question_tom_type,question_tom_order,question_type,answer_correct,content,is_correct,has_correct_reasoning,is_beyond_scope,has_grammatical_error,needs_debating
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,1_EN,English,Belief,1,Closed-ended,chocolate,Sam believes that the bag is full of chocolate.,True,,,,
1,1_EN,English,Belief,1,Closed-ended,chocolate,chocolate.,True,,,,
2,1_EN,English,Belief,1,Closed-ended,chocolate,chocolate,True,,,,
3,1_EN,English,Belief,1,Closed-ended,popcorn,Sam believes that the bag is full of chocolate.,False,,,,
4,1_EN,English,Belief,1,Closed-ended,popcorn,chocolate.,False,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1347,31_TR,Turkish,Knowledge,0,Open-ended,"Hayır, bilmiyor.","Hayır, Can baloncunun Gamze'ye nereye gittiğin...",True,False,,,
1348,33_TR-3,Turkish,Thought,1,Closed-ended,okulda,Gamze baloncunun okulda olduğunu sanıyor.,True,,,,
1349,33_TR-3,Turkish,Thought,1,Closed-ended,okulda,Gamze baloncunun okula gittiğini sanıyor.,True,,,,
1350,35_TR-3,Turkish,Thought,1,Closed-ended,okulda,Can baloncunun parkta olduğunu sanıyor.,False,,,,


In [None]:
combined = new.merge(
    answers.drop_duplicates(
        subset=["question_id",
                "question_language",
                "question_tom_type",
                "question_tom_order",
                "question_type",
                "answer_correct",
                "content"]
        ),
        how="left",
        on=["question_id",
            "question_language",
            "question_tom_order",
            "question_type",
            "answer_correct", 
            "content"])

In [None]:
len(combined[combined["is_correct"].isna()])

342

In [None]:
combined = combined.drop(columns="question_tom_type_y")

In [None]:
combined = combined.rename(columns={"question_tom_type_x": "question_tom_type"})

In [None]:
combined[combined["study_id"] == 2]["is_correct"].sum()

1650

In [None]:
combined_2[combined_2["study_id"] == 2]["is_correct"].sum()

1644

In [None]:
combined.to_csv("all_responses_coded.csv")

In [None]:
combined.columns

Index(['initial_order', 'initial_index', 'session_id', 'study_id',
       'study_name', 'scenario_id', 'scenario_code', 'story_id', 'chat_id',
       'story_common_id', 'story_category', 'story_name', 'story_content',
       'story_language', 'chat_name', 'chat_language', 'chat_has_fbv_zan',
       'chat_has_fbv_san', 'questions', 'system_message_id',
       'system_message_language', 'system_message_content', 'question_index',
       'question_id', 'question_common_id', 'question_content',
       'question_type', 'question_language', 'question_has_fbv_zan',
       'question_has_fbv_san', 'question_tom_order', 'question_tom_type',
       'answer_id', 'answer_correct', 'id', 'created', 'model', 'object',
       'system_fingerprint', 'completion_tokens', 'prompt_tokens',
       'total_tokens', 'content', 'role', 'function_call', 'tool_calls',
       'language_category', 'is_correct', 'has_correct_reasoning',
       'is_beyond_scope', 'has_grammatical_error', 'needs_debating'],
      dtyp

In [None]:
def studify(study_id: int):
    study = combined.loc[combined["study_id"] == study_id]
    df = study.loc[:, ["initial_order", "initial_index", "session_id", "story_category", "story_id", "story_common_id", "question_id", "question_common_id", "story_language", "language_category", "model", "question_tom_type", "content", "is_correct"]]
    codes = dict(df[["story_category", "story_language", "language_category", "model", "question_tom_type"]].apply(lambda column: dict(enumerate(column.astype("category").cat.categories))))
    df.loc[:, ["story_category", "story_common_id", "story_language", "language_category", "model", "question_tom_type"]] = df[["story_category", "story_common_id", "story_language", "language_category", "model", "question_tom_type"]].apply(lambda column: column.astype("category").cat.codes)

    return (codes, df)

In [None]:
codes, df = studify(3)

In [None]:
codes

{'story_category': {0: 'Unexpected Contents', 1: 'Unexpected Transfer'},
 'story_language': {0: 'English', 1: 'Turkish'},
 'language_category': {0: 'English',
  1: 'Turkish',
  2: 'Turkish-san',
  3: 'Turkish-zan'},
 'model': {0: 'gpt-3.5-turbo-0613', 1: 'gpt-4-1106-preview'},
 'question_tom_type': {0: 'Action',
  1: 'Belief',
  2: 'Explanation',
  3: 'Reality',
  4: 'Thought'}}

In [None]:
df["is_correct"].sum()

1650

In [None]:
len(df[df["is_correct"].isna()])

302

In [27]:
combined_2 = pd.read_csv("combined.csv")

In [None]:
c_study_1 = combined[combined["study_id"] == 1]

In [28]:
study_1.sort_values("scenario_id")["is_correct"].compare(c_study_1.sort_values("scenario_id")["is_correct"]).query("(self != True) and (self != False)")

NameError: name 'study_1' is not defined

In [None]:
c_study_1["is_correct"].sum()

476

In [693]:
df.to_excel("study_1.xlsx")

In [424]:
study_1.loc[:, ["story_category", "story_id", "story_language", "language_category", "model", "question_tom_type", "content", "is_correct"]]

KeyError: "['language_category', 'is_correct'] not in index"

In [None]:
fixed.to_csv("all_responses.csv")

In [None]:
fixed["session_id"].

array([   1,    2,    3, ..., 2311, 2312, 2313])

In [None]:
v = fixed.drop_duplicates(subset="scenario_id").set_index("scenario_id")["questions"].str.count(",") + 1

In [None]:
v = v.sort_index()

In [None]:
v

scenario_id
1.0      1
2.0      1
3.0      1
4.0      1
5.0      1
        ..
112.0    4
113.0    6
114.0    6
115.0    8
116.0    7
Name: questions, Length: 116, dtype: int64

In [None]:
b = (fixed.groupby("scenario_id").size() / v / 2 / 10).to_frame()

In [None]:
b[b[0] != 1].to_clipboard()

In [None]:
(fixed.groupby("scenario_id").size().to_frame() / 2) / v

ValueError: cannot reindex on an axis with duplicate labels

In [None]:
fixed.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6513 entries, 182 to 59
Data columns (total 44 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   study_id                 6513 non-null   int64  
 1   study_name               6513 non-null   object 
 2   scenario_id              6513 non-null   float64
 3   scenario_code            6513 non-null   object 
 4   story_id                 6513 non-null   object 
 5   chat_id                  6513 non-null   float64
 6   story_common_id          6513 non-null   object 
 7   story_category           6513 non-null   object 
 8   story_name               6513 non-null   object 
 9   story_content            6513 non-null   object 
 10  story_language           6513 non-null   object 
 11  chat_name                6513 non-null   object 
 12  chat_language            6513 non-null   object 
 13  chat_has_fbv_zan         1540 non-null   float64
 14  chat_has_fbv_san         1540

In [None]:
fixed[fixed["scenario_id"] == 81].value_counts(["model", "question_id"])

model               question_id
gpt-4-1106-preview  10_EN          10
                    11_EN          10
                    12_EN          10
                    13_EN          10
                    14_EN          10
                    9_EN           10
gpt-3.5-turbo-0613  10_EN           9
                    11_EN           9
                    12_EN           9
                    13_EN           9
                    14_EN           9
                    9_EN            9
Name: count, dtype: int64

In [None]:
fixed.to_csv("to_fix.csv")

In [None]:
combined_df.to_csv("../output/combined.csv")

In [None]:
answers = combined_df.loc[:,
    [
        "question_id",
        "question_common_id",
        "question_language",
        "question_tom_type", 
        "question_tom_order", 
        "question_type", 
        "answer_correct", 
        "content"
    ]
    ].drop_duplicates(
        subset=["question_common_id", "answer_correct", "content"]
        ).reset_index(drop=True)

In [None]:
answers

Unnamed: 0,story_id,question_id,question_common_id,question_language,question_tom_type,question_tom_order,question_type,answer_correct,content
0,1-EN,1_EN,1.0,English,Belief,1.0,Closed-ended,chocolate,Sam believes that the bag is full of chocolate.
1,1-EN,1_EN,1.0,English,Belief,1.0,Closed-ended,chocolate,chocolate.
2,1-EN,1_EN,1.0,English,Belief,1.0,Closed-ended,chocolate,chocolate
3,1A-EN,1_EN,1.0,English,Belief,1.0,Closed-ended,popcorn,Sam believes that the bag is full of chocolate.
4,1A-EN,1_EN,1.0,English,Belief,1.0,Closed-ended,popcorn,chocolate.
...,...,...,...,...,...,...,...,...,...
1390,4B-TR,31_TR,31.0,Turkish,Knowledge,0.0,Open-ended,"Hayır, bilmiyor.","Hayır, Can baloncunun Gamze'ye nereye gittiğin..."
1391,4B-TR,33_TR-3,33.0,Turkish,Thought,1.0,Closed-ended,okulda,Gamze baloncunun okulda olduğunu sanıyor.
1392,4B-TR,33_TR-3,33.0,Turkish,Thought,1.0,Closed-ended,okulda,Gamze baloncunun okula gittiğini sanıyor.
1393,4B-TR,35_TR-3,35.0,Turkish,Thought,1.0,Closed-ended,okulda,Can baloncunun parkta olduğunu sanıyor.


In [None]:
from nltk.stem import SnowballStemmer
import nltk
import re
import snowballstemmer

In [None]:
nltk.download("stopwords")
nltk.download("punkt")
nltk.download("snowball_data")

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/onurbal101/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/onurbal101/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package snowball_data to
[nltk_data]     /Users/onurbal101/nltk_data...
[nltk_data]   Package snowball_data is already up-to-date!


True

In [None]:
def turkish_stemming(text):
    stemmer = snowballstemmer.stemmer("turkish")
    return " ".join([stemmer.stemWord(word) for word in re.findall(r'\b\w+\b', text)])

def english_stemming(text):
    stemmer = SnowballStemmer("english")
    return " ".join([stemmer.stem(word) for word in re.findall(r'\b\w+\b', text)])

In [None]:
#answers.loc[answers["answer_correct"].isna(), "answer_correct"] = "box"

In [None]:
#answers["answer_correct"] = answers["answer_correct"].astype("str")

In [None]:
answers['stemmed_content'] = answers.apply(lambda row: turkish_stemming(row['content']) if row['question_language'] == 'Turkish' else english_stemming(row['content']), axis=1)
answers['stemmed_answer_correct'] = answers.apply(lambda row: turkish_stemming(row['answer_correct']) if row['question_language'] == 'Turkish' else english_stemming(row['answer_correct']), axis=1)

In [None]:
answers.head(3)

Unnamed: 0,question_id,question_language,question_tom_type,question_tom_order,question_type,answer_correct,content,stemmed_content,stemmed_answer_correct
0,1_EN,English,Belief,1.0,Closed-ended,chocolate,Sam believes that the bag is full of chocolate.,sam believ that the bag is full of chocol,chocol
1,1_EN,English,Belief,1.0,Closed-ended,chocolate,chocolate.,chocol,chocol
2,1_EN,English,Belief,1.0,Closed-ended,chocolate,chocolate,chocol,chocol


In [None]:
answers['is_correct'] = answers.apply(lambda row: row["question_type"] == "Closed-ended" and row['stemmed_answer_correct'] in row['stemmed_content'], axis=1)

In [None]:
answers.to_csv("../output/processed/answers.csv")

In [None]:
import dataframe_image as dfi

In [None]:
samp = answers[answers["answer_type"] == "Closed-ended"][["stemmed_content", "stemmed_answer_correct", "is_correct"]].sample(n=10)
dfi.export(samp, "table.png")

In [None]:
#answers["similarity"] = answers.apply(lambda row: fuzz.ratio(row["answer_correct"], row["content"]))

In [None]:
answers.loc[answers["answer_type"] == "Open-ended", "is_correct"] = None

  answers.loc[answers["answer_type"] == "Open-ended", "is_correct"] = None


In [None]:
answers[answers["answer_type"] == "Open-ended"]

Unnamed: 0,question_language,answer_type,answer_correct,content,stemmed_content,stemmed_answer_correct,is_correct
52,English,Open-ended,Because the label says there is chocolate in t...,Because Sam loves the taste of chocolate.,becaus sam love the tast of chocol,becaus the label say there is chocol in the ba...,
56,English,Open-ended,Because the bag is made of clear plastic Sam s...,Because Sam loves the taste and texture of cho...,becaus sam love the tast and textur of chocol,becaus the bag is made of clear plastic sam sh...,
60,English,Open-ended,"Because Sam cannot read, and cannot see what i...",Because Sam loves the taste of chocolate.,becaus sam love the tast of chocol,becaus sam cannot read and cannot see what is ...,
64,English,Open-ended,Because Sam trusts her friend who told her tha...,Because Sam loves the taste and texture of pop...,becaus sam love the tast and textur of popcorn,becaus sam trust her friend who told her that ...,
68,English,Open-ended,"Because Sam filled the bag herself, even thoug...",Because she filled the bag with popcorn and sh...,becaus she fill the bag with popcorn and she e...,becaus sam fill the bag herself even though sh...,
...,...,...,...,...,...,...,...
152,Turkish,Open-ended,"Çünkü kedi kutunun üstünde, bu da Can'ın onu g...","Çünkü Can, odadan ayrıldığında kediyi sepetin ...",Çünkü Can oda ayrıldık kedi sepet üst koymuş v...,Çünkü kedi kut üst bu da Can ın on görebilecek...,
156,Turkish,Open-ended,"Çünkü kedi kutunun üstünde, bu da Can'ın onu g...","Çünkü Can, kediyi sepetin üstüne koyduktan son...",Çünkü Can kedi sepet üst koyduk sonra oda ayrı...,Çünkü kedi kut üst bu da Can ın on görebilecek...,
160,English,Open-ended,Because Mark had put the cat in the box and do...,Because John initially put the cat in the bask...,becaus john initi put the cat in the basket be...,becaus mark had put the cat in the box and doe...,
164,Turkish,Open-ended,Çünkü Mehmet kediyi kutuya koymuştu ve kendisi...,Çünkü Can kediyi sepetin içine koymuştu ve Meh...,Çünkü Can kedi sepet iç koymuş ve Mehmet de se...,Çünkü Mehmet kedi kutu koymuş ve kendis yok ne...,


In [None]:
from openai import OpenAI

In [None]:
openai_client = OpenAI(api_key=dotenv_values("../../.env")["OPENAI_API_KEY"])

In [None]:
def create_initial_messages(system_message: str) -> list[dict[str, str]]:

    messages: list[dict[str, str]] = [
        
        {
            "role": "system",
            "content": system_message
        }

    ]

    return messages

In [None]:
def get_chatgpt_response(
        *args, 
        messages: list[dict[str, str]],
        engine: str = "gpt-3.5-turbo",
        max_tokens: int = 500,
        temperature: float = 0.0,
        n: int = 1,
        **kwargs
        ):
    
    response = openai_client.chat.completions.create(
        messages=messages,
        model=engine,
        max_tokens=max_tokens,
        temperature=temperature,
        n=n,
        seed=42
    )

    return response

In [None]:
def get_chatgpt_responses(
        *args,
        system_message: str,
        prompts: list[str],
        engine: str = "gpt-3.5-turbo",
        max_tokens: int = 500,
        temperature: float = 0.0,
        n: int = 1,
        **kwargs
):
    
    if len(prompts) > 1:
        n = 1

    chat: dict[str, list] = {
        "messages": create_initial_messages(system_message),
        "responses": []
              }

    for prompt in prompts:
        
        chat["messages"].append({"role": "user", "content": prompt})
        
        response = get_chatgpt_response(
            messages=chat["messages"],
            engine=engine,
            max_tokens=max_tokens,
            temperature=temperature,
            n=n
            )
        
        response_message_content: str = response.choices[0].message.content

        chat["responses"].append(response)
        chat["messages"].append({"role": "assistant", "content": response_message_content})

    return chat

In [None]:
answers.reset_index(drop=True)

Unnamed: 0,question_language,answer_type,answer_correct,content,stemmed_content,stemmed_answer_correct,is_correct
0,English,Closed-ended,chocolate,chocolate.,chocol,chocol,True
1,English,Closed-ended,popcorn,chocolate,chocol,popcorn,False
2,English,Closed-ended,uncertainty,chocolate.,chocol,uncertainti,False
3,English,Closed-ended,popcorn,popcorn.,popcorn,popcorn,True
4,English,Closed-ended,popcorn,She believes that the bag is full of chocolate.,she believ that the bag is full of chocol,popcorn,False
...,...,...,...,...,...,...,...
1695,Turkish,Closed-ended,kutunun,kutu,kut,kut,True
1696,Turkish,Closed-ended,kutunun,kutu,kut,kut,True
1697,Turkish,Closed-ended,kutunun,sepet,sepet,kut,False
1698,Turkish,Open-ended,Çünkü Mehmet kediyi kutuya koymuştu ve kendisi...,Çünkü Can kediyi sepetin içine koymuştu ve Meh...,Çünkü Can kedi sepet iç koymuş ve Mehmet sepet...,Çünkü Mehmet kedi kutu koymuş ve kendis yok ne...,


In [None]:
answers["gpt_verdict"] = answers.apply(lambda row: get_chatgpt_responses(system_message="You will be given two sentences. Compare them and respond with 'True' if fundamentally they are arguing for the same position, and 'False' if they are arguing for different viewpoints.",
                      prompts=[
                          f"Sentence 1: {row["answer_correct"]}\nSentence 2: {row["content"]}"
                          ]) if row["answer_type"] == "Open-ended" else None, axis="columns")

In [None]:
answers.loc[answers["answer_type"] == "Open-ended", "gpt_verdict"] = answers[answers["answer_type"] == "Open-ended"]["gpt_verdict"].apply(lambda x: x["messages"][2]["content"])

In [None]:
answers[answers["answer_type"] == "Open-ended"][["answer_correct", "content", "gpt_verdict"]].to_csv("./correct_answers_open.csv")

In [None]:
response = get_chatgpt_responses(system_message="You will be given two sentences. Compare them and respond with 'True' if fundamentally they are arguing for the same position, and 'False' if they are arguing for different viewpoints.",
                      prompts=[
                          "Sentence 1: Çünkü Can, odadan ayrıldığında kediyi sepetin üstüne koymuştu ve dönüşünde kediyi sepetin üstünde beklemesini bekleyecektir.\nSentence 2: Çünkü kedi kutunun üstünde, bu da Can'ın onu görebileceği anlamına geliyor."
                          ])

In [None]:
response

{'messages': [{'role': 'system',
   'content': "You will be given two sentences. Compare them and respond with 'True' if fundamentally they are arguing for the same position, and 'False' if they are arguing for different viewpoints."},
  {'role': 'user',
   'content': "Sentence 1: Çünkü Can, odadan ayrıldığında kediyi sepetin üstüne koymuştu ve dönüşünde kediyi sepetin üstünde beklemesini bekleyecektir.\nSentence 2: Çünkü kedi kutunun üstünde, bu da Can'ın onu görebileceği anlamına geliyor."},
  {'role': 'assistant', 'content': 'False'}],
 'responses': [ChatCompletion(id='chatcmpl-8MePnzOysyZe8r6an8O7K5wFBXFNM', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='False', role='assistant', function_call=None, tool_calls=None))], created=1700409263, model='gpt-3.5-turbo-0613', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=1, prompt_tokens=139, total_tokens=140))]}