# Re-prompting GPT - V3
The results from these are listed in the FT column in the paper


### Conda environment: transformers

In [1]:
!conda env list

# conda environments:
#
base                     /Users/kylehamilton/opt/anaconda3
annotation               /Users/kylehamilton/opt/anaconda3/envs/annotation
mapping                  /Users/kylehamilton/opt/anaconda3/envs/mapping
mlflow                   /Users/kylehamilton/opt/anaconda3/envs/mlflow
nlp                      /Users/kylehamilton/opt/anaconda3/envs/nlp
pyg                      /Users/kylehamilton/opt/anaconda3/envs/pyg
si                       /Users/kylehamilton/opt/anaconda3/envs/si
torch                    /Users/kylehamilton/opt/anaconda3/envs/torch
transformers          *  /Users/kylehamilton/opt/anaconda3/envs/transformers



In [2]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter, OrderedDict, defaultdict
import features
import ast
import tiktoken
import os

from tqdm import tqdm
import backoff
import logging
import requests
import re
import utils
import importlib
importlib.reload(utils)

import GPT_V2
import GPT_V3

In [3]:
from openai import OpenAI

client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # this is also the default, it can be omitted
)

In [22]:
fs = list(features.f_od.keys())

In [29]:
fs

['Aspect',
 'Emphasis',
 'Figures_of_argument',
 'Figures_of_word_choice',
 'Language_of_origin',
 'Language_varieties',
 'Lexical_and_semantic_fields',
 'Modifying_clauses',
 'Modifying_phrases',
 'Mood',
 'New_words_and_changing_uses',
 'Parallelism',
 'Phrases_built_on_nouns',
 'Phrases_built_on_verbs',
 'Predication',
 'Prosody_and_punctuation',
 'Sentence_architecture',
 'Series',
 'Subject_choices',
 'Tense',
 'Tropes',
 'Verb_choices']

In [47]:
fs = ['Aspect',
 'Emphasis',
 'Figures_of_argument',
 'Figures_of_word_choice',
 'Language_varieties',
 'Lexical_and_semantic_fields',
 'Modifying_clauses',
 'Mood',
 'New_words_and_changing_uses',
 'Parallelism',
 'Phrases_built_on_nouns',
 'Phrases_built_on_verbs',
 'Predication',
 'Sentence_architecture',
 'Series',
 'Subject_choices',
 'Tense',
 'Tropes',
 'Verb_choices']

In [10]:
# gpt-3.5-turbo-1106 models

models_dict = {
"ft:gpt-3.5-turbo-1106:personal::8kHQUVJr":"Aspect",
"ft:gpt-3.5-turbo-1106:personal::8kJH3MlY":"Emphasis",
"ft:gpt-3.5-turbo-1106:personal::8kJNlyTL":"Figures_of_argument",
"ft:gpt-3.5-turbo-1106:personal::8kJHdiWj":"Figures_of_word_choice",
"ft:gpt-3.5-turbo-1106:personal::8kSbYB5Z":"Language_varieties",
"ft:gpt-3.5-turbo-1106:personal::8kSXEMmV":"Lexical_and_semantic_fields",
"ft:gpt-3.5-turbo-1106:personal::8kSZGelp":"Modifying_clauses",
"ft:gpt-3.5-turbo-1106:personal::8krH6MwS":"Mood",
"ft:gpt-3.5-turbo-1106:personal::8kTWojhH":"New_words_and_changing_uses",
"ft:gpt-3.5-turbo-1106:personal::8kTceGAO":"Parallelism",
"ft:gpt-3.5-turbo-1106:personal::8kVWQWJW":"Phrases_built_on_nouns",
"ft:gpt-3.5-turbo-1106:personal::8kVFi1Q9":"Phrases_built_on_verbs",
"ft:gpt-3.5-turbo-1106:personal::8kVdlK5k":"Predication",
"ft:gpt-3.5-turbo-1106:personal::8kYKKQWJ":"Sentence_architecture",
"ft:gpt-3.5-turbo-1106:personal::8kYKGlP0":"Series",
"ft:gpt-3.5-turbo-1106:personal::8kYdJ0eq":"Subject_choices",
"ft:gpt-3.5-turbo-1106:personal::8ka5hSbD":"Tense",
"ft:gpt-3.5-turbo-1106:personal::8kaunpHX":"Tropes",
"ft:gpt-3.5-turbo-1106:personal::8kZhtRxY":"Verb_choices"
}

In [71]:
# GPT-4o-mini models

models_dict = {
"ft:gpt-4o-mini-2024-07-18:personal:aspect:9obT66PC":"Aspect",
"ft:gpt-4o-mini-2024-07-18:personal:emphasis:9obaWiHn":"Emphasis",
"ft:gpt-4o-mini-2024-07-18:personal:fig-of-argument:9oc4TdDq":"Figures_of_argument",
"ft:gpt-4o-mini-2024-07-18:personal:fig-of-wordchoice:9ocKMI6k":"Figures_of_word_choice",
"ft:gpt-4o-mini-2024-07-18:personal::9ockWPGZ":"Language_varieties",
"ft:gpt-4o-mini-2024-07-18:personal:lex-sem-fields:9otZKXdw":"Lexical_and_semantic_fields",
"ft:gpt-4o-mini-2024-07-18:personal:mod-clauses:9oy9tJ5H":"Modifying_clauses",
"ft:gpt-4o-mini-2024-07-18:personal:mood:9oyFktVS":"Mood",
"ft:gpt-4o-mini-2024-07-18:personal:new-words:9ozqBKSc":"New_words_and_changing_uses",
"ft:gpt-4o-mini-2024-07-18:personal:parallel:9ozuSX9K":"Parallelism",
"ft:gpt-4o-mini-2024-07-18:personal:phrases-nouns:9p1UUoyi":"Phrases_built_on_nouns",
"ft:gpt-4o-mini-2024-07-18:personal:phrases-verbs:9pGaHXJK":"Phrases_built_on_verbs",
"ft:gpt-4o-mini-2024-07-18:personal:predication:9pKoOFeY":"Predication",
"ft:gpt-4o-mini-2024-07-18:personal:sent-architecture:9pKjV2LY":"Sentence_architecture",
"ft:gpt-4o-mini-2024-07-18:personal:series:9pO28rQJ":"Series",
"ft:gpt-4o-mini-2024-07-18:personal:tense:9pO6FmBe":"Tense",
"ft:gpt-4o-mini-2024-07-18:personal:subject-choices:9oWoIqLH":"Subject_choices",
"ft:gpt-4o-mini-2024-07-18:personal:tropes:9pagEI0v":"Tropes",
"ft:gpt-4o-mini-2024-07-18:personal:verb-choices:9pdQlxYi":"Verb_choices"
}



In [59]:
# for evaluating models trained on different features
alt = "_FT_Maj"
alt = ""

In [60]:
# fs = ['Subject_choices']
MODEL = "gpt-4o" #"ft:gpt-4o-mini-2024-07-18:personal:subject-choices:9oWoIqLH", "gpt-4o-mini-2024-07-18"
model_version = "_FT_Maj_gpt-4o-mini"
version = "V4"
output_path = "data/"+version+"/"+model_version+"/"

In [61]:
# get_gpt_response

importlib.reload(utils)
importlib.reload(GPT_V3)
gpt = GPT_V3.GPT(MODEL)

def parseRes(x,_property):
    try:
        result = gpt.parseYNResponse(x,_property)
    except():
        result = []
    return result


def fixProperties(s,feature):
    new_list = []
    if type(s) == str:
        s = ast.literal_eval(s) 

    for l in s:
        new_list.append(gpt.mapToProperty(l,feature))
    return new_list



def run(data,FEATURE,temp,version,model_version,MODEL):
    temp = str(temp)
    
    responses_data = []
    gpt = GPT_V3.GPT(MODEL)

    for row in tqdm(data.iterrows()):
        sentence = row[1]['text']
        feature = row[1]['feature_id']
        sid = row[1]['sentence_id']
        
        responses = gpt.get_gpt_response(sentence,feature,sid,float(temp),model_version)
        
        for res in responses:
            responses_data.append([sid, res[1], res[3]])

    df = pd.DataFrame(responses_data, columns=['sentence_id','property'+model_version,'res'+model_version+'_'+temp+'_'+version])
     
    data = data.merge(df, how='outer',on='sentence_id')
    data.to_csv(output_path+version+"_"+FEATURE+alt+".csv",index=None)
    
    data['property'+model_version+'_'+temp+'_'+version] = \
        data.apply(lambda row: parseRes(row['res'+model_version+'_'+temp+'_'+version],row['property'+model_version]),axis=1) 
    
    data.to_csv(output_path+version+"_"+FEATURE+alt+".csv",index=None)
    
    print(f"There were {len(gpt.errors)} errors.")

In [69]:
# ADDS THE PROPERTY NAME TO THE RESPONSE OBJECT FOR EASIER READING LATER.
def combine(prop, res):
    # print(res)
    _json_obj = gpt.responseToJson(res)
    _json_obj['Property'] = prop
    
    return _json_obj

def removeErrors(s):
    if "parse error" in s or "timeout" in s:
        s = "[]"
        
    return ast.literal_eval(s)

for k, v in models_dict.items():

    FEATURE = v
    MODEL = k

    
    df = pd.read_csv("data/human_gpt_verified/"+FEATURE+".csv")
    df = df[df["humans isCorrect"]>=0]

    # df = df[:2]

    print(FEATURE, model_version, version, len(df))
    run(df,FEATURE,0.0,version,model_version,MODEL)

    # Combine
    df = pd.read_csv(output_path+"/"+version+"_"+FEATURE+alt+".csv")
    df['property'+model_version+'_0.0_'+version] = df['property'+model_version+'_0.0_'+version].apply(removeErrors)
    df['res'+model_version+'_0.0_'+version] = df.apply(lambda x: combine(x['property'+model_version],x['res'+model_version+'_0.0_'+version]), axis=1)
    df['sentence_id'] = df['sentence_id'].apply(lambda x: int(x))

    df = df.groupby(['sentence_id']).agg({
        'sentence_id':lambda x: x.iloc[0], 
        'technique':lambda x: x.iloc[0], 
        'text':lambda x: x.iloc[0], 
        'feature_id':lambda x: x.iloc[0], 
        'props_a20':lambda x: x.iloc[0],
        'props_a21':lambda x: x.iloc[0], 
        'props_a22':lambda x: x.iloc[0], 
        'annotator_consistency':lambda x: x.iloc[0],
        'props_gpt4_majority':lambda x: x.iloc[0], 
        'res_1.0_1':lambda x: x.iloc[0], 
        'gpt_props_1.0_1':lambda x: x.iloc[0], 
        'res_1.0_2':lambda x: x.iloc[0],
        'gpt_props_1.0_2':lambda x: x.iloc[0], 
        'res_1.0_3':lambda x: x.iloc[0], 
        'gpt_props_1.0_3':lambda x: x.iloc[0],
        'gpt3.5_1.0_consistency':lambda x: x.iloc[0], 
        'res_0.2_1':lambda x: x.iloc[0], 
        'gpt_props_0.2_1':lambda x: x.iloc[0], 
        'res_0.2_2':lambda x: x.iloc[0],
        'gpt_props_0.2_2':lambda x: x.iloc[0], 
        'res_0.2_3':lambda x: x.iloc[0], 
        'gpt_props_0.2_3':lambda x: x.iloc[0],
        'gpt3.5_0.2_consistency':lambda x: x.iloc[0], 
        'gpt3.5_0.2_majority':lambda x: x.iloc[0], 
        'humans isCorrect':lambda x: x.iloc[0],
        'gpt isCorrect':lambda x: x.iloc[0], 
        'comments':lambda x: x.iloc[0], 
        'ground truth':lambda x: x.iloc[0],
        'property'+model_version:list, 
        'res'+model_version+'_0.0_'+version:list,
        'property'+model_version+'_0.0_'+version:sum
    })
    df=df.drop('property'+model_version,axis=1)
    df.to_csv(output_path+"/_"+version+"_"+FEATURE+alt+".csv",index=None)
    df=pd.read_csv(output_path+"/_"+version+"_"+FEATURE+alt+".csv")
    df = df[df["humans isCorrect"]>=0]
    df['agreement'] = df.apply(lambda x: utils.calcAgreement(x["ground truth"],x["property"+model_version+"_0.0_"+version]), axis=1)
    
    print(Counter(df['agreement'])[1]/len(df))
    print("="*100)

Tropes _FT_Maj_gpt-4o-mini V4 32


32it [45:46, 85.81s/it] 


There were 0 errors.


  df = df.groupby(['sentence_id']).agg({


0.5
Verb_choices _FT_Maj_gpt-4o-mini V4 30


30it [16:41, 33.37s/it]

There were 0 errors.
0.7666666666666667



  df = df.groupby(['sentence_id']).agg({


# Get the accuracy scores from fine-tuned models

In [None]:
importlib.reload(utils)

model_version = "_FT_Maj_gpt-4o-mini"
version = "V4"
output_path = "data/"+version+"/"+model_version+"/"


for k,v in models_dict.items():
    FEATURE = v
    MODEL = k

    if MODEL != "":

        df=pd.read_csv(output_path+"/_"+version+"_"+FEATURE+alt+".csv")
        df['agreement'] = df.apply(lambda x: utils.calcAgreement(x["ground truth"],x["property"+model_version+"_0.0_"+version]), axis=1)

        print(v, model_version, version)
        print(Counter(df['agreement'])[1]/len(df))
        print("="*100)

In [52]:
model_version = "_FT_Maj_gpt-4o-mini"
version = "V4"

df=pd.read_csv("data/V4/_FT_Maj_gpt-4o-mini/_V4_Subject_choices.csv")
df['agreement'] = df.apply(lambda x: utils.calcAgreement(x["ground truth"],x["property"+model_version+"_0.0_"+version]), axis=1)

print(model_version, version)
print(Counter(df['agreement'])[1]/len(df))
print("="*100)

_FT_Maj_gpt-4o-mini V4
0.26666666666666666
