In [47]:
import polars as pl
import json
import statistics
import re
import random
from tqdm.auto import tqdm

from openai import OpenAI

##### Load data

In [4]:
data_dir = '/Volumes/PortableSSD/CSS/data/processed/'
#data_dir = '/users/ujan/Downloads/'

In [None]:
data_df = pl.read_csv(data_dir+'posts_2015-21_ps_min_2c_politics.csv')
data_df.head()

##### save positive posts to csv from the text files

In [42]:
data_dict = {'posts': [], 'exps': []}

with open(data_dir+'gpt3_pos_sample.txt') as f:
    for line in f.readlines():
        if line[0] == 'p':
            post = line.split('post : ')[-1].split('\n')[0]
            data_dict['posts'].append(post)
        elif line[0] == 'm':
            exp = line.split('meta : ')[-1].split('\n')[0]
            data_dict['exps'].append(exp)

pos_df = pl.from_dict(data_dict)
pos_df.write_csv(data_dir+'gpt3_pos_sample.csv', separator=",")

##### save negative posts to csv from the text files

In [43]:
data_dict = {'posts': []}

with open(data_dir+'gpt3_neg_sample.txt') as f:
    for line in f.readlines():
        if line[0] == 'p':
            post = line.split('post : ')[-1].split('\n')[0]
            data_dict['posts'].append(post)

neg_df = pl.from_dict(data_dict)
neg_df.write_csv(data_dir+'gpt3_neg_sample.csv', separator=",")

##### chatgpt to find metaphors on the samples

In [48]:
# gpt3.5 client
client = OpenAI(api_key="api_key")  # "api_key"

##### load data

In [54]:
pos_df = pl.read_csv(data_dir+'gpt3_pos_sample.csv')
posts = pos_df['posts'].to_list()

In [71]:
neg_df = pl.read_csv(data_dir+'gpt3_neg_sample.csv')
posts = neg_df['posts'].to_list()

In [72]:
temp = 0.5
all_responses = []

bar = tqdm(range(len(posts)))
for post in posts:
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        response_format={ "type": "json_object" },
        seed=20,
        temperature=temp,
        messages=[
            {
                "role": "system",
                "content": "You will be provided with a Reddit post title, and your task is to identify if the post contains a sports metaphor or not. Note that sports related words may be used in a nonmetaphorical way, do not label such cases as sports metaphors. If the text does contain a sports metaphor, identify the sports metaphor word or phrase and provide a max 10 word explanation. Provide the answer in a JSON format with the following keys, contains_sports_metaphor (true/false), sports_metaphor, explanation."
            },
            {
                "role": "user",
                "content": post
            }
        ],
    )
                
    obj = json.loads(response.json())
    resp_json = json.loads(obj["choices"][0]["message"]["content"])
    resp_json["post"] = post
    all_responses.append(resp_json)
    bar.update(1)

  0%|          | 0/201 [00:00<?, ?it/s]

In [73]:
all_responses

[{'contains_sports_metaphor': False,
  'sports_metaphor': None,
  'explanation': None,
  'post': 'How Donald Trump Bankrupted His Casinos, Left Contractors Unpaid, Ruined Investors &amp; Made Millions (video)'},
 {'contains_sports_metaphor': False,
  'post': 'Just getting back from the rally at University of New Hampshire...'},
 {'contains_sports_metaphor': False,
  'sports_metaphor': None,
  'explanation': None,
  'post': 'Trump privately blamed Black Americans for lacking initiative: report'},
 {'contains_sports_metaphor': False,
  'sports_metaphor': '',
  'explanation': '',
  'post': 'Shooting Survivor: CNN Gave Me "Scripted Question" After Denying Question About Armed Guards'},
 {'contains_sports_metaphor': False,
  'sports_metaphor': None,
  'explanation': None,
  'post': 'Are Trump supporters evil, or just wrong? Political scientists struggle with morality'},
 {'contains_sports_metaphor': False,
  'post': 'Leicester blast triggers police alert'},
 {'contains_sports_metaphor': Fal

In [74]:
neg_meta = []
neg_non_meta = []

for response in all_responses:
    if response['contains_sports_metaphor']:
        neg_meta.append({'post': response['post'], 'meta': response['sports_metaphor']})
    else:
        neg_non_meta.append({'post': response['post']})

In [75]:
len(neg_meta)

0

In [77]:
len(neg_non_meta)

201

In [59]:
meta = []
non_meta = []

for response in all_responses:
    if response['contains_sports_metaphor']:
        meta.append({'post': response['post'], 'meta': response['sports_metaphor']})
    else:
        non_meta.append({'post': response['post']})

In [60]:
len(non_meta)

29

In [67]:
len(meta)

172

##### added results after running gpt on positive and negative matches from earlier separately

In [78]:
meta = meta + neg_meta
non_meta = non_meta + neg_non_meta

In [79]:
len(meta)

172

In [85]:
len(non_meta)

230

In [82]:
with open(data_dir+'gpt3_pos_sample_5.txt', 'w') as f:
    for item in meta:
        f.write('post : {}'.format(item['post']))
        f.write('\n')
        f.write('meta : {}'.format(item['meta']))
        f.write('\n')
        f.write('\n')

In [87]:
with open(data_dir+'gpt3_neg_sample_5.txt', 'w') as f:
    for item in non_meta:
        f.write('post : {}'.format(item['post']))
        f.write('\n')
        f.write('\n')