### Code that asks Chat GPT to assess if the Reddit comment is about crime and then analyze the sentiment on crime

In [1]:
# load api key
# load library to get environmental files
import os
from dotenv import load_dotenv


# load keys from  environmental var
load_dotenv() # .env file in cwd
gpt_key = os.environ.get("gpt_key") 

In [3]:
import requests 
import pandas as pd
import numpy as np
from tqdm import tqdm

In [5]:
## function to query a response from Chat GPT
def ask_chat(question, api_key):
    # define headers
    headers = {
        "Authorization": f"Bearer {gpt_key}",
        "Content-Type": "application/json",
    }

    info = {
     "model": "gpt-4o-mini",
     "messages": [{"role": "user", "content": question}],
     "temperature": 0
    }
    ##Get the response
    response = requests.post('https://api.openai.com/v1/chat/completions', headers = headers, json = info)
    ##Convert to json
    response_json = response.json()
    return response_json['choices'][0]['message']['content'].strip()

In [125]:
##open coded csv file
test_data = pd.read_csv("./data/reddit/comments_parsed.csv", skiprows=5050, nrows=150, names=["x","y","comment_id","submission_id","author","body","score","year","month","subreddit","type"])
test_data.head()

Unnamed: 0,x,y,comment_id,submission_id,author,body,score,year,month,subreddit,type
0,5057,5156,l6ciaxn,1d474to,BlkNtvTerraFFVI,I'm fine with people talking about crime but p...,4,2024,5,1,1
1,5058,5157,l6csge0,1d474to,PanAmargo,DC was like 5th in the country for murders las...,3,2024,5,1,1
2,5059,5158,l6duoac,1d474to,IcyWillow1193,the worst of them pretty obviously live nowher...,5,2024,5,1,1
3,5060,5159,l6e3qxd,1d474to,douggie84,You made a throw-away account for this because...,2,2024,5,1,1
4,5061,5160,l6cdb6l,1d474to,little_bird_vagabond,"I live in nova, but I worked in DC until recen...",2,2024,5,1,1


In [127]:
test_data.shape

(150, 11)

In [None]:
import time
##loop through the coded data to test gpt's accuracy with prompt
answer = []
for j in tqdm(range(len(test_data))):
    try:
        base_question = "Is this text about neighborhood safety in DC? \
        Answer only with a number: 1 if about safety in DC, 0 if not. \
        If the answer is 1, how safe do you think the author feels? \
        Answer with only a number on a scale between 1 (very unsafe) and 10 (very safe) \
            Here is the text: "
        text = test_data.loc[j, "body"]
        full_question = base_question + str(text)
        answer.append(ask_chat(full_question, gpt_key))
        time.sleep(1)
    except:
        answer.append(np.nan)

 91%|█████████████████████████████████████▏   | 136/150 [03:18<00:21,  1.54s/it]

In [117]:
answer[0:15]

['0',
 '1  \n6',
 '0',
 '0',
 '0',
 '0',
 '1  \n3',
 '0',
 '0',
 '0',
 '1  \n3',
 '0',
 '0',
 '0',
 '0']

In [115]:
len(answer)

50

In [119]:
test_data['gpt_score'] = answer

In [121]:
test_data.head(25)

Unnamed: 0,x,y,comment_id,submission_id,author,body,score,year,month,subreddit,type,gpt_score
0,5007,5104,m0lfq81,1h7dozg,_RemyLeBeau_,The real reason is that the companies that are...,-1,2024,12,1,1,0
1,5008,5105,m0kv52k,1h7dozg,Derpolitik23,"Yes, Arlington’s crime rate is lower than DC's...",18,2024,12,1,1,1 \n6
2,5009,5106,m0lemna,1h7dozg,_RemyLeBeau_,"10-15 years ago, Clarendon/Courthouse was wild...",11,2024,12,1,1,0
3,5010,5107,m0l65h8,1h7dozg,Bohm81,Depends which direction. You can go an hour so...,10,2024,12,1,1,0
4,5011,5108,m0mrjjz,1h7dozg,the-bc5,I always enjoyed people telling me I lived in ...,2,2024,12,1,1,0
5,5012,5109,m0mpf1c,1h7dozg,lmboyer04,"They said navy yard not EOTR fwiw, but he’s a ...",1,2024,12,1,1,0
6,5013,5110,m0lqbqe,1h7dozg,lorddementor,I wonder where all the tax money goes every ye...,4,2024,12,1,1,1 \n3
7,5014,5111,m0ml5dm,1h7dozg,imissallofit,Possible to live car-free? Do you mean there a...,1,2024,12,1,1,0
8,5015,5112,m0m9j2x,1h7dozg,internet_emporium,Lmao,1,2024,12,1,1,0
9,5016,5113,m0m2sp1,1h7dozg,mega05,Arlington is DC's more successful younger brot...,4,2024,12,1,1,0


In [123]:
test_data.to_csv("./data/reddit/gpt_output_3.csv")