### Code that asks Chat GPT to assess if the Reddit comment is about crime and then analyze the sentiment on crime

In [1]:
# load api key
# load library to get environmental files
import os
from dotenv import load_dotenv


# load keys from  environmental var
load_dotenv() # .env file in cwd
gpt_key = os.environ.get("gpt_key") 

In [13]:
import requests 
import pandas as pd
import numpy as np
from tqdm import tqdm

In [5]:
## function to query a response from Chat GPT
def ask_chat(question, api_key):
    # define headers
    headers = {
        "Authorization": f"Bearer {gpt_key}",
        "Content-Type": "application/json",
    }

    info = {
     "model": "gpt-4o-mini",
     "messages": [{"role": "user", "content": question}],
     "temperature": 0
    }
    ##Get the response
    response = requests.post('https://api.openai.com/v1/chat/completions', headers = headers, json = info)
    ##Convert to json
    response_json = response.json()
    return response_json['choices'][0]['message']['content'].strip()

In [7]:
##open coded csv file
test_data = pd.read_csv("./data/reddit/comments_parsed.csv", nrows = 5000)
test_data.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,comment_id,submission_id,author,body,score,year,month,subreddit,type
0,0,0,isl1exn,y57cbw,eeek0711,Or Cleveland Park in DC,4,2022,10,0,1
1,1,1,isl1dig,y57cbw,eeek0711,Takoma Park MD,3,2022,10,0,1
2,2,2,islh5rt,y57cbw,nonmimeticform,Baltimore,3,2022,10,0,1
3,3,3,isuelis,y57cbw,dcgirlsmallworld,If you are looking for a quiet neighborhood wi...,3,2022,10,0,1
4,4,4,isl1grp,y57cbw,eeek0711,I lived in the DeLano Apartments in Woodley Pa...,2,2022,10,0,1


In [9]:
test_data.shape

(5000, 11)

In [15]:
import time
##loop through the coded data to test gpt's accuracy with prompt
answer = []
for j in tqdm(range(len(test_data))):
    try:
        base_question = "Is this text about neighborhood safety in DC? \
        Answer only with a number: 1 if about safety in DC, 0 if not. \
        If the answer is 1, how safe do you think the author feels? \
        Answer with only a number on a scale between 1 (very unsafe) and 10 (very safe) \
            Here is the text: "
        text = test_data.loc[j, "body"]
        full_question = base_question + str(text)
        answer.append(ask_chat(full_question, gpt_key))
        time.sleep(1)
    except:
        answer.append(np.nan)

100%|█████████████████████████████████████| 5000/5000 [2:01:11<00:00,  1.45s/it]


In [31]:
test_data['gpt_score'] = answer

In [35]:
test_data.head(25)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,comment_id,submission_id,author,body,score,year,month,subreddit,type,gpt_score
0,0,0,isl1exn,y57cbw,eeek0711,Or Cleveland Park in DC,4,2022,10,0,1,0
1,1,1,isl1dig,y57cbw,eeek0711,Takoma Park MD,3,2022,10,0,1,0
2,2,2,islh5rt,y57cbw,nonmimeticform,Baltimore,3,2022,10,0,1,0
3,3,3,isuelis,y57cbw,dcgirlsmallworld,If you are looking for a quiet neighborhood wi...,3,2022,10,0,1,0
4,4,4,isl1grp,y57cbw,eeek0711,I lived in the DeLano Apartments in Woodley Pa...,2,2022,10,0,1,0
5,5,5,ita0jq1,y57cbw,samthehaggis,Agreed to Cleveland Park and Woodley Park. The...,1,2022,10,0,1,1 \n8
6,6,6,itxtwh8,y57cbw,Adept-Pension-1312,Mt Pleasant,1,2022,10,0,1,0
7,7,8,iwbywh5,y57cbw,danimaymoj315,"Probably Capitol Hill, Cleveland Park, and Log...",1,2022,11,0,1,0
8,8,9,iszr3e7,y57cbw,zazaza05,"Thank you, very helpful because I was consider...",1,2022,10,0,1,0
9,9,10,i9bctxl,utr8aw,,Too much Fox news,8,2022,5,0,1,0


In [39]:
test_data.to_csv("./data/reddit/gpt_output_1.csv")