In [None]:
import pandas as pd
import numpy as np
from openai import OpenAI
import json
import csv

# Load the data
reviews = pd.read_csv('philly_reviews_1.csv')

# Prompt
system_prompt = "You are a restaurant review annotator. Categorize the following text so I can evaluate the restaurant accurately. Then for each category give the restaurant a score from 1-5, 1 being bad and 5 being excellent for their performance in each category. Include every single dish mentioned in the review with a score. Return the output in JSON format like {\"Service\": score, \"Pizza\": score}."

In [None]:
client = OpenAI()
annotated_reviews = {}

# Iterate through the reviews and annotate them
for idx, review in reviews.iterrows():
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": review['text']},
        ],
        max_tokens=256
    )

    enriched = response.choices[0].message.content.removeprefix('```json').removesuffix('```').strip()
        
    try:
        enriched = json.loads(enriched)

    except:
        print(f'Error parsing JSON: {idx}')
        continue
        
    annotated_reviews[review['review_id']] = enriched

# Save annotated reviews as JSON    
with open(f'annotated_reviews_1_final.json', 'w') as f:
    json.dump(annotated_reviews, f)

In [None]:
# read json file and convert to csv
df = pd.read_json('annotated_reviews_1_final.json', lines=True)
df = df.T.reset_index()
df.columns = ['review_id', 'annotated']  # rename columns
df['annotated'] = df['annotated'].apply(json.dumps)
df.to_csv('enriched_final_1.csv', index=False, quoting=csv.QUOTE_ALL)  # double quote the nested content

final = pd.read_csv('enriched_final_1.csv', quoting=csv.QUOTE_ALL)  # read the csv file

In [None]:
# combine annotated_reviews_1_final.csv and annotated_reviews_2_final.csv
df = pd.read_json('annotated_reviews_2_final.json', lines=True)
df = df.T.reset_index()
df.columns = ['review_id', 'annotated']  # rename columns
df['annotated'] = df['annotated'].apply(json.dumps)
df.to_csv('enriched_final_2.csv', index=False, quoting=csv.QUOTE_ALL)  # double quote the nested content

In [None]:
final_2 = pd.read_csv('enriched_final_2.csv', quoting=csv.QUOTE_ALL)  # read the csv file
final = pd.concat([final, final_2], ignore_index=True)  # combine the two dataframes
final.to_csv('enriched_final.csv', quoting=csv.QUOTE_ALL)  # save the combined dataframe as csv

In [None]:
# Combine the json files too
enriched_json = []
json_files = ['annotated_reviews_1_final.json', 'annotated_reviews_2_final.json']
for file in json_files:
    with open(file, 'r') as f:
        enriched_json.append(json.load(f))

with open('enriched_final.json', 'w') as f:
    json.dump(enriched_json, f)