In [None]:
import os
import re
import openai
import pandas as pd
import time
from tenacity import retry, wait_random_exponential, stop_after_attempt
# Add the path to the constants file to the system path
sys.path.append('../../')
from constants import RANDOM_STATE, OPENAI_API_KEY

# OpenAI API Key
openai.api_key = OPENAI_API_KEY


In [None]:
# Helper functions for constructing file paths
def get_absolute_path(relative_path):
    current_dir = os.path.dirname(os.path.abspath(__file__))
    absolute_path = os.path.join(current_dir, relative_path)
    return absolute_path

def get_data_path(filename):
    return get_absolute_path(os.path.join('../../data/amazon-beauty', filename))

# Decorator for retrying function execution
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def predict_rating(title, model=GPT_MODEL_NAME, temperature=TEMPERATURE, rating_history=None):
    if rating_history:
        # Few-shot prediction
        prompt = (f"Here is the user's rating history: {rating_history}. "
                  f"Based on the above rating history, how many stars would you rate the product: '{title}'? "
                  f"(Provide a number between 1 and 5, either followed by the word 'stars' or preceded by the words 'would be'). "
                  f"Attention! Keep the response concise.")
    else:
        # Zero-shot prediction
        prompt = f"How will users rate this product title: '{title}'? (1 being lowest and 5 being highest) Attention! Just give me back the exact whole number as a result, and you don't need a lot of text."

    response = openai.ChatCompletion.create(
        model=model,
        temperature=temperature,
        messages=[
            {"role": "system", "content": "You are an Amazon Beauty products critic."},
            {"role": "user", "content": prompt}
        ]
    )
    
    rating_text = response.choices[0].message['content'].strip()
    try:
        # Extract rating value from the response
        match = re.search(r'(\d+(\.\d*)?)\s*(?=stars|star)|rating\s*(\d+(\.\d*)?)|would be\s*(\d+(\.\d*)?)', rating_text)
        if match:
            rating = float(next((m for m in match.groups() if m), 0))
        else:
            rating = 0
        if not (0.5 <= rating <= 5.0):
            raise ValueError("Rating out of bounds")
    except (ValueError, AttributeError):
        print(f"Unexpected response for '{title}': {rating_text}")
        rating = 0

    return rating

# Main function to execute the rating prediction and save results
def execute_rating_prediction(data, mode="zero-shot"):
    if mode == "zero-shot":
        # Zero-shot prediction
        titles = data['title'].unique()
        predicted_ratings = [predict_rating(title) for title in titles]
        predicted_ratings_df = pd.DataFrame({
            'title': titles,
            'zero_shot_predicted_rating': predicted_ratings
        })
        merged_data_with_predictions = pd.merge(data, predicted_ratings_df, on='title')
        merged_data_with_predictions.to_csv(get_data_path('predictions_zero_shot.csv'), index=False)
    else:
        # Few-shot prediction
        predicted_ratings = []
        actual_ratings = []
        users = data['reviewerID'].unique()
        for reviewerID in users:
            user_data = data[data['reviewerID'] == reviewerID]
            if len(user_data) >= 5:
                train_data = user_data.sample(4, random_state=RANDOM_STATE)
                test_data = user_data.drop(train_data.index)
                for _, test_row in test_data.iterrows():
                    rating_history_str = ', '.join([f"{row['title']} ({row['rating']} stars)" for _, row in train_data.iterrows()])
                    predicted_ratings.append(predict_rating(test_row['title'], rating_history=rating_history_str))
                    actual_ratings.append(test_row['rating'])
        predicted_ratings_df = pd.DataFrame({
            'few_shot_predicted_rating': predicted_ratings,
            'actual_rating': actual_ratings
        })
        predicted_ratings_df.to_csv(get_data_path('large_predictions_few_shot.csv'), index=False)

# Example usage
data = pd.read_csv(get_data_path('large_merged_data.csv'))
execute_rating_prediction(data, mode="zero-shot")
