In [1]:
import numpy as np
import openai
import pandas as pd
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error
import sys
import re
import time
from tenacity import retry, wait_random_exponential, stop_after_attempt

# Add the path to the constants file to the system path
sys.path.append('../../')
from constants import *
from evaluation_utils import *
from ChatCompletion_OpenAI_API import *

# OpenAI API Key
openai.api_key = OPENAI_API_KEY

# Get the current directory of the notebook
current_dir = os.path.dirname(os.path.abspath("../../data/amazon-beauty/rating_prediction.ipynb"))
print(f"current directory: {current_dir}")

current directory: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/rec-sys/data/amazon-beauty


# RMSE & MAE evaluation metrics

In [2]:
# Test
actual_ratings = [4, 4]  # Ground truth ratings
predicted_ratings = [3, 5]  # Predicted ratings
rmse, mae = calculate_rmse_and_mae(actual_ratings, predicted_ratings)
print("RMSE: ", rmse)
print("MAE: ", mae)


RMSE:  1.0
MAE:  1.0


# Data Overview

In [3]:
# Construct the path to data file
data_path = os.path.join(current_dir, 'large_merged_data.csv')
print(f'data path: {data_path}')

data path: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/rec-sys/data/amazon-beauty/large_merged_data.csv


In [4]:
# Read the data
data = pd.read_csv(data_path)
# get sample data of NUM_SAMPLES rows
data.info()
# get neccessary columns
data = data[['title', 'rating', 'reviewText', 'reviewerID']]
data.head(3)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9767 entries, 0 to 9766
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   rating      9767 non-null   float64
 1   reviewerID  9767 non-null   object 
 2   asin        9767 non-null   object 
 3   reviewText  9759 non-null   object 
 4   summary     9759 non-null   object 
 5   category    9767 non-null   object 
 6   title       9767 non-null   object 
dtypes: float64(1), object(6)
memory usage: 534.3+ KB


Unnamed: 0,title,rating,reviewText,reviewerID
0,Jenna Jameson Heartbreaker Perfume for women 3...,1.0,"I use a lot of perfume, I go through a new bot...",A2RYSCZOPEXOCQ
1,Norelco 6885XL Deluxe Quadra Action Cord/Cordl...,5.0,"First, a little background. I've switched bet...",A141OPVE376YFI
2,Norelco 6885XL Deluxe Quadra Action Cord/Cordl...,5.0,"First, a little background. I've switched bet...",A141OPVE376YFI


# Zero-shot (OpenAI API)

+ We used the ``.drop_duplicates()`` method to get unique pairs of "title" and "reviewText". The predictions are then based on both the title and the corresponding review text for each unique pair.

In [5]:
%%time

# Function to predict rating using both title and reviewText
def predict_rating_zero_shot_with_review(title, review):
    return predict_rating_zero_shot_ChatCompletion(f"{title}. {review}")

# Iterate through the dataset and predict ratings
predicted_ratings = []
unique_pairs = data[['title', 'reviewText']].drop_duplicates().values
for idx, (title, review) in enumerate(unique_pairs):
    predicted_rating = predict_rating_zero_shot_with_review(title, review)
    print(f"Predicted rating for {title}: {predicted_rating}")
    predicted_ratings.append(predicted_rating)
    
    # Pause every PAUSE_EVERY_N_USERS rows
    if (idx + 1) % PAUSE_EVERY_N_USERS == 0:
        print(f"Pausing for {SLEEP_TIME} seconds...")
        time.sleep(SLEEP_TIME)

# Create a DataFrame with titles, reviewText and predicted ratings
predicted_ratings_df = pd.DataFrame({
    'title': unique_pairs[:, 0],
    'reviewText': unique_pairs[:, 1],
    'zero_shot_predicted_rating': predicted_ratings
})

# Merge the predicted ratings with the original data
merged_data_with_predictions = pd.merge(data, predicted_ratings_df, on=['title', 'reviewText'])

# Save the merged data with predictions to a new CSV file
merged_data_with_predictions.to_csv('../../data/amazon-beauty/reviewText_large_predictions_zero_shot.csv', index=False)


RetryError: RetryError[<Future at 0x131fe2450 state=finished raised APIError>]

In [None]:
# evaluate the rating prediction model

product_titles = merged_data_with_predictions['title']
actual_ratings = merged_data_with_predictions['rating']

# Remove None predictions if any
actual_ratings_filtered, predicted_ratings_filtered = zip(*[(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None])

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
print(f'Root Mean Squared Error (RMSE): {rmse}')

# Calculate MAE
mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
print(f'Mean Absolute Error (MAE): {mae}')


# Few-shot (OpenAI API)


+ For each user, we'll use 4 of their ratings as training data to predict ratings for the rest of their products. Finally, we'll evaluate the predictions against the actual ratings to calculate the overall RMSE and MAE.

+ The rating_history_str now includes both the title and the review text for each of the training data rows

In [None]:
%%time

# Function to predict rating using both title and reviewText with user's rating history
def predict_rating_few_shot_with_review(title, review, rating_history_str):
    return predict_rating_few_shot_ChatCompletion(f"{title}. {review}", rating_history_str)

predicted_ratings = []
actual_ratings = []

# For each user in the dataset
users = data['reviewerID'].unique()
for idx, reviewerID in enumerate(users):
    user_data = data[data['reviewerID'] == reviewerID]
    
    # Check if the user has at least 5 ratings
    if len(user_data) >= 5:
        train_data = user_data.sample(4, random_state=RANDOM_STATE)
        test_data = user_data.drop(train_data.index)

        # For each product in the testing set, use the training data to predict a rating
        for _, test_row in test_data.iterrows():
            rating_history_str = ', '.join([f"{row['title']} ({row['rating']} stars): {row['reviewText']}" for _, row in train_data.iterrows()])
            predicted_rating = predict_rating_few_shot_with_review(test_row['title'], test_row['reviewText'], rating_history_str)
            
            predicted_ratings.append(predicted_rating)
            actual_ratings.append(test_row['rating'])
    
    # Introduce a pause after processing every PAUSE_EVERY_N_USERS
    if (idx + 1) % PAUSE_EVERY_N_USERS == 0:
        print(f"Processed {idx + 1} users. Pausing for {SLEEP_TIME} seconds...")
        time.sleep(SLEEP_TIME)

# Save the predicted ratings to a new CSV file
predicted_ratings_df = pd.DataFrame({
    'few_shot_predicted_rating': predicted_ratings,
    'actual_rating': actual_ratings
})
predicted_ratings_df.to_csv('../../data/amazon-beauty/reviewText_large_predictions_few_shot.csv', index=False)

predicted_ratings_df.head(3)


In [None]:
# load data from ../../data/amazon-beauty/large_predictions_few_shot.csv file
large_predictions_few_shot = pd.read_csv('../../data/amazon-beauty/reviewText_large_predictions_few_shot.csv')
large_predictions_few_shot.head(NUM_EXAMPLES)

In [None]:
# convert few_shot_predicted_rating column to list
predicted_ratings = large_predictions_few_shot['few_shot_predicted_rating'].tolist()
# convert actual_rating column to list
actual_ratings = large_predictions_few_shot['actual_rating'].tolist()
filtered_list = [(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None]

if not filtered_list:
    print("No valid predictions available for evaluation.")
else:
    actual_ratings_filtered, predicted_ratings_filtered = zip(*filtered_list)
    # Evaluate the model's performance
    rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
    print(f'Root Mean Squared Error (RMSE): {rmse}')

    mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
    print(f'Mean Absolute Error (MAE): {mae}')


# 1 observation per reviewer - Few-shot OpenAI

In [None]:
%%time

# Function to predict rating using both title and reviewText with user's rating history
def predict_rating_few_shot_with_review(title, review, rating_history_str):
    return predict_rating_few_shot_ChatCompletion(f"{title}. {review}", rating_history_str)


predicted_ratings = []
actual_ratings = []

# For each user in the dataset
users = data['reviewerID'].unique()
for idx, reviewerID in enumerate(users):
    user_data = data[data['reviewerID'] == reviewerID]
    
    # Check if the user has at least 5 ratings
    if len(user_data) >= 5:
        # Sample one observation for the test set
        test_data = user_data.sample(1, random_state=RANDOM_STATE)
        
        # Use the remaining data for training
        train_data = user_data.drop(test_data.index)

        # For the single product in the testing set, use the training data to predict a rating
        for _, test_row in test_data.iterrows():
            rating_history_str = ', '.join([f"{row['title']} ({row['rating']} stars): {row['reviewText']}" for _, row in train_data.iterrows()])
            predicted_rating = predict_rating_few_shot_with_review(test_row['title'], test_row['reviewText'], rating_history_str)
            
            predicted_ratings.append(predicted_rating)
            actual_ratings.append(test_row['rating'])
    
    # Introduce a pause after processing every PAUSE_EVERY_N_USERS
    if (idx + 1) % PAUSE_EVERY_N_USERS == 0:
        print(f"Processed {idx + 1} users. Pausing for {SLEEP_TIME} seconds...")
        time.sleep(SLEEP_TIME)

# Save the predicted ratings to a new CSV file
predicted_ratings_df = pd.DataFrame({
    'few_shot_predicted_rating': predicted_ratings,
    'actual_rating': actual_ratings
})

predicted_ratings_df.to_csv('../../data/amazon-beauty/reviewText_large_1_test_predictions_few_shot.csv', index=False)

predicted_ratings_df.head(NUM_EXAMPLES)

In [None]:
large_predictions_few_shot = pd.read_csv('../../data/amazon-beauty/reviewText_large_1_test_predictions_few_shot.csv')
large_predictions_few_shot.head(NUM_EXAMPLES)
# convert few_shot_predicted_rating column to list
predicted_ratings = large_predictions_few_shot['few_shot_predicted_rating'].tolist()
# convert actual_rating column to list
actual_ratings = large_predictions_few_shot['actual_rating'].tolist()
filtered_list = [(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None]

if not filtered_list:
    print("No valid predictions available for evaluation.")
else:
    actual_ratings_filtered, predicted_ratings_filtered = zip(*filtered_list)
    # Evaluate the model's performance
    rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
    print(f'Root Mean Squared Error (RMSE): {rmse}')

    mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
    print(f'Mean Absolute Error (MAE): {mae}')


# Limitations:

The model might not fully understand the nuanced relationships between products based on titles alone. Additional context or features might be needed for more accurate predictions.
This approach might be computationally expensive and slower than traditional matrix factorization or deep learning-based recommendation models, especially for a large number of users.

# References

+ https://platform.openai.com/docs/api-reference/authentication