In [1]:
import numpy as np
import openai
import pandas as pd
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error
import sys
import re
import time
from tenacity import retry, wait_random_exponential, stop_after_attempt

# Add the path to the constants file to the system path
sys.path.append('../../')
from constants import *
from evaluation_utils import *
from ChatCompletion_OpenAI_API import *

# OpenAI API Key
openai.api_key = OPENAI_API_KEY

# Get the current directory of the notebook
current_dir = os.path.dirname(os.path.abspath("../../data/amazon-beauty/rating_prediction.ipynb"))
print(f"current directory: {current_dir}")

current directory: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/rec-sys/data/amazon-beauty


# Data Overview

In [2]:
# Construct the path to data file
data_path = os.path.join(current_dir, 'large_merged_data.csv')
print(f'data path: {data_path}')

data path: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/rec-sys/data/amazon-beauty/large_merged_data.csv


In [3]:
# Read the data
data = pd.read_csv(data_path)
# get necessary columns
# data = data[['title', 'rating', 'reviewText', 'reviewerID']]
# get sample data of NUM_SAMPLES rows
data.info()
data.head(3)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9767 entries, 0 to 9766
Data columns (total 27 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   rating          9767 non-null   float64
 1   verified        9767 non-null   bool   
 2   reviewTime      9767 non-null   object 
 3   reviewerID      9767 non-null   object 
 4   asin            9767 non-null   object 
 5   reviewerName    9767 non-null   object 
 6   reviewText      9759 non-null   object 
 7   summary         9759 non-null   object 
 8   unixReviewTime  9767 non-null   object 
 9   vote            1487 non-null   object 
 10  style           6768 non-null   object 
 11  category        9767 non-null   object 
 12  tech1           2 non-null      object 
 13  description     9767 non-null   object 
 14  fit             0 non-null      float64
 15  title           9767 non-null   object 
 16  also_buy        9767 non-null   object 
 17  tech2           0 non-null      f

Unnamed: 0,rating,verified,reviewTime,reviewerID,asin,reviewerName,reviewText,summary,unixReviewTime,vote,...,tech2,brand,feature,rank,also_view,details,main_cat,similar_item,date,price
0,1.0,False,2015-08-25,A2RYSCZOPEXOCQ,9790787006,The Cat Next Door,"I use a lot of perfume, I go through a new bot...",This is not going to be my favorite scent.,2015-08-25,,...,,Jenna Jameson,[],298.0,"['B00357FTX8', 'B01NBID7FJ', 'B0017JT658']","{'Shipping Weight:': '12.8 ounces (', 'ASIN: '...",All Beauty,,,13.85
1,5.0,False,2001-06-08,A141OPVE376YFI,B000050B65,Paul G.,"First, a little background. I've switched bet...","Finally, a razor that lives up to the ads",2001-06-08,81.0,...,,Norelco,[],2.0,"['B01B1O9DOM', 'B00JITDVD2', 'B01KXV16DK', 'B0...",{},All Beauty,,,
2,5.0,False,2001-06-08,A141OPVE376YFI,B000050B65,Paul G.,"First, a little background. I've switched bet...","Finally, a razor that lives up to the ads",2001-06-08,81.0,...,,Norelco,[],2.0,"['B01B1O9DOM', 'B00JITDVD2', 'B01KXV16DK', 'B0...",{},All Beauty,,,


# Few-shot (OpenAI API)


+ For each user, we'll use 4 of their ratings as training data to predict ratings for the rest of their products. Finally, we'll evaluate the predictions against the actual ratings to calculate the overall RMSE and MAE.

+ The rating_history_str now includes both the title and the review text for each of the training data rows

In [None]:
%%time

predict_ratings_few_shot_and_save(data,
                                      columns_for_training=ITEM_SIDE + USER_SIDE + INTERACTION_SIDE,
                                       columns_for_prediction=ITEM_SIDE,
                                       pause_every_n_users=PAUSE_EVERY_N_USERS,
                                       sleep_time=SLEEP_TIME,
                                       save_path='../../data/amazon-beauty/all_large_predictions_few_shot.csv')
# read csv file
merged_data_with_predictions = pd.read_csv('../../data/amazon-beauty/all_large_predictions_few_shot.csv')
merged_data_with_predictions.head(3)


In [None]:
# convert few_shot_predicted_rating column to list
predicted_ratings = merged_data_with_predictions['few_shot_predicted_rating'].tolist()
# convert actual_rating column to list
actual_ratings = merged_data_with_predictions['actual_rating'].tolist()
filtered_list = [(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None]

if not filtered_list:
    print("No valid predictions available for evaluation.")
else:
    actual_ratings_filtered, predicted_ratings_filtered = zip(*filtered_list)
    # Evaluate the model's performance
    rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
    print(f'Root Mean Squared Error (RMSE): {rmse}')

    mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
    print(f'Mean Absolute Error (MAE): {mae}')


# 1 observation per reviewer - Few-shot OpenAI

In [5]:
%%time

predict_ratings_few_shot_and_save(data,
                                      columns_for_training=ITEM_SIDE + USER_SIDE + INTERACTION_SIDE,
                                       columns_for_prediction=ITEM_SIDE,
                                       obs_per_user=1,
                                       pause_every_n_users=PAUSE_EVERY_N_USERS,
                                       sleep_time=SLEEP_TIME,
                                       save_path='../../data/amazon-beauty/all_large_1_test_predictions_few_shot.csv')
# read csv file
merged_data_with_predictions = pd.read_csv('../../data/amazon-beauty/all_large_1_test_predictions_few_shot.csv')
merged_data_with_predictions.head(3)


Predicted rating for B0002564EE: 5.0
Predicted rating for B000050FDY: 5.0
Predicted rating for B000068PBO: 5.0
Predicted rating for B0017TZD7S: 5.0
Predicted rating for B00G5L867C: 5.0
Predicted rating for B0013NB7DW: 5.0
Predicted rating for B003X9YAL0: 4.0
Predicted rating for B0000530HU: 5.0
Predicted rating for B00L1BLGSW: 4.0
Predicted rating for B0013NB7DW: 5.0
Processed 10 users. Pausing for 60 seconds...
Predicted rating for B0168SXRR0: 3.0
Predicted rating for B000V5Z4J6: 5.0
Predicted rating for B0013NB7DW: 5.0
Predicted rating for B0002MQ9GK: 5.0
Predicted rating for B000H6A02A: 5.0


RetryError: RetryError[<Future at 0x134cb93d0 state=finished raised InvalidRequestError>]

In [None]:

# convert few_shot_predicted_rating column to list
predicted_ratings = merged_data_with_predictions['few_shot_predicted_rating'].tolist()
# convert actual_rating column to list
actual_ratings = merged_data_with_predictions['actual_rating'].tolist()
filtered_list = [(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None]

if not filtered_list:
    print("No valid predictions available for evaluation.")
else:
    actual_ratings_filtered, predicted_ratings_filtered = zip(*filtered_list)
    # Evaluate the model's performance
    rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
    print(f'Root Mean Squared Error (RMSE): {rmse}')

    mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
    print(f'Mean Absolute Error (MAE): {mae}')


# Limitations:

The model might not fully understand the nuanced relationships between products based on titles alone. Additional context or features might be needed for more accurate predictions.
This approach might be computationally expensive and slower than traditional matrix factorization or deep learning-based recommendation models, especially for a large number of users.

# References

+ https://platform.openai.com/docs/api-reference/authentication