In [1]:
import numpy as np
import openai
import pandas as pd
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error
import sys
import re
import time
from tenacity import retry, wait_random_exponential, stop_after_attempt

# Add the path to the constants file to the system path
sys.path.append('../../')
from constants import *
from evaluation_utils import *
from ChatCompletion_OpenAI_API import *

# OpenAI API Key
openai.api_key = OPENAI_API_KEY

# Get the current directory of the notebook
current_dir = os.path.dirname(os.path.abspath("../../data/amazon-beauty/rating_prediction.ipynb"))
print(f"current directory: {current_dir}")

current directory: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/rec-sys/data/amazon-beauty


# RMSE & MAE evaluation metrics

In [None]:
# Test
actual_ratings = [4, 4]  # Ground truth ratings
predicted_ratings = [3, 5]  # Predicted ratings
rmse, mae = calculate_rmse_and_mae(actual_ratings, predicted_ratings)
print("RMSE: ", rmse)
print("MAE: ", mae)


# Data Overview

In [2]:
# Construct the path to data file
data_path = os.path.join(current_dir, 'merged_data.csv')
print(f'data path: {data_path}')

data path: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/rec-sys/data/amazon-beauty/merged_data.csv


In [3]:
# Read the data
data = pd.read_csv(data_path)
# get necessary columns
data = data[['title', 'rating', 'reviewText', 'reviewerID']]
# get sample data of NUM_SAMPLES rows
data.info()
data.head(3)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34 entries, 0 to 33
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   title       34 non-null     object 
 1   rating      34 non-null     float64
 2   reviewText  34 non-null     object 
 3   reviewerID  34 non-null     object 
dtypes: float64(1), object(3)
memory usage: 1.2+ KB


Unnamed: 0,title,rating,reviewText,reviewerID
0,Reach Dentotape Waxed Dental Floss with Extra ...,5.0,best floss i've used. does not break as easily...,ANV9L0JU6BNL
1,Reach Dentotape Waxed Dental Floss with Extra ...,5.0,best floss i've used. does not break as easily...,ANV9L0JU6BNL
2,Citre Shine Moisture Burst Shampoo - 16 fl oz,2.0,Doesnt smell,A2TU781PWGS09X


# Zero-shot (OpenAI API)

+ We used the ``.drop_duplicates()`` method to get unique pairs of "title" and "reviewText". The predictions are then based on both the title and the corresponding review text for each unique pair.

In [4]:
%%time

predict_ratings_zero_shot_and_save(data,
                                       columns_for_training=['title', 'reviewText'],
                                       columns_for_prediction=['title'],
                                       pause_every_n_users=PAUSE_EVERY_N_USERS,
                                       sleep_time=SLEEP_TIME,
                                       save_path='../../data/amazon-beauty/reviewText_small_predictions_zero_shot.csv')
# read csv file
merged_data_with_predictions = pd.read_csv('../../data/amazon-beauty/reviewText_small_predictions_zero_shot.csv')
merged_data_with_predictions.head(3)


Predicted rating for ['Reach Dentotape Waxed Dental Floss with Extra Wide Cleaning Surface for Large Spaces between Teeth, Unflavored, 100 Yards'
 "best floss i've used. does not break as easily as others, and i have tight teeth."]: 4.0
Predicted rating for ['Citre Shine Moisture Burst Shampoo - 16 fl oz' 'Doesnt smell']: 4.0
Predicted rating for ['Citre Shine Moisture Burst Shampoo - 16 fl oz' 'My favorite powder!']: 4.0
Predicted rating for ['Bonne Bell Smackers Bath and Body Starburst Collection' 'Doesnt smell']: 4.0
Predicted rating for ['Bonne Bell Smackers Bath and Body Starburst Collection'
 'My favorite powder!']: 4.0


In [None]:
# evaluate the rating prediction model

product_titles = merged_data_with_predictions['title']
actual_ratings = merged_data_with_predictions['rating']
predicted_ratings = merged_data_with_predictions['predicted_rating']

# Remove None predictions if any
actual_ratings_filtered, predicted_ratings_filtered = zip(*[(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None])

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
print(f'Root Mean Squared Error (RMSE): {rmse}')

# Calculate MAE
mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
print(f'Mean Absolute Error (MAE): {mae}')


# Few-shot (OpenAI API)


+ For each user, we'll use 4 of their ratings as training data to predict ratings for the rest of their products. Finally, we'll evaluate the predictions against the actual ratings to calculate the overall RMSE and MAE.

+ The rating_history_str now includes both the title and the review text for each of the training data rows

In [None]:
%%time

predict_ratings_few_shot_and_save(data,
                                      columns_for_training=['title', 'reviewText'],
                                      columns_for_prediction=['title'],
                                      obs_per_user=None,
                                      pause_every_n_users=PAUSE_EVERY_N_USERS,
                                      sleep_time=SLEEP_TIME,
                                      save_path='../../data/amazon-beauty/reviewText_small_predictions_few_shot.csv.csv')
                                      
# load data from ../../data/amazon-beauty/small_predictions_few_shot.csv file
small_predictions_few_shot = pd.read_csv('../../data/amazon-beauty/reviewText_small_predictions_few_shot.csv')
small_predictions_few_shot.head(NUM_EXAMPLES)


In [None]:
# convert few_shot_predicted_rating column to list
predicted_ratings = small_predictions_few_shot['few_shot_predicted_rating'].tolist()
# convert actual_rating column to list
actual_ratings = small_predictions_few_shot['actual_rating'].tolist()
filtered_list = [(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None]

if not filtered_list:
    print("No valid predictions available for evaluation.")
else:
    actual_ratings_filtered, predicted_ratings_filtered = zip(*filtered_list)
    # Evaluate the model's performance
    rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
    print(f'Root Mean Squared Error (RMSE): {rmse}')

    mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
    print(f'Mean Absolute Error (MAE): {mae}')


# 1 observation per reviewer - Few-shot OpenAI

In [None]:
%%time

predict_ratings_few_shot_and_save(data,
                                      columns_for_training=['title', 'reviewText'],
                                      columns_for_prediction=['title'],
                                      obs_per_user=1,
                                      pause_every_n_users=PAUSE_EVERY_N_USERS,
                                      sleep_time=SLEEP_TIME,
                                      save_path='../../data/amazon-beauty/reviewText_small_1_test_predictions_few_shot.csv')

small_predictions_few_shot = pd.read_csv('../../data/amazon-beauty/reviewText_small_1_test_predictions_few_shot.csv')
small_predictions_few_shot.head(NUM_EXAMPLES)

In [None]:

# convert few_shot_predicted_rating column to list
predicted_ratings = small_predictions_few_shot['few_shot_predicted_rating'].tolist()
# convert actual_rating column to list
actual_ratings = small_predictions_few_shot['actual_rating'].tolist()
filtered_list = [(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None]

if not filtered_list:
    print("No valid predictions available for evaluation.")
else:
    actual_ratings_filtered, predicted_ratings_filtered = zip(*filtered_list)
    # Evaluate the model's performance
    rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
    print(f'Root Mean Squared Error (RMSE): {rmse}')

    mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
    print(f'Mean Absolute Error (MAE): {mae}')


# Limitations:

The model might not fully understand the nuanced relationships between products based on titles alone. Additional context or features might be needed for more accurate predictions.
This approach might be computationally expensive and slower than traditional matrix factorization or deep learning-based recommendation models, especially for a small number of users.

# References

+ https://platform.openai.com/docs/api-reference/authentication