In [1]:
import numpy as np
import openai
import pandas as pd
import os
import sys
import time

# Add the path to the constants file to the system path
sys.path.append('../../')
from constants import *
from evaluation_utils import *
from path_utils import *
from ChatCompletion_OpenAI_API import *

# OpenAI API Key
openai.api_key = OPENAI_API_KEY



In [2]:
# source code folder path
rec_sys_dir = get_rec_sys_directory()
print(f"Rec-sys directory: {rec_sys_dir}")

# data folder path
DATA_DIR = os.path.join(rec_sys_dir, 'data')
print(f"Data directory: {DATA_DIR}")

# data path
data_path = os.path.join(DATA_DIR, 'movie-ml-latest-small/merged_data.csv')
print(f'Data path: {data_path}')

# zero shot save path
ZERO_SHOT_SAVE_PATH = os.path.join(DATA_DIR, 'movie-ml-latest-small/output/title_large_predictions_zero_shot.csv')
print(f'Zero shot save path: {ZERO_SHOT_SAVE_PATH}')

# few shot save path
FEW_SHOT_1_OBS_SAVE_PATH = os.path.join(DATA_DIR, 'movie-ml-latest-small/output/title_large_1_test_predictions_few_shot.csv')
print(f'Few shot save path: {FEW_SHOT_1_OBS_SAVE_PATH}')

Rec-sys directory: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/rec-sys
Data directory: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/rec-sys/data
Data path: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/rec-sys/data/movie-ml-latest-small/merged_data.csv
Zero shot save path: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/rec-sys/data/movie-ml-latest-small/output/title_large_predictions_zero_shot.csv
Few shot save path: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/rec-sys/data/movie-ml-latest-small/output/title_large_1_test_predictions_few_shot.csv


# Data Overview

In [3]:
# Read the data
data = pd.read_csv(data_path)

# get statistic and first few data of NUM_SAMPLES rows
data.info()
data.head(NUM_EXAMPLES)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3405 entries, 0 to 3404
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   movieId  3405 non-null   int64  
 1   imdbId   3405 non-null   int64  
 2   tmdbId   3405 non-null   float64
 3   title    3405 non-null   object 
 4   genres   3405 non-null   object 
 5   userId   3405 non-null   int64  
 6   rating   3405 non-null   float64
 7   tag      3405 non-null   object 
dtypes: float64(2), int64(3), object(3)
memory usage: 212.9+ KB


Unnamed: 0,movieId,imdbId,tmdbId,title,genres,userId,rating,tag
0,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,336,4.0,pixar
1,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,474,4.0,pixar
2,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,567,3.5,fun
3,2,113497,8844.0,Jumanji (1995),Adventure|Children|Fantasy,62,4.0,fantasy
4,2,113497,8844.0,Jumanji (1995),Adventure|Children|Fantasy,62,4.0,magic board game


# Zero-shot (OpenAI API)

In [37]:
%%time

predict_ratings_zero_shot_and_save(data,
                                       columns_for_prediction=['title'],
                                       user_column_name='userId',
                                       title_column_name='title',
                                       asin_column_name='movieId',
                                       pause_every_n_users=PAUSE_EVERY_N_USERS,
                                       sleep_time=SLEEP_TIME,
                                       save_path=ZERO_SHOT_SAVE_PATH)

Constructed Prompt for zero-shot approach:

The prompt:
**********
How will user rate this title: Toy Story (1995)? (1 being lowest and 5 being highest) Attention! Just give me back the exact number as a result, and you don't need a lot of text.

Based on the above information, please predict user's rating for the product: (1 being lowest and 5 being highest, The output should be like: (x stars, xx%), do not explain the reason.)
**********

Unexpected Error: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Constructed Prompt for zero-shot approach:

The prompt:
**********
How will user rate this title: Toy Story (1995)? (1 being lowest and 5 being highest) Attention! Just give me back the exact number as a result, and you don't need a lot of text.

Based on the above information, please predict user's rating for the product: (1 being lowest and 5 being highest, The output should be like: (x stars, xx%), do not

In [4]:
import pandas as pd

# Read the data
data = pd.read_csv(ZERO_SHOT_SAVE_PATH)

# Display the original data types
# print("Original Data Types:")
# print(data.dtypes)
# print("\n")

# Attempt to convert ratings to float and add a flag for conversion failure
data['is_rating_float'] = pd.to_numeric(data['predicted_rating'], errors='coerce').notna()

# Filter rows where ratings are not float
non_float_ratings = data[data['is_rating_float'] == False]

# Number of rows with non-float ratings
print(f"Number of rows with non-float ratings: {len(non_float_ratings)}")

# Display rows with non-float ratings
print("Rows with non-float ratings:")
non_float_ratings.head(3)


Number of rows with non-float ratings: 7
Rows with non-float ratings:


Unnamed: 0,user_id,item_id,title,actual_rating,predicted_rating,is_rating_float
0,336,1,Toy Story (1995),4.0,"(None, ""Error communicating with OpenAI: ('Con...",False
617,599,924,2001: A Space Odyssey (1968),5.0,"(None, 'The server is overloaded or not ready ...",False
1309,474,2943,Indochine (1992),3.5,"(None, 'The server is overloaded or not ready ...",False


In [6]:
%%time

data = pd.read_csv(ZERO_SHOT_SAVE_PATH)

# Rerun predictions for failed cases and save the updated data
rerun_save_path = os.path.join(DATA_DIR, 'movie-ml-latest-small/output/rerun_title_large_predictions_zero_shot.csv')
columns_for_prediction = ['title']
updated_data = rerun_failed_zero_shot_predictions(data, ZERO_SHOT_SAVE_PATH, rerun_save_path, columns_for_prediction, PAUSE_EVERY_N_USERS, SLEEP_TIME)

# Remove rows with non-float ratings and save the cleaned data
cleaned_data = updated_data[pd.to_numeric(updated_data['predicted_rating'], errors='coerce').notna()]
cleaned_data.to_csv(ZERO_SHOT_SAVE_PATH, index=False)

# Evaluate the model predictions
evaluate_model_predictions_rmse_mae(ZERO_SHOT_SAVE_PATH, NUM_EXAMPLES, 'actual_rating', 'predicted_rating')


RMSE: 2.1204 (95% CI: (2.0606, 2.1791)) ± 0.0006
MAE: 1.4233 (95% CI: (1.3714, 1.4757)) ± 0.0005

First few actual vs predicted ratings:
Actual: 4.0, Predicted: 5.0000
Actual: 3.5, Predicted: 5.0000
Actual: 4.0, Predicted: 4.0000
Actual: 4.0, Predicted: 4.0000
Actual: 4.0, Predicted: 4.0000
CPU times: user 15.2 s, sys: 12.7 ms, total: 15.2 s
Wall time: 15.4 s


# Few-shot (OpenAI API)


+ For each user, we'll use 4 of their ratings as training data to predict ratings for the rest of their products. Finally, we'll evaluate the predictions against the actual ratings to calculate the overall RMSE and MAE.

+ The rating_history_str now includes both the title and the review text for each of the training data rows

# 1 observation per reviewer - Few-shot OpenAI

In [15]:
%%time

predict_ratings_few_shot_and_save(data,
                                      columns_for_training=['title'],
                                       columns_for_prediction=['title'],
                                       user_column_name='userId',
                                       asin_column_name='movieId', 
                                       obs_per_user=1,
                                       pause_every_n_users=PAUSE_EVERY_N_USERS,
                                       sleep_time=SLEEP_TIME,
                                       save_path=FEW_SHOT_1_OBS_SAVE_PATH)


Constructed Prompt for few-shot approach:

The prompt:
**********
How will user rate this title: Sin City (2005)? (1 being lowest and 5 being highest) Attention! Just give me back the exact number as a result, and you don't need a lot of text.

Here is user rating history:
* title: Corpse Bride (2005) - Rating: 2.5 stars
* title: Wedding Crashers (2005) - Rating: 4.0 stars
* title: Lord of War (2005) - Rating: 4.0 stars
* title: Cinderella Man (2005) - Rating: 4.5 stars

Based on the above information, please predict user's rating for the product: (1 being lowest and 5 being highest, The output should be like: (x stars, xx%), do not explain the reason.)
**********



System Fingerprint: fp_eeff13170a

API call response: "3.5 stars"
Extracted rating: 3.5
Constructed Prompt for few-shot approach:

The prompt:
**********
How will user rate this title: 101 Dalmatians (1996)? (1 being lowest and 5 being highest) Attention! Just give me back the exact number as a result, and you don't need a

In [23]:
import pandas as pd

# Read the data
data = pd.read_csv(FEW_SHOT_1_OBS_SAVE_PATH)

# Display the original data types
# print("Original Data Types:")
# print(data.dtypes)
# print("\n")

# Attempt to convert ratings to float and add a flag for conversion failure
data['is_rating_float'] = pd.to_numeric(data['predicted_rating'], errors='coerce').notna()

# Filter rows where ratings are not float
non_float_ratings = data[data['is_rating_float'] == False]

# total number of rows with non-float ratings
print(f"Total number of rows with non-float ratings: {len(non_float_ratings)}")

# Display rows with non-float ratings
print("Rows with non-float ratings:")
non_float_ratings.head(3)


Total number of rows with non-float ratings: 0
Rows with non-float ratings:


Unnamed: 0,user_id,item_id,title,actual_rating,predicted_rating,is_rating_float


In [24]:
evaluate_model_predictions_rmse_mae(
    data_path=FEW_SHOT_1_OBS_SAVE_PATH,
    num_examples=NUM_EXAMPLES,
    actual_ratings_column='actual_rating',
    predicted_ratings_column='predicted_rating'
)

RMSE: 0.9587 (95% CI: (0.7071, 1.2107)) ± 0.0025
MAE: 0.7743 (95% CI: (0.5455, 1.0227)) ± 0.0024

First few actual vs predicted ratings:
Actual: 4.0, Predicted: 3.5000
Actual: 2.0, Predicted: 3.0000
Actual: 3.5, Predicted: 4.0000
Actual: 4.0, Predicted: 3.5000
Actual: 3.5, Predicted: 3.0000


# Limitations:

The model might not fully understand the nuanced relationships between products based on titles alone. Additional context or features might be needed for more accurate predictions.
This approach might be computationally expensive and slower than traditional matrix factorization or deep learning-based recommendation models, especially for a large number of users.

# References

+ https://platform.openai.com/docs/api-reference/authentication