In [1]:
import numpy as np
import openai
import pandas as pd
import os
import sys
import time

# Add the path to the constants file to the system path
sys.path.append('../../../')
from constants import *
from evaluation_utils import *
from path_utils import *
from ChatCompletion_OpenAI_API import *

# OpenAI API Key
openai.api_key = OPENAI_API_KEY

SYSTEM_CONTENT = MOVIELENS_CONTENT_SYSTEM
# Constants for column names
USER_COLUMN_NAME = 'UserID'
TITLE_COLUMN_NAME = 'Title'
ITEM_ID_COLUMN = 'MovieID'
RATING_COLUMN_NAME = 'Rating'

TEST_OBSERVATION_PER_USER = 1




In [2]:
# source code folder path
rec_sys_dir = get_rec_sys_directory()
print(f"Rec-sys directory: {rec_sys_dir}")

# data folder path
DATA_DIR = os.path.join(rec_sys_dir, '../data')
print(f"Data directory: {DATA_DIR}")

# data path
data_path = os.path.join(DATA_DIR, 'ml-1m/merged_data.dat')
print(f'Data path: {data_path}')

# output
ZERO_SHOT_SAVE_PATH = os.path.join(DATA_DIR, 'ml-1m/output/title_zero_shot.dat')
print(f'ZERO_SHOT_SAVE_PATH path: {ZERO_SHOT_SAVE_PATH}')

ZERO_SHOT_RERUN_PATH = os.path.join(DATA_DIR, 'ml-1m/output/rerun_title_zero_shot.dat')
print(f'ZERO_SHOT_RERUN_PATH path: {ZERO_SHOT_RERUN_PATH}')

# few shot save path
FEW_SHOT_1_OBS_SAVE_PATH = os.path.join(DATA_DIR, 'ml-1m/output/title_1_test_predictions_few_shot.csv')
print(f'Few shot save path: {FEW_SHOT_1_OBS_SAVE_PATH}')


# few shot save path
FEW_SHOT_1_OBS_RERUN_PATH = os.path.join(DATA_DIR, 'ml-1m/output/rerun_title_1_test_predictions_few_shot.csv')
print(f'Few shot rerun path: {FEW_SHOT_1_OBS_SAVE_PATH}')

Rec-sys directory: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/code/notebook
Data directory: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/code/notebook/../data
Data path: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/code/notebook/../data/ml-1m/merged_data.dat
ZERO_SHOT_SAVE_PATH path: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/code/notebook/../data/ml-1m/output/title_zero_shot.dat
ZERO_SHOT_RERUN_PATH path: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/code/notebook/../data/ml-1m/output/rerun_title_zero_shot.dat
Few shot save path: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/code/notebook/../data/ml-1m/output/title_1_test_predictions_few_shot.csv
Few shot rerun path: /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/code/notebook/../data/ml-1m/output/title_1_test_predictions_few_shot.csv


# Data Overview

In [3]:
# Read the data
data = pd.read_csv(data_path)

# get statistic and first few data of NUM_SAMPLES rows
data.info()
data.head(NUM_EXAMPLES)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000209 entries, 0 to 1000208
Data columns (total 10 columns):
 #   Column      Non-Null Count    Dtype 
---  ------      --------------    ----- 
 0   UserID      1000209 non-null  int64 
 1   MovieID     1000209 non-null  int64 
 2   Rating      1000209 non-null  int64 
 3   Timestamp   1000209 non-null  int64 
 4   Gender      1000209 non-null  object
 5   Age         1000209 non-null  int64 
 6   Occupation  1000209 non-null  int64 
 7   Zip-code    1000209 non-null  object
 8   Title       1000209 non-null  object
 9   Genres      1000209 non-null  object
dtypes: int64(6), object(4)
memory usage: 76.3+ MB


Unnamed: 0,UserID,MovieID,Rating,Timestamp,Gender,Age,Occupation,Zip-code,Title,Genres
0,1,1193,5,978300760,F,1,10,48067,One Flew Over the Cuckoo's Nest (1975),Drama
1,2,1193,5,978298413,M,56,16,70072,One Flew Over the Cuckoo's Nest (1975),Drama
2,12,1193,4,978220179,M,25,12,32793,One Flew Over the Cuckoo's Nest (1975),Drama
3,15,1193,4,978199279,M,25,7,22903,One Flew Over the Cuckoo's Nest (1975),Drama
4,17,1193,5,978158471,M,50,1,95350,One Flew Over the Cuckoo's Nest (1975),Drama


# Zero-shot (OpenAI API)

In [5]:
%%time

predict_ratings_zero_shot_and_save(data,
                                       columns_for_prediction=[TITLE_COLUMN_NAME],
                                       user_column_name=USER_COLUMN_NAME,
                                       title_column_name=TITLE_COLUMN_NAME,
                                       asin_column_name=ITEM_ID_COLUMN,
                                       rating_column_name=RATING_COLUMN_NAME,
                                       pause_every_n_users=PAUSE_EVERY_N_USERS,
                                       sleep_time=SLEEP_TIME,
                                       save_path=ZERO_SHOT_SAVE_PATH,
                                       system_content=SYSTEM_CONTENT)

Constructed Prompt for zero-shot approach:

The prompt:
**********
How will user rate this product Title: Pleasantville (1998)? (1 being lowest and 5 being highest) Attention! Just give me back the exact number as a result, and you don't need a lot of text.
**********



System Fingerprint: fp_cbe4fa03fe

API call response: "4"
Extracted rating: 4.0
Constructed Prompt for zero-shot approach:

The prompt:
**********
How will user rate this product Title: GoodFellas (1990)? (1 being lowest and 5 being highest) Attention! Just give me back the exact number as a result, and you don't need a lot of text.
**********



System Fingerprint: fp_cbe4fa03fe

API call response: "4"
Extracted rating: 4.0
Constructed Prompt for zero-shot approach:

The prompt:
**********
How will user rate this product Title: Boat, The (Das Boot) (1981)? (1 being lowest and 5 being highest) Attention! Just give me back the exact number as a result, and you don't need a lot of text.
**********



System Fingerprint: 

Unnamed: 0,user_id,item_id,title,actual_rating,predicted_rating
0,1,2321,Pleasantville (1998),3,4.0
1,2,1213,GoodFellas (1990),2,4.0
2,12,1233,"Boat, The (Das Boot) (1981)",3,4.0
3,15,2997,Being John Malkovich (1999),2,4.0
4,17,2762,"Sixth Sense, The (1999)",5,4.0
...,...,...,...,...,...
6035,3537,3718,American Pimp (1999),1,4.0
6036,2908,1261,Evil Dead II (Dead By Dawn) (1987),5,4.0
6037,2982,177,Lord of Illusions (1995),1,4.0
6038,3893,3822,"Girl on the Bridge, The (La Fille sur le Pont)...",4,4.0


In [6]:
# Read the data
zeroshot_saved_data = pd.read_csv(ZERO_SHOT_SAVE_PATH)

# Display the original data types
print("Original Data Types:")
print(zeroshot_saved_data.dtypes)
print("\n")

# Attempt to convert ratings to float and add a flag for conversion failure
zeroshot_saved_data['is_rating_float'] = pd.to_numeric(zeroshot_saved_data['predicted_rating'], errors='coerce').notna()

# Filter rows where ratings are not float
non_float_ratings = zeroshot_saved_data[zeroshot_saved_data['is_rating_float'] == False]

# total number of rows with non-float ratings
print(f"Total number of rows with non-float ratings: {len(non_float_ratings)}")

# rerun indices for non-float ratings
rerun_indices = non_float_ratings.index.tolist()
print(f"Rerun indices: {rerun_indices}")

# Display rows with non-float ratings
print("Rows with non-float ratings:")
non_float_ratings.head(3)


Original Data Types:
user_id              int64
item_id              int64
title               object
actual_rating        int64
predicted_rating    object
dtype: object


Total number of rows with non-float ratings: 1
Rerun indices: [5747]
Rows with non-float ratings:


Unnamed: 0,user_id,item_id,title,actual_rating,predicted_rating,is_rating_float
5747,2370,897,For Whom the Bell Tolls (1943),4,"(None, ""Request timed out: HTTPSConnectionPool...",False


In [7]:
%%time

zero_shot_saved_data = pd.read_csv(ZERO_SHOT_SAVE_PATH)

# Rerun predictions for failed cases and save the updated data
rerun_save_path = os.path.join(DATA_DIR, 'movie-ml-latest-small/output/rerun_title_large_predictions_zero_shot.csv')
columns_for_prediction = ['title']
updated_data = rerun_failed_zero_shot_predictions(zero_shot_saved_data, ZERO_SHOT_SAVE_PATH, rerun_save_path, columns_for_prediction, PAUSE_EVERY_N_USERS, SLEEP_TIME)

# Remove rows with non-float ratings and save the cleaned data
cleaned_data = updated_data[pd.to_numeric(updated_data['predicted_rating'], errors='coerce').notna()]
cleaned_data.to_csv(ZERO_SHOT_SAVE_PATH, index=False)

# Evaluate the model predictions
evaluate_model_predictions_rmse_mae(ZERO_SHOT_SAVE_PATH, NUM_EXAMPLES, 'actual_rating', 'predicted_rating')


Re-running predictions for 1 failed cases.
Predictions saved to /Users/tnathu-ai/VSCode/recommender-system/recommender-system-openAI/code/notebook/../data/movie-ml-latest-small/output/rerun_title_large_predictions_zero_shot.csv
RMSE: 1.1581 (95% CI: (1.1347, 1.1816)) ± 0.0002
MAE: 0.8363 (95% CI: (0.8164, 0.8564)) ± 0.0002

First few actual vs predicted ratings:
Actual: 3, Predicted: 4.0000
Actual: 2, Predicted: 4.0000
Actual: 3, Predicted: 4.0000
Actual: 2, Predicted: 4.0000
Actual: 5, Predicted: 4.0000
CPU times: user 26.4 s, sys: 14.6 ms, total: 26.4 s
Wall time: 26.6 s


(1.1581482097341116, 0.8363398244742506)

# Few-shot (OpenAI API)


+ For each user, we'll use 4 of their ratings as training data to predict ratings for the rest of their products. Finally, we'll evaluate the predictions against the actual ratings to calculate the overall RMSE and MAE.

+ The rating_history_str now includes both the title and the review text for each of the training data rows

# 1 observation per reviewer - Few-shot OpenAI

In [4]:
%%time

predict_ratings_few_shot_and_save(data,
                                      columns_for_training=[TITLE_COLUMN_NAME],
                                       columns_for_prediction=[TITLE_COLUMN_NAME],
                                       title_column_name=TITLE_COLUMN_NAME, 
                                       user_column_name=USER_COLUMN_NAME,
                                       asin_column_name=ITEM_ID_COLUMN,
                                       rating_column_name=RATING_COLUMN_NAME,
                                       obs_per_user=TEST_OBSERVATION_PER_USER,
                                       pause_every_n_users=PAUSE_EVERY_N_USERS,
                                       sleep_time=SLEEP_TIME,
                                       save_path=FEW_SHOT_1_OBS_SAVE_PATH,
                                       system_content=SYSTEM_CONTENT)


Predicting rating for: "Title: Pleasantville (1998)"
Rating history:
* Title: Antz (1998) - Rating: 4 stars
* Title: Dead Poets Society (1989) - Rating: 4 stars
* Title: Sixth Sense, The (1999) - Rating: 4 stars
* Title: Mary Poppins (1964) - Rating: 5 stars
Constructed Prompt for few-shot approach:

The prompt:
**********


Here is user rating history:
* Title: Antz (1998) - Rating: 4 stars
* Title: Dead Poets Society (1989) - Rating: 4 stars
* Title: Sixth Sense, The (1999) - Rating: 4 stars
* Title: Mary Poppins (1964) - Rating: 5 stars

Based on above rating history, please predict user's rating for the product Title: Pleasantville (1998), (1 being lowest and 5 being highest,The output should be like: (x stars, xx%), do not explain the reason.)
**********


System Fingerprint: fp_cbe4fa03fe

API call response: "Based on the user's rating history, the predicted rating for "Pleasantville (1998)" is 4 stars, 80%."
Extracted rating: 4.0



----------------------------------------------

In [None]:
# Read the data
fewshot_saved_data = pd.read_csv(FEW_SHOT_1_OBS_SAVE_PATH)

# Display the original data types
print("Original Data Types:")
print(fewshot_saved_data.dtypes)
print("\n")

# Attempt to convert ratings to float and add a flag for conversion failure
fewshot_saved_data['is_rating_float'] = pd.to_numeric(fewshot_saved_data['predicted_rating'], errors='coerce').notna()

# Filter rows where ratings are not float
non_float_ratings = fewshot_saved_data[fewshot_saved_data['is_rating_float'] == False]

# total number of rows with non-float ratings
print(f"Total number of rows with non-float ratings: {len(non_float_ratings)}")

# rerun indices for non-float ratings
rerun_indices = non_float_ratings.index.tolist()
print(f"Rerun indices: {rerun_indices}")

# Display rows with non-float ratings
print("Rows with non-float ratings:")
non_float_ratings.head(3)


In [None]:
%%time 

rerun_failed_few_shot_predictions(data, 
                                  columns_for_training=[TITLE_COLUMN_NAME],
                                  columns_for_prediction=[TITLE_COLUMN_NAME],
                                  user_column_name=USER_COLUMN_NAME,
                                  title_column_name=TITLE_COLUMN_NAME,
                                  asin_column_name=ITEM_ID_COLUMN,
                                  rating_column_name=RATING_COLUMN_NAME,
                                  obs_per_user=TEST_OBSERVATION_PER_USER,
                                  pause_every_n_users=PAUSE_EVERY_N_USERS,
                                  sleep_time=SLEEP_TIME,
                                  save_path=FEW_SHOT_1_OBS_SAVE_PATH, 
                                  new_path=FEW_SHOT_1_OBS_RERUN_PATH,
                                  rerun_indices=rerun_indices,
                                  system_content=SYSTEM_CONTENT)

In [None]:
# Read the data
fewshot_saved_data = pd.read_csv(FEW_SHOT_1_OBS_RERUN_PATH)

# Display the original data types
print("Original Data Types:")
print(fewshot_saved_data.dtypes)
print("\n")

# Attempt to convert ratings to float and add a flag for conversion failure
fewshot_saved_data['is_rating_float'] = pd.to_numeric(fewshot_saved_data['predicted_rating'], errors='coerce').notna()

# Filter rows where ratings are not float
non_float_ratings = fewshot_saved_data[fewshot_saved_data['is_rating_float'] == False]

# total number of rows with non-float ratings
print(f"Total number of rows with non-float ratings: {len(non_float_ratings)}")

# rerun indices for non-float ratings
rerun_indices = non_float_ratings.index.tolist()
print(f"Rerun indices: {rerun_indices}")

# Display rows with non-float ratings
print("Rows with non-float ratings:")
non_float_ratings.head(3)


In [None]:
evaluate_model_predictions_rmse_mae(
    data_path=FEW_SHOT_1_OBS_RERUN_PATH,
    num_examples=NUM_EXAMPLES,
    actual_ratings_column='actual_rating',
    predicted_ratings_column='predicted_rating'
)

# Limitations:

The model might not fully understand the nuanced relationships between products based on titles alone. Additional context or features might be needed for more accurate predictions.
This approach might be computationally expensive and slower than traditional matrix factorization or deep learning-based recommendation models, especially for a large number of users.

# References

+ https://platform.openai.com/docs/api-reference/authentication