In [1]:
import numpy as np
import openai
import pandas as pd
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error
# OpenAI API Key
openai.api_key = openai.api_key = os.getenv("OPENAI_API_KEY")




# RMSE & MAE evaluation metrics

In [2]:
# calculate RMSE and MAE manually
def calculate_rmse_and_mae(actual_ratings, predicted_ratings):
    differences = [actual - predicted for actual, predicted in zip(actual_ratings, predicted_ratings)]
    
    # RMSE
    squared_differences = [diff ** 2 for diff in differences]
    mean_squared_difference = sum(squared_differences) / len(squared_differences)
    rmse = mean_squared_difference ** 0.5

    # MAE
    absolute_differences = [abs(diff) for diff in differences]
    mae = sum(absolute_differences) / len(absolute_differences)

    return rmse, mae

# Test
actual_ratings = [4, 4]  # Ground truth ratings
predicted_ratings = [3, 5]  # Predicted ratings

rmse, mae = calculate_rmse_and_mae(actual_ratings, predicted_ratings)
print("RMSE: ", rmse)
print("MAE: ", mae)


RMSE:  1.0
MAE:  1.0


# Data Overview

In [3]:
# Read the dataset
dataset_path = "../data/ml-latest-small/merged_data.csv"
movie_data = pd.read_csv(dataset_path)
# get sample data of 100 rows
movie_data = movie_data.sample(100)
movie_data.info()
movie_data.head(3)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100 entries, 944 to 2549
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   movieId  100 non-null    int64  
 1   imdbId   100 non-null    int64  
 2   tmdbId   100 non-null    float64
 3   title    100 non-null    object 
 4   genres   100 non-null    object 
 5   userId   100 non-null    int64  
 6   rating   100 non-null    float64
 7   tag      100 non-null    object 
dtypes: float64(2), int64(3), object(3)
memory usage: 7.0+ KB


Unnamed: 0,movieId,imdbId,tmdbId,title,genres,userId,rating,tag
944,1423,116506,55146.0,Hearts and Minds (1996),Drama,474,4.0,In Netflix queue
3342,135536,1386697,297761.0,Suicide Squad (2016),Action|Crime|Sci-Fi,62,4.0,lack of plot
1183,2313,80678,1955.0,"Elephant Man, The (1980)",Drama,474,4.5,freaks


# Simple statistical methods (mean calculations) 

>> **Zero-Shot Prediction (zero_shot_predict function):**
This method calculates the average rating for a given movie title from the `movie_data` DataFrame.
It does not take into account any user-specific information and predicts the rating based on the average rating of the movie across all users.

>> **Few-Shot Prediction (few_shot_predict function):**
This method calculates the average rating for a given movie title by a specific user from the `movie_data` DataFrame.
It predicts the rating based on the average rating of the movie by that specific user, thus incorporating user-specific information.

In [4]:
%%time 

def zero_shot_predict(movie_title):
    # Get the average rating for the given title
    avg_rating = movie_data.loc[movie_data['title'] == movie_title, 'rating'].mean()
    return avg_rating

def few_shot_predict(movie_title, user_id):
    # Get the average rating for the given title by the specific user
    avg_rating = movie_data.loc[(movie_data['title'] == movie_title) & (movie_data['userId'] == user_id), 'rating'].mean()
    return avg_rating

# Applying predictions
movie_data['predicted_rating_zero_shot'] = movie_data['title'].apply(zero_shot_predict)
movie_data['predicted_rating_few_shot'] = [few_shot_predict(row['title'], row['userId']) for _, row in movie_data.iterrows()]

# Calculating RMSE and MAE for Zero-Shot
rmse_zero_shot = mean_squared_error(movie_data['rating'], movie_data['predicted_rating_zero_shot'], squared=False)
mae_zero_shot = mean_absolute_error(movie_data['rating'], movie_data['predicted_rating_zero_shot'])

# Calculating RMSE and MAE for Few-Shot
rmse_few_shot = mean_squared_error(movie_data['rating'], movie_data['predicted_rating_few_shot'], squared=False)
mae_few_shot = mean_absolute_error(movie_data['rating'], movie_data['predicted_rating_few_shot'])

print(f"Zero-Shot RMSE: {rmse_zero_shot}, MAE: {mae_zero_shot}")
print(f"Few-Shot RMSE: {rmse_few_shot}, MAE: {mae_few_shot}")

movie_data.head()



Zero-Shot RMSE: 0.035355339059327376, MAE: 0.005
Few-Shot RMSE: 0.0, MAE: 0.0
CPU times: user 50.8 ms, sys: 3.94 ms, total: 54.7 ms
Wall time: 51.7 ms


Unnamed: 0,movieId,imdbId,tmdbId,title,genres,userId,rating,tag,predicted_rating_zero_shot,predicted_rating_few_shot
944,1423,116506,55146.0,Hearts and Minds (1996),Drama,474,4.0,In Netflix queue,4.0,4.0
3342,135536,1386697,297761.0,Suicide Squad (2016),Action|Crime|Sci-Fi,62,4.0,lack of plot,4.0,4.0
1183,2313,80678,1955.0,"Elephant Man, The (1980)",Drama,474,4.5,freaks,4.5,4.5
336,296,110912,680.0,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,599,5.0,psychological,5.0,5.0
2560,37729,121164,3933.0,Corpse Bride (2005),Animation|Comedy|Fantasy|Musical|Romance,336,2.5,animation,2.5,2.5


# Zero-shot (GPT-3.5-turbo)

In [5]:
data = movie_data

In [6]:
%%time

import openai

def predict_rating(title):
    prompt = f"How will users rate this movie title: '{title}'? (1 being lowest and 5 being highest) Attention! Just give me back the exact number as a result, and you don't need a lot of text."

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "You are a movie critic."
            },
            {
                "role": "user",
                "content": prompt
            }
        ]
    )

    rating_text = response.choices[0].message['content'].strip()
    try:
        # Extract a float number from the response
        rating = float(rating_text)
        if rating < 0.5 or rating > 5.0:
            raise ValueError("Rating out of bounds")
    except ValueError:
        print(f"Unexpected response for '{title}': {rating_text}")
        rating = None

    return rating


# Iterate through the dataset and predict ratings
predicted_ratings = []
for title in data['title'].unique():
    predicted_rating = predict_rating(title)
    print(f"Predicted rating for {title}: {predicted_rating}")
    predicted_ratings.append(predicted_rating)

# Create a DataFrame with titles and predicted ratings
predicted_ratings_df = pd.DataFrame({
    'title': data['title'].unique(),
    'predicted_rating': predicted_ratings
})

# Merge the predicted ratings with the original data
merged_data_with_predictions = pd.merge(data, predicted_ratings_df, on='title')

# Save the merged data with predictions to a new CSV file
merged_data_with_predictions.to_csv('../data/ml-latest-small/merged_data_with_predictions.csv', index=False)


Unexpected response for 'Hearts and Minds (1996)': Unfortunately, as an AI text-based model, I don't have access to real-time user ratings.
Predicted rating for Hearts and Minds (1996): None
Predicted rating for Suicide Squad (2016): 3.0
Predicted rating for Elephant Man, The (1980): 4.0
Predicted rating for Pulp Fiction (1994): 4.0
Predicted rating for Corpse Bride (2005): 4.0
Predicted rating for Enigma (2001): 3.0
Predicted rating for Green Lantern (2011): 2.0
Predicted rating for Monster (2003): 4.5
Predicted rating for Ginger Snaps (2000): 4.0
Predicted rating for Clerks (1994): 4.0
Predicted rating for Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964): 4.0
Predicted rating for Fracture (2007): 4.5
Predicted rating for Father of the Bride Part II (1995): 4.0
Predicted rating for Inception (2010): 5.0
Predicted rating for Bug (2007): 3.5
Predicted rating for Twins (1988): 4.0
Predicted rating for South Park: Bigger, Longer and Uncut (1999): 4.5
Predicted r

In [7]:
# how many predicted_rating values are null? Why null output?
print(f"Number of null predicted_rating values: {merged_data_with_predictions['predicted_rating'].isnull().sum()}")

merged_data_with_predictions.head(3)

Number of null predicted_rating values: 1


Unnamed: 0,movieId,imdbId,tmdbId,title,genres,userId,rating,tag,predicted_rating_zero_shot,predicted_rating_few_shot,predicted_rating
0,1423,116506,55146.0,Hearts and Minds (1996),Drama,474,4.0,In Netflix queue,4.0,4.0,
1,135536,1386697,297761.0,Suicide Squad (2016),Action|Crime|Sci-Fi,62,4.0,lack of plot,4.0,4.0,3.0
2,135536,1386697,297761.0,Suicide Squad (2016),Action|Crime|Sci-Fi,62,4.0,Ben Affleck,4.0,4.0,3.0


In [8]:
# evaluate the rating prediction model

movie_titles = merged_data_with_predictions['title']
actual_ratings = merged_data_with_predictions['rating']

# Remove None predictions if any
actual_ratings_filtered, predicted_ratings_filtered = zip(*[(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None])

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
print(f'Root Mean Squared Error (RMSE): {rmse}')

# Calculate MAE
mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
print(f'Mean Absolute Error (MAE): {mae}')


Root Mean Squared Error (RMSE): 1.132809969485744
Mean Absolute Error (MAE): 0.8640449438202248


In [9]:
# calculate RMSE and MAE manually using calculate_rmse_and_mae function
rmse, mae = calculate_rmse_and_mae(
    actual_ratings_filtered, predicted_ratings_filtered
)
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")


RMSE: 1.132809969485744
MAE: 0.8640449438202248


# Few-shot

In [10]:
%%time

import re

def predict_rating_few_shot(movie_title, rating_history):
    prompt = f"Here is user rating history: {rating_history}; Based on the above rating history, please predict user's rating for the movie: '{movie_title}', (1 being lowest and 5 being highest). Attention! Just give me back the exact number as a result, and you don't need a lot of text."

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "You are a movie critic."
            },
            {
                "role": "user",
                "content": prompt
            }
        ]
    )

    rating_text = response.choices[0].message['content'].strip()
    try:
        # Extract the first numerical value from the response
        rating = float(re.search(r'\d+(\.\d+)?', rating_text).group())
        if rating < 0.5 or rating > 5.0:
            raise ValueError("Rating out of bounds")
    except (ValueError, AttributeError):
        print(f"Unexpected response for '{movie_title}': {rating_text}")
        rating = None

    return rating


predicted_ratings = []
for title in data['title'].unique():
    # Randomly sample 4 rows from the entire dataset
    rating_history_samples = data.sample(4)
    rating_history_str = ', '.join([f"{row['title']} ({row['rating']} stars)" for _, row in rating_history_samples.iterrows()])

    predicted_rating = predict_rating_few_shot(title, rating_history_str)
    print(f"Predicted rating for {title}: {predicted_rating}")
    predicted_ratings.append(predicted_rating)

# Create a DataFrame with titles and predicted ratings
predicted_ratings_df = pd.DataFrame({
    'title': data['title'].unique(),
    'predicted_rating_few_shot': predicted_ratings
})

# Merge the predicted ratings with the original data
merged_data_with_predictions = pd.merge(data, predicted_ratings_df, on='title')

# Save the merged data with predictions to a new CSV file
merged_data_with_predictions.to_csv('../data/ml-latest-small/merged_data_with_predictions.csv', index=False)


# evaluate the rating prediction model

movie_titles = merged_data_with_predictions['title']
actual_ratings = merged_data_with_predictions['rating']

# Remove None predictions if any
actual_ratings_filtered, predicted_ratings_filtered = zip(*[(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None])

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
print(f'Root Mean Squared Error (RMSE): {rmse}')
# Calculate MAE
mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
print(f'Mean Absolute Error (MAE): {mae}')



Unexpected response for 'Hearts and Minds (1996)': Based on the user's rating history, the predicted rating for the movie "Hearts and Minds (1996)" would be 4.0 stars.
Predicted rating for Hearts and Minds (1996): None
Predicted rating for Suicide Squad (2016): 4.0
Unexpected response for 'Elephant Man, The (1980)': Based on the user's rating history, the predicted rating for the movie "Elephant Man, The (1980)" would be 4.0 stars.
Predicted rating for Elephant Man, The (1980): None
Predicted rating for Pulp Fiction (1994): 5.0
Predicted rating for Corpse Bride (2005): 4.0
Predicted rating for Enigma (2001): 4.0
Predicted rating for Green Lantern (2011): 3.0
Predicted rating for Monster (2003): 4.0
Unexpected response for 'Ginger Snaps (2000)': Based on the user's rating history, I predict the user's rating for the movie "Ginger Snaps (2000)" would be 4.0 stars.
Predicted rating for Ginger Snaps (2000): None
Predicted rating for Clerks (1994): 4.5
Predicted rating for Dr. Strangelove o

In [11]:
# demonstrate the output of 4 random historical ratings
predicted_ratings = []
for title in data['title'].unique():
    # Randomly sample 4 rows from the entire dataset
    rating_history_samples = data.sample(4)
    rating_history_str = ', '.join([f"{row['title']} ({row['rating']} stars)" for _, row in rating_history_samples.iterrows()])

rating_history_str


"Imitation of Life (1959) (4.0 stars), Enigma (2001) (2.5 stars), Seven (a.k.a. Se7en) (1995) (5.0 stars), Angel's Egg (Tenshi no tamago) (1985) (3.5 stars)"

# References

+ https://platform.openai.com/docs/api-reference/authentication