In [1]:
import numpy as np
import openai
import pandas as pd
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error
# OpenAI API Key
openai.api_key = openai.api_key = os.getenv("OPENAI_API_KEY")




# RMSE & MAE evaluation metrics

In [2]:
# calculate RMSE and MAE manually
def calculate_rmse_and_mae(actual_ratings, predicted_ratings):
    differences = [actual - predicted for actual, predicted in zip(actual_ratings, predicted_ratings)]
    
    # RMSE
    squared_differences = [diff ** 2 for diff in differences]
    mean_squared_difference = sum(squared_differences) / len(squared_differences)
    rmse = mean_squared_difference ** 0.5

    # MAE
    absolute_differences = [abs(diff) for diff in differences]
    mae = sum(absolute_differences) / len(absolute_differences)

    return rmse, mae

# Test
actual_ratings = [4, 4]  # Ground truth ratings
predicted_ratings = [3, 5]  # Predicted ratings

rmse, mae = calculate_rmse_and_mae(actual_ratings, predicted_ratings)
print("RMSE: ", rmse)
print("MAE: ", mae)


RMSE:  1.0
MAE:  1.0


# Data Overview

In [3]:
# Read the dataset
dataset_path = "../data/ml-latest-small/merged_data.csv"
movie_data = pd.read_csv(dataset_path)
movie_data.info()
movie_data.head(3)
# get sample of 100 rows

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3476 entries, 0 to 3475
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   movieId  3476 non-null   int64  
 1   imdbId   3476 non-null   int64  
 2   tmdbId   3476 non-null   float64
 3   title    3476 non-null   object 
 4   genres   3476 non-null   object 
 5   userId   3476 non-null   int64  
 6   rating   3476 non-null   float64
 7   tag      3476 non-null   object 
dtypes: float64(2), int64(3), object(3)
memory usage: 217.4+ KB


Unnamed: 0,movieId,imdbId,tmdbId,title,genres,userId,rating,tag
0,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,336,4.0,pixar
1,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,474,4.0,pixar
2,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,567,3.5,fun


# Simple statistical methods (mean calculations) 

>> **Zero-Shot Prediction (zero_shot_predict function):**
This method calculates the average rating for a given movie title from the `movie_data` DataFrame.
It does not take into account any user-specific information and predicts the rating based on the average rating of the movie across all users.

>> **Few-Shot Prediction (few_shot_predict function):**
This method calculates the average rating for a given movie title by a specific user from the `movie_data` DataFrame.
It predicts the rating based on the average rating of the movie by that specific user, thus incorporating user-specific information.

In [4]:
%%time 

def zero_shot_predict(movie_title):
    # Get the average rating for the given title
    avg_rating = movie_data.loc[movie_data['title'] == movie_title, 'rating'].mean()
    return avg_rating

def few_shot_predict(movie_title, user_id):
    # Get the average rating for the given title by the specific user
    avg_rating = movie_data.loc[(movie_data['title'] == movie_title) & (movie_data['userId'] == user_id), 'rating'].mean()
    return avg_rating

# Applying predictions
movie_data['predicted_rating_zero_shot'] = movie_data['title'].apply(zero_shot_predict)
movie_data['predicted_rating_few_shot'] = [few_shot_predict(row['title'], row['userId']) for _, row in movie_data.iterrows()]

# Calculating RMSE and MAE for Zero-Shot
rmse_zero_shot = mean_squared_error(movie_data['rating'], movie_data['predicted_rating_zero_shot'], squared=False)
mae_zero_shot = mean_absolute_error(movie_data['rating'], movie_data['predicted_rating_zero_shot'])

# Calculating RMSE and MAE for Few-Shot
rmse_few_shot = mean_squared_error(movie_data['rating'], movie_data['predicted_rating_few_shot'], squared=False)
mae_few_shot = mean_absolute_error(movie_data['rating'], movie_data['predicted_rating_few_shot'])

print(f"Zero-Shot RMSE: {rmse_zero_shot}, MAE: {mae_zero_shot}")
print(f"Few-Shot RMSE: {rmse_few_shot}, MAE: {mae_few_shot}")

movie_data.head()



Zero-Shot RMSE: 0.2595373192744763, MAE: 0.0901704296495429
Few-Shot RMSE: 0.0, MAE: 0.0
CPU times: user 2.9 s, sys: 32 ms, total: 2.94 s
Wall time: 2.94 s


Unnamed: 0,movieId,imdbId,tmdbId,title,genres,userId,rating,tag,predicted_rating_zero_shot,predicted_rating_few_shot
0,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,336,4.0,pixar,3.833333,4.0
1,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,474,4.0,pixar,3.833333,4.0
2,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,567,3.5,fun,3.833333,3.5
3,2,113497,8844.0,Jumanji (1995),Adventure|Children|Fantasy,62,4.0,fantasy,3.75,4.0
4,2,113497,8844.0,Jumanji (1995),Adventure|Children|Fantasy,62,4.0,magic board game,3.75,4.0


# Zero-shot (GPT-3.5-turbo)

In [5]:
data = movie_data

In [6]:
%%time

import openai

def predict_rating(title, model="gpt-3.5-turbo", temperature=0):
    prompt = f"How will users rate this movie title: '{title}'? (1 being lowest and 5 being highest. On a scale from 0.5 to 5, with intervals of 0.5) Attention! Just give me back the exact number as a result, and you don't need a lot of text."

    response = openai.ChatCompletion.create(
        model=model,
        temperature=temperature, # Higher temperature: more creative
        messages=[
            {
                "role": "system",
                "content": "You are a movie critic."
            },
            {
                "role": "user",
                "content": prompt
            }
        ]
    )
    # Extract the rating answer from the GPT's response
    rating_text = response.choices[0].message['content'].strip()
    try:
        # Extract a float number from the response
        rating = float(rating_text)
        if rating < 0.5 or rating > 5.0:
            raise ValueError("Rating out of bounds")
    except ValueError:
        print(f"Unexpected response for '{title}': {rating_text}")
        rating = None

    return rating


# Iterate through the dataset and predict ratings
predicted_ratings = []
for title in data['title'].unique():
    predicted_rating = predict_rating(title)
    print(f"Predicted rating for {title}: {predicted_rating}")
    predicted_ratings.append(predicted_rating)

# Create a DataFrame with titles and predicted ratings
predicted_ratings_df = pd.DataFrame({
    'title': data['title'].unique(),
    'predicted_rating': predicted_ratings
})

# Merge the predicted ratings with the original data
merged_data_with_predictions = pd.merge(data, predicted_ratings_df, on='title')

# Save the merged data with predictions to a new CSV file
merged_data_with_predictions.to_csv('../data/ml-latest-small/merged_data_with_predictions.csv', index=False)


Predicted rating for Toy Story (1995): 4.5
Predicted rating for Jumanji (1995): 4.5
Predicted rating for Grumpier Old Men (1995): 4.5
Predicted rating for Father of the Bride Part II (1995): 4.5
Predicted rating for Sabrina (1995): 4.5
Predicted rating for American President, The (1995): 4.5
Predicted rating for Nixon (1995): 3.5
Predicted rating for Casino (1995): 4.5
Predicted rating for Sense and Sensibility (1995): 4.5
Predicted rating for Get Shorty (1995): 4.5
Predicted rating for Copycat (1995): 4.5
Predicted rating for Leaving Las Vegas (1995): 4.5
Predicted rating for Othello (1995): 4.5


Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)

In [None]:
# flatten the output dataframes rather than nested lists and dictionaries
pd.json_normalize(response, "choices", ['id', 'object', 'created', 'model', 'usage'])

NameError: name 'pd' is not defined

In [None]:
# how many predicted_rating values are null? Why null output?
print(f"Number of null predicted_rating values: {merged_data_with_predictions['predicted_rating'].isnull().sum()}")

merged_data_with_predictions.head(3)

Number of null predicted_rating values: 58


Unnamed: 0,movieId,imdbId,tmdbId,title,genres,userId,rating,tag,predicted_rating
0,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,336,4.0,pixar,4.5
1,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,474,4.0,pixar,4.5
2,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,567,3.5,fun,4.5


In [None]:
# evaluate the rating prediction model

movie_titles = merged_data_with_predictions['title']
actual_ratings = merged_data_with_predictions['rating']

# Remove None predictions if any
actual_ratings_filtered, predicted_ratings_filtered = zip(*[(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None])

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
print(f'Root Mean Squared Error (RMSE): {rmse}')

# Calculate MAE
mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
print(f'Mean Absolute Error (MAE): {mae}')


Root Mean Squared Error (RMSE): 0.9985242406994432
Mean Absolute Error (MAE): 0.7637057598889659


In [None]:
# calculate RMSE and MAE manually using calculate_rmse_and_mae function
rmse, mae = calculate_rmse_and_mae(
    actual_ratings_filtered, predicted_ratings_filtered
)
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")


RMSE: 0.9985242406994432
MAE: 0.7637057598889659


# Few-shot

In [None]:
%%time

import re

def predict_rating_few_shot(movie_title, rating_history, , model="gpt-3.5-turbo", temperature=0):
    prompt = f"Here is user rating history: {rating_history}; Based on the above rating history, please predict user's rating for the movie: '{movie_title}', (1 being lowest and 5 being highest. On a scale from 0.5 to 5, with intervals of 0.5). Attention! Just give me back the exact number as a result, and you don't need a lot of text."

    response = openai.ChatCompletion.create(
        model=model,
        temperature=temperature, # Higher temperature: more creative
        messages=[
            {
                "role": "system",
                "content": "You are a movie critic."
            },
            {
                "role": "user",
                "content": prompt
            }
        ]
    )

    rating_text = response.choices[0].message['content'].strip()
    try:
        # Extract the first numerical value from the response
        rating = float(re.search(r'\d+(\.\d+)?', rating_text).group())
        if rating < 0.5 or rating > 5.0:
            raise ValueError("Rating out of bounds")
    except (ValueError, AttributeError):
        print(f"Unexpected response for '{movie_title}': {rating_text}")
        rating = None

    return rating


predicted_ratings = []
for title in data['title'].unique():
    # Randomly sample 4 rows from the entire dataset
    rating_history_samples = data.sample(4)
    rating_history_str = ', '.join([f"{row['title']} ({row['rating']} stars)" for _, row in rating_history_samples.iterrows()])

    predicted_rating = predict_rating_few_shot(title, rating_history_str)
    print(f"Predicted rating for {title}: {predicted_rating}")
    predicted_ratings.append(predicted_rating)

# Create a DataFrame with titles and predicted ratings
predicted_ratings_df = pd.DataFrame({
    'title': data['title'].unique(),
    'predicted_rating_few_shot': predicted_ratings
})

# Merge the predicted ratings with the original data
merged_data_with_predictions = pd.merge(data, predicted_ratings_df, on='title')

# Save the merged data with predictions to a new CSV file
merged_data_with_predictions.to_csv('../data/ml-latest-small/merged_data_with_predictions.csv', index=False)


# evaluate the rating prediction model

movie_titles = merged_data_with_predictions['title']
actual_ratings = merged_data_with_predictions['rating']

# Remove None predictions if any
actual_ratings_filtered, predicted_ratings_filtered = zip(*[(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None])

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
print(f'Root Mean Squared Error (RMSE): {rmse}')
# Calculate MAE
mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
print(f'Mean Absolute Error (MAE): {mae}')



NameError: name 'data' is not defined

In [None]:
# demonstrate the output of 4 random historical ratings
predicted_ratings = []
for title in data['title'].unique():
    # Randomly sample 4 rows from the entire dataset
    rating_history_samples = data.sample(4)
    rating_history_str = ', '.join([f"{row['title']} ({row['rating']} stars)" for _, row in rating_history_samples.iterrows()])

rating_history_str


NameError: name 'data' is not defined

# References

+ https://platform.openai.com/docs/api-reference/authentication