In [39]:
import numpy as np
import openai
import pandas as pd
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error
import re

# From the IPython.display package, import display and Markdown
from IPython.display import display, Markdown
# OpenAI API Key
openai.api_key = openai.api_key = os.getenv("OPENAI_API_KEY")

# Get the current working directory
current_directory = os.getcwd()

# Assuming the notebook is in the "notebook" directory, and you want to access the "data/ml-latest-small" directory
data_directory = os.path.join(current_directory, '..', 'data', 'ml-latest-small')


# RMSE & MAE evaluation metrics

In [40]:
# # calculate RMSE and MAE manually
# def calculate_rmse_and_mae(actual_ratings, predicted_ratings):
#     differences = [actual - predicted for actual, predicted in zip(actual_ratings, predicted_ratings)]
    
#     # RMSE
#     squared_differences = [diff ** 2 for diff in differences]
#     mean_squared_difference = sum(squared_differences) / len(squared_differences)
#     rmse = mean_squared_difference ** 0.5

#     # MAE
#     absolute_differences = [abs(diff) for diff in differences]
#     mae = sum(absolute_differences) / len(absolute_differences)

#     return rmse, mae

# # Test
# actual_ratings = [4, 4]  # Ground truth ratings
# predicted_ratings = [3, 5]  # Predicted ratings

# rmse, mae = calculate_rmse_and_mae(actual_ratings, predicted_ratings)
# print("RMSE: ", rmse)
# print("MAE: ", mae)

# Replaced for-loop with vectorized operation for performance.
def calculate_rmse_and_mae(actual_ratings, predicted_ratings):
    differences = np.array(actual_ratings) - np.array(predicted_ratings)
    
    # RMSE
    squared_differences = differences ** 2
    mean_squared_difference = squared_differences.mean()
    rmse = mean_squared_difference ** 0.5

    # MAE
    absolute_differences = np.abs(differences)
    mae = absolute_differences.mean()

    return rmse, mae

# Data Overview

In [41]:
# Read the dataset
dataset_path = "../data/ml-latest-small/merged_data.csv"
movie_data = pd.read_csv(dataset_path)
movie_data.info()
movie_data.head(3)
# # Updated to read only necessary columns using `usecols` parameter for memory optimization.
# movie_data = pd.read_csv(dataset_path, usecols=["userId", "movieId", "rating"])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3476 entries, 0 to 3475
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   movieId  3476 non-null   int64  
 1   imdbId   3476 non-null   int64  
 2   tmdbId   3476 non-null   float64
 3   title    3476 non-null   object 
 4   genres   3476 non-null   object 
 5   userId   3476 non-null   int64  
 6   rating   3476 non-null   float64
 7   tag      3476 non-null   object 
dtypes: float64(2), int64(3), object(3)
memory usage: 217.4+ KB


Unnamed: 0,movieId,imdbId,tmdbId,title,genres,userId,rating,tag
0,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,336,4.0,pixar
1,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,474,4.0,pixar
2,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,567,3.5,fun


# Simple statistical methods (mean calculations) 

>> **Zero-Shot Prediction (zero_shot_predict function):**
This method calculates the average rating for a given movie title from the `movie_data` DataFrame.
It does not take into account any user-specific information and predicts the rating based on the average rating of the movie across all users.

>> **Few-Shot Prediction (few_shot_predict function):**
This method calculates the average rating for a given movie title by a specific user from the `movie_data` DataFrame.
It predicts the rating based on the average rating of the movie by that specific user, thus incorporating user-specific information.

In [42]:
%%time 

def zero_shot_predict(movie_title):
    # Get the average rating for the given title
    avg_rating = movie_data.loc[movie_data['title'] == movie_title, 'rating'].mean()
    return avg_rating

def few_shot_predict(movie_title, user_id):
    # Get the average rating for the given title by the specific user
    avg_rating = movie_data.loc[(movie_data['title'] == movie_title) & (movie_data['userId'] == user_id), 'rating'].mean()
    return avg_rating

# Applying predictions
movie_data['predicted_rating_zero_shot'] = movie_data['title'].apply(zero_shot_predict)
movie_data['predicted_rating_few_shot'] = [few_shot_predict(row['title'], row['userId']) for _, row in movie_data.iterrows()]

# Calculating RMSE and MAE for Zero-Shot
rmse_zero_shot = mean_squared_error(movie_data['rating'], movie_data['predicted_rating_zero_shot'], squared=False)
mae_zero_shot = mean_absolute_error(movie_data['rating'], movie_data['predicted_rating_zero_shot'])

# Calculating RMSE and MAE for Few-Shot
rmse_few_shot = mean_squared_error(movie_data['rating'], movie_data['predicted_rating_few_shot'], squared=False)
mae_few_shot = mean_absolute_error(movie_data['rating'], movie_data['predicted_rating_few_shot'])

print(f"Zero-Shot RMSE: {rmse_zero_shot}, MAE: {mae_zero_shot}")
print(f"Few-Shot RMSE: {rmse_few_shot}, MAE: {mae_few_shot}")

movie_data.head()

Zero-Shot RMSE: 0.2595373192744763, MAE: 0.0901704296495429
Few-Shot RMSE: 0.0, MAE: 0.0
CPU times: user 2.9 s, sys: 17.5 ms, total: 2.92 s
Wall time: 2.92 s


Unnamed: 0,movieId,imdbId,tmdbId,title,genres,userId,rating,tag,predicted_rating_zero_shot,predicted_rating_few_shot
0,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,336,4.0,pixar,3.833333,4.0
1,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,474,4.0,pixar,3.833333,4.0
2,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,567,3.5,fun,3.833333,3.5
3,2,113497,8844.0,Jumanji (1995),Adventure|Children|Fantasy,62,4.0,fantasy,3.75,4.0
4,2,113497,8844.0,Jumanji (1995),Adventure|Children|Fantasy,62,4.0,magic board game,3.75,4.0


# Zero-shot (GPT-3.5-turbo)

In [46]:
data = movie_data

In [51]:
%%time

def get_movie_rating_from_gpt(title, model="gpt-3.5-turbo", temperature=0):
    """
    Make an API call to GPT to get a movie rating prediction.
    
    Parameters:
    - title (str): The title of the movie.
    - model (str): The GPT model version to use.
    - temperature (float): Sampling temperature for the model response. 
                           Higher values make the output more random, and lower values make it more deterministic.
    
    Returns:
    - str: The raw text response from GPT.
    """
    # Construct the prompt to ask the model
    prompt = (f"How will users rate this movie title: '{title}'? "
              "(1 being lowest and 5 being highest. On a scale from 0.5 to 5, with intervals of 0.5) "
              "Attention! Just give me back the exact number as a result, and you don't need a lot of text.")
    
    # Make the API call
    response = openai.ChatCompletion.create(
        model=model,
        temperature=temperature,
        messages=[
            {"role": "system", "content": "You are a movie critic."},
            {"role": "user", "content": prompt}
        ]
    )
    
    return response.choices[0].message['content'].strip()

def validate_rating(rating_text, title):
    """
    Validate the rating returned from GPT.
    
    Parameters:
    - rating_text (str): The raw text response from GPT.
    - title (str): The title of the movie, used for error messages.
    
    Returns:
    - float: The validated movie rating, or None if invalid.
    """
    try:
        rating = float(rating_text)
        # Check if the rating is within the expected range
        if 0.5 <= rating <= 5.0:
            return rating
    except ValueError:
        pass
    
    # Print an error message for unexpected responses
    print(f"Unexpected response for '{title}': {rating_text}")
    return None

def predict_movie_ratings(data):
    """
    Predict ratings for all unique movie titles in the dataset.
    
    Parameters:
    - data (DataFrame): The dataset containing movie titles.
    
    Returns:
    - list: A list of predicted ratings for each movie title.
    """
    ratings = []
    for title in data['title'].unique():
        # Get the raw rating prediction from GPT
        rating_text = get_movie_rating_from_gpt(title)
        # Validate the rating
        rating = validate_rating(rating_text, title)
        ratings.append(rating)
        print(f"Predicted rating for {title}: {rating}")
    return ratings


# Predict ratings for all unique movie titles
predicted_ratings = predict_movie_ratings(movie_data)

# Create a new DataFrame with titles and their predicted ratings
predicted_ratings_df = pd.DataFrame({
    'title': data['title'].unique(),
    'predicted_rating': predicted_ratings
})

# Merge the predicted ratings with the original data
merged_data_with_predictions = pd.merge(data, predicted_ratings_df, on='title')

# Save the merged data with predictions to a new CSV file
file_path = os.path.join(data_directory, 'merged_data_with_predictions.csv')
merged_data_with_predictions.to_csv(file_path, index=False)


Predicted rating for Toy Story (1995): 4.5
Predicted rating for Jumanji (1995): 4.5
Predicted rating for Grumpier Old Men (1995): 4.5
Predicted rating for Father of the Bride Part II (1995): 4.5
Predicted rating for Sabrina (1995): 4.5
Predicted rating for American President, The (1995): 4.5
Predicted rating for Nixon (1995): 3.5
Predicted rating for Casino (1995): 4.5
Predicted rating for Sense and Sensibility (1995): 4.5
Predicted rating for Get Shorty (1995): 4.5
Predicted rating for Copycat (1995): 4.5
Predicted rating for Leaving Las Vegas (1995): 4.5
Predicted rating for Othello (1995): 4.5
Predicted rating for Persuasion (1995): 4.5
Predicted rating for City of Lost Children, The (Cité des enfants perdus, La) (1995): 4.5
Predicted rating for Dangerous Minds (1995): 4.5
Predicted rating for Twelve Monkeys (a.k.a. 12 Monkeys) (1995): 4.5
Predicted rating for Babe (1995): 4.5
Predicted rating for Dead Man Walking (1995): 4.5
Predicted rating for It Takes Two (1995): 3.5
Predicted r

APIError: Internal error {
    "error": {
        "message": "Internal error",
        "type": "internal_error",
        "param": null,
        "code": "internal_error"
    }
}
 500 {'error': {'message': 'Internal error', 'type': 'internal_error', 'param': None, 'code': 'internal_error'}} {'Date': 'Thu, 07 Sep 2023 22:08:26 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': '152', 'Connection': 'keep-alive', 'vary': 'Origin', 'x-ratelimit-limit-requests': '3500', 'x-ratelimit-limit-tokens': '90000', 'x-ratelimit-remaining-requests': '3499', 'x-ratelimit-remaining-tokens': '89916', 'x-ratelimit-reset-requests': '17ms', 'x-ratelimit-reset-tokens': '56ms', 'x-request-id': '7ec6c1d04159dda1a8bcac47e6a1eb75', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'CF-Cache-Status': 'DYNAMIC', 'Server': 'cloudflare', 'CF-RAY': '8032448a48855a9c-MEL', 'alt-svc': 'h3=":443"; ma=86400'}

In [None]:
# how many predicted_rating values are null? Why null output?
print(f"Number of null predicted_rating values: {merged_data_with_predictions['predicted_rating'].isnull().sum()}")

merged_data_with_predictions.head(3)

Number of null predicted_rating values: 58


Unnamed: 0,movieId,imdbId,tmdbId,title,genres,userId,rating,tag,predicted_rating
0,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,336,4.0,pixar,4.5
1,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,474,4.0,pixar,4.5
2,1,114709,862.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,567,3.5,fun,4.5


In [None]:
# evaluate the rating prediction model

movie_titles = merged_data_with_predictions['title']
actual_ratings = merged_data_with_predictions['rating']

# Remove None predictions if any
actual_ratings_filtered, predicted_ratings_filtered = zip(*[(actual, predicted) for actual, predicted in zip(actual_ratings, predicted_ratings) if predicted is not None])

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(actual_ratings_filtered, predicted_ratings_filtered))
print(f'Root Mean Squared Error (RMSE): {rmse}')

# Calculate MAE
mae = mean_absolute_error(actual_ratings_filtered, predicted_ratings_filtered)
print(f'Mean Absolute Error (MAE): {mae}')

# Replaced for-loop with vectorized operation for performance.

Root Mean Squared Error (RMSE): 0.9985242406994432
Mean Absolute Error (MAE): 0.7637057598889659


In [None]:
# calculate RMSE and MAE manually using calculate_rmse_and_mae function
rmse, mae = calculate_rmse_and_mae(
    actual_ratings_filtered, predicted_ratings_filtered
)
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")


RMSE: 0.9985242406994432
MAE: 0.7637057598889659


# Few-shot

In [53]:
%%time


def predict_rating_few_shot(movie_title, rating_history, model="gpt-3.5-turbo", temperature=0):
    """
    Predict the rating of a movie based on user's past rating history using the GPT model.
    
    Parameters:
    - movie_title (str): The title of the movie for which rating needs to be predicted.
    - rating_history (str): A string representation of user's past movie ratings.
    - model (str): The GPT model version to use.
    - temperature (float): Sampling temperature for the model response. 
    
    Returns:
    - float: Predicted rating for the movie or None if the response is not valid.
    """
    # Construct the prompt to ask the model
    prompt = (f"Here is user rating history: {rating_history}; "
              f"Based on the above rating history, please predict user's rating for the movie: '{movie_title}', "
              "(1 being lowest and 5 being highest. On a scale from 0.5 to 5, with intervals of 0.5). "
              "Attention! Just give me back the exact number as a result, and you don't need a lot of text.")
    
    # Make the API call
    response = openai.ChatCompletion.create(
        model=model,
        temperature=temperature,
        messages=[
            {"role": "system", "content": "You are a movie critic."},
            {"role": "user", "content": prompt}
        ]
    )
    
    rating_text = response.choices[0].message['content'].strip()
    try:
        # Extract the first numerical value from the response
        rating = float(re.search(r'\d+(\.\d+)?', rating_text).group())
        if not (0.5 <= rating <= 5.0):
            raise ValueError("Rating out of bounds")
    except (ValueError, AttributeError):
        print(f"Unexpected response for '{movie_title}': {rating_text}")
        rating = None

    return rating

# Load the dataset
data = movie_data

predicted_ratings = []
for title in data['title'].unique():
    # Sample 4 rows from the dataset to represent user's past rating history
    rating_history_samples = data.sample(4)
    rating_history_str = ', '.join([f"{row['title']} ({row['rating']} stars)" for _, row in rating_history_samples.iterrows()])
    predicted_rating = predict_rating_few_shot(title, rating_history_str)
    print(f"Predicted rating for {title}: {predicted_rating}")
    predicted_ratings.append(predicted_rating)

# Create a new DataFrame with titles and their predicted ratings
predicted_ratings_df = pd.DataFrame({
    'title': data['title'].unique(),
    'predicted_rating_few_shot': predicted_ratings
})

# Merge the predicted ratings with the original data
merged_data_with_predictions = pd.merge(data, predicted_ratings_df, on='title')

# Save the merged data with predictions to a new CSV file
file_path = os.path.join(data_directory, 'merged_data_with_predictions.csv')
merged_data_with_predictions.to_csv(file_path, index=False)

# Evaluate the rating prediction model

# Filter out rows where the predicted rating is None
filtered_data = merged_data_with_predictions.dropna(subset=['predicted_rating_few_shot'])

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(filtered_data['rating'], filtered_data['predicted_rating_few_shot']))
print(f'Root Mean Squared Error (RMSE): {rmse}')

# Calculate MAE
mae = mean_absolute_error(filtered_data['rating'], filtered_data['predicted_rating_few_shot'])
print(f'Mean Absolute Error (MAE): {mae}')



Predicted rating for Toy Story (1995): 4.5
Predicted rating for Jumanji (1995): 4.0
Predicted rating for Grumpier Old Men (1995): 4.0
Predicted rating for Father of the Bride Part II (1995): 4.0
Predicted rating for Sabrina (1995): 4.0
Predicted rating for American President, The (1995): 3.5
Predicted rating for Nixon (1995): 4.0
Predicted rating for Casino (1995): 4.5
Predicted rating for Sense and Sensibility (1995): 4.0
Predicted rating for Get Shorty (1995): 4.5
Predicted rating for Copycat (1995): 3.5
Predicted rating for Leaving Las Vegas (1995): 3.5
Predicted rating for Othello (1995): 4.0
Predicted rating for Persuasion (1995): 4.0
Predicted rating for City of Lost Children, The (Cité des enfants perdus, La) (1995): 3.5
Predicted rating for Dangerous Minds (1995): 4.0
Predicted rating for Twelve Monkeys (a.k.a. 12 Monkeys) (1995): 4.0
Predicted rating for Babe (1995): 3.5
Predicted rating for Dead Man Walking (1995): 3.5
Predicted rating for It Takes Two (1995): 3.5
Predicted r

In [None]:
# demonstrate the output of 4 random historical ratings
predicted_ratings = []
for title in data['title'].unique():
    # Randomly sample 4 rows from the entire dataset
    rating_history_samples = data.sample(4)
    rating_history_str = ', '.join([f"{row['title']} ({row['rating']} stars)" for _, row in rating_history_samples.iterrows()])

rating_history_str

# Replaced for-loop with vectorized operation for performance.

NameError: name 'data' is not defined

## Use a helper function

You need to write a lot of repetitive boilerplate code to do these three simple things. Having a wrapper function to abstract away the boring bits is useful. That way we can focus on data science use cases.

Hopefully OpenAI will improve the interface to their Python package so this sort of thing is built-in. In the meantime, feel free to use this in your own code.

The function takes 2 arguments.

- `system`: A string containing the system message.
- `user_assistant`: An array of strings that alternate user message then assistant message.

The return value is the generated content.

### Instructions

- Run the next cell so you have access to the function.

In [None]:
def chat(system, user_assistant):
    assert isinstance(system, str), "`system` should be a string"
    assert isinstance(user_assistant, list), "`user_assistant` should be a list"
    system_msg = [{"role": "system", "content": system}]
    user_assistant_msgs = [
        {"role": "assistant", "content": user_assistant[i]} if i % 2 else {"role": "user", "content": user_assistant[i]} 
        for i in range(len(user_assistant))
    ]
    msgs = system_msg + user_assistant_msgs
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=msgs
    )
    status_code = response["choices"][0]["finish_reason"]
    assert status_code == "stop", f"The status code was {status_code}."
    return response["choices"][0]["message"]["content"]
        

Here is a check to make sure the function works.

In [None]:
response_fn_test = chat(
    "You are a movie critic.", 
    ["How will users rate this movie title: Barbie (2023)? (1 being lowest and 5 being highest. On a scale from 0.5 to 5, with intervals of 0.5) Attention! Just give me back the exact number as a result, and you don't need a lot of text."]
)
display(Markdown(response_fn_test))

3.5

# References

+ https://platform.openai.com/docs/api-reference/authentication