In [1]:
import pandas as pd
import random
from joblib import load


In [2]:
# Function to find ONE HOT ENCODING for genres
genre_list = ['Action', 'Adventure', 'Animation','Children','Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
def one_hot_encoding(genres):
    genres = genres.strip().split("|")
    OHE=[0]*len(genre_list)
    for i in range(len(genre_list)):
        if genre_list[i] in genres:
            OHE[i] = 1      
    return OHE

In [3]:
# prepare model input
def get_model_input(user_input):
    OHE = one_hot_encoding(user_input[1])
    model_input = user_input[:1]
    model_input.extend(OHE)
    return model_input

In [4]:
movie_ratings = pd.read_csv(".\datasets\movie_ratings.csv", index_col = 0)
#movie_ratings

In [5]:
# Randomly pick a userId
test_user = random.choice(movie_ratings['userId'])
test_user

21219

In [6]:
# Filtering movies not rated by the test_user
movies = movie_ratings.loc[(movie_ratings['userId'] != test_user)]['movieId']
# Find list of unrated movies 
unrated_movies = set(movies.tolist())
pred_movies = list(unrated_movies)
#pred_movies

In [7]:
# Randomly choose a movieId from movies not rated by test user
predict_movie = random.choice(pred_movies)
movies = pd.read_csv(r"C:\Data D\Spring 2021\Data Mining\MovieLens\Recommendation\Project\datasets\movie_ratings.csv")

In [8]:
print("Movie to predict rating: ", predict_movie)

Movie to predict rating:  2158


In [9]:
# Get first row from the movie with the movieId chosen
movie_row = movies.loc[movies['movieId'] == predict_movie][:1]
# Extract year and genres from the row to pass to model
year = movie_row['year'].to_string(index=False).replace(" ", "")
genre = movie_row['genres'].to_string(index=False).replace(" ", "")
movie_features = [int(year),genre]
model_input = get_model_input(movie_features)

# Final rating prediction is given by linear and ridge model
Based on the evaluation metrics RMSE and R-squared, these two are better than ridge model

In [10]:
# Load linear regression model
model= load('model/reg_pred_model.joblib') 
prediction = model.predict([model_input])
print("Rating prediction: ",round(prediction[0][0],2))

Rating prediction:  4.22


In [11]:
# Load ridge model
ridge_model = load('./model/ridge_pred_model.joblib')
prediction = ridge_model.predict([model_input])
print("Rating prediction: ",round(prediction[0][0],2))

Rating prediction:  4.22


In [12]:
# Load lasso model
lasso_model = load('./model/lasso_pred_model.joblib')
prediction = lasso_model.predict([model_input])
print("Rating prediction: ",round(prediction[0],2))

Rating prediction:  4.23
