In [4]:
### Loading necessary packages

import random
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader


### loading our custom model python files
from recommender.models import Recommender
from recommender.data_processing import get_context, pad_list, map_column, MASK, PAD


In [33]:
### defining the data file path

ratings_path = "../Data/ml-latest-small/ratings.csv"
movies_path = "../Data/ml-latest-small/movies.csv"
model_path = "../recommender_models/recommender.ckpt"

In [34]:
### creating the dataframes form the csv files
ratings = pd.read_csv(ratings_path)
movies = pd.read_csv(movies_path)

In [35]:
### sorting the data based on timestamp to make it sequential as per user ratings

ratings.sort_values(by="timestamp", inplace=True)

In [36]:
### creating the inverse mapping of the movie id using our custom function from data_processing file
ratings, mapping, inverse_mapping = map_column(ratings, col_name="movieId")

### grouping the data based on user id
grp_by_train = ratings.groupby(by="userId")

In [37]:
### checking a random sample of user id from the ratings data 
random.sample(list(grp_by_train.groups), k=10)

[86, 51, 175, 65, 573, 577, 102, 283, 17, 90]

In [38]:
### creating the model object from our custom model class from model file
model = Recommender(
        vocab_size=len(mapping) + 2,
        lr=1e-4,
        dropout=0.3,
    )
model.eval()

### loading our pretrained model
model.load_state_dict(torch.load(model_path)["state_dict"])

<All keys matched successfully>

In [39]:
### creating a dictinary of movie names as key and mapping Id as values
movie_to_idx = {a: mapping[b] for a, b in zip(movies.title.tolist(), movies.movieId.tolist()) if b in mapping}

### creating a dictinary of mapping id as key and movie names as values
idx_to_movie = {v: k for k, v in movie_to_idx.items()}

In [53]:
### creating a simple function that will take list of movies you watched, predict the next 10 movies

def predict(list_movies, model, movie_to_idx, idx_to_movie):
    
    ###adding [PAD] in the beginning based on no of movies provied , then the movie ids and at the end [MASK]
    ids = [PAD] * (120 - len(list_movies) - 1) + [movie_to_idx[a] for a in list_movies] + [MASK]
    
    src = torch.tensor(ids, dtype=torch.long).unsqueeze(0)
    
    with torch.no_grad():
        prediction = model(src)
    
    masked_pred = prediction[0, -1].numpy()
    
    sorted_predicted_ids = np.argsort(masked_pred).tolist()[::-1]
    
    sorted_predicted_ids = [a for a in sorted_predicted_ids if a not in ids]
    
    return [idx_to_movie[a] for a in sorted_predicted_ids[:10] if a in idx_to_movie]


### Senario 1: Adventure/Fantasy 

In [54]:
list_movies = ["Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)",
               "Harry Potter and the Chamber of Secrets (2002)",
               "Harry Potter and the Prisoner of Azkaban (2004)",
               "Harry Potter and the Goblet of Fire (2005)"]

top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Pulp Fiction (1994)',
 'Star Wars: Episode V - The Empire Strikes Back (1980)',
 'Fight Club (1999)',
 'Fugitive, The (1993)',
 'Silence of the Lambs, The (1991)',
 'Four Weddings and a Funeral (1994)',
 'Lord of the Rings: The Two Towers, The (2002)',
 'Die Hard: With a Vengeance (1995)',
 'Usual Suspects, The (1995)',
 'Inception (2010)']

### Senario 2:  Action/Adventure

In [55]:
list_movies = ["Pulp Fiction (1994)",
               "Fight Club (1999)",
               "Silence of the Lambs, The (1991)",
               "Inception (2010)",
               "Star Wars: Episode V - The Empire Strikes Back (1980)",
               "Terminator 2: Judgment Day (1991)"]

top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Fugitive, The (1993)',
 'Four Weddings and a Funeral (1994)',
 'Lord of the Rings: The Two Towers, The (2002)',
 'Die Hard: With a Vengeance (1995)',
 'Usual Suspects, The (1995)',
 'Independence Day (a.k.a. ID4) (1996)',
 'Interview with the Vampire: The Vampire Chronicles (1994)',
 'Mask, The (1994)',
 'Star Wars: Episode VI - Return of the Jedi (1983)',
 'Net, The (1995)']

### Senario 3: Comedy

In [56]:
list_movies = ["Zootopia (2016)",
               "Toy Story 3 (2010)",
               "Finding Nemo (2003)",
               "Ratatouille (2007)",
               "The Lego Movie (2014)",
               "Ghostbusters (a.k.a. Ghost Busters) (1984)",
               "Ace Ventura: When Nature Calls (1995)"]
top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Pulp Fiction (1994)',
 'Star Wars: Episode V - The Empire Strikes Back (1980)',
 'Fight Club (1999)',
 'Fugitive, The (1993)',
 'Silence of the Lambs, The (1991)',
 'Four Weddings and a Funeral (1994)',
 'Lord of the Rings: The Two Towers, The (2002)',
 'Die Hard: With a Vengeance (1995)',
 'Usual Suspects, The (1995)',
 'Inception (2010)']