
### Jupyter Notebook in VS Code

From directory with virtual environment in it, use this terminal command to use Jupyter Notebook in VS Code:

jupyter notebook --NotebookApp.allow_origin='*' --NotebookApp.ip='0.0.0.0'

### Select Python Interpreter

Open Command Palette as follows: Ctrl+Shift+P

Then select from VS Code Command Palette: "Python: Select Interpreter" and choose the same Python interpreter that you used to install Jupyter.

If server not shown, enter server manually as follows: 
http://localhost:8888

Reminder:  pw: ""

### Reset and clear all variables if needed - waits for confirmation

In [1]:
# %reset  # clear all variables

### Load Libraries and Define Model Class Identcal to Model Class in Training Notebook 

In [2]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

class RecSysModel(nn.Module):
    def __init__(self, n_users, n_movies, n_factors):
        super().__init__()
        self.user_embed = nn.Embedding(n_users, n_factors)
        self.movie_embed = nn.Embedding(n_movies, n_factors)
        self.out = nn.Linear(int(n_factors * 2), 1)

    def forward(self, users, movies):
        user_embeds = self.user_embed(users)
        movie_embeds = self.movie_embed(movies)
        output = torch.cat([user_embeds, movie_embeds], dim=1)
        output = self.out(output)
        return output

In [3]:
# set device to cuda if available, otherwise use cpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(device)

cuda


### Load the MovieLens movies dataset for pairing with the ratings dataset to provide movie titles as recommendations

In [5]:
df_movies = pd.read_csv('../Sandbox/MovieLens/ml-latest-small/movies.csv')

### Load the model and label encoders for users and movies

In [7]:

# Load the model and label encoders from the trained model .pth file
checkpoint = torch.load('models/movie_rec_sys_r2.pth', weights_only=False)

# Extract the label encoders
lbl_user_loaded = checkpoint['lbl_user']
print(f"lbl_user_loaded: {lbl_user_loaded}")
lbl_movie_loaded = checkpoint['lbl_movie']
print(f"lbl_movie_loaded: {lbl_movie_loaded}")
user_embed = checkpoint['user_embed']
print(f"user_embed: {user_embed}")
movie_embed = checkpoint['movie_embed']
print(f"movie_embed: {movie_embed}")
n_factors = checkpoint['n_factors']
print(f"n_factors: {n_factors}")

# Define the model architecture
n_users = len(lbl_user_loaded.classes_)
n_movies = len(lbl_movie_loaded.classes_)

model_loaded = RecSysModel(n_users, n_movies, n_factors)

# Load the model's state dictionary
model_loaded.load_state_dict(checkpoint['model_state_dict'])

# Set the device to use for inference
# model_loaded.to(device)

# Set the model to evaluation mode
model_loaded.eval()

lbl_user_loaded: LabelEncoder()
lbl_movie_loaded: LabelEncoder()
user_embed: Embedding(624, 64)
movie_embed: Embedding(9724, 64)
n_factors: 64


RecSysModel(
  (user_embed): Embedding(624, 64)
  (movie_embed): Embedding(9724, 64)
  (out): Linear(in_features=128, out_features=1, bias=True)
)

### Recommend the top K movies for a given user

In [8]:
# Assuming the model is already defined and trained
# model = RecSysModel(n_users, n_movies, n_factors)


def recommend_movies_for_new_user(model, user_ids, movie_ids, top_k=10):
    # Get all movie ids
    movie_ids = torch.LongTensor(range(len(lbl_movie_loaded.classes_)))
    print(f'movie_ids device: {movie_ids.device}')
    print(f'movie_ids: {movie_ids}')
    # Use a placeholder user id (e.g., 0)
    user_id = 0
    user_ids = torch.LongTensor([user_id] * len(lbl_movie_loaded.classes_))
    print(f'user_ids device: {user_ids.device}')
    print(f'user_ids: {user_ids}')
    # Get predictions for all movies
    with torch.no_grad():
        all_predictions = model(user_ids, movie_ids)
        print()
        print(f'all_predictions device: {all_predictions.device}')
        print(f"all_predictions shape: {all_predictions.shape}")
        print(f'all_predictions: {all_predictions}')
    # Ensure top_k does not exceed the number of available movies
    top_k = min(top_k, len(all_predictions))
    print()
    print(f'top_k: {top_k}')
    print(f'len(all_predictions): {len(all_predictions)}')
    # Get top k movie predictions
    # top_k_predictions, top_k_indices = all_predictions.topk(top_k)
    top_k_predictions, top_k_indices = torch.topk(all_predictions, top_k, dim=0, largest=True, sorted=True)
    print()
    print(f'top_k_predictions device: {top_k_predictions.device}')
    print(f'top_k_predictions: {top_k_predictions}')
    print(f'top_k_indices device: {top_k_indices.device}')
    print(f'top_k_indices: {top_k_indices}')
    return top_k_predictions, top_k_indices


### Define ratings for a new user and generate recommendations

In [9]:
# Define ratings for a new user to be used for recommendations

new_user_ratings = {
    'movieId': [2959, 318, 1213, 1221, 858, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],  # Example movie IDs
    'rating': [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]   # Example ratings
}

# new_user_ratings = {
#     'movieId': [4896, 5816, 8368, 40815, 54001, 69844, 318], # Harry Potter Movies
#     'rating': [5, 5, 5, 5, 5, 5, 0]
# }

# Convert the new user ratings to a DataFrame
df_new_user_ratings = pd.DataFrame(new_user_ratings)
print(f'MovieId df_new_user_ratings.head(): {df_new_user_ratings.head()}')
# Encode the movie IDs using the loaded label encoder
df_new_user_ratings['movieId'] = lbl_movie_loaded.transform(df_new_user_ratings['movieId'])
print(f'Movie_label df_new_user_ratings.head(): {df_new_user_ratings.head()}')

# Encode the user ID using the loaded label encoder
user_id = 0  # Placeholder user ID

# Add unseen user_id to lbl_user_loaded
if user_id not in lbl_user_loaded.classes_:
    lbl_user_loaded.classes_ = np.append(lbl_user_loaded.classes_, user_id)

df_new_user_ratings['userId'] = lbl_user_loaded.transform([user_id] * len(df_new_user_ratings))
# print(f'df_new_user_ratings: {df_new_user_ratings}')

# Convert the DataFrame to tensors
user_tensor = torch.LongTensor(df_new_user_ratings['userId'].values)
print(f'user_tensor: {user_tensor}')
movie_tensor = torch.LongTensor(df_new_user_ratings['movieId'].values)
print(f'movie_tensor: {movie_tensor}')
# Convert ratings to tensor
ratings_tensor = torch.FloatTensor(df_new_user_ratings['rating'].values)
print(f'ratings_tensor: {ratings_tensor}')

# Get recommendations for the new user by passing the user and movie tensors to the model
top_k_predictions, top_k_indices = recommend_movies_for_new_user(model_loaded, user_tensor, movie_tensor, top_k=100)
print(f'top_k_predictions: {top_k_predictions}')
print(f'top_k_indices: {top_k_indices}')

# Convert top_k_indices to movie IDs
top_k_movie_ids = lbl_movie_loaded.inverse_transform(top_k_indices.numpy())
print(f'top_k_movie_ids: {top_k_movie_ids}')

# Ensure the top_k_movie_ids and top_k_predictions are 1D tensors
print(f"top_k_movie_ids shape: {top_k_movie_ids.shape}")
print(f"top_k_predictions shape: {top_k_predictions.shape}")
top_k_movie_ids = top_k_movie_ids.flatten() if len(top_k_movie_ids.shape) > 1 else top_k_movie_ids
top_k_predictions = top_k_predictions.flatten() if len(top_k_predictions.shape) > 1 else top_k_predictions
print(f"top_k_movie_ids shape: {top_k_movie_ids.shape}")
print(f"top_k_predictions shape: {top_k_predictions.shape}")

# Convert top_k_movie_ids to DataFrame
df_recommendations = pd.DataFrame({
    'movieId': top_k_movie_ids,
    'predicted_rating': top_k_predictions.numpy()
})
print(f'df_recommendations: {df_recommendations}')
# Merge with movie titles
df_recommendations = df_recommendations.merge(df_movies, on='movieId', how='left')
print(f'df_recommendations: {df_recommendations}')

MovieId df_new_user_ratings.head():    movieId  rating
0     2959       0
1      318       0
2     1213       0
3     1221       1
4      858       1
Movie_label df_new_user_ratings.head():    movieId  rating
0     2224       0
1      277       0
2      913       0
3      921       1
4      659       1
user_tensor: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
movie_tensor: tensor([2224,  277,  913,  921,  659,    5,    6,    7,    8,    9,   10,   11,
          12,   13,   14])
ratings_tensor: tensor([0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
movie_ids device: cpu
movie_ids: tensor([   0,    1,    2,  ..., 9721, 9722, 9723])
user_ids device: cpu
user_ids: tensor([0, 0, 0,  ..., 0, 0, 0])

all_predictions device: cpu
all_predictions shape: torch.Size([9724, 1])
all_predictions: tensor([[4.5570],
        [4.1197],
        [4.0453],
        ...,
        [3.8376],
        [3.8039],
        [4.1474]])

top_k: 100
len(all_predictions): 9724

top_k_predictions devi

  y = column_or_1d(y, warn=True)


### Display the top-k recommended movies

In [10]:
# Show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Display the top-k recommended movies
print("Top-k recommended movies:")
print(df_recommendations[['movieId', 'title', 'predicted_rating']].head(20))
# df_recommendations

Top-k recommended movies:
    movieId                                              title  \
0      3201                            Five Easy Pieces (1970)   
1      8477                                   Jetée, La (1962)   
2      3451                Guess Who's Coming to Dinner (1967)   
3     27397  Joint Security Area (Gongdong gyeongbi guyeok ...   
4      1248                               Touch of Evil (1958)   
5      2239  Swept Away (Travolti da un insolito destino ne...   
6     92535      Louis C.K.: Live at the Beacon Theater (2011)   
7      1217                                         Ran (1985)   
8     93838                        The Raid: Redemption (2011)   
9      7983                         Broadway Danny Rose (1984)   
10    55167           Tekkonkinkreet (Tekkon kinkurîto) (2006)   
11     6666  Discreet Charm of the Bourgeoisie, The (Charme...   
12     1208                              Apocalypse Now (1979)   
13     1046                             Beautiful 