In [653]:
# General libraries
import csv
import json
import math
import numpy as np
import os
from os.path import isfile, join
import pandas as pd
from pathlib import Path as Data_Path
import pickle
import random

import matplotlib.pyplot as plt
%matplotlib inline

import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch.nn import Embedding, Linear, ModuleList
from typing import Optional, Union

from tqdm.notebook import tqdm

# PyTorch Geometric
import torch_geometric
import torch_geometric.nn as pyg_nn
from torch_geometric.data import Data
from torch_geometric.transforms import RandomLinkSplit
from torch_geometric.utils import negative_sampling, to_networkx
from torch_geometric.nn.conv import LGConv
from torch_geometric.typing import Adj, OptTensor, SparseTensor


#### Read Dataset

In [654]:
RATINGS_CSV = "./data/full-dataset/ml-explicit/ratings.csv"
MOVIES_CSV = "./data/full-dataset/ml-explicit/movies.csv"
ratings_df = pd.read_csv("./data/full-dataset/ml-explicit/ratings.csv")
movies_df = pd.read_csv("./data/full-dataset/ml-explicit/movies.csv")

In [655]:
# Merge the dataframes on 'movieId' column
full_ratings_df = pd.merge(ratings_df, movies_df[['movieId', 'title']], on='movieId', how='left')
# Rename the columns
full_ratings_df = full_ratings_df.rename(columns={'title': 'movieName'})
# Reorder the columns --> this is going to let me get movie information later
full_ratings_df = full_ratings_df[['userId', 'movieId', 'movieName', 'rating', 'timestamp']]

In [656]:
full_ratings_df.head(5)

Unnamed: 0,userId,movieId,movieName,rating,timestamp
0,1,17,Sense and Sensibility,4.0,944249077
1,1,25,Leaving Las Vegas,1.0,944250228
2,1,29,"City of Lost Children, The",2.0,943230976
3,1,30,Shanghai Triad,5.0,944249077
4,1,32,Twelve Monkeys,5.0,943228858


#### Graph Construction

In [657]:
""" 
  Defining classes to use in the graph. 
  Attempting to build a bipartite graph where the two node types are users and movies. 
"""

movies = []

class Movie:
  """
  Class for a movie, which contains:
    1. ID (unique to Movielens dataset)
    2. Name (the name of the movie)
  """

  def __init__(self, movielens_id):
    self.movielens_id = int(movielens_id)
    self.name = f"movie_{self.movielens_id}"
    self.title = str(IDS_TO_MOVIES[movielens_id])

  def __str__(self):
    return f"Movie {self.movielens_id}: {self.title}"

  def __repr__(self):
    return self.name

In [658]:
class User:
  """
  Class for a user, which contains:
    1. ID (user id on the movielens platform)
    2. Ratings (dictionary of movies rated and their ratings)
  """

  def __init__(self, user_id, ratings):
    self.id = int(user_id)
    self.name = f"user_{user_id}"
    self.ratings = {f"movie_{key}": value/5 for key,value in ratings.items()}

  def __str__(self):
    return f"User: {self.name}"

  def __repr__(self):
    return f"{self.name}"

In [661]:
users = []

grouped_df = full_ratings_df.groupby('userId')

# this is basically allowing me to control how many users I am constructing the graph for.
count = 0

for userID, group_df in grouped_df:
    if count >= 20000 and userID != 200952: #getting the first 10,000 users plus my user
        continue
    elif userID%10 != 0 and userID != 200952:
        continue
    # if userID <= 178745: #getting the last 20,000 users plus my user
    #     continue
    ratings_dict = dict(zip(group_df['movieId'], group_df['rating']))
    user = User(userID, ratings_dict)
    users.append(user)
    count += 1

# we have a list of users, now we just need a list of all the unique movies
movies = [Movie(x) for x in IDS_TO_MOVIES.keys()] 

print(len(users), len(movies))

20001 31271


In [662]:
import networkx as nx

MovielensGraph = nx.Graph()
MovielensGraph.add_nodes_from([
    (u.name, {'name':u.name, "node_type" : "user"}) for u in users
])
MovielensGraph.add_nodes_from([
    (m.name, {'name':m.name, "node_type" : "movies"}) for m in movies
])

edge_list = []
for user in users:
  for movie in user.ratings.keys():
    edge_list.append((user.name, movie))
MovielensGraph.add_edges_from(edge_list)

print('Num nodes:', MovielensGraph.number_of_nodes(), '. Num edges:', MovielensGraph.number_of_edges())

Num nodes: 51272 . Num edges: 3128042


In [663]:
# reduce the graph into a smaller subgraph for handling directly on the PC
kcore = 30
MovielensGraph = nx.k_core(MovielensGraph, kcore)
print('Num nodes:', MovielensGraph.number_of_nodes(), '. Num edges:', MovielensGraph.number_of_edges())

Num nodes: 24166 . Num edges: 2916980


In [664]:
if "user_200952" in nx.get_node_attributes(MovielensGraph, 'name').keys():
    print("Maria user detected") # making sure that my user is in the graph

Maria user detected


In [665]:
n_nodes, n_edges = MovielensGraph.number_of_nodes(), MovielensGraph.number_of_edges()

sorted_nodes = sorted(list(MovielensGraph.nodes()))
# these will be useful when accessing nodes and recommendations later
node2id = dict(zip(sorted_nodes, np.arange(n_nodes)))
id2node = dict(zip(np.arange(n_nodes), sorted_nodes))

# nodes need integer IDs for training with torch_geometric
MovielensGraph = nx.relabel_nodes(MovielensGraph, node2id)

In [666]:
if "user_200952" in nx.get_node_attributes(MovielensGraph, 'name').values():
    print("Maria user detected") # double-checking again just to make sure

Maria user detected


In [667]:
# keep track of how many users / movies we have and make the lists torch-compatible
users_idx = [i for i, v in enumerate(node2id.keys()) if "user" in v]
movies_idx = [i for i, v in enumerate(node2id.keys()) if "movie" in v]

n_users = len(users_idx)
n_movies = len(movies_idx)

users_idx = torch.Tensor(users_idx).type(torch.int64)
movies_idx =torch.Tensor(movies_idx).type(torch.int64)

n_users, n_movies

(16500, 7666)

In [668]:
num_nodes = n_users +  n_movies
edge_idx = torch.Tensor(np.array(MovielensGraph.edges()).T).type(torch.int64)
graph_data = Data(edge_index = edge_idx, num_nodes = num_nodes)

# convert to train/val/test splits
transform = RandomLinkSplit(
    is_undirected=True,
    add_negative_train_samples=False,
    neg_sampling_ratio=0,
    num_val=0.15,
    num_test=0.15
)
train_split, val_split, test_split = transform(graph_data)

#### Building the GNN Model

In [669]:
class RecommendationGCN(torch.nn.Module):
    def __init__(
        self,
        num_nodes: int,
        embedding_dim: int,
        num_layers: int,
        conv_layer = "LGC",
        name = None,
    ):
        super().__init__()
        # define hyperparameters
        self.num_nodes = num_nodes
        self.embedding_dim = embedding_dim
        self.num_layers = num_layers
        self.embedding = Embedding(num_nodes, embedding_dim)

        self.alpha = 1. / (num_layers + 1) #this is the multiplicative factor that is used for summing layer embeddings

        # define layers (number of them will depend on hyperparameters)
        self.conv_layers = ModuleList (
            [ LGConv() for _ in range(num_layers) ]
        )
        torch.nn.init.xavier_uniform_(self.embedding.weight)
        for layer in self.conv_layers:
            layer.reset_parameters()


    def get_embedding(self, edge_index: Adj) -> Tensor:
        x = self.embedding.weight
        out = x 

        for i in range(self.num_layers):
            x = self.conv_layers[i](x, edge_index)
            out = out + (x * self.alpha)

        return out


    def get_pred_only(self, embed: Adj, edge_label_index: Adj) -> Tensor:

        user_embeddings = embed[edge_label_index[0]]
        movie_embeddings = embed[edge_label_index[1]]
        return (user_embeddings * movie_embeddings).sum(dim=-1)
    

    # I liked the idea that the authors had to make this part of the model class
    # in order to access the embedding weights more easily.
    def recommendation_loss(
        self,
        loss_fn,
        pos_edge_rank: Tensor,
        neg_edge_rank: Tensor,
    ) -> Tensor:
        return loss_fn(pos_edge_rank, neg_edge_rank, self.embedding.weight)


In [670]:
# BPRLoss NOTE this method was taken directly from https://medium.com/stanford-cs224w/spotify-track-neural-recommender-system-51d266e31e16 
class BPRLoss(_Loss):
    __constants__ = ['lambda_reg']
    lambda_reg: float

    def __init__(self, lambda_reg: float = 0):
        super().__init__(None, None, "sum")
        self.lambda_reg = lambda_reg

    def forward(
        self,
        positives: Tensor,
        negatives: Tensor,
        parameters: Tensor = None
    ) -> Tensor:

        n_pairs = positives.size(0)
        log_prob = F.logsigmoid(positives - negatives).sum()
        regularization = 0

        if self.lambda_reg != 0:
            regularization = self.lambda_reg * parameters.norm(p=2).pow(2)

        return (-log_prob + regularization) / n_pairs

In [671]:
# Train
def train(train_data, val_data, model, optimizer, args, name=""):
  stats = {
      'train': {
        'loss': [],
      },
      'val': {
        'loss': [],
      }
  }
  for epoch in range(args["epochs"]):
    model.train()
    optimizer.zero_grad()

    # STEP ONE: perform negative sampling
    neg_edge_index = negative_sampling(train_data.edge_label_index, num_nodes=(n_users, n_movies))
    neg_edge_label = torch.zeros(neg_edge_index.shape[1])

    # STEP TW0: get model embedding
    embed = model.get_embedding(train_data.edge_index)
    
    # STEP THREE: get predictions for positive and negative edges 
    # using the embedding obtained above
    pos_predictions = model.get_pred_only(embed, train_data.edge_label_index)
    neg_predictions = model.get_pred_only(embed, neg_edge_index)

    # STEP FOUR: loss calculations
    # BPRLoss
    lambda_reg = 1e-4  # loss regularization constant
    loss_fn = BPRLoss(lambda_reg)
    loss = model.recommendation_loss(loss_fn, pos_predictions, neg_predictions)

    loss.backward()
    optimizer.step()

    val_loss, val_neg_edge, val_neg_label = test(
        model, val_data, neg_edge_index=None, neg_edge_label=None
    )

    stats['train']['loss'].append(loss)
    stats['val']['loss'].append(val_loss)

    print(f"Epoch {epoch}; Train loss {loss}; Val loss {val_loss}")

  pickle.dump(stats, open(f"model_stats/LightGCN{name}.pkl", "wb"))
  return stats


In [672]:
# Test
def test(model, val_data, neg_edge_index = None, neg_edge_label = None):

  model.eval()
  with torch.no_grad():
    # STEP ONE: perform negative sampling
    neg_edge_index = negative_sampling(val_data.edge_label_index, num_nodes=(n_users, n_movies))
    neg_edge_label = torch.zeros(neg_edge_index.shape[1])
    # STEP TW0: get model embedding
    embed = model.get_embedding(val_data.edge_index)

    # STEP THREE: get predictions for positive and negative edges 
    # using the embedding obtained above
    pos_predictions = model.get_pred_only(embed, val_data.edge_label_index)
    neg_predictions = model.get_pred_only(embed, neg_edge_index)
    
    # STEP FOUR: loss calculations
    # BPRLoss
    lambda_reg = 1e-4  # loss regularization constant
    loss_fn = BPRLoss(lambda_reg)
    loss = model.recommendation_loss(loss_fn, pos_predictions, neg_predictions)

  return loss, neg_edge_index, neg_edge_label

In [673]:
# initialize model and and optimizer
hyperparams = {
    'num_layers' :  3,
    'emb_size' : 64,
    'weight_decay': 1e-5,
    'lr': 0.01,
    'epochs': 300
}
model = RecommendationGCN(
    num_nodes = num_nodes,
    num_layers = hyperparams['num_layers'],
    embedding_dim = hyperparams["emb_size"]
)
optimizer = torch.optim.Adam(model.parameters(), lr=hyperparams['lr'], weight_decay=hyperparams['weight_decay'])

In [674]:
# create directory to save model_stats
if not os.path.exists("model_stats"):
  os.makedirs("model_stats")

train(train_split, val_split, model, optimizer, hyperparams, name="20k-skip10-explicit")

Epoch 0; Train loss 0.6931394934654236; Val loss 0.692793607711792
Epoch 1; Train loss 0.6927223801612854; Val loss 0.6867653727531433
Epoch 2; Train loss 0.6866341829299927; Val loss 0.6708593964576721
Epoch 3; Train loss 0.6705849170684814; Val loss 0.6428906321525574
Epoch 4; Train loss 0.6423322558403015; Val loss 0.602729856967926
Epoch 5; Train loss 0.6019803881645203; Val loss 0.5525990128517151
Epoch 6; Train loss 0.5513163208961487; Val loss 0.49571284651756287
Epoch 7; Train loss 0.49418994784355164; Val loss 0.43738263845443726
Epoch 8; Train loss 0.43561244010925293; Val loss 0.38320794701576233
Epoch 9; Train loss 0.3807418942451477; Val loss 0.33669304847717285
Epoch 10; Train loss 0.333859384059906; Val loss 0.2998286783695221
Epoch 11; Train loss 0.2956826984882355; Val loss 0.2738643288612366
Epoch 12; Train loss 0.26840540766716003; Val loss 0.2549056112766266
Epoch 13; Train loss 0.24837303161621094; Val loss 0.24114108085632324
Epoch 14; Train loss 0.234107077121734

{'train': {'loss': [tensor(0.6931, grad_fn=<DivBackward0>),
   tensor(0.6927, grad_fn=<DivBackward0>),
   tensor(0.6866, grad_fn=<DivBackward0>),
   tensor(0.6706, grad_fn=<DivBackward0>),
   tensor(0.6423, grad_fn=<DivBackward0>),
   tensor(0.6020, grad_fn=<DivBackward0>),
   tensor(0.5513, grad_fn=<DivBackward0>),
   tensor(0.4942, grad_fn=<DivBackward0>),
   tensor(0.4356, grad_fn=<DivBackward0>),
   tensor(0.3807, grad_fn=<DivBackward0>),
   tensor(0.3339, grad_fn=<DivBackward0>),
   tensor(0.2957, grad_fn=<DivBackward0>),
   tensor(0.2684, grad_fn=<DivBackward0>),
   tensor(0.2484, grad_fn=<DivBackward0>),
   tensor(0.2341, grad_fn=<DivBackward0>),
   tensor(0.2254, grad_fn=<DivBackward0>),
   tensor(0.2190, grad_fn=<DivBackward0>),
   tensor(0.2152, grad_fn=<DivBackward0>),
   tensor(0.2116, grad_fn=<DivBackward0>),
   tensor(0.2087, grad_fn=<DivBackward0>),
   tensor(0.2060, grad_fn=<DivBackward0>),
   tensor(0.2032, grad_fn=<DivBackward0>),
   tensor(0.2016, grad_fn=<DivBackwar

#### Generating Predictions for My User

In [675]:
# get my user ID as it pertains to nodes in the graph
my_user_id = node2id["user_200952"]
my_user_obj = None
for obj in users:
    if obj.name == "user_200952":
        my_user_obj = obj
        break

In [676]:
def get_subgraph_for_user(graph, user_id):
    # get all the edges associated with this user id
    graph_nx = to_networkx(test_split)
    user_edges = graph_nx.edges(user_id)
    # create a subgraph out of the edges that we just extracted
    user_subgraph = graph_nx.edge_subgraph(user_edges)
    return user_subgraph

user_subgraph = get_subgraph_for_user(test_split, my_user_id)
user_edge_idx = torch.Tensor(np.array(user_subgraph.edges()).T).type(torch.int64)
user_edge_idx

Exception ignored in: <function tqdm.__del__ at 0x126244220>
Traceback (most recent call last):
  File "/Users/mariavmihu/Documents/MSCI 720/Final_Project/project-env/lib/python3.11/site-packages/tqdm/std.py", line 1148, in __del__
    self.close()
  File "/Users/mariavmihu/Documents/MSCI 720/Final_Project/project-env/lib/python3.11/site-packages/tqdm/notebook.py", line 279, in close
    self.disp(bar_style='danger', check_delay=False)
    ^^^^^^^^^
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


tensor([[16857, 16857, 16857, 16857, 16857, 16857, 16857, 16857, 16857, 16857,
         16857, 16857, 16857, 16857, 16857, 16857, 16857, 16857, 16857, 16857,
         16857, 16857, 16857, 16857, 16857, 16857, 16857, 16857, 16857, 16857,
         16857, 16857, 16857, 16857, 16857, 16857, 16857, 16857, 16857, 16857,
         16857, 16857, 16857, 16857],
        [ 1630,  4022,  3394,  6602,  6256,  1197,   135,  1201,  7605,  2758,
          1806,  2499,   480,  2854,  1786,  7247,  5666,  2663,  4355,   255,
            87,  3592,  6601,   330,  5698,  1127,   416,  1749,   295,  3000,
          6057,  1165,  6344,  7620,   101,  6891,  6255,  5191,  5763,  3379,
          1844,  2921,  1299,  1612]])

In [677]:
movies_for_validation = user_edge_idx[1].tolist()
movies_for_validation = [id2node[m] for m in movies_for_validation]

filename = "results/my_user.qrels"
sorted_movies = sorted(movies_for_validation, key=lambda m: my_user_obj.ratings[m], reverse=True)
with open(filename, 'w', newline='') as results_file:
    for m in sorted_movies:
        user = 200952
        movieID = m.split("movie_")[1]
        score = my_user_obj.ratings[m]

        results_file.write(f"{user} 0 {movieID} {score}\n")

In [678]:
# get the embedding of all users and items using the model we just trained
embeddings = model.get_embedding(graph_data.edge_index) #should be the full dataset
embeddings.shape 

torch.Size([24166, 64])

In [679]:
movie_embeddings = embeddings[:n_movies]
movie_embeddings.shape

torch.Size([7666, 64])

In [680]:
user_embeddings = embeddings[my_user_id]
user_embeddings = torch.unsqueeze(user_embeddings, 0)
user_embeddings.shape

torch.Size([1, 64])

In [681]:
predicted_ratings = torch.sigmoid(torch.matmul(user_embeddings, movie_embeddings.t()))
predicted_ratings.shape

torch.Size([1, 7666])

In [682]:
# this was taken from https://medium.com/stanford-cs224w/lightgcn-for-movie-recommendation-eb6d112f1e8
scores, top_movie_indices = torch.sort(predicted_ratings, descending=True)
movie_indices = top_movie_indices.squeeze().tolist()
scores = scores.squeeze().tolist()

In [683]:
k = 50
top_recommended_movies = movie_indices[:k]

In [684]:
# the movie IDs that are returned are the IDs as they appear in the graph, which means I need to do a bit of post-processing:
# 1. get the actual movielens IDs using node2id
# 2. get the name of the movie from movies_df
# 3. display the recs

print(f"Recommended for my user:")
for i, movie in enumerate(top_recommended_movies):
    movielensID = id2node[movie]
    movielensID = movielensID.split("movie_")[1]
    movie = movies_df[movies_df['movieId'] == int(movielensID)].iloc[0]
    movie_name = movie['title']
    movie_year = movie['year']
    print(f"{i+1}. {movie_name} ({movie_year})")

Recommended for my user:
1. Matrix, The  (1999)
2. Inception  (2010)
3. Fight Club  (1999)
4. Dark Knight, The  (2008)
5. Lord of the Rings: The Fellowship of the Ring, The  (2001)
6. Lord of the Rings: The Two Towers, The  (2002)
7. Lord of the Rings: The Return of the King, The  (2003)
8. Shawshank Redemption, The  (1994)
9. Pulp Fiction  (1994)
10. Shrek  (2001)
11. Star Wars: Episode IV - A New Hope  (1977)
12. Gladiator  (2000)
13. WALL·E  (2008)
14. Pirates of the Caribbean: The Curse of the Black Pearl  (2003)
15. Memento  (2000)
16. Silence of the Lambs, The  (1991)
17. Eternal Sunshine of the Spotless Mind  (2004)
18. Interstellar  (2014)
19. Monsters, Inc.  (2001)
20. Star Wars: Episode V - The Empire Strikes Back  (1980)
21. Raiders of the Lost Ark  (1981)
22. American Beauty  (1999)
23. Kill Bill: Vol. 1  (2003)
24. Up  (2009)
25. Finding Nemo  (2003)
26. Godfather, The  (1972)
27. Star Wars: Episode VI - Return of the Jedi  (1983)
28. Django Unchained  (2012)
29. Incredibl

#### Evaluation Metrics on My User

In [685]:
def write_qrels_to_file(file_name, scores, movies, userID, run):
    results = []
    with open( file_name, 'w', newline='' ) as results_file:
        for i, (score, movie) in enumerate(zip(scores, movies)):
            user_recs = {}
            user_recs['user'] = userID
            user_recs['Q0'] = 'Q0'
            user_recs['item'] = movie
            user_recs['rank'] = i
            user_recs['score'] = score
            user_recs['algorithm'] = run
            results.append(user_recs)
        
        writer = csv.DictWriter(results_file, fieldnames=['user', 'Q0', 'item', 'rank','score','algorithm'])
        writer.writerows(results)


In [686]:
# create directory to save model_stats
if not os.path.exists("results"):
  os.makedirs("results")

NAME_OF_RUN = "20k_explicit_nodes"
results_file_name = os.path.join("results", f"{NAME_OF_RUN}.csv")
write_qrels_to_file(results_file_name, scores, movie_indices, 200952, NAME_OF_RUN)

In [687]:
'''
    Code for NDCG
    citing my previous work in MSCI541 and MSCI720 assignment 3 for the overall logic in this snippet
'''
def compute_NDCG(k: int, qrels: dict, results: dict, metrics: dict, algo_name: str, NUM_USERS: int = 1011):

    NDCGs = []
    for_avg = 0

    # compute average precision
    for userid, recommendations in results.items():
        curr_dcg = 0
        curr_idcg = 0

        k_actual = min(k, len(recommendations))
        # compute DCG
        for i in range(k_actual):
            movie = recommendations[i]
            gain = 0 if movie not in qrels[userid].keys() else qrels[userid][movie]
            
            rank = i + 1
            curr_dcg += gain / (math.log2(1+rank))

        # figure out ideal search rankings
        #   1. get number of documents that are relevant
        rel_bar = len(qrels[userid].values())
        #   2. mimic ideal search results
        k_ideal = min(k, rel_bar)

        rank = 1
        for ground_truth_rating in qrels[userid].values():
            curr_idcg += ground_truth_rating / (math.log2(1+rank))
            rank += 1
            if rank > k_ideal:
                break

        # compute NDCG = DCG / IDCG
        curr_NCDG = curr_dcg / curr_idcg
        NDCGs.append(f"{userid},{curr_NCDG}")

        for_avg += curr_NCDG

    avg_NDCG = for_avg/NUM_USERS
    metric_name = f"NDCG_at_{k}"
    metrics[metric_name][algo_name] = avg_NDCG
    print(f"avg NDCG@{k}: {'{:.3f}'.format(round(avg_NDCG,6))}")
    return NDCGs


In [688]:
def get_qrels(qrels_path: str) -> dict:
    '''utility reading in QRELS file and saving it as a dictionary'''

    qrels_all_users = {}

    with open(qrels_path, 'r') as f:
        for line in f:
            parts_of_line = line.split()  

            userid, movie, rating = parts_of_line[0], parts_of_line[2], parts_of_line[3]
            if userid not in qrels_all_users.keys():
                qrels_all_users[userid] = {}
            
            qrels_all_users[userid][movie] = float(rating)

    return qrels_all_users

In [689]:
def get_results(results_path: str) -> tuple[dict, str]:
    '''
        Utility for reading in RESULTS file and saving it as a dictionary.
        The format of the dictionary is userid: [movieid, movieid, movieid].
            The results files are all sorted in order of rankings, so appending the movie IDs to a list should preserve the order for purposes of judging rank later.
    '''

    results_all_users = {}

    with open(results_path, 'r') as f:
        for line in f:
            parts_of_line = line.split(",")

            userid, movie, score = parts_of_line[0], parts_of_line[2], parts_of_line[3]
            algorithm = parts_of_line[5]
            if userid not in results_all_users.keys():
                results_all_users[userid] = []

            results_all_users[userid].append(movie)

    return results_all_users, algorithm

In [690]:
qrels_for_me = get_qrels(qrels_path="results/my_user.qrels")
qrels_for_me

{'200952': {'88163': 1.0,
  '356': 1.0,
  '60397': 1.0,
  '2953': 1.0,
  '40629': 0.9,
  '4447': 0.9,
  '109487': 0.9,
  '111': 0.9,
  '33493': 0.9,
  '33615': 0.8,
  '5995': 0.8,
  '116797': 0.8,
  '66203': 0.8,
  '164909': 0.8,
  '97921': 0.8,
  '80166': 0.8,
  '541': 0.8,
  '6155': 0.8,
  '286897': 0.8,
  '7361': 0.7,
  '6942': 0.7,
  '275243': 0.7,
  '260': 0.7,
  '2797': 0.7,
  '102903': 0.7,
  '112552': 0.7,
  '2': 0.7,
  '69406': 0.7,
  '204698': 0.7,
  '104211': 0.6,
  '1196': 0.6,
  '201646': 0.6,
  '2694': 0.6,
  '70293': 0.6,
  '1721': 0.6,
  '202439': 0.5,
  '193942': 0.4,
  '166528': 0.4,
  '166635': 0.4,
  '162590': 0.4,
  '192803': 0.4,
  '97188': 0.1,
  '7360': 0.1,
  '103249': 0.1}}

In [691]:
results, name_of_algo = get_results(results_path=results_file_name)

metrics = {
    "NDCG_at_10": {},
    "NDCG_at_100": {},
    "NDCG_at_1000": {},
    f"NDCG_at_{n_movies}": {},
}

# compute NDCG @ 10 for me
NDCGs_at_10 = compute_NDCG(k=10, qrels=qrels_for_me, results=results, metrics=metrics, algo_name=name_of_algo, NUM_USERS=1)
# compute NDCG @ 100 for me
NDCGs_at_100 = compute_NDCG(k=100, qrels=qrels_for_me, results=results, metrics=metrics, algo_name=name_of_algo, NUM_USERS=1)
# compute NDCG @ 1000 for me
NDCGs_at_1000 = compute_NDCG(k=1000, qrels=qrels_for_me, results=results, metrics=metrics, algo_name=name_of_algo, NUM_USERS=1)
# compute NDCG @ 7840 for me
NDCGs_at_n_movies = compute_NDCG(k=n_movies, qrels=qrels_for_me, results=results, metrics=metrics, algo_name=name_of_algo, NUM_USERS=1)

avg NDCG@10: 0.000
avg NDCG@100: 0.000
avg NDCG@1000: 0.012
avg NDCG@7666: 0.118
