In [1]:
!rm -r Glocal_K/
!rm -r group-movie-recommendation/
!git clone https://github.com/usydnlp/Glocal_K.git
!git clone https://github.com/sanjeevg15/group-movie-recommendation.git

rm: cannot remove 'Glocal_K/': No such file or directory
rm: cannot remove 'group-movie-recommendation/': No such file or directory
Cloning into 'Glocal_K'...
remote: Enumerating objects: 80, done.[K
remote: Counting objects: 100% (80/80), done.[K
remote: Compressing objects: 100% (76/76), done.[K
remote: Total 80 (delta 21), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (80/80), done.
Cloning into 'group-movie-recommendation'...
remote: Enumerating objects: 2702, done.[K
remote: Counting objects: 100% (2702/2702), done.[K
remote: Compressing objects: 100% (2486/2486), done.[K
remote: Total 2702 (delta 160), reused 2696 (delta 158), pack-reused 0[K
Receiving objects: 100% (2702/2702), 11.68 MiB | 18.84 MiB/s, done.
Resolving deltas: 100% (160/160), done.


In [2]:
from time import time
from scipy.sparse import csc_matrix
import numpy as np
import h5py
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torch.nn.parameter import Parameter
from tqdm.notebook import tqdm
import os

torch.manual_seed(1284)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print('device:', device)

device: cuda:0


# Data Loader Function

In [3]:
def load_data_1m(path='./', delimiter='::'):
    frac = 0.8
    total = np.loadtxt(path+'movielens_1m_dataset.dat', skiprows=0, delimiter=delimiter).astype('int32')
    samples = len(total)
    total = total[total[:, 1].argsort()]

    user_mapping = {} # old: new
    counter = 0
    for i in range(len(total)):
        train_example = total[i] 
        user_id = train_example[1]
        if user_id not in user_mapping: 
            counter += 1
            # print('User ID:', user_id, 'Counter: ', counter)
            if user_id != counter: 
                total[i][1] = counter
            user_mapping[user_id] = counter
        else: 
            total[i][1] = user_mapping[user_id]


    train_samples = np.floor(frac*samples).astype('int32')
    train = total[:train_samples]
    test = total[train_samples:]

    # total = np.concatenate((train, test), axis=0)

    n_u = np.unique(total[:,0]).size  # num of users
    n_m = np.unique(total[:,1]).size  # num of movies
    n_train = train.shape[0]  # num of training ratings
    n_test = test.shape[0]  # num of test ratings

    train_r = np.zeros((n_m, n_u), dtype='float32')
    test_r = np.zeros((n_m, n_u), dtype='float32')

    for i in range(n_train):
        train_r[train[i,1]-1, train[i,0]-1] = train[i,2]

    for i in range(n_test):
        test_r[test[i,1]-1, test[i,0]-1] = test[i,2]

    train_m = np.greater(train_r, 1e-12).astype('float32')  # masks indicating non-zero entries
    test_m = np.greater(test_r, 1e-12).astype('float32')

    print('data matrix loaded')
    print('num of users: {}'.format(n_u))
    print('num of movies: {}'.format(n_m))
    print('num of training ratings: {}'.format(n_train))
    print('num of test ratings: {}'.format(n_test))

    return n_m, n_u, train_r, train_m, test_r, test_m

# Load Data

In [5]:
# Insert the path of a data directory by yourself (e.g., '/content/.../data')
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
data_path = '/content/Glocal_K/data'
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._

In [6]:
# Data Load
try:
    path = data_path + '/MovieLens_1M/'
    n_m, n_u, train_r, train_m, test_r, test_m = load_data_1m(path=path, delimiter='::')

except Exception as e:
    print('Error: Unable to load data: ', e)

data matrix loaded
num of users: 6040
num of movies: 3706
num of training ratings: 800167
num of test ratings: 200042


In [7]:
# Common hyperparameter settings
n_hid = 500 # size of hidden layers
n_dim = 5 # inner AE embedding size
n_layers = 2 # number of hidden layers
gk_size = 3 # width=height of kernel for convolution

# Hyperparameters to tune for specific case
max_epoch_p = 500 # max number of epochs for pretraining
max_epoch_f = 1000 # max number of epochs for finetuning
patience_p = 10 # number of consecutive rounds of early stopping condition before actual stop for pretraining
patience_f = 10 # and finetuning
tol_p = 1e-4 # minimum threshold for the difference between consecutive values of train rmse, used for early stopping, for pretraining
tol_f = 1e-5 # and finetuning
lambda_2 = 70. # regularisation of number or parameters
lambda_s = 0.018 # regularisation of sparsity of the final matrix
dot_scale = 0.5 # dot product weight for global kernel

lambda_2 = 20
lambda_s = 0.0
iter_p = 50
iter_f = 10
epoch_p = 20
epoch_f = 30
dot_scale = 0.5


# Network Functions

In [8]:
def local_kernel(u, v):
    dist = torch.norm(u - v, p=2, dim=2)
    hat = torch.clamp(1. - dist**2, min=0.)
    return hat

class KernelLayer(nn.Module):
    def __init__(self, n_in, n_hid, n_dim, lambda_s, lambda_2, activation=nn.Sigmoid()):
      super().__init__()
      '''
        n_in (int): size of input layer
        n_hid (int): size of hidden layers 
        n_dim (int): inner AE embedding size
        lambda_s (float): hyperparameter - sparsity encouraging regularizer
        lambda_2 (float): hyperparameter - l2 regularizer
        activation () : Activtion Function 
      '''
      self.W = nn.Parameter(torch.randn(n_in, n_hid))
      self.u = nn.Parameter(torch.randn(n_in, 1, n_dim))
      self.v = nn.Parameter(torch.randn(1, n_hid, n_dim))
      self.b = nn.Parameter(torch.randn(n_hid))

      self.lambda_s = lambda_s
      self.lambda_2 = lambda_2

      nn.init.xavier_uniform_(self.W, gain=torch.nn.init.calculate_gain("relu"))
      nn.init.xavier_uniform_(self.u, gain=torch.nn.init.calculate_gain("relu"))
      nn.init.xavier_uniform_(self.v, gain=torch.nn.init.calculate_gain("relu"))
      nn.init.zeros_(self.b)
      self.activation = activation

    def forward(self, x):
      w_hat = local_kernel(self.u, self.v) # n_in x n_hid
    
      sparse_reg = torch.nn.functional.mse_loss(w_hat, torch.zeros_like(w_hat))
      sparse_reg_term = self.lambda_s * sparse_reg
      
      l2_reg = torch.nn.functional.mse_loss(self.W, torch.zeros_like(self.W))
      l2_reg_term = self.lambda_2 * l2_reg

      W_eff = self.W * w_hat  # Local kernelised weight matrix
      y = torch.matmul(x, W_eff) + self.b
      y = self.activation(y) # shape of y = (1 x n_hid)

      # Proposed append user side info to vector y
      # Let y1 = d_y1 x 1
      # y = torch.cat(y, y1, dim=0) Shape of y = (1 x (n_hid + d_y1))


      return y, sparse_reg_term + l2_reg_term

class KernelNet(nn.Module):
    def __init__(self, n_u, n_hid, n_dim, n_layers, lambda_s, lambda_2):
      super().__init__()
      layers = []
      for i in range(n_layers):
        if i == 0:
          layers.append(KernelLayer(n_u, n_hid, n_dim, lambda_s, lambda_2))
        else:
          layers.append(KernelLayer(n_hid, n_hid, n_dim, lambda_s, lambda_2))
          # layers.append(KernelLayer(n_hid + d_y1, n_hid, n_dim, lambda_s, lambda_2))
      layers.append(KernelLayer(n_hid, n_u, n_dim, lambda_s, lambda_2, activation=nn.Identity()))
      self.layers = nn.ModuleList(layers)
      self.dropout = nn.Dropout(0.33)

    def forward(self, x):
      total_reg = None
      for i, layer in enumerate(self.layers):
        x, reg = layer(x)
        if i < len(self.layers)-1:
          x = self.dropout(x)
        if total_reg is None:
          total_reg = reg
        else:
          total_reg += reg
      return x, total_reg

In [9]:
class CompleteNet(nn.Module):
    def __init__(self, kernel_net, n_u, n_m, n_hid, n_dim, n_layers, lambda_s, lambda_2, gk_size, dot_scale):
      super().__init__()
      self.gk_size = gk_size
      self.dot_scale = dot_scale
      self.local_kernel_net = kernel_net
      self.conv_kernel = torch.nn.Parameter(torch.randn(n_m, gk_size**2) * 0.1)
      nn.init.xavier_uniform_(self.conv_kernel, gain=torch.nn.init.calculate_gain("relu"))
      

    def forward(self, x, x_local):
      gk = self.global_kernel(x_local, self.gk_size, self.dot_scale)
      x = self.global_conv(x, gk)
      x, global_reg_loss = self.local_kernel_net(x)
      return x, global_reg_loss

    def global_kernel(self, input, gk_size, dot_scale):
      avg_pooling = torch.mean(input, dim=1)  # Item (axis=1) based average pooling
      avg_pooling = avg_pooling.view(1, -1)

      gk = torch.matmul(avg_pooling, self.conv_kernel) * dot_scale  # Scaled dot product
      gk = gk.view(1, 1, gk_size, gk_size)

      return gk

    def global_conv(self, input, W):
      input = input.unsqueeze(0).unsqueeze(0)
      conv2d = nn.LeakyReLU()(F.conv2d(input, W, stride=1, padding=1))
      return conv2d.squeeze(0).squeeze(0)

class Loss(nn.Module):
    def forward(self, pred_p, reg_loss, train_m, train_r):
      ''' Calculate L2 loss between pred_p and train_r
        pred_p: predictions
        reg_loss: regularization_loss
        train_m: mask for train data
        train_r: ground truth
      '''
      diff = train_m * (train_r - pred_p) # Calculate difference only where user ratings are available
      sqE = torch.nn.functional.mse_loss(diff, torch.zeros_like(diff))
      loss_p = sqE + reg_loss
      return loss_p

# Network Instantiation

## Pre-training

In [10]:
model = KernelNet(n_u, n_hid, n_dim, n_layers, lambda_s, lambda_2).double().to(device)

## Fine-tuning

In [11]:
complete_model = CompleteNet(model, n_u, n_m, n_hid, n_dim, n_layers, lambda_s, lambda_2, gk_size, dot_scale).double().to(device)

# Evaluation code

In [12]:
def dcg_k(score_label, k):
    dcg, i = 0., 0
    for s in score_label:
        if i < k:
            dcg += (2**s[1]-1) / np.log2(2+i)
            i += 1
    return dcg

In [13]:
def ndcg_k(y_hat, y, k):
    score_label = np.stack([y_hat, y], axis=1).tolist()
    score_label = sorted(score_label, key=lambda d:d[0], reverse=True)
    score_label_ = sorted(score_label, key=lambda d:d[1], reverse=True)
    norm, i = 0., 0
    for s in score_label_:
        if i < k:
            norm += (2**s[1]-1) / np.log2(2+i)
            i += 1
    dcg = dcg_k(score_label, k)
    return dcg / norm

In [14]:
def call_ndcg(y_hat, y):
    ndcg_sum, num = 0, 0
    y_hat, y = y_hat.T, y.T
    n_users = y.shape[0]

    for i in range(n_users):
        y_hat_i = y_hat[i][np.where(y[i])]
        y_i = y[i][np.where(y[i])]

        if y_i.shape[0] < 2:
            continue

        ndcg_sum += ndcg_k(y_hat_i, y_i, y_i.shape[0])  # user-wise calculation
        num += 1

    return ndcg_sum / num

# Training and Test Loop

In [15]:
best_rmse_ep, best_mae_ep, best_ndcg_ep = 0, 0, 0
best_rmse, best_mae, best_ndcg = float("inf"), float("inf"), 0

time_cumulative = 0
tic = time()

# Pre-Training
optimizer = torch.optim.AdamW(complete_model.local_kernel_net.parameters(), lr=0.001)

def closure():
  optimizer.zero_grad()
  x = torch.Tensor(train_r).double().to(device)
  m = torch.Tensor(train_m).double().to(device)
  complete_model.local_kernel_net.train()
  pred, reg = complete_model.local_kernel_net(x)
  loss = Loss().to(device)(pred, reg, m, x)
  loss.backward()
  return loss

last_rmse = np.inf
counter = 0

for i in tqdm(range(max_epoch_p)):
  optimizer.step(closure)
  complete_model.local_kernel_net.eval()
  t = time() - tic
  time_cumulative += t

  pre, _ = model(torch.Tensor(train_r).double().to(device))
  
  pre = pre.float().cpu().detach().numpy()
  
  error = (test_m * (np.clip(pre, 1., 5.) - test_r) ** 2).sum() / test_m.sum()  # test error
  test_rmse = np.sqrt(error)

  error_train = (train_m * (np.clip(pre, 1., 5.) - train_r) ** 2).sum() / train_m.sum()  # train error
  train_rmse = np.sqrt(error_train)
 
  if last_rmse-train_rmse < tol_p:
    counter += 1
  else:
    counter = 0

  last_rmse = train_rmse

  if patience_p == counter:
    print('.-^-._' * 12)
    print('PRE-TRAINING')
    print('Epoch:', i+1, 'test rmse:', test_rmse, 'train rmse:', train_rmse)
    print('Time:', t, 'seconds')
    print('Time cumulative:', time_cumulative, 'seconds')
    print('.-^-._' * 12)
    break


  if i % 50 != 0:
    continue
  print('.-^-._' * 12)
  print('PRE-TRAINING')
  print('Epoch:', i, 'test rmse:', test_rmse, 'train rmse:', train_rmse)
  print('Time:', t, 'seconds')
  print('Time cumulative:', time_cumulative, 'seconds')
  print('.-^-._' * 12)

  0%|          | 0/500 [00:00<?, ?it/s]

.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 0 test rmse: 2.7412589 train rmse: 2.8296566
Time: 2.767164707183838 seconds
Time cumulative: 2.767164707183838 seconds
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 50 test rmse: 1.0868257 train rmse: 0.9557076
Time: 61.12351942062378 seconds
Time cumulative: 1627.6038534641266 seconds
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 100 test rmse: 1.095899 train rmse: 0.90106815
Time: 118.91510510444641 seconds
Time cumulative: 6157.982696056366 seconds
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 150 test rmse: 1.1180272 train rmse: 0.88871

In [16]:
# Fine-Tuning

train_r_local = np.clip(pre, 1., 5.)

optimizer = torch.optim.AdamW(complete_model.parameters(), lr=0.001)

def closure():
  optimizer.zero_grad()
  x = torch.Tensor(train_r).double().to(device)
  x_local = torch.Tensor(train_r_local).double().to(device)
  m = torch.Tensor(train_m).double().to(device)
  complete_model.train()
  pred, reg = complete_model(x, x_local)
  loss = Loss().to(device)(pred, reg, m, x)
  loss.backward()
  return loss

last_rmse = np.inf
counter = 0

for i in tqdm(range(max_epoch_f)):
  optimizer.step(closure)
  complete_model.eval()
  t = time() - tic
  time_cumulative += t

  pre, _ = complete_model(torch.Tensor(train_r).double().to(device), torch.Tensor(train_r_local).double().to(device))
  
  pre = pre.float().cpu().detach().numpy()

  error = (test_m * (np.clip(pre, 1., 5.) - test_r) ** 2).sum() / test_m.sum()  # test error
  test_rmse = np.sqrt(error)

  error_train = (train_m * (np.clip(pre, 1., 5.) - train_r) ** 2).sum() / train_m.sum()  # train error
  train_rmse = np.sqrt(error_train)

  test_mae = (test_m * np.abs(np.clip(pre, 1., 5.) - test_r)).sum() / test_m.sum()
  train_mae = (train_m * np.abs(np.clip(pre, 1., 5.) - train_r)).sum() / train_m.sum()

  test_ndcg = call_ndcg(np.clip(pre, 1., 5.), test_r)
  train_ndcg = call_ndcg(np.clip(pre, 1., 5.), train_r)

  if test_rmse < best_rmse:
      best_rmse = test_rmse
      best_rmse_ep = i+1

  if test_mae < best_mae:
      best_mae = test_mae
      best_mae_ep = i+1

  if best_ndcg < test_ndcg:
      best_ndcg = test_ndcg
      best_ndcg_ep = i+1

  if last_rmse-train_rmse < tol_f:
    counter += 1
  else:
    counter = 0

  last_rmse = train_rmse

  if patience_f == counter:
    print('.-^-._' * 12)
    print('FINE-TUNING')
    print('Epoch:', i+1, 'test rmse:', test_rmse, 'test mae:', test_mae, 'test ndcg:', test_ndcg)
    print('Epoch:', i+1, 'train rmse:', train_rmse, 'train mae:', train_mae, 'train ndcg:', train_ndcg)
    print('Time:', t, 'seconds')
    print('Time cumulative:', time_cumulative, 'seconds')
    print('.-^-._' * 12)
    break


  if i % 50 != 0:
    continue

  print('.-^-._' * 12)
  print('FINE-TUNING')
  print('Epoch:', i, 'test rmse:', test_rmse, 'test mae:', test_mae, 'test ndcg:', test_ndcg)
  print('Epoch:', i, 'train rmse:', train_rmse, 'train mae:', train_mae, 'train ndcg:', train_ndcg)
  print('Time:', t, 'seconds')
  print('Time cumulative:', time_cumulative, 'seconds')
  print('.-^-._' * 12)

  0%|          | 0/1000 [00:00<?, ?it/s]

.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
FINE-TUNING
Epoch: 0 test rmse: 1.4179113 test mae: 1.2127701 test ndcg: 0.8332603408236293
Epoch: 0 train rmse: 1.3564273 train mae: 1.1475483 train ndcg: 0.8926810279202094
Time: 2832.5898683071136 seconds
Time cumulative: 148152.10799884796 seconds
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
FINE-TUNING
Epoch: 50 test rmse: 1.0984298 test mae: 0.90472007 test ndcg: 0.8300756276461214
Epoch: 50 train rmse: 0.8742117 train mae: 0.6914784 train ndcg: 0.9122153933790662
Time: 3171.8530082702637 seconds
Time cumulative: 298439.8807988167 seconds
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
FINE-TUNING
Epoch: 100 test rmse: 1.1022028 test mae: 0.9081849 test ndcg: 0.8324213420514246
Epoch: 100 train rmse: 0.8650851 tr

KeyboardInterrupt: ignored

In [43]:
error = (test_m * (np.clip(pre, 1., 5.) - test_r) ** 2).sum() / test_m.sum()
print(np.sqrt(error))

1.086739


In [17]:
# Save predictions
predictions_path = './Glocal_K/predictions.npy'
np.save(predictions_path, pre)

True


In [88]:
def get_random_group(n_users, group_size):
    if isinstance(group_size, int): 
        pass
    elif group_size == "small":
        group_size = np.random.randint(1, 4)
    elif group_size == "medium": 
        group_size = np.random.randint(4,7)
    elif group_size == "large":
        group_size = np.random.randint(7,10)

    return np.random.choice(n_users, group_size, replace=False)

In [62]:
def evaluate(ground_truth_matrix, prediction_matrix):
    if prediction_matrix.sum() == 0:
        return 0
    rmse_error = np.sqrt((prediction_matrix - ground_truth_matrix) ** 2).sum() / prediction_matrix.sum()
    return rmse_error

In [74]:
def generate_actual_recommendations(ratings, threshold):
    # non_eval_items = Group.non_testable_items(self.members, ratings)

    items = np.argwhere(np.logical_or(ratings >= threshold, ratings == 0)).flatten()
    fp = np.argwhere(np.logical_and(ratings > 0, ratings < threshold)).flatten()
    for member in group:
        cur_items = np.argwhere(np.logical_or(ratings >= threshold, ratings == 0)).flatten()
        fp = np.union1d(fp, np.argwhere(np.logical_and(ratings > 0, ratings < threshold)).flatten())
        items = np.intersect1d(items, cur_items)

    # items = np.setdiff1d(items, non_eval_items)

    actual_recos = items
    # print('Recos:', actual_recos)
    false_positive = fp
    # print('False positivess: ', false_positive)

    return actual_recos, false_positive

In [77]:
def get_precision_recall(actual_recos, reco_list, false_positive):
    tp = float(np.intersect1d(actual_recos, reco_list).size)
    fp = float(np.intersect1d(false_positive, reco_list).size)

    try:
        precision = tp / (tp + fp)
    except ZeroDivisionError:
        precision = np.NaN

    try:
        recall = tp / actual_recos.size
    except ZeroDivisionError:
        recall = np.NaN

    # print('tp: ', tp)
    # print('fp: ', fp)
    # print('precision_af: ', precision)
    # print('recall_af: ', recall)

    return precision, recall, tp, fp

In [75]:
predicted_recos, false_positive = generate_actual_recommendations(prediction_matrix.mean(axis=0), 4)
actual_recos, false_positive = generate_actual_recommendations(ground_truth_matrix, 4)
print('Predicted Recos: ', predicted_recos)
print('False Positives: ', false_positive)
print('Actual Recos: ', actual_recos)
get_precision_recall(actual_recos, predicted_recos, false_positive)

Predicted Recos:  [0]
False Positives:  [   1    3    4    5 2774 2775 2783 2788 2803 2807 2817 2819 2820 2825
 2826 2838 2848 2856 2857 2871 2873 2879 2891 2892 2897 2898 2914 2939
 2941 2944 2952 2958 2959 2976 2981 2993 3019 3026 3029 3030 3032 3035
 3037 3039 3043 3044 3045 3048 3050 3059 3104 3126 3130 3133 3155 3157
 3163 3165 3166 3167 3170 3186 3189 3203 3209 3216 3218 3228 3233 3236
 3238 3256 3294 3301 3302 3303 3310 3318 3341 3351 3358 3368 3374 3377
 3383 3392 3394 3397 3405 3444 3446 3451 3453 3455 3457 3459 3460 3461
 3462 3463 3468 3470 3478 3481 3488 3498 3503 3509 3523 3548 3559 3566
 3590 3591 3596 3615 3618 3622 3623 3647 3666 3701]
Actual Recos:  [   0    1    2 ... 3703 3704 3705]
tp:  1.0
fp:  0.0
precision_af:  1.0
recall_af:  0.0002698327037236913


(1.0, 0.0002698327037236913, 1.0, 0.0)

In [93]:
import math
from tqdm.notebook import tqdm
num_groups = 100
group_size = "small"
mean_precision = 0
mean_recall = 0
mean_rmse = 0
pr_counter = 0
re_counter = 0
rmse_counter = 0
for i in tqdm(range(num_groups)): 
    group = get_random_group(3706, group_size)
    ground_truth_matrix = test_r[:, group]
    prediction_matrix = np.clip(pre, 1., 5.)[:, group]
    curr_rmse = evaluate(ground_truth_matrix, prediction_matrix)
    if curr_rmse > 0:
        mean_rmse += curr_rmse 
        rmse_counter += 1

    predicted_recos, false_positive = generate_actual_recommendations(prediction_matrix.mean(axis=0), 4)
    actual_recos, false_positive = generate_actual_recommendations(ground_truth_matrix, 4)
    pr,re, _,_ = get_precision_recall(actual_recos, predicted_recos, false_positive)
    if not math.isnan(pr): 
        mean_precision += pr
        pr_counter += 1
    if not math.isnan(re):
        mean_recall += re
        re_counter += 1

if rmse_counter == 0: 
    mean_rmse = np.nan
else:
    mean_rmse /= rmse_counter
if pr_counter == 0:
    mean_precision = np.nan
else:
    mean_precision /= pr_counter

if re_counter == 0: 
    mean_recall = np.nan
else:
    mean_recall /= re_counter
print('Mean RMSE (' + group_size + '): ', mean_rmse)
print('Mean Precision (' + group_size + '): ', mean_precision)
print('Mean Recall (' + group_size + '): ', mean_recall)

  0%|          | 0/100 [00:00<?, ?it/s]

Mean RMSE (small):  0.9931189626455307
Mean Precision (small):  0.6666666666666666
Mean Recall (small):  8.095709405108104e-06


In [100]:
import math
from tqdm.notebook import tqdm
num_groups = 100
group_size = "medium"
mean_precision = 0
mean_recall = 0
mean_rmse = 0
pr_counter = 0
re_counter = 0
rmse_counter = 0
for i in tqdm(range(num_groups)): 
    group = get_random_group(3706, group_size)
    ground_truth_matrix = test_r[:, group]
    prediction_matrix = np.clip(pre, 1., 5.)[:, group]
    curr_rmse = evaluate(ground_truth_matrix, prediction_matrix)
    if curr_rmse > 0:
        mean_rmse += curr_rmse 
        rmse_counter += 1

    predicted_recos, false_positive = generate_actual_recommendations(prediction_matrix.mean(axis=0), 4)
    actual_recos, false_positive = generate_actual_recommendations(ground_truth_matrix, 4)
    pr,re, _,_ = get_precision_recall(actual_recos, predicted_recos, false_positive)
    if not math.isnan(pr): 
        mean_precision += pr
        pr_counter += 1
    if not math.isnan(re):
        mean_recall += re
        re_counter += 1

if rmse_counter == 0: 
    mean_rmse = np.nan
else:
    mean_rmse /= rmse_counter
if pr_counter == 0:
    mean_precision = np.nan
else:
    mean_precision /= pr_counter

if re_counter == 0: 
    mean_recall = np.nan
else:
    mean_recall /= re_counter
print('Mean RMSE (' + group_size + '): ', mean_rmse)
print('Mean Precision (' + group_size + '): ', mean_precision)
print('Mean Recall (' + group_size + '): ', mean_recall)

  0%|          | 0/100 [00:00<?, ?it/s]

Mean RMSE (medium):  0.9927968782186508
Mean Precision (medium):  0.5555555555555556
Mean Recall (medium):  2.428494333513222e-05


In [99]:
import math
from tqdm.notebook import tqdm
num_groups = 100
group_size = "large"
mean_precision = 0
mean_recall = 0
mean_rmse = 0
pr_counter = 0
re_counter = 0
rmse_counter = 0
for i in tqdm(range(num_groups)): 
    group = get_random_group(3706, group_size)
    ground_truth_matrix = test_r[:, group]
    prediction_matrix = np.clip(pre, 1., 5.)[:, group]
    curr_rmse = evaluate(ground_truth_matrix, prediction_matrix)
    if curr_rmse > 0:
        mean_rmse += curr_rmse 
        rmse_counter += 1

    predicted_recos, false_positive = generate_actual_recommendations(prediction_matrix.mean(axis=0), 4)
    actual_recos, false_positive = generate_actual_recommendations(ground_truth_matrix, 4)
    pr,re, _,_ = get_precision_recall(actual_recos, predicted_recos, false_positive)
    if not math.isnan(pr): 
        mean_precision += pr
        pr_counter += 1
    if not math.isnan(re):
        mean_recall += re
        re_counter += 1

if rmse_counter == 0: 
    mean_rmse = np.nan
else:
    mean_rmse /= rmse_counter
if pr_counter == 0:
    mean_precision = np.nan
else:
    mean_precision /= pr_counter

if re_counter == 0: 
    mean_recall = np.nan
else:
    mean_recall /= re_counter
print('Mean RMSE (' + group_size + '): ', mean_rmse)
print('Mean Precision (' + group_size + '): ', mean_precision)
print('Mean Recall (' + group_size + '): ', mean_recall)

  0%|          | 0/100 [00:00<?, ?it/s]

Mean RMSE (large):  0.9933498537540436
Mean Precision (large):  0.6333333333333333
Mean Recall (large):  4.047490555855371e-05
