# Libraries

In [1]:
import cornac
import pandas as pd
import numpy as np

from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.models.cornac.cornac_utils import predict_ranking

from sklearn.model_selection import StratifiedKFold
import statistics


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# utils

In [3]:
def filter_dataset(dataframe, min_items, min_interactions):
  dataframe = dataframe.groupby("itemID").filter(lambda x: len(x) >= min_items)
  dataframe = dataframe.groupby("userID").filter(lambda x: len(x) >= min_interactions)
  return dataframe

In [4]:
def evaluate_recsys(model, train_set, train_, test_):
  model.fit(train_set);
  all_predictions = predict_ranking(model, train_, usercol='userID', itemcol='itemID', remove_seen=True)
  eval_ndcg = ndcg_at_k(test_, all_predictions, col_prediction='prediction', k=10)
  return eval_ndcg

In [5]:
# Split the data into four folds using stratified KFold

def cross_val_rec_sys(df, model):

  df = df.sample(frac=1).reset_index(drop=True)

  NDCGs = []
  skf = StratifiedKFold(n_splits=4)

  y = df['userID']
  X = df.drop(['userID'], axis = 1)

  for fold, (train_indices, test_indices) in enumerate(skf.split(X,y)):
      train_ = df.iloc[train_indices]
      test_ = df.iloc[test_indices]

      test_ = test_[test_["userID"].isin(train_["userID"].unique())]
      test_ = test_[test_["itemID"].isin(train_["itemID"].unique())]

      train_set = cornac.data.Dataset.from_uir(train_.itertuples(index=False), seed= 69)
      test_set = cornac.data.Dataset.from_uir(test_.itertuples(index=False), seed= 69)

      ndcg_score = evaluate_recsys(model, train_set, train_, test_)
      NDCGs.append(ndcg_score)

  return statistics.mean(NDCGs), NDCGs

# model

In [6]:
params = {
    'name' : 'nemo', 
    'k':30, 
    'encoder_structure' :[200], 
    'act_fn' : 'relu', 
    'likelihood' :'bern', 
    'n_epochs' :100, 
    'batch_size': 100, 
    'learning_rate': 0.001, 
    'beta_kl': 1.0, 
    'cap_priors': {'item': False, 'user': False}, 
    'trainable': True, 
    'verbose': False, 
    'seed': None, 
    'use_gpu': True
}

In [7]:
bivae_cfr = cornac.models.bivaecf.recom_bivaecf.BiVAECF(**params)

In [8]:
bivae_cfr

<cornac.models.bivaecf.recom_bivaecf.BiVAECF at 0x20cd9cdcf10>

# data processing

In [9]:
user_history_path = '../data/atrad_user_history_v2.csv'
df = pd.read_csv(user_history_path, names=["userID", "itemID"],dtype = {'userID':np.int32}, skiprows=1)
df = df.assign(rating=1)
df.head(3)

Unnamed: 0,userID,itemID,rating
0,3,CIC.N0000,1
1,3,LIOC.N0000,1
2,3,RICH.N0000,1


In [10]:
df = filter_dataset(
    df,
    min_items = 3,
    min_interactions = 10
)

df.head(3)

Unnamed: 0,userID,itemID,rating
10,39,ACL.N0000,1
11,39,BIL.N0000,1
12,39,BRWN.N0000,1


In [11]:
df.userID.nunique(), df.itemID.nunique()

(1494, 291)

# Training & Evaluation

In [12]:
avg_ndcg, history = cross_val_rec_sys(df, bivae_cfr)
print('avg ndcg : {}'.format(avg_ndcg))
print('ndcgs : {}'.format(history))



RuntimeError: mat1 and mat2 shapes cannot be multiplied (100x290 and 291x200)

In [None]:
idk = df.itertuples(index = False)

In [None]:
next(idk)

Pandas(userID=39, itemID='ACL.N0000', rating=1)

In [None]:
from cornac.eval_methods.ratio_split import RatioSplit
from cornac.data import Dataset

ds = Dataset.from_uir(idk)

In [None]:
model = bivae_cfr.fit(ds)



In [None]:
cornac.models.recommender.is_ann_supported(model)

True

In [None]:
model.k

30

In [None]:
user_vectors = model.get_user_vectors()
user_vectors.shape

(1494, 30)