In [1]:
# Installation


In [2]:
import torch
if not torch.cuda.is_available():
    raise Exception("You should enable GPU runtime")

In [3]:
device = torch.device("cuda")

In [4]:
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-1.10.0+cu113.html
!pip install -q torch-geometric

In [5]:
# Checking problematic imports
from torch_geometric.nn import GCNConv, GATConv  
from torch_geometric.utils import from_scipy_sparse_matrix  

In [6]:
# installing tensorboard

In [2]:
%load_ext tensorboard 

In [8]:
 #%reload_ext tensorboard

In [9]:
import os
logs_base_dir = "runs2"
os.makedirs(logs_base_dir, exist_ok=True)

In [10]:
from torch.utils.tensorboard import SummaryWriter

tb_fm = SummaryWriter(log_dir=f'{logs_base_dir}/{logs_base_dir}_FM/')
tb_gcn = SummaryWriter(log_dir=f'{logs_base_dir}/{logs_base_dir}_GCN/')
tb_gcn_attention = SummaryWriter(log_dir=f'{logs_base_dir}/{logs_base_dir}_GCN_att/')

In [11]:
# Preparing imports

In [12]:
from torch.utils.data import DataLoader, Dataset
from IPython import embed
from sklearn.metrics import roc_auc_score
import pandas as pd
import numpy as np
import csv
import os
import scipy.sparse as sp
from tqdm import tqdm, trange

from torch_geometric.nn import GCNConv  

In [13]:
# load our data

In [14]:
#transactions = pd.read_csv("./data/transactions_ddup_2019-09-22_nart_5_ncust_20_ncustr_25000.csv")
#transactions = pd.read_csv("./data/transactions_ddup_2019-09-22_nart_5_ncust_20_ncustr_15000.csv")
transactions = pd.read_csv("./data/transactions_ddup_2019-09-22_nart_5_ncust_20_ncustr_10000.csv")
number_cust_file=10000

In [15]:
transactions.head()

Unnamed: 0.1,Unnamed: 0,t_dat,customer_id,article_id,price,sales_channel_id
0,16856320,2019-09-22,032d6afde041a1f88cf96e7680e40bb7b03c425b7e409f...,733098009,0.016932,2
1,16856321,2019-09-22,032d6afde041a1f88cf96e7680e40bb7b03c425b7e409f...,337991001,0.025407,2
2,16856322,2019-09-22,032d6afde041a1f88cf96e7680e40bb7b03c425b7e409f...,752814002,0.033881,2
3,16856323,2019-09-22,032d6afde041a1f88cf96e7680e40bb7b03c425b7e409f...,808938001,0.050831,2
4,16856324,2019-09-22,032d6afde041a1f88cf96e7680e40bb7b03c425b7e409f...,778187002,0.06778,2


In [16]:
# transactions2=transactions[["customer_id","article_id", "t_dat"]]

In [17]:
# add label wih 1
transactions["label"]=1

In [18]:
transactions.nunique()

Unnamed: 0          407717
t_dat                  367
customer_id          10000
article_id           38349
price                 4693
sales_channel_id         2
label                    1
dtype: int64

In [19]:

transactions.head(3)

Unnamed: 0.1,Unnamed: 0,t_dat,customer_id,article_id,price,sales_channel_id,label
0,16856320,2019-09-22,032d6afde041a1f88cf96e7680e40bb7b03c425b7e409f...,733098009,0.016932,2,1
1,16856321,2019-09-22,032d6afde041a1f88cf96e7680e40bb7b03c425b7e409f...,337991001,0.025407,2,1
2,16856322,2019-09-22,032d6afde041a1f88cf96e7680e40bb7b03c425b7e409f...,752814002,0.033881,2,1


In [20]:
# we need to remove test data
# Test data es for each customer its hast transaction
transactions=transactions.sort_values(['customer_id','t_dat'], \
              ascending = [True, False])


In [21]:
# we will create 2 dictionaries to save customer ID and article ID

In [22]:
customer_dict={}; article_dict={}
n_cust=1; n_art=1; debugi=0
for index, row in transactions.iterrows():
    customer=row["customer_id"]; article=row["article_id"]
    if (customer not in customer_dict):
        customer_dict[customer]=n_cust; n_cust+=1
    if (article not in article_dict):
        article_dict[article]=n_art; n_art+=1
    #debugi+=1
    #if (debugi==2000):
    #    break;
print ("Number of unique customers -> ", len(customer_dict))
print ("Number of unique articles  -> ", len(article_dict))

Number of unique customers ->  10000
Number of unique articles  ->  38349


In [23]:
aux_debug=0;
test_data_list=[]; data_list=[]; 
last_customer_id=-999; current_customer_id=-999
for index, row in transactions.iterrows():
    customer=row["customer_id"]; customer_id= customer_dict[customer]
    article= row["article_id"]; article_id= article_dict[article]
    timestamp = int (row["t_dat"].replace('-',''))
    if (last_customer_id != customer_id):
        current_customer_id=customer_id # for data_test_list
        last_customer_id= customer_id
        row= [current_customer_id, article_id, row["label"],timestamp ]
        test_data_list.append(row)
        aux_debug+=1
        if (aux_debug == 70000):
            print ("<ERROR>")
            break
    else:
        row= [current_customer_id, article_id, row["label"],timestamp ]
        data_list.append(row)
        


In [24]:
print (len (test_data_list))

10000


In [25]:
# create dataframes to save as movielens.train.rating
column_names = ["customer_id", "article_id" ,"label", "t_dat"]
aux_data= pd.DataFrame(data_list, columns = column_names)
aux_test_data=pd.DataFrame(test_data_list, columns = column_names)
aux_data.to_csv("./data/movielens.train.rating" , sep="\t", index=False,header=False) 
aux_test_data.to_csv("./data/movielens.test.rating" , sep="\t", index=False,header=False)

In [26]:
# importing library
import numpy
# customer_id, artilce_id, label, t_dat

data=numpy.array(data_list)
data


array([[       1,        2,        1, 20200519],
       [       1,        3,        1, 20200504],
       [       1,        4,        1, 20200504],
       ...,
       [   10000,     7760,        1, 20191220],
       [   10000,    14435,        1, 20191220],
       [   10000,    24119,        1, 20191220]])

In [27]:
# items is cero based no need to dcrecese 1
items = data[:, :2].astype(np.int) - 1  # -1 because ID begins from 1
items

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  items = data[:, :2].astype(np.int) - 1  # -1 because ID begins from 1


array([[    0,     1],
       [    0,     2],
       [    0,     3],
       ...,
       [ 9999,  7759],
       [ 9999, 14434],
       [ 9999, 24118]])

In [28]:
# I assume articles ID are fine, no need to add nothing but I will follow Paula flow
np.max(items, axis=0)[:2] + 1 



array([10000, 38349])

In [29]:
# We need each node to have a unique id
#
#
# notice 25000 comes from previous data
# number_cust_file number of cust in file
#
#
reindex_items = items.copy()
reindex_items[:, 1] = reindex_items[:, 1] + number_cust_file
reindex_items

array([[    0, 10001],
       [    0, 10002],
       [    0, 10003],
       ...,
       [ 9999, 17759],
       [ 9999, 24434],
       [ 9999, 34118]])

In [30]:
# not sure what does
field_dims = np.max(reindex_items, axis=0) + 1
field_dims

array([10000, 48349])

In [31]:
def build_adj_mx(dims, interactions):
    train_mat = sp.dok_matrix((dims, dims), dtype=np.float32)
    for x in tqdm(interactions, desc="BUILDING ADJACENCY MATRIX..."):
        train_mat[x[0], x[1]] = 1.0
        train_mat[x[1], x[0]] = 1.0

    return train_mat

In [32]:
train_mat = build_adj_mx(field_dims[-1], reindex_items.copy())
train_mat

BUILDING ADJACENCY MATRIX...: 100%|█████████████████████████████████████████████████████████████████| 397717/397717 [00:05<00:00, 73114.09it/s]


<48349x48349 sparse matrix of type '<class 'numpy.float32'>'
	with 749344 stored elements in Dictionary Of Keys format>

In [33]:
#
# this is for paulas, dont need it here 
# Check that we have (2*99057 = 198114) interactions...
99057*2

198114

##### *Checking we have just positive data:*

In [34]:
targets = data[:, 2]
targets

array([1, 1, 1, ..., 1, 1, 1])

In [35]:
np.unique(targets)

array([1])

##### *Example on performing negative data for a training sample: (u, i, j)*


In [36]:
data = np.c_[(reindex_items, targets)].astype(int)
data

array([[    0, 10001,     1],
       [    0, 10002,     1],
       [    0, 10003,     1],
       ...,
       [ 9999, 17759,     1],
       [ 9999, 24434,     1],
       [ 9999, 34118,     1]])

In [37]:
field_dims[:2]

array([10000, 48349])

In [38]:
# EXAMPLE interaction number 988 : user 6 - item 1470
x = data[988]
x


array([   20, 10898,     1])

In [39]:
neg_triplet = np.array([0,0,0])
neg_triplet[0] = x[0].copy()
neg_triplet

array([20,  0,  0])

In [40]:
# Example: We find item 1200 has no connection with user 6
j = 1200
neg_triplet[1] = j
neg_triplet

array([  20, 1200,    0])

##### *Define metrics:*

In [41]:
import math

def getHitRatio(recommend_list, gt_item):
    if gt_item in recommend_list:
        return 1
    else:
        return 0

def getNDCG(recommend_list, gt_item):
    idx = np.where(recommend_list == gt_item)[0]
    if len(idx) > 0:
        return math.log(2)/math.log(idx+2)
    else:
        return 0

##### *Build test dataset for evaluation*

In [42]:
# Paula
dataset_path = 'data/movielens'
#test_data = pd.read_csv(f'{dataset_path}.test.rating', sep='\t',
#                        header=None, names=colnames).to_numpy()
#test_data

In [43]:
test_data=numpy.array(test_data_list)
test_data


array([[       1,        1,        1, 20200519],
       [       2,       27,        1, 20200613],
       [       3,       56,        1, 20200528],
       ...,
       [    9998,    15939,        1, 20200901],
       [    9999,      676,        1, 20200908],
       [   10000,    12013,        1, 20200920]])

In [44]:
# Take number of users and items from reindex items from train set
users, items = np.max(reindex_items, axis=0)[:2] + 1 # [ 943, 1682])
print(users)
print(items)

10000
48349


In [45]:
# Reindex test items and substract 1
pairs_test = test_data[:, :2].astype(np.int) - 1    
pairs_test[:, 1] = pairs_test[:, 1] + users 
pairs_test

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  pairs_test = test_data[:, :2].astype(np.int) - 1


array([[    0, 10000],
       [    1, 10026],
       [    2, 10055],
       ...,
       [ 9997, 25938],
       [ 9998, 10675],
       [ 9999, 22012]])

In [46]:
# assert 74 + 943 - 1 == 1016

In [47]:
pair = pairs_test[0]
pair

array([    0, 10000])

In [48]:
# GENERATE TEST SET WITH NEGATIVE EXAMPLES TO EVALUATE
max_users, max_items = field_dims[:2] # number users (943), number items (2625)
negatives = []
for t in range(10):
    j = np.random.randint(max_users, max_items)
    while (pair[0], j) in train_mat or j == pair[1]:
        j = np.random.randint(max_users, max_items)
    negatives.append(j)
negatives

[16281, 35398, 32243, 22542, 22530, 30720, 30896, 32929, 23340, 31486]

In [49]:
single_user_test_set = np.vstack([pair, ] * (len(negatives)+1))
single_user_test_set

array([[    0, 10000],
       [    0, 10000],
       [    0, 10000],
       [    0, 10000],
       [    0, 10000],
       [    0, 10000],
       [    0, 10000],
       [    0, 10000],
       [    0, 10000],
       [    0, 10000],
       [    0, 10000]])

In [50]:
single_user_test_set[:, 1][1:] = negatives
single_user_test_set

array([[    0, 10000],
       [    0, 16281],
       [    0, 35398],
       [    0, 32243],
       [    0, 22542],
       [    0, 22530],
       [    0, 30720],
       [    0, 30896],
       [    0, 32929],
       [    0, 23340],
       [    0, 31486]])

#### **2. Building dataset and preparing data for the model ...**

In [51]:
#@title
import numpy as np
import pandas as pd
import torch.utils.data


class MovieLens100kDataset(torch.utils.data.Dataset):
    """
    MovieLens 100k Dataset

    Data preparation
        treat samples with a rating less than 3 as negative samples

    :param dataset_path: MovieLens dataset path

    """

    def __init__(self, dataset_path, num_negatives_train=4, num_negatives_test=100, sep='\t'):

        colnames = ["customer_id", 'article_id', 'label', 't_dat']
        data = pd.read_csv(f'{dataset_path}.train.rating', sep=sep, header=None, names=colnames).to_numpy()
        test_data = pd.read_csv(f'{dataset_path}.test.rating', sep=sep, header=None, names=colnames).to_numpy()

        # TAKE items, targets and test_items
        self.targets = data[:, 2]
        self.items = self.preprocess_items(data)

        # Save dimensions of max users and items and build training matrix
        self.field_dims = np.max(self.items, axis=0) + 1 # ([ 943, 2625])
        self.train_mat = build_adj_mx(self.field_dims[-1], self.items.copy())

        # Generate train interactions with 4 negative samples for each positive
        self.negative_sampling(num_negatives=num_negatives_train)
        
        # Build test set by passing as input the test item interactions
        self.test_set = self.build_test_set(self.preprocess_items(test_data),
                                            num_neg_samples_test = num_negatives_test)

    def __len__(self):
        return self.targets.shape[0]

    def __getitem__(self, index):
        return self.interactions[index]
    
    def preprocess_items(self, data, users=number_cust_file): # users=25000):
        reindexed_items = data[:, :2].astype(np.int) - 1  # -1 because ID begins from 1
        #users, items = np.max(reindexed_items, axis=0)[:2] + 1 # [ 943, 1682])
        # Reindex items (we need to have [users + items] nodes with unique idx)
        reindexed_items[:, 1] = reindexed_items[:, 1] + users

        return reindexed_items

    def negative_sampling(self, num_negatives=4):
        self.interactions = []
        data = np.c_[(self.items, self.targets)].astype(int)
        max_users, max_items = self.field_dims[:2] # number users (943), number items (2625)

        for x in tqdm(data, desc="Performing negative sampling on test data..."):  # x are triplets (u, i , 1) 
            # Append positive interaction
            self.interactions.append(x)
            # Copy user and maintain last position to 0. Now we will need to update neg_triplet[1] with j
            neg_triplet = np.vstack([x, ] * (num_negatives))
            neg_triplet[:, 2] = np.zeros(num_negatives)

            # Generate num_negatives negative interactions
            for idx in range(num_negatives):
                j = np.random.randint(max_users, max_items)
                # IDEA: Loop to exclude true interactions (set to 1 in adj_train) user - item
                while (x[0], j) in self.train_mat:
                    j = np.random.randint(max_users, max_items)
                neg_triplet[:, 1][idx] = j
            self.interactions.append(neg_triplet.copy())

        self.interactions = np.vstack(self.interactions)
    
    def build_test_set(self, gt_test_interactions, num_neg_samples_test=99):
        max_users, max_items = self.field_dims[:2] # number users (943), number items (2625)
        test_set = []
        for pair in tqdm(gt_test_interactions, desc="BUILDING TEST SET..."):
            negatives = []
            for t in range(num_neg_samples_test):
                j = np.random.randint(max_users, max_items)
                while (pair[0], j) in self.train_mat or j == pair[1]:
                    j = np.random.randint(max_users, max_items)
                negatives.append(j)
            #APPEND TEST SETS FOR SINGLE USER
            single_user_test_set = np.vstack([pair, ] * (len(negatives)+1))
            single_user_test_set[:, 1][1:] = negatives
            test_set.append(single_user_test_set.copy())
        return test_set

In [52]:
print (dataset_path)

data/movielens


In [53]:
full_dataset= MovieLens100kDataset( \
                                   dataset_path, num_negatives_train=4, num_negatives_test=99)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  reindexed_items = data[:, :2].astype(np.int) - 1  # -1 because ID begins from 1
BUILDING ADJACENCY MATRIX...: 100%|█████████████████████████████████████████████████████████████████| 397717/397717 [00:05<00:00, 73981.20it/s]
Performing negative sampling on test data...: 100%|█████████████████████████████████████████████████| 397717/397717 [00:08<00:00, 44403.79it/s]
BUILDING TEST SET...: 100%|████████████████████████████████████████████████████████████████████████████| 10000/10000 [00:03<00:00, 3198.27it/s]


In [54]:
# 90570 interactions with pairs of index that have interacted + 4*90570 negative
full_dataset.interactions

array([[    0, 10001,     1],
       [    0, 26658,     0],
       [    0, 20053,     0],
       ...,
       [ 9999, 17116,     0],
       [ 9999, 32984,     0],
       [ 9999, 18622,     0]])

In [55]:
full_dataset.interactions[:20]

array([[    0, 10001,     1],
       [    0, 26658,     0],
       [    0, 20053,     0],
       [    0, 13869,     0],
       [    0, 11827,     0],
       [    0, 10002,     1],
       [    0, 35862,     0],
       [    0, 41424,     0],
       [    0, 15524,     0],
       [    0, 43613,     0],
       [    0, 10003,     1],
       [    0, 31870,     0],
       [    0, 28245,     0],
       [    0, 15138,     0],
       [    0, 30822,     0],
       [    0, 10004,     1],
       [    0, 14985,     0],
       [    0, 19500,     0],
       [    0, 37273,     0],
       [    0, 21466,     0]])

In [56]:
## We had 99057 interactions in training_matrix --> now we have 99057 positive plus 4*99057 negative
# assert 5*99057 == full_dataset.interactions.shape[0]

In [57]:
# For test set, we keep the size (one interaction per user) but we append 99 negative samples for evaluation
print(len(full_dataset.test_set))

10000


In [58]:
len(full_dataset.test_set[0]) # --> [gt_pair + 99_neg_samples]

100

In [59]:
full_dataset.test_set[0]

array([[    0, 10000],
       [    0, 21389],
       [    0, 36712],
       [    0, 20489],
       [    0, 31409],
       [    0, 28020],
       [    0, 32606],
       [    0, 27487],
       [    0, 31931],
       [    0, 44568],
       [    0, 28857],
       [    0, 39062],
       [    0, 35802],
       [    0, 30037],
       [    0, 23601],
       [    0, 13516],
       [    0, 41483],
       [    0, 48254],
       [    0, 43162],
       [    0, 25568],
       [    0, 38834],
       [    0, 10373],
       [    0, 38832],
       [    0, 15171],
       [    0, 45629],
       [    0, 42978],
       [    0, 18580],
       [    0, 47518],
       [    0, 30466],
       [    0, 36122],
       [    0, 43177],
       [    0, 47095],
       [    0, 24160],
       [    0, 32216],
       [    0, 14517],
       [    0, 10288],
       [    0, 42680],
       [    0, 33262],
       [    0, 22557],
       [    0, 42851],
       [    0, 35897],
       [    0, 11681],
       [    0, 35086],
       [   

Sampling 4 negative samples for each positive, will also work as a type of normalization.

In [60]:
data_loader = DataLoader(full_dataset, batch_size=256, shuffle=True, num_workers=0)

In [61]:
for i, (interactions) in enumerate(data_loader):
    if i == 0:
        print(interactions.shape)
    else:
        break

torch.Size([256, 3])


### **Building Factorization Machines model**


Our training matrix is now even sparser: Of all 237,746,250 values (90,570*2,625), only 181,140 are non-zero (90,570*2). In other words, the matrix is 99.92% sparse. Storing this as a dense matrix would be a massive waste of both storage and computing power!
To avoid this, let’s use a scipy.lil_matrix sparse matrix for samples and a numpy array for labels.

<div>
<center><img src="https://d2908q01vomqb2.cloudfront.net/f1f836cb4ea6efb2a0b1b99f41ad8b103eff4b59/2019/04/03/sagemaker-factorization-1.gif" width="400"/></center>
</div>

##### **LAYERS:** Linear and FM part of the equation

In [62]:
# EMBEDDING PYTORCH: https://pytorch.org/docs/stable/nn.html?highlight=embedding#torch.nn.Embedding

In [63]:
# Linear part of the equation
class FeaturesLinear(torch.nn.Module):

    def __init__(self, field_dims, output_dim=1):
        super().__init__()

        self.fc = torch.nn.Embedding(field_dims, output_dim)
        self.bias = torch.nn.Parameter(torch.zeros((output_dim,)))

    def forward(self, x):
        """
        :param x: Long tensor of size ``(batch_size, num_fields)``
        """
        # self.fc(x).shape --> [batch_size, num_fields, 1]
        # torch.sum(self.fc(x), dim=1).shape --> ([batch_size, 1])
        return torch.sum(self.fc(x), dim=1) + self.bias

In [64]:
# FM part of the equation
class FM_operation(torch.nn.Module):

    def __init__(self, reduce_sum=True):
        super().__init__()
        self.reduce_sum = reduce_sum

    def forward(self, x):
        """
        :param x: Float tensor of size ``(batch_size, num_fields, embed_dim)``
        """
        square_of_sum = torch.sum(x, dim=1) ** 2
        sum_of_square = torch.sum(x ** 2, dim=1)
        ix = square_of_sum - sum_of_square
        if self.reduce_sum:
            ix = torch.sum(ix, dim=1, keepdim=True)
        return 0.5 * ix


##### MODEL

In [65]:
class FactorizationMachineModel(torch.nn.Module):
    """
    A pytorch implementation of Factorization Machine.

    Reference:
        S Rendle, Factorization Machines, 2010.
    """

    def __init__(self, field_dims, embed_dim):
        super().__init__()
        # field_dims == total of nodes (sum users + context)
        # self.linear = torch.nn.Linear(field_dims, 1, bias=True)
        self.linear = FeaturesLinear(field_dims)
        self.embedding = torch.nn.Embedding(field_dims, embed_dim, sparse=False)
        self.fm = FM_operation(reduce_sum=True)

        torch.nn.init.xavier_uniform_(self.embedding.weight.data)

    def forward(self, interaction_pairs):
        """
        :param interaction_pairs: Long tensor of size ``(batch_size, num_fields)``
        """
        out = self.linear(interaction_pairs) + self.fm(self.embedding(interaction_pairs))
        
        return out.squeeze(1)
        
    def predict(self, interactions, device):
        # return the score, inputs are numpy arrays, outputs are tensors
        test_interactions = torch.from_numpy(interactions).to(dtype=torch.long, device=device)
        output_scores = self.forward(test_interactions)
        return output_scores
    


### **Workflow for FM with usual embeddings ...**Ç

#### **Train**

In [66]:
from statistics import mean

def train_one_epoch(model, optimizer, data_loader, criterion, device, log_interval=100):
    model.train()
    total_loss = []

    for i, (interactions) in enumerate(data_loader):
        interactions = interactions.to(device)
        targets = interactions[:,2]
        predictions = model(interactions[:,:2])
        
        loss = criterion(predictions, targets.float())
        model.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss.append(loss.item())

    return mean(total_loss)

#### **Evaluation**

##### **Understanding evaluation ...**

In [67]:
len(full_dataset.test_set)

10000

In [68]:
user_test = full_dataset.test_set[0]
user_test.shape

(100, 2)

In [69]:
user_test

array([[    0, 10000],
       [    0, 21389],
       [    0, 36712],
       [    0, 20489],
       [    0, 31409],
       [    0, 28020],
       [    0, 32606],
       [    0, 27487],
       [    0, 31931],
       [    0, 44568],
       [    0, 28857],
       [    0, 39062],
       [    0, 35802],
       [    0, 30037],
       [    0, 23601],
       [    0, 13516],
       [    0, 41483],
       [    0, 48254],
       [    0, 43162],
       [    0, 25568],
       [    0, 38834],
       [    0, 10373],
       [    0, 38832],
       [    0, 15171],
       [    0, 45629],
       [    0, 42978],
       [    0, 18580],
       [    0, 47518],
       [    0, 30466],
       [    0, 36122],
       [    0, 43177],
       [    0, 47095],
       [    0, 24160],
       [    0, 32216],
       [    0, 14517],
       [    0, 10288],
       [    0, 42680],
       [    0, 33262],
       [    0, 22557],
       [    0, 42851],
       [    0, 35897],
       [    0, 11681],
       [    0, 35086],
       [   

In [70]:
gt_pair = user_test[0]
neg_items = user_test[1:]
print(f'gt_pair: {gt_pair}')
print(f'lenght neg_items: {len(neg_items)}')

gt_pair: [    0 10000]
lenght neg_items: 99


In [71]:
# DEFINE GT_ITEM
gt_item = user_test[0][1]
gt_item

10000

In [76]:
# Defining dummy model with 8 embedding dimensions
dummy_model = FactorizationMachineModel(full_dataset.field_dims[-1], 8).to(device)
out = dummy_model.predict(user_test, device)
out.shape

torch.Size([100])

In [74]:
#del out
#torch.cuda.empty_cache()
#dump_tensors()
#del dummy_model
#import gc
#gc.collect()

In [72]:
def pretty_size(size):
    """Pretty prints a torch.Size object"""
    assert(isinstance(size, torch.Size))
    return " × ".join(map(str, size))

def dump_tensors(gpu_only=True):
    """Prints a list of the Tensors being tracked by the garbage collector."""
    import gc
    total_size = 0
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj):
                if not gpu_only or obj.is_cuda:
                    print("%s:%s%s %s" % (type(obj).__name__, 
                                          " GPU" if obj.is_cuda else "",
                                          " pinned" if obj.is_pinned else "",
                                          pretty_size(obj.size())))
                    total_size += obj.numel()
            elif hasattr(obj, "data") and torch.is_tensor(obj.data):
                if not gpu_only or obj.is_cuda:
                    print("%s → %s:%s%s%s%s %s" % (type(obj).__name__, 
                                                   type(obj.data).__name__, 
                                                   " GPU" if obj.is_cuda else "",
                                                   " pinned" if obj.data.is_pinned else "",
                                                   " grad" if obj.requires_grad else "", 
                                                   " volatile" if obj.volatile else "",
                                                   pretty_size(obj.data.size())))
                    total_size += obj.data.numel()
        except Exception as e:
            pass        
    print("Total size:", total_size)

In [73]:
#dump_tensors()

In [77]:
# Print first 10 predictions, where 1st one is the one for the GT
out[:10]

tensor([ 0.0735,  0.2419,  0.6175,  0.6738,  0.5040,  0.3190,  1.7319,  0.4291,
        -0.3535,  1.6929], device='cuda:0', grad_fn=<SliceBackward0>)

In [78]:
values, indices = torch.topk(out, 10)
print(values)
print(indices.cpu().detach().numpy())

tensor([4.0175, 2.7743, 2.6291, 2.5063, 2.3616, 2.3297, 2.2282, 2.2028, 1.8737,
        1.8242], device='cuda:0', grad_fn=<TopkBackward0>)
[86 41 78 57 31 36 89 66 85 87]


In [79]:
user_test[0]

array([    0, 10000])

In [80]:
# RANKING LIST TO RECOMMEND
recommend_list = user_test[indices.cpu().detach().numpy()][:, 1]
recommend_list

array([31274, 11681, 46069, 47507, 47095, 42680, 43823, 38287, 34956,
       40908])

In [81]:
gt_item in recommend_list

False

##### **Defining test function...**

In [82]:
def test(model, full_dataset, device, topk=10):
    # Test the HR and NDCG for the model @topK
    model.eval()

    HR, NDCG = [], []

    for user_test in full_dataset.test_set:
        gt_item = user_test[0][1]

        predictions = model.predict(user_test, device)
        _, indices = torch.topk(predictions, topk)
        recommend_list = user_test[indices.cpu().detach().numpy()][:, 1]

        HR.append(getHitRatio(recommend_list, gt_item))
        NDCG.append(getNDCG(recommend_list, gt_item))
    return mean(HR), mean(NDCG)

#### **Model, loss and optimizer definition**

In [83]:
model = FactorizationMachineModel(full_dataset.field_dims[-1], 32).to(device)

In [84]:
criterion = torch.nn.BCEWithLogitsLoss(reduction='mean')
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)


#### **Random evaluation**

In [85]:
topk = 10

# Check Init performance
hr, ndcg = test(model, full_dataset, device, topk=topk)
print("initial HR: ", hr)
print("initial NDCG: ", ndcg)


initial HR:  0.102
initial NDCG:  0.04740044522753258


#### **Start training the model**

In [86]:
# DO EPOCHS NOW
tb = True
topk = 10
for epoch_i in range(20):
    #data_loader.dataset.negative_sampling()
    train_loss = train_one_epoch(model, optimizer, data_loader, criterion, device)
    hr, ndcg = test(model, full_dataset, device, topk=topk)

    print('\n')

    print(f'epoch {epoch_i}:')
    print(f'training loss = {train_loss:.4f} | Eval: HR@{topk} = {hr:.4f}, NDCG@{topk} = {ndcg:.4f} ')
    print('\n')
    if tb:
        tb_fm.add_scalar('train/loss', train_loss, epoch_i)
        tb_fm.add_scalar('eval/HR@{topk}', hr, epoch_i)
        tb_fm.add_scalar('eval/NDCG@{topk}', ndcg, epoch_i)



epoch 0:
training loss = 0.7095 | Eval: HR@10 = 0.1113, NDCG@10 = 0.0520 




epoch 1:
training loss = 0.5176 | Eval: HR@10 = 0.1507, NDCG@10 = 0.0764 




epoch 2:
training loss = 0.3981 | Eval: HR@10 = 0.1646, NDCG@10 = 0.0848 




epoch 3:
training loss = 0.3322 | Eval: HR@10 = 0.1701, NDCG@10 = 0.0877 




epoch 4:
training loss = 0.2788 | Eval: HR@10 = 0.1737, NDCG@10 = 0.0895 




epoch 5:
training loss = 0.2292 | Eval: HR@10 = 0.1764, NDCG@10 = 0.0903 




epoch 6:
training loss = 0.1843 | Eval: HR@10 = 0.1781, NDCG@10 = 0.0912 




epoch 7:
training loss = 0.1455 | Eval: HR@10 = 0.1793, NDCG@10 = 0.0918 




epoch 8:
training loss = 0.1134 | Eval: HR@10 = 0.1828, NDCG@10 = 0.0932 




epoch 9:
training loss = 0.0874 | Eval: HR@10 = 0.1828, NDCG@10 = 0.0938 




epoch 10:
training loss = 0.0667 | Eval: HR@10 = 0.1843, NDCG@10 = 0.0941 




epoch 11:
training loss = 0.0505 | Eval: HR@10 = 0.1853, NDCG@10 = 0.0947 




epoch 12:
training loss = 0.0379 | Eval: HR@10 = 0.1867, NDC

In [87]:
#%tensorboard --logdir runs2

### **Computing embeddings with GCN instead ...**

In [88]:
from scipy.sparse import identity
from torch_geometric.utils import from_scipy_sparse_matrix


def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """ Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

# BUILD FEATURES X (identity tensor for the moment)
# https://pytorch-geometric.readthedocs.io/en/latest/modules/utils.html?highlight=sparse#torch_geometric.utils.from_scipy_sparse_matrix
X = sparse_mx_to_torch_sparse_tensor(identity(full_dataset.train_mat.shape[0]))
X


tensor(indices=tensor([[    0,     1,     2,  ..., 48346, 48347, 48348],
                       [    0,     1,     2,  ..., 48346, 48347, 48348]]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]),
       size=(48349, 48349), nnz=48349, layout=torch.sparse_coo)

In [89]:
# get TRAIN MX FROM DATASET
full_dataset.train_mat

<48349x48349 sparse matrix of type '<class 'numpy.float32'>'
	with 749344 stored elements in Dictionary Of Keys format>

In [90]:
# BUILD EDGE INDEX from training matrix (identity tensor for the moment)
edge_idx, edge_attr = from_scipy_sparse_matrix(full_dataset.train_mat)
edge_idx

tensor([[    0, 10001,     0,  ..., 24434,  9999, 34118],
        [10001,     0, 10002,  ...,  9999, 34118,  9999]])

In [91]:
from torch_geometric.nn import GCNConv # https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html?highlight=GCNConv#torch_geometric.nn.conv.GCNConv
from torch_geometric.utils import from_scipy_sparse_matrix

class GraphModel(torch.nn.Module):
    def __init__(self, field_dims, embed_dim, features, train_mat, attention=False):

        super().__init__()

        self.A = train_mat
        self.features = features
        if attention:
            self.GCN_module = GATConv(int(field_dims), embed_dim, heads=8, dropout=0.6)
        else:  
            self.GCN_module = GCNConv(field_dims, embed_dim)

    def forward(self, x):
        """
        :param x: Long tensor of size ``(batch_size, num_fields)``
        """
        return self.GCN_module(self.features, self.A)[x]


In [92]:
class FactorizationMachineModel_withGCN(torch.nn.Module):
    """
    A pytorch implementation of Factorization Machine.

    Reference:
        S Rendle, Factorization Machines, 2010.
    """

    def __init__(self, field_dims, embed_dim, X, A, attention=False):
        super().__init__()

        self.linear = FeaturesLinear(field_dims)
        #self.embedding = torch.nn.Embedding(field_dims, embed_dim, sparse=False)
        self.embedding = GraphModel(field_dims, embed_dim, X, A, attention=attention)
        self.fm = FM_operation(reduce_sum=True)

        #torch.nn.init.xavier_uniform_(self.embedding.weight.data)

    def forward(self, interaction_pairs):
        """
        :param interaction_pairs: Long tensor of size ``(batch_size, num_fields)``
        """
        out = self.linear(interaction_pairs) + self.fm(self.embedding(interaction_pairs))
        return out.squeeze(1)
        
    def predict(self, interactions, device):
        # return the score, inputs are numpy arrays, outputs are tensors
 
        test_interactions = torch.from_numpy(interactions).to(dtype=torch.long, device=device)
        output_scores = self.forward(test_interactions)
        return output_scores
    

#### **Using GCN regular layer ...**

In [93]:
# torch.device("cuda:1")
model_gcn = FactorizationMachineModel_withGCN(full_dataset.field_dims[-1],
                                              64,
                                              X.to(device),
                                              edge_idx.to(device),
                                              ).to(device)

In [94]:
criterion = torch.nn.BCEWithLogitsLoss(reduction='mean')
optimizer = torch.optim.Adam(params=model_gcn.parameters(), lr=0.001)


In [95]:
topk = 10

# Check Init performance (already higher than init performance with FM and usual embeddings)
hr, ndcg = test(model_gcn, full_dataset, device, topk=topk)
print("initial HR: ", hr)
print("initial NDCG: ", ndcg)

initial HR:  0.1042
initial NDCG:  0.04732804796209067


In [96]:
# DO EPOCHS NOW
tb = True
for epoch_i in range(20):
    #data_loader.dataset.negative_sampling()
    train_loss = train_one_epoch(model_gcn, optimizer, data_loader, criterion, device)
    hr, ndcg = test(model_gcn, full_dataset, device, topk=topk)

    print('\n')

    print(f'epoch {epoch_i}:')
    print(f'training loss = {train_loss:.4f} | Eval: HR@{topk} = {hr:.4f}, NDCG@{topk} = {ndcg:.4f} ')
    print('\n')
    if tb:
        tb_gcn.add_scalar('train/loss', train_loss, epoch_i)
        tb_gcn.add_scalar('eval/HR@{topk}', hr, epoch_i)
        tb_gcn.add_scalar('eval/NDCG@{topk}', ndcg, epoch_i)



epoch 0:
training loss = 0.5893 | Eval: HR@10 = 0.2360, NDCG@10 = 0.1252 




epoch 1:
training loss = 0.4291 | Eval: HR@10 = 0.2827, NDCG@10 = 0.1521 




epoch 2:
training loss = 0.3856 | Eval: HR@10 = 0.2907, NDCG@10 = 0.1552 




epoch 3:
training loss = 0.3617 | Eval: HR@10 = 0.3014, NDCG@10 = 0.1596 




epoch 4:
training loss = 0.3417 | Eval: HR@10 = 0.3072, NDCG@10 = 0.1632 




epoch 5:
training loss = 0.3208 | Eval: HR@10 = 0.3130, NDCG@10 = 0.1671 




epoch 6:
training loss = 0.2978 | Eval: HR@10 = 0.3189, NDCG@10 = 0.1724 




epoch 7:
training loss = 0.2730 | Eval: HR@10 = 0.3229, NDCG@10 = 0.1753 




epoch 8:
training loss = 0.2473 | Eval: HR@10 = 0.3291, NDCG@10 = 0.1800 




epoch 9:
training loss = 0.2216 | Eval: HR@10 = 0.3357, NDCG@10 = 0.1856 




epoch 10:
training loss = 0.1967 | Eval: HR@10 = 0.3403, NDCG@10 = 0.1892 




epoch 11:
training loss = 0.1732 | Eval: HR@10 = 0.3464, NDCG@10 = 0.1940 




epoch 12:
training loss = 0.1515 | Eval: HR@10 = 0.3481, NDC

#### **Using GCN ATTENTION layer ...**

In [97]:

del model_gcn
del hr
del ndcg
del criterion
del optimizer
del model_gcn_att

NameError: name 'model_gcn_att' is not defined

In [98]:
dump_tensors()
#device2 = torch.device("cuda:1")
torch.cuda.empty_cache()
dump_tensors()
torch.cuda.empty_cache()

Tensor: GPU pinned 100 × 2
Tensor: GPU pinned 100 × 2 × 8
Tensor: GPU pinned 100 × 8
Tensor: GPU pinned 100 × 1
Tensor: GPU pinned 100
Tensor: GPU pinned 10
Tensor: GPU pinned 10
Tensor: GPU pinned 10
Tensor: GPU pinned 1
Tensor: GPU pinned 48349 × 1
Tensor: GPU pinned 48349 × 32
Parameter: GPU pinned 1
Parameter: GPU pinned 48349 × 1
Parameter: GPU pinned 48349 × 32
Parameter: GPU pinned 1
Parameter: GPU pinned 48349 × 1
Parameter: GPU pinned 48349 × 8
Tensor: GPU pinned 64
Tensor: GPU pinned 64 × 48349
Parameter: GPU pinned 64
Parameter: GPU pinned 64 × 48349




Total size: 9817808
Tensor: GPU pinned 100 × 2
Tensor: GPU pinned 100 × 2 × 8
Tensor: GPU pinned 100 × 8
Tensor: GPU pinned 100 × 1
Tensor: GPU pinned 100
Tensor: GPU pinned 10
Tensor: GPU pinned 10
Tensor: GPU pinned 10
Tensor: GPU pinned 1
Tensor: GPU pinned 48349 × 1
Tensor: GPU pinned 48349 × 32
Parameter: GPU pinned 1
Parameter: GPU pinned 48349 × 1
Parameter: GPU pinned 48349 × 32
Parameter: GPU pinned 1
Parameter: GPU pinned 48349 × 1
Parameter: GPU pinned 48349 × 8
Tensor: GPU pinned 64
Tensor: GPU pinned 64 × 48349
Parameter: GPU pinned 64 × 48349
Parameter: GPU pinned 64
Total size: 9817808




In [99]:
model_gcn_att = FactorizationMachineModel_withGCN(full_dataset.field_dims[-1],
                                                  64,
                                                  X.to(device),
                                                  edge_idx.to(device),
                                                  attention=True
                                                  ).to(device)

In [100]:
criterion = torch.nn.BCEWithLogitsLoss(reduction='mean')
optimizer = torch.optim.Adam(params=model_gcn_att.parameters(), lr=0.001)


In [101]:
topk = 10

# Check Init performance (already higher than init performance with FM and usual embeddings)
hr, ndcg = test(model_gcn_att, full_dataset, device, topk=topk)
print("initial HR: ", hr)
print("initial NDCG: ", ndcg)

initial HR:  0.0931
initial NDCG:  0.04210229620104079


In [102]:
# DO EPOCHS NOW
tb = True
for epoch_i in range(20):
    #data_loader.dataset.negative_sampling()
    train_loss = train_one_epoch(model_gcn_att, optimizer, data_loader, criterion, device)
    hr, ndcg = test(model_gcn_att, full_dataset, device, topk=topk)

    print('\n')

    print(f'epoch {epoch_i}:')
    print(f'training loss = {train_loss:.4f} | Eval: HR@{topk} = {hr:.4f}, NDCG@{topk} = {ndcg:.4f} ')
    print('\n')
    if tb:
        tb_gcn_attention.add_scalar('train/loss', train_loss, epoch_i)
        tb_gcn_attention.add_scalar('eval/HR@{topk}', hr, epoch_i)
        tb_gcn_attention.add_scalar('eval/NDCG@{topk}', ndcg, epoch_i)



epoch 0:
training loss = 0.5800 | Eval: HR@10 = 0.2712, NDCG@10 = 0.1351 




epoch 1:
training loss = 0.4235 | Eval: HR@10 = 0.3081, NDCG@10 = 0.1582 




epoch 2:
training loss = 0.3777 | Eval: HR@10 = 0.3208, NDCG@10 = 0.1678 




epoch 3:
training loss = 0.3420 | Eval: HR@10 = 0.3384, NDCG@10 = 0.1800 




epoch 4:
training loss = 0.3066 | Eval: HR@10 = 0.3449, NDCG@10 = 0.1840 




epoch 5:
training loss = 0.2710 | Eval: HR@10 = 0.3577, NDCG@10 = 0.1942 




epoch 6:
training loss = 0.2360 | Eval: HR@10 = 0.3634, NDCG@10 = 0.1997 




epoch 7:
training loss = 0.2035 | Eval: HR@10 = 0.3747, NDCG@10 = 0.2093 




epoch 8:
training loss = 0.1753 | Eval: HR@10 = 0.3818, NDCG@10 = 0.2147 




epoch 9:
training loss = 0.1494 | Eval: HR@10 = 0.3814, NDCG@10 = 0.2178 




epoch 10:
training loss = 0.1280 | Eval: HR@10 = 0.3846, NDCG@10 = 0.2207 




epoch 11:
training loss = 0.1097 | Eval: HR@10 = 0.3862, NDCG@10 = 0.2232 




epoch 12:
training loss = 0.0935 | Eval: HR@10 = 0.3962, NDC

## **VISUALIZING RESULTS**

Once we have trained both models (*fm with usual embbedding layers* vs *fm with embeddings from gcn*), we can observe both metrics and loss in the same graphic in order to compare:

In [120]:
#from tensorboard import notebook
#notebook.list() # View open TensorBoard instances

In [3]:
%tensorboard --logdir runs2

In [None]:

transactions.to_csv("./data/tmp.csv")