In [1]:
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.sparse as sp

In [21]:
class GMF(nn.Module):
    ''' Constructs a Generalized Matrix Factorization model '''

    def __init__(self, num_users=6040, num_items=3952, embeddings=64):
        torch.manual_seed(0)
        super().__init__()

        # user and item embedding layers
        self.user_embedding = nn.Embedding(num_users, embeddings).cuda()
        self.item_embedding = nn.Embedding(num_items, embeddings).cuda()


    def forward(self, user, item):

        # map to embeddings
        embedding1 = self.user_embedding(user).squeeze(1)
        embedding2 = self.item_embedding(item).squeeze(1)

        # Elementwise multiplication
        GMF_layer = embedding1*embedding2
        
        # sum GMF layer
        out = torch.sum(GMF_layer, 1).unsqueeze_(1)

        # output between 0 and 1
        out = torch.sigmoid(out)

        return out

class MLP(nn.Module):
    ''' Constructs a Multi-Layer Perceptron model'''

    def __init__(self, num_users=6040, num_items=3952, embeddings=64, dropout=0):
        torch.manual_seed(0)
        super().__init__()

        # user and item embedding layers
        self.user_embedding = nn.Embedding(num_users, embeddings).cuda()
        self.item_embedding = nn.Embedding(num_items, embeddings).cuda()

        # MLP layers
        self.l1 = nn.Linear(embeddings*2, embeddings).cuda()
        self.l2 = nn.Linear(embeddings, int(embeddings/2)).cuda()
        self.l3 = nn.Linear(int(embeddings/2), int(embeddings/4)).cuda()
        self.l4 = nn.Linear(int(embeddings/4), 1, bias=False).cuda()
        
        # Dropout layer
        self.dropout = dropout
        if self.dropout > 0:
            self.drop = nn.Dropout(p=self.dropout)

    def forward(self, user, item):

        # map to embeddings
        embedding1 = self.user_embedding(user).squeeze(1)
        embedding2 = self.item_embedding(item).squeeze(1)

        # Concatenation of the embedding layers
        out = torch.cat((embedding1, embedding2),1)

        # feed through the MLP layers
        out = F.relu(self.l1(out))
        if self.dropout > 0:
            out = self.drop(out)
        
        out = F.relu(self.l2(out))
        if self.dropout > 0:
            out = self.drop(out)
            
        out = F.relu(self.l3(out))
        if self.dropout > 0:
            out = self.drop(out)
            
        # output between 0 and 1
        out = torch.sigmoid(self.l4(out))
        return out

class NeuMF(nn.Module):
    ''' Constructs a Neural Matrix Factorization model '''

    def __init__(self, num_users=6040, num_items=3952, embeddings=64, dropout=0):
        torch.manual_seed(0)
        super().__init__()

        # GMF layers
        self.GMF_user_embedding = nn.Embedding(num_users, embeddings).cuda()
        self.GMF_item_embedding = nn.Embedding(num_items, embeddings).cuda()

        # MLP layers
        self.MLP_user_embedding = nn.Embedding(num_users, embeddings).cuda()
        self.MLP_item_embedding = nn.Embedding(num_users, embeddings).cuda()
        self.l1 = nn.Linear(embeddings*2, embeddings).cuda()
        self.l2 = nn.Linear(embeddings, int(embeddings/2)).cuda()
        self.l3 = nn.Linear(int(embeddings/2), int(embeddings/4)).cuda()
        
        # Dropout layer
        self.dropout = dropout
        if self.dropout > 0:
            self.drop = nn.Dropout(p=self.dropout)

        # output layer
        self.out = nn.Linear(int(embeddings/4 + embeddings), 1, bias=False).cuda()

    def forward(self, user, item):

        #GMF forward
        GMF_latent1 = self.GMF_user_embedding(user).squeeze(1)
        GMF_latent2 = self.GMF_item_embedding(item).squeeze(1)
        GMF_layer = GMF_latent1*GMF_latent2

        # MLP forward
        MLP_latent1 = self.MLP_user_embedding(user).squeeze(1)
        MLP_latent2 = self.MLP_item_embedding(item).squeeze(1)
        MLP_layer = torch.cat((MLP_latent1, MLP_latent2),1) 
        MLP_layer = F.relu(self.l1(MLP_layer))
        if self.dropout > 0:
            MLP_layer = self.drop(MLP_layer)
            
        MLP_layer = F.relu(self.l2(MLP_layer))
        if self.dropout > 0:
            MLP_layer = self.drop(MLP_layer)
        MLP_layer = F.relu(self.l3(MLP_layer))
        if self.dropout > 0:
            MLP_layer = self.drop(MLP_layer)

        # Fusion
        NeuMF_layer = torch.cat((GMF_layer, MLP_layer), 1)
        out = torch.sigmoid(self.out(NeuMF_layer))
        return out

In [3]:
class RatingsData(Dataset):

    def __init__(self, csv_file, num_negatives=4, validation=True, num_users=6040, num_items=3952, alfa=0.2):
        np.random.seed(0)
        self.num_users = num_users
        self.num_items = num_items
        self.alfa = alfa
        # Reads the data from file
        r = pd.read_table(csv_file, sep="::",
                          names=["user_id", "movie_id", "rating", "timestamp"], engine='python')
        
        self.length = len(r) # Number of interactions in the data set
        self.ratings = self.load_as_matrix(r) # Interactions as a matrix structured as ((user,item) rating)
        self.num_negatives = num_negatives # Number of negative instances per positive instance
        # Lists of the users and items to train and test on
        self.user_input, self.item_input, self.rating, self.test = self.get_train_instances(validation)


    def __len__(self):
        return len(self.user_input) # Length of the data to train on


    def __getitem__(self, idx): # idx is the index of the training instance
        # User and item id as tensors
        user = torch.LongTensor([self.user_input[idx] - 1]) # -1 so that indexing starts from 0
        movie = torch.LongTensor([self.item_input[idx] - 1])
        # Output label and loss weight as tensors
        y = torch.ones(1)
        w = torch.ones(1)
        # Larger weight for higher ratings
        w[0] = 1 + self.alfa * min(self.rating[idx], 1) * (self.rating[idx]-3)
        y[0] = min(1, self.rating[idx])

        return user, movie, y, w
    
    
    def set_alfa(self, alfa):
        self.alfa = alfa
      

    def load_as_matrix(self, ratings):
        # Interactions as dictionary of keys (matrix)
        mat = sp.dok_matrix((self.num_users + 1, self.num_items + 1), dtype=np.float32)

        for i in range(self.length):
            user, item, rating = int(ratings["user_id"][i]), int(ratings["movie_id"][i]), int(ratings["rating"][i])
            if (rating > 0):
                #mat[user, item] = 1.0
                mat[user, item] = rating

        return mat


    # if validation is True the last two interaction for each user will not be
    # part of the training instances, and the penultimate will be used as test
    # if False only the last interaction for each user will be left out and used as test
    def get_train_instances(self, validation = True):
        train = self.ratings
        user_input, item_input, labels, test_item = [], [], [], []
        last_u = 1
        if validation:
            skip = 2
        else:
            skip = 1

        for (u, i) in train.keys():
            if u > last_u: # Reaches the next user
                # Remove the last instances
                for k in range(skip*(self.num_negatives + 1)):
                    user_input.pop()
                    item = item_input.pop()
                    labels.pop()
                test_item.append(item) # save item for testing

            # positive instance
            user_input.append(u)
            item_input.append(i)
            labels.append(int(train[u, i]))

            # Generate negative instances
            for t in range(self.num_negatives): 
                j = np.random.randint(1, self.num_items+1)
                # Keep generating items if the item has been interacted with
                while (u, j) in train:
                    j = np.random.randint(1, self.num_items+1)
                user_input.append(u)
                item_input.append(j)
                labels.append(0)
            last_u = u # Keep track on who the user was

        # removing the last data points
        for k in range(skip*(self.num_negatives + 1)):
            user_input.pop()
            item = item_input.pop()
            labels.pop()
        test_item.append(item) # save item for testing

        return user_input, item_input, labels, test_item

In [14]:
def test_negatives(filename):

    with open(filename, "r") as f:
        line = f.readline()
        negatives = []
        while line != None and line != "":
            arr = line.split("\t")
            for x in arr:
                if x != "\n":
                    negatives.append(int(x))
            line = f.readline()

    return negatives


def percentile(l, item):
    index = 0
    for element in l:
        if element > item:
            index+=1
            return index
        index+=1
    return index


def get_test_tensor(user, test_item, test_neg):
  
    test_item = test_item[user-1]

    test_negatives = test_neg[100*(user-1):100*user]

    user_tensor = torch.LongTensor([user-1])
    user_test = torch.stack((user_tensor, user_tensor))
    item_input = test_negatives[0]-1
    item_input = torch.LongTensor([item_input])

    user_tensor.unsqueeze_(0)

    item_test = torch.LongTensor([test_item-1])
    item_test = torch.stack((item_test, item_input))

    for i in range(1,100):
        item_input = test_negatives[i]-1
        item_input = torch.LongTensor([item_input])
        item_input.unsqueeze_(0)

        user_test = torch.cat((user_test, user_tensor), 0)
        item_test = torch.cat((item_test, item_input), 0)

    return user_test, item_test


def evaluate_model(model, validation=True, num_users=6040):
    device = "cuda"
    test_items = data.test
    if validation:
        test_neg = test_negatives(dir + "validation_negatives.csv")
    else:
        test_neg = test_negatives(dir + "test_negatives.csv")
    hits = 0

    for i in range(1, num_users+1):
        user_test, item_test = get_test_tensor(i, test_items, test_neg)


        user_test = user_test.to(device)
        item_test = item_test.to(device)

        l = model(user_test, item_test)
        l = l.tolist()
        l = sum(l,[])
        first = l.pop(0)

        l.sort()
        percent = percentile(l, first)

        if percent > 90:
            hits+=1

    hr = hits/6040
    return hr


def fit(model, data, batch_size, epochs, lr, verbose = True):
    
    device = "cuda"
    print('Using device:', device)
    # Defining optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    dataloader = DataLoader(data, batch_size=batch_size,
                          shuffle=True, num_workers=0)

    data_length = len(data)
    it_per_epoch = len(data)/batch_size
    tot_loss = 0

    t1 = time.time()

    # Start training loop
    for e in range(epochs):
        print("Starting epoch ", e+1)
        t1 = time.time()
        i = 0
        for batch in dataloader:
            # Load tensors of users, movies, outputs and loss weights
            u, m, y, w = batch 
            # move tensors to cuda
            u = u.to(device)
            m = m.to(device)
            y = y.to(device)
            w = w.to(device)

            # make predictions
            y_pred = model(u, m)

            # Calculate mean loss
            loss_fn = torch.nn.BCELoss(weight=w ,reduction = "mean")
            #loss_fn = torch.nn.MSELoss(reduction = "mean")
            loss = loss_fn(y_pred, y)
            tot_loss+=loss

            # Backpropagate the output and updates model parameters
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            i+=1

            # Print progress
            if i % int(1+it_per_epoch/10) == 0 and verbose:
                #t2 = time.time()
                print("Progress: ", int(100*i/it_per_epoch), "%")
                #print("Time:", int(t2-t1), "seconds")

        # Epoch metrics
        t2 = time.time()
        print("Epoch time:", int(t2-t1), "seconds")
        print("Loss:", tot_loss.item()/i)
        print("Evaluating model...")
        print("Hit rate @10:", evaluate_model(model, validation=True))
        tot_loss = 0
        print()

    print("Done")


In [6]:
dir = "data/"
print("Processing data...")
data = RatingsData(dir + "ratings.dat", num_negatives=4, validation=True)
print("Done")

Processing data...
Done


In [27]:
'''
---"Grid Search"---
Tune learning rate
'''
print("11 ITERATIONS OF GRID SEARCH")

lrates = [pow(10, -1), pow(10, -1.2), pow(10, -1.4), pow(10, -1.6),
          pow(10, -1.8,), pow(10, -2), pow(10, -2.2), pow(10, -2.4), 
          pow(10, -2.6), pow(10, -2.8), pow(10, -3)]

print(lrates)

data.set_alfa(0)
i = 1
best_hr = 0
best_lr = 0

for lr in lrates:
    print("Starting search", i)
    model = GMF()
    # train
    fit(model=model, data=data, batch_size=256, epochs=1, lr=lr, verbose=False)
    hr = evaluate_model(model, validation=True)
    
    print("RESULT")
    print("HR@10:", hr)
    
    if hr > best_hr:
        best_hr = hr
        best_lr = lr
        
    i+=1

print("The best HR@10 was:", best_hr)
print("Using lr:", best_lr)

result_file = open(dir + "hyper_MF.txt", "w+")
result_file.write("MF \n")
result_file.write("lr \t" + str(best_lr) + "\n")
result_file.write("alfa \t" + str(best_alfa))
result_file.close()




11 ITERATIONS OF GRID SEARCH
[0.1, 0.06309573444801933, 0.039810717055349734, 0.025118864315095794, 0.015848931924611134, 0.01, 0.00630957344480193, 0.003981071705534973, 0.0025118864315095794, 0.001584893192461114, 0.001]
Starting search 1
Using device: cuda
Starting epoch  1
Epoch time: 296 seconds
Loss: 19.964172603626942
Validating model...
Hit rate @10: 0.669205298013245

Done


NameError: name 'validate_model' is not defined

In [7]:
'''--- Random search ---

Hyperparameters to tune for NCF:

1. learning rate (lr), logaritmically between 0.1 and 0.001
2. weight parameter in loss function (alfa), linearly between 0 and 1

Hyperparemters NOT to tune for NCF:
1. number of negatives = 4
2. embeddings = 16
3. number of layers and weight initializations
4. batch size = 256
5. epochs = 8'''

lr_min = 1
lr_max = 3
best_hr = 0
best_lr = 0
best_alfa = 0

print("15 ITERATIONS OF RANDOM SEARCH")
print()

for i in range(15):
    print()
    print("Starting search", i+1)
    model = GMF()
    # Random value for learning rate
    r1 = np.random.rand(1)[0]*(lr_max-1)+lr_min
    lr = pow(10,-1*r1)
    # Random value for alfa
    alfa = np.random.rand(1)[0]
    
    # set value of alfa
    data.set_alfa(alfa=alfa)
    
    # train
    fit(model=model, data=data, batch_size=256, epochs=8, lr=lr, verbose=False)
    hr = validate_model(model, validation=True)
    print("RESULT")
    print("With lr=", lr, "and alfa=", alfa)
    print("HR@10:", hr)

    if hr > best_hr:
        best_hr = hr
        best_lr = lr
        best_alfa = alfa

        
print("The best HR@10 was:", best_hr)
print("Using lr:", best_lr)
print("Using alfa:", best_alfa)

result_file = open(dir + "hyper_GMF.txt", "w+")
result_file.write("GMF \n")
result_file.write("lr \t" + str(best_lr) + "\n")
result_file.write("alfa \t" + str(best_alfa))
result_file.close()

15 ITERATIONS OF RANDOM SEARCH


Starting search 1


RuntimeError: CUDA error: device-side assert triggered