### Used ignite to take regularization function into account , which was throwing some error in pytorch 

In [160]:
import torch
import numpy as np
import pandas as pd
from torch import nn
from time import time
from ignite.metrics import Loss
import torch.nn.functional as F
from sklearn.utils import shuffle
from torch.utils.data import DataLoader
from ignite.metrics import MeanSquaredError
from sklearn.model_selection import train_test_split
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator

In [132]:
print('reading rating data...')
tic = time()
data = np.loadtxt('./ml-1m/ratings.dat', skiprows=0,  delimiter='::').astype('int32')
print("reading user data...")
datContent = [i.strip().split('::') for i in open("./ml-1m/users.dat",encoding="ISO-8859-1").readlines()]
user_data = pd.DataFrame(datContent,columns=['userId','gender','occupation','age','zip'])
print('data read in', time() - tic, 'seconds')

reading rating data...
reading user data...
data read in 5.757378816604614 seconds


In [133]:
df =pd.DataFrame(data)
df.columns=['userId','movieId','rating','timestampe']

users = df.userId.unique()
movies = df.movieId.unique()

userid2idx = {o:i for i,o in enumerate(users)}
movieid2idx = {o:i for i,o in enumerate(movies)}


df['userId'] = df['userId'].apply(lambda x: userid2idx[x])
df['movieId'] = df['movieId'].apply(lambda x: movieid2idx[x])

In [134]:
class Loader():
    current = 0
    def __init__(self, x, y, batchsize=1024, do_shuffle=True):
        self.shuffle = shuffle
        self.x = x
        self.y = y
        self.batchsize = batchsize
        self.batches = range(0, len(self.y), batchsize)
        if do_shuffle:
            self.x, self.y = shuffle(self.x, self.y)

    def __iter__(self):
        # Reset & return a new iterator
        self.x, self.y = shuffle(self.x, self.y, random_state=0)
        self.current = 0
        return self

    def __len__(self):
        # Return the number of batches
        return int(len(self.x) / self.batchsize)

    def __next__(self):
        # Update iterator and stop iteration until the batch size is out of range
        n = self.batchsize
        if self.current + n >= len(self.y):
            raise StopIteration
        i = self.current

        # Transform NumPy arrays to PyTorch tensors
        xs = torch.from_numpy(self.x[i:i + n])
        ys = torch.from_numpy(self.y[i:i + n])
        self.current += n
        return xs, ys

In [135]:
batch_size=1024*4

In [137]:
train,valid = train_test_split(df,random_state=123)

In [139]:
train_loader = Loader(train.values ,train.values[:,2] , batchsize=batch_size , do_shuffle=False)
test_loader = Loader(valid.values ,valid.values[:,2], batchsize=batch_size , do_shuffle=False )

In [141]:
def l2_regularize(array):
    """
    Function to do L2 regularization
    """
    loss = torch.sum(array ** 2.0)
    return loss
lr = 1e-2  # Learning Rate
k = 10  # Number of dimensions per user, item
c_vector = 1e-6  # regularization constant

class MF(nn.Module):
    itr = 0
    def __init__(self,n_user, n_item, k=10, c_vector=1e-6 , writer=None):
        super(MF , self).__init__()
        self.n_user = n_user
        self.n_item = n_item
        self.c_vector = c_vector
        
        self.userEmbedding = nn.Embedding(num_embeddings=self.n_user , embedding_dim=k)
        self.itemEmbedding = nn.Embedding(num_embeddings=self.n_item , embedding_dim=k)        
    def forward(self,x):
        user= x[:,0]
        item= x[:,1]
        
        vector_user = self.userEmbedding(user)
        vector_item = self.itemEmbedding(item)
        ui_interaction = torch.sum(vector_user * vector_item, dim=1)
        return ui_interaction
    
    def loss(self,prediction, target):
        loss_mse = F.mse_loss(prediction, target.squeeze().type(torch.FloatTensor))

        prior_user = l2_regularize(self.userEmbedding.weight)*self.c_vector
        prior_item = l2_regularize(self.itemEmbedding.weight)*self.c_vector        

        loss_total = loss_mse+prior_user+prior_item
        return loss_total


In [150]:
users = df.userId.unique()
movies = df.movieId.unique()
n_user=len(users)
n_item = len(movies)

In [151]:
model = MF(n_user , n_item , k=10,c_vector=1e-6)

In [152]:
n_epoch =10
lr = 0.01
loss_func = torch.nn.MSELoss()

In [153]:
optimizer = torch.optim.Adam(model.parameters(),lr)

In [154]:
trainer = create_supervised_trainer(model, optimizer, loss_fn=model.loss)

In [155]:
metrics = {'evaluation': MeanSquaredError()}
evaluator = create_supervised_evaluator(model, metrics=metrics)

In [156]:
def log_training_loss(engine, log_interval=500):
    """
    Function to log the training loss
    """
    model.itr = engine.state.iteration  # Keep track of iterations
    if model.itr % log_interval == 0:
        fmt = "Epoch[{}] Iteration[{}/{}] Loss: {:.4f}"
        # Keep track of epochs and outputs
        msg = fmt.format(engine.state.epoch, engine.state.iteration, len(train_loader), engine.state.output)
        print(msg)


trainer.add_event_handler(event_name=Events.ITERATION_COMPLETED, handler=log_training_loss)


def log_validation_results(engine):
    evaluator.run(test_loader)
    avg_loss = evaluator.state.metrics['evaluation']
    print("Epoch[{}] Validation MSE: {:.4f} ".format(engine.state.epoch, avg_loss))


trainer.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=log_validation_results)


<ignite.engine.events.RemovableEventHandle at 0x7fd10c1da220>

In [149]:
trainer.run(train_loader, max_epochs=50)

Epoch[1] Validation MSE: 16.6346 
Epoch[2] Validation MSE: 11.3862 
Epoch[3] Iteration[500/183] Loss: 2.9799
Epoch[3] Validation MSE: 2.3249 
Epoch[4] Validation MSE: 1.3322 
Epoch[5] Validation MSE: 1.1113 
Epoch[6] Iteration[1000/183] Loss: 0.9088
Epoch[6] Validation MSE: 1.0228 
Epoch[7] Validation MSE: 0.9777 
Epoch[8] Validation MSE: 0.9521 
Epoch[9] Iteration[1500/183] Loss: 0.8530
Epoch[9] Validation MSE: 0.9339 
Epoch[10] Validation MSE: 0.9229 
Epoch[11] Iteration[2000/183] Loss: 0.7727
Epoch[11] Validation MSE: 0.9140 
Epoch[12] Validation MSE: 0.9063 
Epoch[13] Validation MSE: 0.8986 
Epoch[14] Iteration[2500/183] Loss: 0.7912
Epoch[14] Validation MSE: 0.8924 
Epoch[15] Validation MSE: 0.8866 
Epoch[16] Validation MSE: 0.8814 
Epoch[17] Iteration[3000/183] Loss: 0.6980
Epoch[17] Validation MSE: 0.8777 
Epoch[18] Validation MSE: 0.8754 
Epoch[19] Validation MSE: 0.8705 
Epoch[20] Iteration[3500/183] Loss: 0.6850
Epoch[20] Validation MSE: 0.8692 
Epoch[21] Validation MSE: 0.86

State:
	iteration: 9150
	epoch: 50
	epoch_length: 183
	max_epochs: 50
	output: 0.6598857641220093
	batch: <class 'tuple'>
	metrics: <class 'dict'>
	dataloader: <class '__main__.Loader'>
	seed: <class 'NoneType'>
	times: <class 'dict'>

In [157]:
trainer.run(train_loader, max_epochs=50)

Epoch[1] Validation MSE: 16.2995 
Epoch[2] Validation MSE: 8.4738 
Epoch[3] Iteration[500/183] Loss: 2.3184
Epoch[3] Validation MSE: 1.8756 
Epoch[4] Validation MSE: 1.2480 
Epoch[5] Validation MSE: 1.0757 
Epoch[6] Iteration[1000/183] Loss: 0.9939
Epoch[6] Validation MSE: 0.9998 
Epoch[7] Validation MSE: 0.9591 
Epoch[8] Validation MSE: 0.9344 
Epoch[9] Iteration[1500/183] Loss: 0.8466
Epoch[9] Validation MSE: 0.9178 
Epoch[10] Validation MSE: 0.9041 
Epoch[11] Iteration[2000/183] Loss: 0.8323
Epoch[11] Validation MSE: 0.8925 
Epoch[12] Validation MSE: 0.8819 
Epoch[13] Validation MSE: 0.8730 
Epoch[14] Iteration[2500/183] Loss: 0.8349
Epoch[14] Validation MSE: 0.8634 
Epoch[15] Validation MSE: 0.8567 
Epoch[16] Validation MSE: 0.8510 
Epoch[17] Iteration[3000/183] Loss: 0.7769
Epoch[17] Validation MSE: 0.8456 
Epoch[18] Validation MSE: 0.8425 
Epoch[19] Validation MSE: 0.8368 
Epoch[20] Iteration[3500/183] Loss: 0.7517
Epoch[20] Validation MSE: 0.8342 
Epoch[21] Validation MSE: 0.829

State:
	iteration: 9150
	epoch: 50
	epoch_length: 183
	max_epochs: 50
	output: 0.6985897421836853
	batch: <class 'tuple'>
	metrics: <class 'dict'>
	dataloader: <class '__main__.Loader'>
	seed: <class 'NoneType'>
	times: <class 'dict'>

In [162]:
model.itemEmbedding.weight

Parameter containing:
tensor([[-5.7700e-01, -6.8974e-01, -3.4236e-01,  ...,  1.0896e+00,
          1.9021e-01,  9.5048e-01],
        [ 8.8600e-02, -6.9858e-02, -8.7543e-01,  ...,  1.0177e+00,
         -4.7718e-01,  4.0930e-01],
        [-6.9491e-01, -2.0536e-01, -9.7252e-01,  ...,  1.3297e+00,
          4.7568e-01,  2.3300e-02],
        ...,
        [ 1.0639e-01, -3.1469e-01,  1.1228e-01,  ...,  1.6115e-01,
         -2.3457e-02,  7.2122e-02],
        [-2.7231e-39, -1.1830e-38, -3.0027e-38,  ..., -1.2503e-38,
         -2.5294e-38,  4.1984e-39],
        [-3.3379e-02, -2.4302e-01, -7.2781e-01,  ...,  5.1424e-01,
          2.9769e-01,  8.4839e-01]], requires_grad=True)