In [1]:
import numpy as np
import pandas as pd
from time import time
from torch.utils.data import DataLoader


In [2]:
print('reading rating data...')
tic = time()
data = np.loadtxt('./ml-1m/ratings.dat', skiprows=0,  delimiter='::').astype('int32')
print("reading user data...")
datContent = [i.strip().split('::') for i in open("./ml-1m/users.dat",encoding="ISO-8859-1").readlines()]
user_data = pd.DataFrame(datContent,columns=['userId','gender','occupation','age','zip'])
print('data read in', time() - tic, 'seconds')

reading rating data...
reading user data...
data read in 5.755181074142456 seconds


In [3]:
df =pd.DataFrame(data)
df.columns=['userId','movieId','rating','timestampe']

users = df.userId.unique()
movies = df.movieId.unique()

userid2idx = {o:i for i,o in enumerate(users)}
movieid2idx = {o:i for i,o in enumerate(movies)}


df['userId'] = df['userId'].apply(lambda x: userid2idx[x])
df['movieId'] = df['movieId'].apply(lambda x: movieid2idx[x])

In [4]:
batch_size=1024*4

In [5]:
split = np.random.rand(len(df)) < 0.8
train = df[split]
valid = df[~split]


In [6]:
train_loader = DataLoader(train.values , batch_size=batch_size , shuffle=True ,)
valid_loader = DataLoader(valid.values , batch_size=batch_size , shuffle=True )

In [7]:
import torch
from torch import nn
import torch.nn.functional as F


In [8]:
def l2_regularize(array):
    """
    Function to do L2 regularization
    """
    loss = torch.sum(array ** 2.0)
    return loss

class MF(nn.Module):
    itr = 0
    def __init__(self,n_user, n_item, k=10, c_vector=1.0, writer=None):
        super(MF , self).__init__()
        self.n_user = n_user
        self.n_item = n_item
        self.c_vector = c_vector
        
        self.userEmbedding = nn.Embedding(num_embeddings=self.n_user , embedding_dim=k)
        self.itemEmbedding = nn.Embedding(num_embeddings=self.n_item , embedding_dim=k)        
    def forward(self,x):
        user= x[:,0]
        item= x[:,1]
        
        vector_user = self.userEmbedding(user)
        vector_item = self.itemEmbedding(item)
        ui_interaction = torch.sum(vector_user * vector_item, dim=1)
        return ui_interaction
    
    def loss(self,prediction, target):
        loss_mse = F.mse_loss(prediction, target.squeeze())

        prior_user = l2_regularize(self.userEmbedding.weight)*self.c_vector
        prior_item = l2_regularize(self.itemEmbedding.weight)*self.c_vector        

        loss_total = loss_mse+prior_user+prior_item
        return loss


In [9]:
users = df.userId.unique()
movies = df.movieId.unique()
n_user=len(users)
n_item = len(movies)

In [10]:
model = MF(n_user , n_item , k=10)

In [11]:
n_epoch =10
lr = 0.01
loss_func = torch.nn.MSELoss()

In [12]:
optimizer = torch.optim.Adam(model.parameters(),lr)

In [20]:
def my_loss(output, target):
    loss = torch.mean((output - target)**2)
    return loss

In [21]:
def checkAccuracy(loader , model ,epoch):
    model.eval()
    with torch.no_grad():
        mse=[]
        for valid_ in loader:
            scores = model(valid_)
            mse.append(loss_func(scores , valid_[:,2]))
    model.train()
    print("RMSE of sample is at epoch {} is".format(epoch), sum(mse)/len(mse) )

In [23]:
for epoch in range(n_epoch):
    for batch_index , (train_) in enumerate(train_loader):
        
        scores = model(train_)
        loss = my_loss(scores ,train_[:,2].type(torch.FloatTensor))
        ##backward
        optimizer.zero_grad()
        loss.backward()
        ##gradient
        optimizer.step()
    print(loss)
    checkAccuracy(valid_loader,model,epoch)

tensor(3.0559, grad_fn=<MeanBackward0>)
RMSE of sample is at epoch 0 is tensor(3.3633)
tensor(1.2913, grad_fn=<MeanBackward0>)
RMSE of sample is at epoch 1 is tensor(1.4451)
tensor(1.0470, grad_fn=<MeanBackward0>)
RMSE of sample is at epoch 2 is tensor(1.1359)
tensor(0.9487, grad_fn=<MeanBackward0>)
RMSE of sample is at epoch 3 is tensor(1.0252)
tensor(0.8627, grad_fn=<MeanBackward0>)
RMSE of sample is at epoch 4 is tensor(0.9709)
tensor(0.8632, grad_fn=<MeanBackward0>)
RMSE of sample is at epoch 5 is tensor(0.9411)
tensor(0.7571, grad_fn=<MeanBackward0>)
RMSE of sample is at epoch 6 is tensor(0.9215)
tensor(0.7827, grad_fn=<MeanBackward0>)
RMSE of sample is at epoch 7 is tensor(0.9073)
tensor(0.8433, grad_fn=<MeanBackward0>)
RMSE of sample is at epoch 8 is tensor(0.8954)
tensor(0.7787, grad_fn=<MeanBackward0>)
RMSE of sample is at epoch 9 is tensor(0.8848)
