In [1]:
import numpy as np
from scipy.sparse import rand as sprand
from scipy.sparse import lil_matrix
import torch
from torch.autograd import Variable
import pandas as pd

In [2]:
names = ['user_id', 'item_id', 'rating', 'timestamp']
df_train = pd.read_csv('ml-10M100K/r1.train', sep='::', names=names,engine='python')
# df_train2 = pd.read_csv('ml-10M100K/r2.train', sep='::', names=names,engine='python')
# df_test = pd.read_csv('ml-10M100K/r1.test', sep='::', names=names,engine='python')

In [3]:
def get_movielens_ratings(df):
    n_users = max(df.user_id.unique())
    n_items = max(df.item_id.unique())

    interactions = lil_matrix( (n_users,n_items), dtype=float) #np.zeros((n_users, n_items))
    for row in df.itertuples():
        interactions[row[1] - 1, row[2] - 1] = row[3]
    return interactions

In [4]:
ratings = get_movielens_ratings(df_train)
ratings.shape

(71567, 65133)

In [18]:
class MatrixFactorization(torch.nn.Module):
    
    def __init__(self, n_users, n_items, n_factors=5):
        super().__init__()
        self.user_factors = torch.nn.Embedding(n_users, 
                                               n_factors,
                                               sparse=False)
        self.item_factors = torch.nn.Embedding(n_items, 
                                               n_factors,
                                               sparse=False)
        # Also should consider fitting overall bias (self.mu term) and both user and item bias vectors
        # Mu is 1x1, user_bias is 1xn_users. item_bias is 1xn_items
    
    # For convenience when we want to predict a sinble user-item pair. 
    def predict(self, user, item):
        # Need to fit bias factors
        return (pred + self.user_factors(user) * self.item_factors(item)).sum(1)
    
    # Much more efficient batch operator. This should be used for training purposes
    def forward(self, users, items):
        # Need to fit bias factors
        self.user_biases = torch.nn.Embedding(ratings.shape[0], 1,sparse=False)
        self.item_biases = torch.nn.Embedding(ratings.shape[1], 1,sparse=False)
        print(self.user_biases(users).shape)
        print(self.item_biases(items).shape)
        print(torch.mm(self.user_factors(users),torch.transpose(self.item_factors(items),0,1)).shape)
        return torch.mm(self.user_factors(users),torch.transpose(self.item_factors(items),0,1))

In [19]:
model = MatrixFactorization(ratings.shape[0], ratings.shape[1], n_factors=2)

In [7]:
loss_func = torch.nn.MSELoss()

In [8]:
reg_loss_func = torch.optim.SGD(model.parameters(), lr=1e-6, weight_decay=1e-3)

In [9]:
def get_batch(batch_size,ratings):
    # Sort our data and scramble it
    rows, cols = ratings.shape
    p = np.random.permutation(rows)
    
    # create batches
    sindex = 0
    eindex = batch_size
    while eindex < rows:
        batch = p[sindex:eindex]
        temp = eindex
        eindex = eindex + batch_size
        sindex = temp
        yield batch
    
    if eindex >= rows:
        batch = range(sindex,rows)
        yield batch    

In [10]:
EPOCH = 100
BATCH_SIZE = 1000 #50
LR = 0.001

In [11]:
def run_epoch():
    for i,batch in enumerate(get_batch(BATCH_SIZE, ratings)):
        # Set gradients to zero
        reg_loss_func.zero_grad()
        
        # Turn data into variables
        interactions = Variable(torch.FloatTensor(ratings[batch, :].toarray()))
        rows = Variable(torch.LongTensor(batch))
        cols = Variable(torch.LongTensor(np.arange(ratings.shape[1])))
    
        # Predict and calculate loss
        predictions = model(rows, cols)
        loss = loss_func(predictions, interactions)
    
        # Backpropagate
        loss.backward()
    
        # Update the parameters
        reg_loss_func.step()

In [20]:
for i in range(EPOCH):
    print(i)
    run_epoch()

0
torch.Size([1000, 1])
torch.Size([65133, 1])
torch.Size([1000, 65133])
torch.Size([1000, 1])
torch.Size([65133, 1])
torch.Size([1000, 65133])
torch.Size([1000, 1])
torch.Size([65133, 1])
torch.Size([1000, 65133])
torch.Size([1000, 1])
torch.Size([65133, 1])
torch.Size([1000, 65133])


KeyboardInterrupt: 