# RecSys MovieLens PyTorch MatrixFactorization

In [None]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
data = pd.read_csv("./data/MovieLens_LatestSmall_ratings.csv.csv")
data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


Data encoding

In [None]:
np.random.seed(3)
msk = np.random.rand(len(data)) < 0.8
train = data[msk].copy()
valid = data[~msk].copy()

In [None]:
# here is a handy function modified from fast.ai
def proc_col(col, train_col=None):
    """Encodes a pandas column with continous ids. 
    """
    if train_col is not None:
        uniq = train_col.unique()
    else:
        uniq = col.unique()
    name2idx = {o:i for i,o in enumerate(uniq)}
    return name2idx, np.array([name2idx.get(x, -1) for x in col]), len(uniq)

In [None]:
def encode_data(df, train=None):
    """ Encodes rating data with continous user and movie ids. 
    If train is provided, encodes df with the same encoding as train.
    """
    df = df.copy()
    for col_name in ["userId", "movieId"]:
        train_col = None
        if train is not None:
            train_col = train[col_name]
        _,col,_ = proc_col(df[col_name], train_col)
        df[col_name] = col
        df = df[df[col_name] >= 0]
    return df

In [None]:
# encoding the train and validation data
df_train = encode_data(train)
df_valid = encode_data(valid, train)

Matrix factorization model

In [None]:
class MF(nn.Module):
    def __init__(self, num_users, num_items, emb_size=100):
        super(MF, self).__init__()
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.item_emb = nn.Embedding(num_items, emb_size)
        self.user_emb.weight.data.uniform_(0, 0.05)
        self.item_emb.weight.data.uniform_(0, 0.05)
        
    def forward(self, u, v):
        u = self.user_emb(u)
        v = self.item_emb(v)
        return (u*v).sum(1)

In [None]:
# unit testing the architecture

sample = encode_data(train.sample(5))
display(sample)

num_users = 5
num_items = 5
emb_size = 3

user_emb = nn.Embedding(num_users, emb_size)
item_emb = nn.Embedding(num_items, emb_size)
users = torch.LongTensor(sample.userId.values)
items = torch.LongTensor(sample.movieId.values)

U = user_emb(users)
V = item_emb(items)

display(U)

display(U*V) # element wise multiplication

display((U*V).sum(1))

Unnamed: 0,userId,movieId,rating,timestamp
32802,0,0,4.0,1391349194
92302,1,1,5.0,940420585
5142,2,2,5.0,939646939
41616,3,3,4.0,1378497540
5050,2,4,3.0,939647873


tensor([[-0.3881, -1.4092, -0.8383],
        [ 0.9562,  1.0932,  2.2158],
        [-0.1165,  0.3107,  1.0649],
        [-0.2075, -1.0847,  0.6501],
        [-0.1165,  0.3107,  1.0649]], grad_fn=<EmbeddingBackward>)

tensor([[ 0.6496,  0.5128,  1.2918],
        [-0.9693,  0.2661, -0.3521],
        [-0.0077,  0.0638, -0.1173],
        [-0.1759, -2.0299,  0.5365],
        [ 0.0655,  0.0866, -0.0490]], grad_fn=<MulBackward0>)

tensor([ 2.4543, -1.0553, -0.0611, -1.6692,  0.1030], grad_fn=<SumBackward1>)

Model training

In [None]:
num_users = len(df_train.userId.unique())
num_items = len(df_train.movieId.unique())
print(num_users, num_items)

610 8998


In [None]:
model = MF(num_users, num_items, emb_size=100) # .cuda() if you have a GPU

In [None]:
def train_epocs(model, epochs=10, lr=0.01, wd=0.0, unsqueeze=False):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    model.train()
    for i in range(epochs):
        users = torch.LongTensor(df_train.userId.values) # .cuda()
        items = torch.LongTensor(df_train.movieId.values) #.cuda()
        ratings = torch.FloatTensor(df_train.rating.values) #.cuda()
        if unsqueeze:
            ratings = ratings.unsqueeze(1)
        y_hat = model(users, items)
        loss = F.mse_loss(y_hat, ratings)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(loss.item()) 
    test_loss(model, unsqueeze)

In [None]:
def test_loss(model, unsqueeze=False):
    model.eval()
    users = torch.LongTensor(df_valid.userId.values) #.cuda()
    items = torch.LongTensor(df_valid.movieId.values) #.cuda()
    ratings = torch.FloatTensor(df_valid.rating.values) #.cuda()
    if unsqueeze:
        ratings = ratings.unsqueeze(1)
    y_hat = model(users, items)
    loss = F.mse_loss(y_hat, ratings)
    print("test loss %.3f " % loss.item())

In [None]:
train_epocs(model, epochs=10, lr=0.1)

12.911455154418945
4.8501667976379395
2.5946691036224365
3.097404718399048
0.8478994369506836
1.820838212966919
2.6556942462921143
2.134251594543457
1.0896706581115723
0.9762216210365295
test loss 1.850 


In [None]:
train_epocs(model, epochs=15, lr=0.01)

1.6420538425445557
1.004096508026123
0.7115985751152039
0.6606449484825134
0.7254654169082642
0.8037557005882263
0.84348064661026
0.835419237613678
0.7931983470916748
0.7376593351364136
0.6877322793006897
0.655558168888092
0.6444454193115234
0.6495035290718079
0.6609548926353455
test loss 0.821 


In [None]:
train_epocs(model, epochs=15, lr=0.01)

0.6689958572387695
0.6311114430427551
0.6389520168304443
0.614119827747345
0.6051469445228577
0.6136897206306458
0.6114951968193054
0.5966076850891113
0.5845629572868347
0.5827490091323853
0.5838581919670105
0.578920841217041
0.5681834816932678
0.557807207107544
0.5514382719993591
test loss 0.759 


MF with bias

In [None]:
class MF_bias(nn.Module):
    def __init__(self, num_users, num_items, emb_size=100):
        super(MF_bias, self).__init__()
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.user_bias = nn.Embedding(num_users, 1)
        self.item_emb = nn.Embedding(num_items, emb_size)
        self.item_bias = nn.Embedding(num_items, 1)
        self.user_emb.weight.data.uniform_(0,0.05)
        self.item_emb.weight.data.uniform_(0,0.05)
        self.user_bias.weight.data.uniform_(-0.01,0.01)
        self.item_bias.weight.data.uniform_(-0.01,0.01)
        
    def forward(self, u, v):
        U = self.user_emb(u)
        V = self.item_emb(v)
        b_u = self.user_bias(u).squeeze()
        b_v = self.item_bias(v).squeeze()
        return (U*V).sum(1) +  b_u  + b_v

In [None]:
model = MF_bias(num_users, num_items, emb_size=100) #.cuda()

In [None]:
train_epocs(model, epochs=10, lr=0.05, wd=1e-5)

12.912150382995605
9.152730941772461
4.3859100341796875
1.1582977771759033
2.4690911769866943
3.7431256771087646
2.4485058784484863
1.0784105062484741
0.81607985496521
1.318210244178772
test loss 2.069 


In [None]:
train_epocs(model, epochs=10, lr=0.01, wd=1e-5)

1.8935295343399048
1.325158715248108
0.9352132678031921
0.7449430227279663
0.722395658493042
0.777296781539917
0.8227252960205078
0.8217465281486511
0.7813524603843689
0.7275670766830444
test loss 0.798 


In [None]:
train_epocs(model, epochs=10, lr=0.001, wd=1e-5)

0.6853445172309875
0.6711350679397583
0.6592451930046082
0.6494942903518677
0.6416682600975037
0.6355280876159668
0.6308267712593079
0.6273204684257507
0.6247788071632385
0.6229932308197021
test loss 0.751 


Note that these models are susceptible to weight initialization, optimization algorithm and regularization.



### Neural Network Model
Note here there is no matrix multiplication, we could potentially make the embeddings of different sizes. Here we could get better results by keep playing with regularization.

In [None]:
class CollabFNet(nn.Module):
    def __init__(self, num_users, num_items, emb_size=100, n_hidden=10):
        super(CollabFNet, self).__init__()
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.item_emb = nn.Embedding(num_items, emb_size)
        self.lin1 = nn.Linear(emb_size*2, n_hidden)
        self.lin2 = nn.Linear(n_hidden, 1)
        self.drop1 = nn.Dropout(0.1)
        
    def forward(self, u, v):
        U = self.user_emb(u)
        V = self.item_emb(v)
        x = F.relu(torch.cat([U, V], dim=1))
        x = self.drop1(x)
        x = F.relu(self.lin1(x))
        x = self.lin2(x)
        return x

In [None]:
model = CollabFNet(num_users, num_items, emb_size=100) #.cuda()

In [None]:
train_epocs(model, epochs=15, lr=0.05, wd=1e-6, unsqueeze=True)

14.083020210266113
3.1649768352508545
1.2708535194396973
1.793819546699524
1.3548780679702759
1.0356323719024658
1.287137508392334
1.0545822381973267
0.8228969573974609
0.9276160597801208
0.9486841559410095
0.7886616587638855
0.7197826504707336
0.813225269317627
0.7996787428855896
test loss 0.779 


In [None]:
train_epocs(model, epochs=10, lr=0.001, wd=1e-6, unsqueeze=True)

0.696186363697052
0.6877015829086304
0.6879603862762451
0.6886828541755676
0.6885420083999634
0.6869543194770813
0.6834055185317993
0.6828559637069702
0.6811237931251526
0.6824584603309631
test loss 0.763 


In [None]:
train_epocs(model, epochs=10, lr=0.001, wd=1e-6, unsqueeze=True)

0.6813802123069763
0.6804739236831665
0.68137127161026
0.6783655285835266
0.6785134673118591
0.6774048805236816
0.6747931838035583
0.6761834025382996
0.673831045627594
0.673130214214325
test loss 0.755 


### Neural network model - different approach
Ref - [T Abhishek](https://youtu.be/MVB1cbe923A)

In [None]:
!pip install tez

In [None]:
import pandas as pd
import tez

In [None]:
df = pd.read_csv("./data/MovieLens_LatestSmall_ratings.csv.csv")
df.head()

Rest of the code is already written by the author and will be available soon.

### Ethan Rosenthal

Ref - https://github.com/EthanRosenthal/torchmf

In [None]:
import os
import requests
import zipfile
import collections

import numpy as np
import pandas as pd
import scipy.sparse as sp
from sklearn.metrics import roc_auc_score

import torch
from torch import nn
import torch.multiprocessing as mp
import torch.utils.data as data
from tqdm import tqdm

In [None]:
def _get_data_path():
    """
    Get path to the movielens dataset file.
    """
    data_path = '/content/data'
    if not os.path.exists(data_path):
        print('Making data path')
        os.mkdir(data_path)
    return data_path


def _download_movielens(dest_path):
    """
    Download the dataset.
    """

    url = 'http://files.grouplens.org/datasets/movielens/ml-100k.zip'
    req = requests.get(url, stream=True)

    print('Downloading MovieLens data')

    with open(os.path.join(dest_path, 'ml-100k.zip'), 'wb') as fd:
        for chunk in req.iter_content(chunk_size=None):
            fd.write(chunk)

    with zipfile.ZipFile(os.path.join(dest_path, 'ml-100k.zip'), 'r') as z:
        z.extractall(dest_path)

In [None]:
def read_movielens_df():
    path = _get_data_path()
    zipfile = os.path.join(path, 'ml-100k.zip')
    if not os.path.isfile(zipfile):
        _download_movielens(path)
    fname = os.path.join(path, 'ml-100k', 'u.data')
    names = ['user_id', 'item_id', 'rating', 'timestamp']
    df = pd.read_csv(fname, sep='\t', names=names)
    return df


def get_movielens_interactions():
    df = read_movielens_df()

    n_users = df.user_id.unique().shape[0]
    n_items = df.item_id.unique().shape[0]

    interactions = np.zeros((n_users, n_items))
    for row in df.itertuples():
        interactions[row[1] - 1, row[2] - 1] = row[3]
    return interactions


def train_test_split(interactions, n=10):
    """
    Split an interactions matrix into training and test sets.
    Parameters
    ----------
    interactions : np.ndarray
    n : int (default=10)
        Number of items to select / row to place into test.

    Returns
    -------
    train : np.ndarray
    test : np.ndarray
    """
    test = np.zeros(interactions.shape)
    train = interactions.copy()
    for user in range(interactions.shape[0]):
        if interactions[user, :].nonzero()[0].shape[0] > n:
            test_interactions = np.random.choice(interactions[user, :].nonzero()[0],
                                                 size=n,
                                                 replace=False)
            train[user, test_interactions] = 0.
            test[user, test_interactions] = interactions[user, test_interactions]

    # Test and training are truly disjoint
    assert(np.all((train * test) == 0))
    return train, test


def get_movielens_train_test_split(implicit=False):
    interactions = get_movielens_interactions()
    if implicit:
        interactions = (interactions >= 4).astype(np.float32)
    train, test = train_test_split(interactions)
    train = sp.coo_matrix(train)
    test = sp.coo_matrix(test)
    return train, test

In [None]:
%%writefile metrics.py

import numpy as np
from sklearn.metrics import roc_auc_score
from torch import multiprocessing as mp
import torch

def get_row_indices(row, interactions):
    start = interactions.indptr[row]
    end = interactions.indptr[row + 1]
    return interactions.indices[start:end]


def auc(model, interactions, num_workers=1):
    aucs = []
    processes = []
    n_users = interactions.shape[0]
    mp_batch = int(np.ceil(n_users / num_workers))

    queue = mp.Queue()
    rows = np.arange(n_users)
    np.random.shuffle(rows)
    for rank in range(num_workers):
        start = rank * mp_batch
        end = np.min((start + mp_batch,  n_users))
        p = mp.Process(target=batch_auc,
                       args=(queue, rows[start:end], interactions, model))
        p.start()
        processes.append(p)

    while True:
        is_alive = False
        for p in processes:
            if p.is_alive():
                is_alive = True
                break
        if not is_alive and queue.empty():
            break

        while not queue.empty():
            aucs.append(queue.get())

    queue.close()
    for p in processes:
        p.join()
    return np.mean(aucs)


def batch_auc(queue, rows, interactions, model):
    n_items = interactions.shape[1]
    items = torch.arange(0, n_items).long()
    users_init = torch.ones(n_items).long()
    for row in rows:
        row = int(row)
        users = users_init.fill_(row)

        preds = model.predict(users, items)
        actuals = get_row_indices(row, interactions)

        if len(actuals) == 0:
            continue
        y_test = np.zeros(n_items)
        y_test[actuals] = 1
        queue.put(roc_auc_score(y_test, preds.data.numpy()))


def patk(model, interactions, num_workers=1, k=5):
    patks = []
    processes = []
    n_users = interactions.shape[0]
    mp_batch = int(np.ceil(n_users / num_workers))

    queue = mp.Queue()
    rows = np.arange(n_users)
    np.random.shuffle(rows)
    for rank in range(num_workers):
        start = rank * mp_batch
        end = np.min((start + mp_batch, n_users))
        p = mp.Process(target=batch_patk,
                       args=(queue, rows[start:end], interactions, model),
                       kwargs={'k': k})
        p.start()
        processes.append(p)

    while True:
        is_alive = False
        for p in processes:
            if p.is_alive():
                is_alive = True
                break
        if not is_alive and queue.empty():
            break

        while not queue.empty():
            patks.append(queue.get())

    queue.close()
    for p in processes:
        p.join()
    return np.mean(patks)


def batch_patk(queue, rows, interactions, model, k=5):
    n_items = interactions.shape[1]

    items = torch.arange(0, n_items).long()
    users_init = torch.ones(n_items).long()
    for row in rows:
        row = int(row)
        users = users_init.fill_(row)

        preds = model.predict(users, items)
        actuals = get_row_indices(row, interactions)

        if len(actuals) == 0:
            continue

        top_k = np.argpartition(-np.squeeze(preds.data.numpy()), k)
        top_k = set(top_k[:k])
        true_pids = set(actuals)
        if true_pids:
            queue.put(len(top_k & true_pids) / float(k))

Overwriting metrics.py


In [None]:
import metrics
import importlib
importlib.reload(metrics)

<module 'metrics' from '/content/metrics.py'>

In [None]:
class Interactions(data.Dataset):
    """
    Hold data in the form of an interactions matrix.
    Typical use-case is like a ratings matrix:
    - Users are the rows
    - Items are the columns
    - Elements of the matrix are the ratings given by a user for an item.
    """

    def __init__(self, mat):
        self.mat = mat.astype(np.float32).tocoo()
        self.n_users = self.mat.shape[0]
        self.n_items = self.mat.shape[1]

    def __getitem__(self, index):
        row = self.mat.row[index]
        col = self.mat.col[index]
        val = self.mat.data[index]
        return (row, col), val

    def __len__(self):
        return self.mat.nnz


class PairwiseInteractions(data.Dataset):
    """
    Sample data from an interactions matrix in a pairwise fashion. The row is
    treated as the main dimension, and the columns are sampled pairwise.
    """

    def __init__(self, mat):
        self.mat = mat.astype(np.float32).tocoo()

        self.n_users = self.mat.shape[0]
        self.n_items = self.mat.shape[1]

        self.mat_csr = self.mat.tocsr()
        if not self.mat_csr.has_sorted_indices:
            self.mat_csr.sort_indices()

    def __getitem__(self, index):
        row = self.mat.row[index]
        found = False

        while not found:
            neg_col = np.random.randint(self.n_items)
            if self.not_rated(row, neg_col, self.mat_csr.indptr,
                              self.mat_csr.indices):
                found = True

        pos_col = self.mat.col[index]
        val = self.mat.data[index]

        return (row, (pos_col, neg_col)), val

    def __len__(self):
        return self.mat.nnz

    @staticmethod
    def not_rated(row, col, indptr, indices):
        # similar to use of bsearch in lightfm
        start = indptr[row]
        end = indptr[row + 1]
        searched = np.searchsorted(indices[start:end], col, 'right')
        if searched >= (end - start):
            # After the array
            return False
        return col != indices[searched]  # Not found

    def get_row_indices(self, row):
        start = self.mat_csr.indptr[row]
        end = self.mat_csr.indptr[row + 1]
        return self.mat_csr.indices[start:end]


class BaseModule(nn.Module):
    """
    Base module for explicit matrix factorization.
    """
    
    def __init__(self,
                 n_users,
                 n_items,
                 n_factors=40,
                 dropout_p=0,
                 sparse=False):
        """

        Parameters
        ----------
        n_users : int
            Number of users
        n_items : int
            Number of items
        n_factors : int
            Number of latent factors (or embeddings or whatever you want to
            call it).
        dropout_p : float
            p in nn.Dropout module. Probability of dropout.
        sparse : bool
            Whether or not to treat embeddings as sparse. NOTE: cannot use
            weight decay on the optimizer if sparse=True. Also, can only use
            Adagrad.
        """
        super(BaseModule, self).__init__()
        self.n_users = n_users
        self.n_items = n_items
        self.n_factors = n_factors
        self.user_biases = nn.Embedding(n_users, 1, sparse=sparse)
        self.item_biases = nn.Embedding(n_items, 1, sparse=sparse)
        self.user_embeddings = nn.Embedding(n_users, n_factors, sparse=sparse)
        self.item_embeddings = nn.Embedding(n_items, n_factors, sparse=sparse)
        
        self.dropout_p = dropout_p
        self.dropout = nn.Dropout(p=self.dropout_p)

        self.sparse = sparse
        
    def forward(self, users, items):
        """
        Forward pass through the model. For a single user and item, this
        looks like:

        user_bias + item_bias + user_embeddings.dot(item_embeddings)

        Parameters
        ----------
        users : np.ndarray
            Array of user indices
        items : np.ndarray
            Array of item indices

        Returns
        -------
        preds : np.ndarray
            Predicted ratings.

        """
        ues = self.user_embeddings(users)
        uis = self.item_embeddings(items)

        preds = self.user_biases(users)
        preds += self.item_biases(items)
        preds += (self.dropout(ues) * self.dropout(uis)).sum(dim=1, keepdim=True)

        return preds.squeeze()
    
    def __call__(self, *args):
        return self.forward(*args)

    def predict(self, users, items):
        return self.forward(users, items)


def bpr_loss(preds, vals):
    sig = nn.Sigmoid()
    return (1.0 - sig(preds)).pow(2).sum()


class BPRModule(nn.Module):
    
    def __init__(self,
                 n_users,
                 n_items,
                 n_factors=40,
                 dropout_p=0,
                 sparse=False,
                 model=BaseModule):
        super(BPRModule, self).__init__()

        self.n_users = n_users
        self.n_items = n_items
        self.n_factors = n_factors
        self.dropout_p = dropout_p
        self.sparse = sparse
        self.pred_model = model(
            self.n_users,
            self.n_items,
            n_factors=n_factors,
            dropout_p=dropout_p,
            sparse=sparse
        )

    def forward(self, users, items):
        assert isinstance(items, tuple), \
            'Must pass in items as (pos_items, neg_items)'
        # Unpack
        (pos_items, neg_items) = items
        pos_preds = self.pred_model(users, pos_items)
        neg_preds = self.pred_model(users, neg_items)
        return pos_preds - neg_preds

    def predict(self, users, items):
        return self.pred_model(users, items)


class BasePipeline:
    """
    Class defining a training pipeline. Instantiates data loaders, model,
    and optimizer. Handles training for multiple epochs and keeping track of
    train and test loss.
    """

    def __init__(self,
                 train,
                 test=None,
                 model=BaseModule,
                 n_factors=40,
                 batch_size=32,
                 dropout_p=0.02,
                 sparse=False,
                 lr=0.01,
                 weight_decay=0.,
                 optimizer=torch.optim.Adam,
                 loss_function=nn.MSELoss(reduction='sum'),
                 n_epochs=10,
                 verbose=False,
                 random_seed=None,
                 interaction_class=Interactions,
                 hogwild=False,
                 num_workers=0,
                 eval_metrics=None,
                 k=5):
        self.train = train
        self.test = test

        if hogwild:
            num_loader_workers = 0
        else:
            num_loader_workers = num_workers
        self.train_loader = data.DataLoader(
            interaction_class(train), batch_size=batch_size, shuffle=True,
            num_workers=num_loader_workers)
        if self.test is not None:
            self.test_loader = data.DataLoader(
                interaction_class(test), batch_size=batch_size, shuffle=True,
                num_workers=num_loader_workers)
        self.num_workers = num_workers
        self.n_users = self.train.shape[0]
        self.n_items = self.train.shape[1]
        self.n_factors = n_factors
        self.batch_size = batch_size
        self.dropout_p = dropout_p
        self.lr = lr
        self.weight_decay = weight_decay
        self.loss_function = loss_function
        self.n_epochs = n_epochs
        if sparse:
            assert weight_decay == 0.0
        self.model = model(self.n_users,
                           self.n_items,
                           n_factors=self.n_factors,
                           dropout_p=self.dropout_p,
                           sparse=sparse)
        self.optimizer = optimizer(self.model.parameters(),
                                   lr=self.lr,
                                   weight_decay=self.weight_decay)
        self.warm_start = False
        self.losses = collections.defaultdict(list)
        self.verbose = verbose
        self.hogwild = hogwild
        if random_seed is not None:
            if self.hogwild:
                random_seed += os.getpid()
            torch.manual_seed(random_seed)
            np.random.seed(random_seed)

        if eval_metrics is None:
            eval_metrics = []
        self.eval_metrics = eval_metrics
        self.k = k

    def break_grads(self):
        for param in self.model.parameters():
            # Break gradient sharing
            if param.grad is not None:
                param.grad.data = param.grad.data.clone()

    def fit(self):
        for epoch in range(1, self.n_epochs + 1):

            if self.hogwild:
                self.model.share_memory()
                processes = []
                train_losses = []
                queue = mp.Queue()
                for rank in range(self.num_workers):
                    p = mp.Process(target=self._fit_epoch,
                                   kwargs={'epoch': epoch,
                                           'queue': queue})
                    p.start()
                    processes.append(p)
                for p in processes:
                    p.join()

                while True:
                    is_alive = False
                    for p in processes:
                        if p.is_alive():
                            is_alive = True
                            break
                    if not is_alive and queue.empty():
                        break

                    while not queue.empty():
                        train_losses.append(queue.get())
                queue.close()
                train_loss = np.mean(train_losses)
            else:
                train_loss = self._fit_epoch(epoch)

            self.losses['train'].append(train_loss)
            row = 'Epoch: {0:^3}  train: {1:^10.5f}'.format(epoch, self.losses['train'][-1])
            if self.test is not None:
                self.losses['test'].append(self._validation_loss())
                row += 'val: {0:^10.5f}'.format(self.losses['test'][-1])
                for metric in self.eval_metrics:
                    func = getattr(metrics, metric)
                    res = func(self.model, self.test_loader.dataset.mat_csr,
                               num_workers=self.num_workers)
                    self.losses['eval-{}'.format(metric)].append(res)
                    row += 'eval-{0}: {1:^10.5f}'.format(metric, res)
            self.losses['epoch'].append(epoch)
            if self.verbose:
                print(row)

    def _fit_epoch(self, epoch=1, queue=None):
        if self.hogwild:
            self.break_grads()

        self.model.train()
        total_loss = torch.Tensor([0])
        pbar = tqdm(enumerate(self.train_loader),
                    total=len(self.train_loader),
                    desc='({0:^3})'.format(epoch))
        for batch_idx, ((row, col), val) in pbar:
            self.optimizer.zero_grad()

            row = row.long()
            # TODO: turn this into a collate_fn like the data_loader
            if isinstance(col, list):
                col = tuple(c.long() for c in col)
            else:
                col = col.long()
            val = val.float()

            preds = self.model(row, col)
            loss = self.loss_function(preds, val)
            loss.backward()

            self.optimizer.step()

            total_loss += loss.item()
            batch_loss = loss.item() / row.size()[0]
            pbar.set_postfix(train_loss=batch_loss)
        total_loss /= self.train.nnz
        if queue is not None:
            queue.put(total_loss[0])
        else:
            return total_loss[0]

    def _validation_loss(self):
        self.model.eval()
        total_loss = torch.Tensor([0])
        for batch_idx, ((row, col), val) in enumerate(self.test_loader):
            row = row.long()
            if isinstance(col, list):
                col = tuple(c.long() for c in col)
            else:
                col = col.long()
            val = val.float()

            preds = self.model(row, col)
            loss = self.loss_function(preds, val)
            total_loss += loss.item()

        total_loss /= self.test.nnz
        return total_loss[0]

In [None]:
def explicit():
    train, test = get_movielens_train_test_split()
    pipeline = BasePipeline(train, test=test, model=BaseModule,
                            n_factors=10, batch_size=1024, dropout_p=0.02,
                            lr=0.02, weight_decay=0.1,
                            optimizer=torch.optim.Adam, n_epochs=40,
                            verbose=True, random_seed=2017)
    pipeline.fit()


def implicit():
    train, test = get_movielens_train_test_split(implicit=True)

    pipeline = BasePipeline(train, test=test, verbose=True,
                           batch_size=1024, num_workers=4,
                           n_factors=20, weight_decay=0,
                           dropout_p=0., lr=.2, sparse=True,
                           optimizer=torch.optim.SGD, n_epochs=40,
                           random_seed=2017, loss_function=bpr_loss,
                           model=BPRModule,
                           interaction_class=PairwiseInteractions,
                           eval_metrics=('auc', 'patk'))
    pipeline.fit()


def hogwild():
    train, test = get_movielens_train_test_split(implicit=True)

    pipeline = BasePipeline(train, test=test, verbose=True,
                            batch_size=1024, num_workers=4,
                            n_factors=20, weight_decay=0,
                            dropout_p=0., lr=.2, sparse=True,
                            optimizer=torch.optim.SGD, n_epochs=40,
                            random_seed=2017, loss_function=bpr_loss,
                            model=BPRModule, hogwild=True,
                            interaction_class=PairwiseInteractions,
                            eval_metrics=('auc', 'patk'))
    pipeline.fit()

In [None]:
explicit()

Making data path
Downloading MovieLens data


( 1 ): 100%|██████████| 89/89 [00:01<00:00, 53.63it/s, train_loss=6.88]
( 2 ):   7%|▋         | 6/89 [00:00<00:01, 57.03it/s, train_loss=6.06]

Epoch:  1   train:  14.42120 val:  8.68083  


( 2 ): 100%|██████████| 89/89 [00:01<00:00, 63.13it/s, train_loss=2.27]
( 3 ):   8%|▊         | 7/89 [00:00<00:01, 62.84it/s, train_loss=2.23]

Epoch:  2   train:  4.15028  val:  3.99969  


( 3 ): 100%|██████████| 89/89 [00:01<00:00, 59.57it/s, train_loss=1.67]
( 4 ):   7%|▋         | 6/89 [00:00<00:01, 59.43it/s, train_loss=1.33]

Epoch:  3   train:  1.84903  val:  2.41240  


( 4 ): 100%|██████████| 89/89 [00:01<00:00, 59.96it/s, train_loss=1.05]
( 5 ):   8%|▊         | 7/89 [00:00<00:01, 61.59it/s, train_loss=0.982]

Epoch:  4   train:  1.20266  val:  1.78271  


( 5 ): 100%|██████████| 89/89 [00:01<00:00, 57.47it/s, train_loss=0.917]
( 6 ):   8%|▊         | 7/89 [00:00<00:01, 62.99it/s, train_loss=0.861]

Epoch:  5   train:  0.98022  val:  1.48147  


( 6 ): 100%|██████████| 89/89 [00:01<00:00, 61.39it/s, train_loss=0.9]
( 7 ):   8%|▊         | 7/89 [00:00<00:01, 65.11it/s, train_loss=0.77] 

Epoch:  6   train:  0.88477  val:  1.32482  


( 7 ): 100%|██████████| 89/89 [00:01<00:00, 62.83it/s, train_loss=0.806]
( 8 ):   7%|▋         | 6/89 [00:00<00:01, 54.86it/s, train_loss=0.766]

Epoch:  7   train:  0.83306  val:  1.22818  


( 8 ): 100%|██████████| 89/89 [00:01<00:00, 58.63it/s, train_loss=0.776]
( 9 ):   3%|▎         | 3/89 [00:00<00:03, 25.32it/s, train_loss=0.722]

Epoch:  8   train:  0.80015  val:  1.16457  


( 9 ): 100%|██████████| 89/89 [00:01<00:00, 59.21it/s, train_loss=0.871]
(10 ):   2%|▏         | 2/89 [00:00<00:04, 19.07it/s, train_loss=0.708]

Epoch:  9   train:  0.77529  val:  1.12250  


(10 ): 100%|██████████| 89/89 [00:01<00:00, 60.45it/s, train_loss=0.749]
(11 ):   2%|▏         | 2/89 [00:00<00:04, 19.87it/s, train_loss=0.735]

Epoch: 10   train:  0.75322  val:  1.09408  


(11 ): 100%|██████████| 89/89 [00:01<00:00, 60.82it/s, train_loss=0.728]
(12 ):   8%|▊         | 7/89 [00:00<00:01, 62.74it/s, train_loss=0.655]

Epoch: 11   train:  0.73431  val:  1.06755  


(12 ): 100%|██████████| 89/89 [00:01<00:00, 64.48it/s, train_loss=0.729]
(13 ):   8%|▊         | 7/89 [00:00<00:01, 61.52it/s, train_loss=0.706]

Epoch: 12   train:  0.71816  val:  1.05441  


(13 ): 100%|██████████| 89/89 [00:01<00:00, 63.59it/s, train_loss=0.804]
(14 ):   7%|▋         | 6/89 [00:00<00:01, 57.44it/s, train_loss=0.658]

Epoch: 13   train:  0.70331  val:  1.04291  


(14 ): 100%|██████████| 89/89 [00:01<00:00, 62.10it/s, train_loss=0.648]
(15 ):   7%|▋         | 6/89 [00:00<00:01, 55.63it/s, train_loss=0.662]

Epoch: 14   train:  0.69230  val:  1.03409  


(15 ): 100%|██████████| 89/89 [00:01<00:00, 59.82it/s, train_loss=0.71]
(16 ):   8%|▊         | 7/89 [00:00<00:01, 63.50it/s, train_loss=0.648]

Epoch: 15   train:  0.68174  val:  1.02946  


(16 ): 100%|██████████| 89/89 [00:01<00:00, 63.41it/s, train_loss=0.762]
(17 ):   8%|▊         | 7/89 [00:00<00:01, 66.62it/s, train_loss=0.6]  

Epoch: 16   train:  0.67185  val:  1.02574  


(17 ): 100%|██████████| 89/89 [00:01<00:00, 61.57it/s, train_loss=0.709]
(18 ):   7%|▋         | 6/89 [00:00<00:01, 59.98it/s, train_loss=0.647]

Epoch: 17   train:  0.66559  val:  1.01690  


(18 ): 100%|██████████| 89/89 [00:01<00:00, 59.60it/s, train_loss=0.657]
(19 ):   7%|▋         | 6/89 [00:00<00:01, 58.13it/s, train_loss=0.609]

Epoch: 18   train:  0.65754  val:  1.01814  


(19 ): 100%|██████████| 89/89 [00:01<00:00, 58.23it/s, train_loss=0.609]
(20 ):   8%|▊         | 7/89 [00:00<00:01, 64.70it/s, train_loss=0.636]

Epoch: 19   train:  0.65179  val:  1.01196  


(20 ): 100%|██████████| 89/89 [00:01<00:00, 58.38it/s, train_loss=0.693]
(21 ):   8%|▊         | 7/89 [00:00<00:01, 68.79it/s, train_loss=0.607]

Epoch: 20   train:  0.64911  val:  1.00926  


(21 ): 100%|██████████| 89/89 [00:01<00:00, 60.85it/s, train_loss=0.75]
(22 ):   7%|▋         | 6/89 [00:00<00:01, 52.77it/s, train_loss=0.635]

Epoch: 21   train:  0.64537  val:  1.01296  


(22 ): 100%|██████████| 89/89 [00:01<00:00, 59.46it/s, train_loss=0.702]
(23 ):   4%|▍         | 4/89 [00:00<00:02, 39.91it/s, train_loss=0.588]

Epoch: 22   train:  0.64303  val:  1.00838  


(23 ): 100%|██████████| 89/89 [00:01<00:00, 56.49it/s, train_loss=0.683]
(24 ):   7%|▋         | 6/89 [00:00<00:01, 59.61it/s, train_loss=0.633]

Epoch: 23   train:  0.63932  val:  0.99910  


(24 ): 100%|██████████| 89/89 [00:01<00:00, 58.42it/s, train_loss=0.709]
(25 ):   7%|▋         | 6/89 [00:00<00:01, 52.67it/s, train_loss=0.594]

Epoch: 24   train:  0.63549  val:  1.01004  


(25 ): 100%|██████████| 89/89 [00:01<00:00, 57.48it/s, train_loss=0.786]
(26 ):   7%|▋         | 6/89 [00:00<00:01, 58.84it/s, train_loss=0.59] 

Epoch: 25   train:  0.63468  val:  1.00146  


(26 ): 100%|██████████| 89/89 [00:01<00:00, 55.84it/s, train_loss=0.64]
(27 ):   7%|▋         | 6/89 [00:00<00:01, 58.98it/s, train_loss=0.603]

Epoch: 26   train:  0.63316  val:  1.00257  


(27 ): 100%|██████████| 89/89 [00:01<00:00, 60.23it/s, train_loss=0.682]
(28 ):   8%|▊         | 7/89 [00:00<00:01, 67.37it/s, train_loss=0.584]

Epoch: 27   train:  0.63269  val:  1.00099  


(28 ): 100%|██████████| 89/89 [00:01<00:00, 59.51it/s, train_loss=0.721]
(29 ):   7%|▋         | 6/89 [00:00<00:01, 57.41it/s, train_loss=0.573]

Epoch: 28   train:  0.63194  val:  0.99549  


(29 ): 100%|██████████| 89/89 [00:01<00:00, 58.52it/s, train_loss=0.759]
(30 ):   7%|▋         | 6/89 [00:00<00:01, 58.95it/s, train_loss=0.564]

Epoch: 29   train:  0.63050  val:  1.00029  


(30 ): 100%|██████████| 89/89 [00:01<00:00, 59.03it/s, train_loss=0.718]
(31 ):   8%|▊         | 7/89 [00:00<00:01, 65.42it/s, train_loss=0.563]

Epoch: 30   train:  0.63016  val:  0.99232  


(31 ): 100%|██████████| 89/89 [00:01<00:00, 57.36it/s, train_loss=0.699]
(32 ):   8%|▊         | 7/89 [00:00<00:01, 62.85it/s, train_loss=0.58] 

Epoch: 31   train:  0.63022  val:  0.99609  


(32 ): 100%|██████████| 89/89 [00:01<00:00, 56.56it/s, train_loss=0.743]
(33 ):   7%|▋         | 6/89 [00:00<00:01, 59.53it/s, train_loss=0.576]

Epoch: 32   train:  0.63043  val:  0.99635  


(33 ): 100%|██████████| 89/89 [00:01<00:00, 57.91it/s, train_loss=0.643]
(34 ):   8%|▊         | 7/89 [00:00<00:01, 64.98it/s, train_loss=0.625]

Epoch: 33   train:  0.63210  val:  0.99697  


(34 ): 100%|██████████| 89/89 [00:01<00:00, 58.12it/s, train_loss=0.641]
(35 ):   6%|▌         | 5/89 [00:00<00:01, 49.84it/s, train_loss=0.546]

Epoch: 34   train:  0.63177  val:  0.99458  


(35 ): 100%|██████████| 89/89 [00:01<00:00, 54.93it/s, train_loss=0.654]
(36 ):   7%|▋         | 6/89 [00:00<00:01, 57.96it/s, train_loss=0.543]

Epoch: 35   train:  0.63137  val:  1.00267  


(36 ): 100%|██████████| 89/89 [00:01<00:00, 58.59it/s, train_loss=0.742]
(37 ):   7%|▋         | 6/89 [00:00<00:01, 59.93it/s, train_loss=0.553]

Epoch: 36   train:  0.63002  val:  0.99718  


(37 ): 100%|██████████| 89/89 [00:01<00:00, 58.76it/s, train_loss=0.733]
(38 ):   7%|▋         | 6/89 [00:00<00:01, 57.61it/s, train_loss=0.56]

Epoch: 37   train:  0.62959  val:  0.99938  


(38 ): 100%|██████████| 89/89 [00:01<00:00, 59.98it/s, train_loss=0.638]
(39 ):   8%|▊         | 7/89 [00:00<00:01, 61.75it/s, train_loss=0.599]

Epoch: 38   train:  0.63083  val:  1.00133  


(39 ): 100%|██████████| 89/89 [00:01<00:00, 61.77it/s, train_loss=0.724]
(40 ):   8%|▊         | 7/89 [00:00<00:01, 60.35it/s, train_loss=0.573]

Epoch: 39   train:  0.63185  val:  0.99541  


(40 ): 100%|██████████| 89/89 [00:01<00:00, 61.02it/s, train_loss=0.69]


Epoch: 40   train:  0.63168  val:  0.99467  


In [None]:
implicit()

  cpuset_checked))
( 1 ): 100%|██████████| 46/46 [00:02<00:00, 21.50it/s, train_loss=0.361]


Epoch:  1   train:  0.42040  val:  0.40008  eval-auc:  0.55278  eval-patk:  0.00776  


( 2 ): 100%|██████████| 46/46 [00:02<00:00, 22.72it/s, train_loss=0.298]


Epoch:  2   train:  0.34066  val:  0.35044  eval-auc:  0.60807  eval-patk:  0.01164  


( 3 ): 100%|██████████| 46/46 [00:02<00:00, 22.89it/s, train_loss=0.303]


Epoch:  3   train:  0.27492  val:  0.31180  eval-auc:  0.65543  eval-patk:  0.01804  


( 4 ): 100%|██████████| 46/46 [00:01<00:00, 23.75it/s, train_loss=0.192]


Epoch:  4   train:  0.22703  val:  0.29160  eval-auc:  0.69006  eval-patk:  0.02694  


( 5 ): 100%|██████████| 46/46 [00:02<00:00, 21.58it/s, train_loss=0.17]


Epoch:  5   train:  0.19465  val:  0.27365  eval-auc:  0.71412  eval-patk:  0.03265  


( 6 ): 100%|██████████| 46/46 [00:02<00:00, 22.30it/s, train_loss=0.176]


Epoch:  6   train:  0.17487  val:  0.25775  eval-auc:  0.73276  eval-patk:  0.03973  


( 7 ): 100%|██████████| 46/46 [00:02<00:00, 22.14it/s, train_loss=0.202]


Epoch:  7   train:  0.16267  val:  0.25430  eval-auc:  0.74666  eval-patk:  0.04201  


( 8 ): 100%|██████████| 46/46 [00:02<00:00, 22.22it/s, train_loss=0.17]


Epoch:  8   train:  0.15176  val:  0.24547  eval-auc:  0.75858  eval-patk:  0.04429  


( 9 ): 100%|██████████| 46/46 [00:02<00:00, 22.55it/s, train_loss=0.141]


Epoch:  9   train:  0.14359  val:  0.23771  eval-auc:  0.76822  eval-patk:  0.04589  


(10 ): 100%|██████████| 46/46 [00:01<00:00, 23.32it/s, train_loss=0.151]


Epoch: 10   train:  0.13715  val:  0.22593  eval-auc:  0.77713  eval-patk:  0.04361  


(11 ): 100%|██████████| 46/46 [00:01<00:00, 23.04it/s, train_loss=0.115]


Epoch: 11   train:  0.13167  val:  0.22131  eval-auc:  0.78402  eval-patk:  0.04772  


(12 ): 100%|██████████| 46/46 [00:02<00:00, 22.63it/s, train_loss=0.134]


Epoch: 12   train:  0.12781  val:  0.22118  eval-auc:  0.79055  eval-patk:  0.04749  


(13 ): 100%|██████████| 46/46 [00:01<00:00, 23.33it/s, train_loss=0.128]


Epoch: 13   train:  0.12185  val:  0.21263  eval-auc:  0.79726  eval-patk:  0.05228  


(14 ): 100%|██████████| 46/46 [00:02<00:00, 22.32it/s, train_loss=0.109]


Epoch: 14   train:  0.11865  val:  0.20135  eval-auc:  0.80326  eval-patk:  0.04977  


(15 ): 100%|██████████| 46/46 [00:01<00:00, 23.13it/s, train_loss=0.117]


Epoch: 15   train:  0.11352  val:  0.20501  eval-auc:  0.80805  eval-patk:  0.05434  


(16 ): 100%|██████████| 46/46 [00:01<00:00, 23.17it/s, train_loss=0.113]


Epoch: 16   train:  0.11156  val:  0.20189  eval-auc:  0.81208  eval-patk:  0.05753  


(17 ): 100%|██████████| 46/46 [00:02<00:00, 22.15it/s, train_loss=0.127]


Epoch: 17   train:  0.10898  val:  0.19678  eval-auc:  0.81534  eval-patk:  0.05936  


(18 ): 100%|██████████| 46/46 [00:01<00:00, 23.03it/s, train_loss=0.13]


Epoch: 18   train:  0.10363  val:  0.19250  eval-auc:  0.81967  eval-patk:  0.05890  


(19 ): 100%|██████████| 46/46 [00:02<00:00, 22.78it/s, train_loss=0.121]


Epoch: 19   train:  0.10260  val:  0.18791  eval-auc:  0.82216  eval-patk:  0.06416  


(20 ): 100%|██████████| 46/46 [00:02<00:00, 22.97it/s, train_loss=0.121]


Epoch: 20   train:  0.10081  val:  0.18382  eval-auc:  0.82357  eval-patk:  0.06370  


(21 ): 100%|██████████| 46/46 [00:02<00:00, 22.89it/s, train_loss=0.0978]


Epoch: 21   train:  0.09957  val:  0.18360  eval-auc:  0.82604  eval-patk:  0.06667  


(22 ): 100%|██████████| 46/46 [00:02<00:00, 22.88it/s, train_loss=0.105]


Epoch: 22   train:  0.09936  val:  0.17989  eval-auc:  0.82805  eval-patk:  0.06667  


(23 ): 100%|██████████| 46/46 [00:01<00:00, 23.03it/s, train_loss=0.102]


Epoch: 23   train:  0.09896  val:  0.17684  eval-auc:  0.83031  eval-patk:  0.07123  


(24 ): 100%|██████████| 46/46 [00:01<00:00, 23.09it/s, train_loss=0.116]


Epoch: 24   train:  0.09503  val:  0.18290  eval-auc:  0.83277  eval-patk:  0.06758  


(25 ): 100%|██████████| 46/46 [00:02<00:00, 22.64it/s, train_loss=0.081]


Epoch: 25   train:  0.09565  val:  0.17506  eval-auc:  0.83462  eval-patk:  0.07511  


(26 ): 100%|██████████| 46/46 [00:02<00:00, 22.48it/s, train_loss=0.102]


Epoch: 26   train:  0.09337  val:  0.17530  eval-auc:  0.83571  eval-patk:  0.07169  


(27 ): 100%|██████████| 46/46 [00:02<00:00, 21.46it/s, train_loss=0.0837]


Epoch: 27   train:  0.09035  val:  0.17689  eval-auc:  0.83655  eval-patk:  0.07420  


(28 ): 100%|██████████| 46/46 [00:02<00:00, 20.81it/s, train_loss=0.0846]


Epoch: 28   train:  0.08635  val:  0.17874  eval-auc:  0.83849  eval-patk:  0.07420  


(29 ): 100%|██████████| 46/46 [00:02<00:00, 21.13it/s, train_loss=0.107]


Epoch: 29   train:  0.08961  val:  0.17910  eval-auc:  0.83905  eval-patk:  0.07237  


(30 ): 100%|██████████| 46/46 [00:02<00:00, 21.09it/s, train_loss=0.0935]


Epoch: 30   train:  0.08822  val:  0.17294  eval-auc:  0.84065  eval-patk:  0.07717  


(31 ): 100%|██████████| 46/46 [00:02<00:00, 21.52it/s, train_loss=0.0926]


Epoch: 31   train:  0.08964  val:  0.16762  eval-auc:  0.84098  eval-patk:  0.07466  


(32 ): 100%|██████████| 46/46 [00:02<00:00, 21.57it/s, train_loss=0.0708]


Epoch: 32   train:  0.08982  val:  0.16215  eval-auc:  0.84217  eval-patk:  0.07055  


(33 ): 100%|██████████| 46/46 [00:02<00:00, 20.14it/s, train_loss=0.106]


Epoch: 33   train:  0.08753  val:  0.16941  eval-auc:  0.84282  eval-patk:  0.07352  


(34 ): 100%|██████████| 46/46 [00:02<00:00, 20.73it/s, train_loss=0.0781]


Epoch: 34   train:  0.08659  val:  0.17334  eval-auc:  0.84284  eval-patk:  0.07489  


(35 ): 100%|██████████| 46/46 [00:02<00:00, 20.66it/s, train_loss=0.0971]


Epoch: 35   train:  0.08623  val:  0.17476  eval-auc:  0.84393  eval-patk:  0.07443  


(36 ): 100%|██████████| 46/46 [00:02<00:00, 20.77it/s, train_loss=0.0864]


Epoch: 36   train:  0.08559  val:  0.17291  eval-auc:  0.84470  eval-patk:  0.07397  


(37 ): 100%|██████████| 46/46 [00:02<00:00, 20.11it/s, train_loss=0.0751]


Epoch: 37   train:  0.08506  val:  0.16872  eval-auc:  0.84690  eval-patk:  0.07648  


(38 ): 100%|██████████| 46/46 [00:02<00:00, 18.27it/s, train_loss=0.0964]


Epoch: 38   train:  0.08522  val:  0.16541  eval-auc:  0.84715  eval-patk:  0.07991  


(39 ): 100%|██████████| 46/46 [00:02<00:00, 19.55it/s, train_loss=0.0962]


Epoch: 39   train:  0.08316  val:  0.16021  eval-auc:  0.84812  eval-patk:  0.07991  


(40 ): 100%|██████████| 46/46 [00:02<00:00, 19.17it/s, train_loss=0.0943]


Epoch: 40   train:  0.08459  val:  0.16542  eval-auc:  0.84809  eval-patk:  0.07237  
