In [None]:
# default_exp models.nmf

# NMF
> Neural Matrix Factorization.

In [None]:
#hide
from nbdev.showdoc import *
from fastcore.nb_imports import *
from fastcore.test import *

In [None]:
#export
from typing import Any, Iterable, List, Optional, Tuple, Union, Callable
import os

import torch
from torch import nn

from recohut.models.bases.common import PointModel

In [None]:
#export
class NMF(PairModel):
    def __init__(self, n_users, n_items, embedding_dim, dropout=0.1):
        super().__init__()

        self.user_embedding = nn.Embedding(
            num_embeddings=n_users, embedding_dim=embedding_dim
        )
        self.item_embedding = nn.Embedding(
            num_embeddings=n_items, embedding_dim=embedding_dim
        )

        self.user_embedding_gmf = nn.Embedding(
            num_embeddings=n_users, embedding_dim=embedding_dim
        )
        self.item_embedding_gmf = nn.Embedding(
            num_embeddings=n_items, embedding_dim=embedding_dim
        )

        self.gmf = nn.Linear(embedding_dim, int(embedding_dim / 2))

        self.fc1 = nn.Linear(embedding_dim * 2, embedding_dim)
        self.fc2 = nn.Linear(embedding_dim, embedding_dim)
        self.fc3 = nn.Linear(embedding_dim, int(embedding_dim / 2))

        self.fc_final = nn.Linear(embedding_dim, 1)

        self.dropout = nn.Dropout(p=dropout)

    def forward(self, users, items):
        user_embeddings = self.user_embedding(users)
        item_embeddings = self.item_embedding(items)
        embeddings = torch.cat([user_embeddings, item_embeddings], dim=1)

        user_embeddings_gmf = self.user_embedding_gmf(users)
        item_embeddings_gmf = self.item_embedding_gmf(items)
        embeddings_gmf = user_embeddings_gmf.mul(item_embeddings_gmf)

        output_gmf = self.gmf(embeddings_gmf)
        output = nn.ReLU()(self.fc1(embeddings))
        output = self.dropout(output)
        output = nn.ReLU()(self.fc2(output))
        output = self.dropout(output)
        output = self.fc3(output)

        output = torch.cat([output, output_gmf], dim=1)
        output = self.fc_final(output)

        return output.squeeze()

In [None]:
model = NMF(n_users=5, n_items=5, embedding_dim=4)
model.forward(users=torch.tensor([0,1]), items=torch.tensor([1,3]))

tensor([-0.4212, -0.6745], grad_fn=<SqueezeBackward0>)

In [None]:
#export
class NMFv2(nn.Module):
    def __init__(self, args, num_users, num_items):
        super(NMFv2, self).__init__()
        self.num_users = num_users
        self.num_items = num_items
        self.factor_num_mf = args.factor_num
        self.factor_num_mlp =  int(args.layers[0]/2)
        self.layers = args.layers
        self.dropout = args.dropout

        self.embedding_user_mlp = nn.Embedding(num_embeddings=self.num_users, embedding_dim=self.factor_num_mlp)
        self.embedding_item_mlp = nn.Embedding(num_embeddings=self.num_items, embedding_dim=self.factor_num_mlp)

        self.embedding_user_mf = nn.Embedding(num_embeddings=self.num_users, embedding_dim=self.factor_num_mf)
        self.embedding_item_mf = nn.Embedding(num_embeddings=self.num_items, embedding_dim=self.factor_num_mf)

        self.fc_layers = nn.ModuleList()
        for idx, (in_size, out_size) in enumerate(zip(args.layers[:-1], args.layers[1:])):
            self.fc_layers.append(torch.nn.Linear(in_size, out_size))
            self.fc_layers.append(nn.ReLU())

        self.affine_output = nn.Linear(in_features=args.layers[-1] + self.factor_num_mf, out_features=1)
        self.logistic = nn.Sigmoid()
        self.init_weight()

    def init_weight(self):
        nn.init.normal_(self.embedding_user_mlp.weight, std=0.01)
        nn.init.normal_(self.embedding_item_mlp.weight, std=0.01)
        nn.init.normal_(self.embedding_user_mf.weight, std=0.01)
        nn.init.normal_(self.embedding_item_mf.weight, std=0.01)
        
        for m in self.fc_layers:
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                
        nn.init.xavier_uniform_(self.affine_output.weight)

        for m in self.modules():
            if isinstance(m, nn.Linear) and m.bias is not None:
                m.bias.data.zero_()

    def forward(self, user_indices, item_indices):
        user_embedding_mlp = self.embedding_user_mlp(user_indices)
        item_embedding_mlp = self.embedding_item_mlp(item_indices)

        user_embedding_mf = self.embedding_user_mf(user_indices)
        item_embedding_mf = self.embedding_item_mf(item_indices)

        mlp_vector = torch.cat([user_embedding_mlp, item_embedding_mlp], dim=-1)
        mf_vector =torch.mul(user_embedding_mf, item_embedding_mf)

        for idx, _ in enumerate(range(len(self.fc_layers))):
            mlp_vector = self.fc_layers[idx](mlp_vector)

        vector = torch.cat([mlp_vector, mf_vector], dim=-1)
        logits = self.affine_output(vector)
        rating = self.logistic(logits)
        return rating.squeeze()

In [None]:
class Args:
    dropout = 0.2
    factor_num = 4
    layers = [8,4,2]
args = Args()

model = NMFv2(args, num_users=5, num_items=5)
model.forward(torch.tensor([0,1]), torch.tensor([1,3]))

tensor([0.5000, 0.5000], grad_fn=<SqueezeBackward0>)

Dataset

In [None]:
import numpy as np
import pandas as pd

from recohut.utils.common_utils import *
from recohut.datasets.bases.interactions import InteractionsDataset, InteractionsDataModule

import warnings
warnings.filterwarnings('ignore')


class ML1mDataset(InteractionsDataset):
    url = "http://files.grouplens.org/datasets/movielens/ml-1m.zip"

    @property
    def raw_file_names(self):
        return 'ratings.dat'

    def download(self):
        path = download_url(self.url, self.raw_dir)
        extract_zip(path, self.raw_dir)
        from shutil import move, rmtree
        move(os.path.join(self.raw_dir, 'ml-1m', self.raw_file_names), self.raw_dir)
        rmtree(os.path.join(self.raw_dir, 'ml-1m'))
        os.unlink(path)

    def load_ratings_df(self):
        df = pd.read_csv(self.raw_paths[0], sep='::', header=None, engine='python')
        df.columns = ['uid', 'sid', 'rating', 'timestamp']
        # drop duplicate user-item pair records, keeping recent ratings only
        df.drop_duplicates(subset=['uid', 'sid'], keep='last', inplace=True)
        return df


class ML1mDataModule(InteractionsDataModule):
    dataset_cls = ML1mDataset

Trainer

In [None]:
class Args:
    def __init__(self):
        self.data_dir = '/content/data'
        self.min_rating = 4
        self.num_negative_samples = 99
        self.min_uc = 5
        self.min_sc = 5

        self.log_dir = '/content/logs'
        self.model_dir = '/content/models'

        self.val_p = 0.2
        self.test_p = 0.2
        self.num_workers = 2
        self.normalize = False
        self.batch_size = 32
        self.seed = 42
        self.shuffle = True
        self.pin_memory = True
        self.drop_last = False
        self.split_type = 'stratified'

        self.embedding_dim = 20
        self.max_epochs = 5

args = Args()

In [None]:
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger

ds = ML1mDataModule(**args.__dict__)

logger = TensorBoardLogger(
    save_dir=args.log_dir,
)

checkpoint_callback = ModelCheckpoint(
    monitor="valid_loss",
    mode="min",
    dirpath=args.model_dir,
    filename="recommender",
)

def pl_trainer(model, datamodule):

    trainer = Trainer(
    max_epochs=args.max_epochs,
    logger=logger,
    check_val_every_n_epoch=10,
    callbacks=[checkpoint_callback],
    gpus=None
    )

    trainer.fit(model, datamodule=datamodule)
    test_result = trainer.test(model, datamodule=datamodule)
    return test_result

In [None]:
ds.prepare_data()

Processing...


Turning into implicit ratings
Filtering triplets
Densifying index


Done!


In [None]:
model = NMF(n_items=ds.data.num_items, n_users=ds.data.num_users, embedding_dim=args.embedding_dim)

pl_trainer(model, ds)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name               | Type      | Params
-------------------------------------------------
0 | user_embedding     | Embedding | 120 K 
1 | item_embedding     | Embedding | 62.5 K
2 | user_embedding_gmf | Embedding | 120 K 
3 | item_embedding_gmf | Embedding | 62.5 K
4 | gmf                | Linear    | 210   
5 | fc1                | Linear    | 820   
6 | fc2                | Linear    | 420   
7 | fc3                | Linear    | 210   
8 | fc_final           | Linear    | 21    
9 | dropout            | Dropout   | 0     
-------------------------------------------------
368 K     Trainable params
0         Non-trainable params
368 K     Total params
1.472     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'apak': tensor(0.0392),
                  'hr': tensor(0.1308),
                  'loss': tensor(0.2274),
                  'ncdg': tensor(0.0602)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'apak': tensor(0.0392),
   'hr': tensor(0.1308),
   'loss': tensor(0.2274),
   'ncdg': tensor(0.0602)}}]

In [None]:
!tree -h --du -C "{args.data_dir}"

[01;34m/content/data[00m
├── [ 11M]  [01;34mprocessed[00m
│   ├── [2.3M]  data_test_neg.pt
│   ├── [ 95K]  data_test_pos.pt
│   ├── [6.5M]  data_train.pt
│   ├── [2.3M]  data_valid_neg.pt
│   └── [ 95K]  data_valid_pos.pt
└── [ 23M]  [01;34mraw[00m
    └── [ 23M]  ratings.dat

  35M used in 2 directories, 6 files


In [None]:
#hide
%reload_ext watermark
%watermark -a "Sparsh A." -m -iv -u -t -d -p recohut,pytorch_lightning

Author: Sparsh A.

Last updated: 2022-01-10 09:09:05

recohut          : 0.0.10
pytorch_lightning: 1.5.8

Compiler    : GCC 7.5.0
OS          : Linux
Release     : 5.4.144+
Machine     : x86_64
Processor   : x86_64
CPU cores   : 2
Architecture: 64bit

numpy  : 1.19.5
pandas : 1.1.5
torch  : 1.10.0+cu111
IPython: 5.5.0

