In [1]:
import os
import numpy as np
import random

import torch
import torch.nn as nn

from model import *

import arguments

In [2]:
import utils.load_dataset
import utils.data_loader
import utils.metrics
from utils.early_stop import EarlyStopping, Stop_args

In [52]:
dataset = 'yahooR3'
base_model_args = {'emb_dim': 10, 'learning_rate': 0.01, 'imputaion_lambda': 0.01, 'weight_decay': 1}
weight1_model_args ={'learning_rate': 0.1, 'weight_decay': 0.001}
weight2_model_args =  {'learning_rate': 1e-3, 'weight_decay': 1e-2}
imputation_model_args = {'learning_rate': 1e-1, 'weight_decay': 1e-4}
training_args =  {'batch_size': 1024, 'epochs': 100, 'patience': 20, 'block_batch': [1000, 100]}
uniform_ratio = 0.05
seed = 0

In [4]:
from train_implicit import setup_seed
setup_seed(seed)

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)


cpu


In [6]:
train, unif_train, validation, test = utils.load_dataset.load_dataset(data_name=dataset, type='implicit', seed=seed, device=device)

## Load Dataset

### Indices: 
`When working with sparse tensors, the indices refer to the positions of non-zero elements in the tensor. It is a tensor of shape (N, D), where N represents the number of non-zero elements, and D corresponds to the number of dimensions or axes of the tensor. Each row in the indices tensor represents the coordinates of a non-zero element in the sparse tensor.`

### Value:
`The value refers to the actual non-zero values associated with the indices in a sparse tensor. It is a tensor of shape (N,), where N is the number of non-zero elements. Each element in the value tensor corresponds to the value of a non-zero element in the sparse tensor.`

### nnz:
`nnz stands for "number of non-zero elements." It represents the count of non-zero elements present in a sparse tensor. In other words, it denotes the length of the indices and value tensors.`

### Layout:
`The layout of a sparse tensor defines how the indices and values are stored in memory. Torch supports different sparse tensor layouts, such as "torch.sparse_coo", "torch.sparse_csr", and "torch.sparse_csc". Each layout has its own advantages and is suited for specific operations and computations. For example, the "torch.sparse_coo" layout stores the indices and values as separate tensors, while the "torch.sparse_csr" and "torch.sparse_csc" layouts store them in a compressed format.`

In [33]:
train # print(unif_train._indices(), unif_train._values())

tensor(indices=tensor([[    0,     0,     0,  ..., 15399, 15399, 15399],
                       [   13,   152,   169,  ...,   563,   636,   948]]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]),
       size=(15400, 1000), nnz=125077, layout=torch.sparse_coo)

In [34]:
# number of total and non-zero elements in the tensor
print(train.coalesce().numel(), train.coalesce()._nnz())

15400000 125077


In [27]:
print(train.shape, unif_train.shape, validation.shape, test.shape)

torch.Size([15400, 1000]) torch.Size([15400, 1000]) torch.Size([15400, 1000]) torch.Size([15400, 1000])


## Train & Eval

In [None]:
# train_and_eval(train, 
#               unif_train,
#               validation,
#               test,
#               device, 
#               base_model_args=base_model_args, 
#               weight1_model_args=weight1_model_args, 
#               weight2_model_args=weight2_model_args, 
#               imputation_model_args=imputation_model_args, 
#               training_args=training_args)

In [49]:
# transform sparse to dense matrix
train_data = train
train_dense = train_data.to_dense()
train_dense

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [50]:
unif_train_data = unif_train
users_unif = unif_train_data._indices()[0]
items_unif = unif_train_data._indices()[1]
y_unif = unif_train_data._values()

In [53]:
# build data_loader. (block matrix data loader)

train_loader = utils.data_loader.Block(train_data,
                                       u_batch_size=training_args['block_batch'][0],
                                       i_batch_size=training_args['block_batch'][1],
                                       device=device)

In [58]:
val_data, test_data = validation, test
val_loader = utils.data_loader.DataLoader(utils.data_loader.Interactions(val_data), batch_size=training_args['batch_size'], shuffle=False, num_workers=0)
test_loader = utils.data_loader.DataLoader(utils.data_loader.Interactions(test_data), batch_size=training_args['batch_size'], shuffle=False, num_workers=0)

In [60]:
# data shape
n_user, n_item = train_data.shape
print(n_user, n_item)

15400 1000


## Models

In [None]:
# class MF(nn.Module):
#   """
#   Base module for matrix factoriazation
#   """
#   def __init__(self, n_user, n_item, dim=40, dropout=0, init=None)

register_buffer 로 layer를 등록하면 어떤 특징이 있는가?

1. optimizer가 업데이트하지 않는다.

2. 그러나 값은 존재한다(하나의 layer로써 작용한다고 보면 된다.)

3. state_dict()로 확인이 가능하다.

4. GPU연산이 가능하다.

 

따라서 네트워크를 구성함에 있어서 네트워크를 end2end로 학습시키고 싶은데 중간에 업데이트를 하지않는 일반 layer를 넣고 싶을 때 사용할 수 있다.

In [70]:
def to_var(x, requires_grad=True):
  if torch.cuda.is_available():
    x = x.cuda()
  return Variable(x, requires_grad=requires_grad) # 현재는 모든 tensor에서 required_grad 옵션을 통해 gradient를 추적할 수 있기 때문에 따로 위와 같이 Variable로 감싸줄 필요가 없다. (현재는 쓸 필요 없음)


In [71]:
from model import MetaModule, MetaEmbed
class MetaEmbed(MetaModule):
  """
  Base module for matrix factorization
  """
  def __init__(self, dim_1, dim_2):
    super().__init__()
    ignore = nn.Embedding(dim_1, dim_2)
    
    self.register_buffer('weight', to_var(ignore.weight.data, requires_grad=True))
    self.register_buffer('bias', None)
    
  def forward(self):
    return self.weight
  
  def named_leaves(self):
    return [('weight', self.weight), ('bias', self.bias)]

In [None]:
# class MetaMF(MetaModule):
#   """ 
#   Base module for matrix factorization
#   """
  
#   def __init__(self, n_user, n_item, dim=40, dropout=0, init=None):
#     super().__init__()
    
#     self.user_latent = MetaEmbed(n_user, dim)

In [64]:
class OneLinear(nn.Module):
  """
  linear model: r
  """
  def __init__(self, n):
    super().__init__()

    self.bias = nn.Embedding(n,1)
    self.init_embedding()

  def init_embedding(self):
    self.bias.weight.data *= 0.01

  def forward(self, values):
    d_bias = self.bias(values)
    return d_bias.squeeze()


In [65]:
class TwoLinear(nn.Module):
  def __init__(self, n_user, n_item):
    super().__init__()

    self.user_bias = nn.Embedding(n_user, 1)
    self.item_bias = nn.Embedding(n_item, 1)

    self.init_embedding(0)
  
  def init_embedding(self, init):
    nn.init.kaiming_normal_(self.user_bias.weight, mode='fan_out', a=init)
    nn.init.kaiming_normal_(self.item_bias.weight, mode='fan_out', a=init)

  def forward(self, users, items):
    u_bias = self.user_bias(users)
    i_bias = self.item_bias(items)
    preds = u_bias + i_bias
    
    return preds.squeeze()

In [66]:
class ThreeLinear(nn.Module):
  """
  linear model: u + i + r / o
  """
  def __init__(self, n_user, n_item, n):
    super().__init__()
    
    self.user_bias = nn.Embedding(n_user, 1)
    self.item_bias = nn.Embedding(n_item, 1)
    self.data_bias= nn.Embedding(n, 1)
    self.init_embedding(0)
      
  def init_embedding(self, init): 
    nn.init.kaiming_normal_(self.user_bias.weight, mode='fan_out', a = init)
    nn.init.kaiming_normal_(self.item_bias.weight, mode='fan_out', a = init)
    nn.init.kaiming_normal_(self.data_bias.weight, mode='fan_out', a = init)
    self.data_bias.weight.data *= 0.001

  def forward(self, users, items, values):

    u_bias = self.user_bias(users)
    i_bias = self.item_bias(items)
    d_bias = self.data_bias(values)

    preds = u_bias + i_bias + d_bias
    return preds.squeeze()

In [67]:
weight1_model = TwoLinear(n_user, n_item).to(device)
weight1_optimizer = torch.optim.Adam(weight1_model.parameters(), 
                                     lr=base_model_args['learning_rate'],
                                     weight_decay=0)

In [68]:
weight2_model = ThreeLinear(n_user, n_item, 2).to(device)
weight2_optimizer = torch.optim.Adam(weight2_model.parameters(), lr=weight2_model_args['learning_rate'], weight_decay=weight2_model_args['weight_decay'])

In [69]:
imputation_model = OneLinear(2).to(device)
imputation_optimizer = torch.optim.Adam(imputation_model.parameters(), lr=imputation_model_args['learning_rate'], weight_decay=imputation_model_args['weight_decay'])

In [None]:
# loss criterion
sum_criterion = nn.MSELoss(reduction='sum')
none_criterion = nn.MSELoss(reduction='none')