# Training Process of IGMC (Inductive Graph based Matrix Completion)

### 연세대학교 응용통계학과 DSL 추천시스템 2팀

Data : MovieLens-1M Dataset  
Source Code : https://github.com/muhanzhang/IGMC  
Implemented by : App.Stat. 김선우  
Co-Worker : App.Stat. 김경훈 송재용 UIC 홍석현

In [None]:
import numpy as np
import pandas as pd
## 아래 얘는 따로 놀긴 하는데, 위에 애들이 반환하는 csr matrix로 갖고 논다

from data_utils import *
from preprocessing import *
from util_functions import *
from prediction_file import *
from model import *
from train import *
from torch import torch
import multiprocessing as mp

In [None]:
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Linear, Conv1d
from torch_geometric.nn import GCNConv, RGCNConv, global_sort_pool, global_add_pool
from torch_geometric.utils import dropout_adj
from util_functions import *
import pdb
import time

### Step 1. Re-Ordering the index and values

In [None]:
data = pd.read_csv("C:\\Users\\kswoo\\OneDrive\\바탕 화면\\IGMC\\ml-1m\\new_ratings.csv")
data.head(10)

In [None]:
n_users, n_items, u_nodes, v_nodes, ratings, u_f, v_f, u_dict, v_dict = load_data(data)

In [None]:
print('Number of Users :', n_users) ; print('Number of Items :', n_items) 

In [None]:
torch.manual_seed(971120)
torch.cuda.manual_seed(971120)
max_nodes_per_hop = 1000

### Step 2. Generate Input Subgraph

In [None]:
## If there exists validation

(csm, train_label, u_train_idx, v_train_idx, 
val_label, u_val_idx, v_val_idx, 
test_label, u_test_idx, v_test_idx, class_values) = create_trainvaltest_split(data, 
                                                                              validation = True, 
                                                                              testing = False)

In [None]:
## Validation does not exists..

(csm, train_label, u_train_idx, v_train_idx, 
test_label, u_test_idx, v_test_idx, class_values) = create_trainvaltest_split(data, 
                                                                              validation = False, 
                                                                              testing = False)

In [None]:
trl = (u_train_idx, v_train_idx)
train_graphs = MyDynamicDataset(root = None,  # 중요한거 아님
                           A = csm,  # Adjacency Matrix를 넣어주자
                           links = trl,  # Edge를
                           labels = train_label, # 정답에 해당하는 답을 입력
                           h = 1, # Hop 수 / 1이 적절
                           sample_ratio = 1, # Sample Ratio 
                           max_nodes_per_hop = max_nodes_per_hop, # 필요없음
                           u_features = None, # Feature가 있으면 넣지만 X
                           v_features = None, # Feature가 있으면 넣지만 X
                           class_values = class_values, # Rating이 몇개냐? # 필요없다
                           max_num = None
                          )

In [None]:
# If val Exists..


trl = (u_val_idx, v_val_idx)
val_graphs = MyDynamicDataset(root = None,  # 중요한거 아님
                           A = csm,  # Adjacency Matrix를 넣어주자
                           links = trl,  # Edge를
                           labels = val_label, # 정답에 해당하는 답을 입력
                           h = 1, # Hop 수 / 1이 적절
                           sample_ratio = 1, # Sample Ratio 
                           max_nodes_per_hop = max_nodes_per_hop, # 필요없음
                           u_features = None, # Feature가 있으면 넣지만 X
                           v_features = None, # Feature가 있으면 넣지만 X
                           class_values = class_values, # Rating이 몇개냐?
                           max_num = None # 필요없다
                          )

In [None]:
trl = (u_test_idx, v_test_idx)
test_graphs = MyDynamicDataset(root = None,  # 중요한거 아님
                           A = csm,  # Adjacency Matrix를 넣어주자
                           links = trl,  # Edge를
                           labels = test_label, # 정답에 해당하는 답을 입력
                           h = 1, # Hop 수 / 1이 적절
                           sample_ratio = 1, # Sample Ratio 
                           max_nodes_per_hop = None, # 필요없음
                           u_features = None, # Feature가 있으면 넣지만 X
                           v_features = None, # Feature가 있으면 넣지만 X
                           class_values = class_values, # Rating이 몇개냐?
                          )

In [None]:
num_relations = len(class_values)
multiply_by = 1

In [None]:
print(train_graphs)
print(test_graphs)

### 3. Define Model and Start Training

In [None]:
model = IGMC(train_graphs, 
            latent_dim = [32, 32, 32, 32], # Layer Feature의 차원
            num_relations = 5,
            num_bases = 3, 
            regression = True, 
            adj_dropout = 0
            )

In [None]:
total_params = sum(p.numel() for param in model.parameters() for p in param)
print(f'학습할 parameter의 수는 {total_params}')

In [None]:
# Waring을 띄워주는 함수! 딱히 중요 X
def warn_with_traceback(message, category, filename, lineno, file=None, line=None):

    log = file if hasattr(file,'write') else sys.stderr
    traceback.print_stack(file=log)
    log.write(warnings.formatwarning(message, category, filename, lineno, line))

# 중간중간 모델을 저장해주는 함수
def logger (info, model, optimizer, directory, save_interval) :
    epoch, train_loss, test_rmse = info['epoch'], info['train_loss'], info['test_rmse']
    with open(os.path.join(directory, 'log.txt'), 'a') as f :
        f.write("Epoch {}, train loss {:.4f}, test rmse {:.6f}\n".format(
            epoch, train_loss, test_rmse))
    if type(epoch) == int and epoch % save_interval == 0 :
        print('Saving Model States')
        model_name = os.path.join(directory, 'model_checkpoint{}.pth'.format(epoch))
        optimizer_name = os.path.join(
            directory, 'optimizer_checkpoint{}.pth'.format(epoch))
        if model is not None :
            torch.save(model.state_dict(), model_name)
        if optimizer is not None :
            torch.save(optimizer.state_dict(), optimizer_name)

In [None]:
train_multiple_epochs(
    train_graphs,
    test_graphs,
    model,
    epochs = 50, 
    batch_size = 40, 
    lr = 0.001, 
    lr_decay_factor=0.1, 
    lr_decay_step_size=20, 
    weight_decay=0, 
    ARR=0.002, 
    test_freq=1, 
    logger=logger, 
    continue_from= None, 
    res_dir="C:\\Users\\kswoo\\OneDrive\\바탕 화면\\IGMC\\processing\\processing\\model_save", 
    save_interval = 4, 
    multiple_gpu = False
)

바로 위의 res_dir라는 인자에 입력한 변수에 따라서 model과 optimizer가 저장됨.