# Explicit Factor Model [Zhang et al]

In [1]:
import pickle
import time
from collections import deque

from sklearn.utils import shuffle
from sklearn.metrics import average_precision_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score

import numpy as np
import networkx as nx

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
from multiprocessing import Pool

from scipy.sparse import lil_matrix, csr_matrix, csc_matrix
from scipy.sparse import vstack

import sys
import importlib

import models
import evaluate
import iterate_train

torch.manual_seed(1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = 'cpu'

## データ読み込み

- posi and nega  
- train/test split

In [2]:
posi_data = pickle.load(open('./dataset/posi_data.pickle', 'rb'))
nega_data = pickle.load(open('./dataset/nega_data.pickle', 'rb'))

train_num = int(0.8 * len(posi_data))
train_posi = posi_data[:train_num]
test_posi = posi_data[train_num:]
train_nega = nega_data[:train_num]
test_nega = nega_data[train_num:]

train_data = np.concatenate([train_posi, train_nega])

In [3]:
target_train = np.array([1 for i in range(len(train_posi))] + [0 for i in range(len(train_nega))])
target_test = np.array([1 for i in range(len(test_posi))] + [0 for i in range(len(test_nega))])

- aspect

In [4]:
user_aspect_dict = pickle.load(open('./dataset/aspect/user_aspect_dict.pickle', 'rb'))
item_aspect_dict = pickle.load(open('./dataset/aspect/item_aspect_dict.pickle', 'rb'))

- dict

In [5]:
user2idx = pickle.load(open('./dataset/user_item_dict/user2idx.pickle', 'rb'))
item2idx = pickle.load(open('./dataset/user_item_dict/item2idx.pickle', 'rb'))

In [None]:
if __name__ == '__main__':
    # 諸々のサイズ
    batch_size = 128
    # Model 定義
    user_num = len(user2idx) + 1
    item_num = len(item2idx) + 1
    aspect_num = 6
    embed_size1 = 32
    embed_size2 = 32
    model = models.Zhang(user_num, item_num, aspect_num, embed_size1, embed_size2).to(device)
    itrter = iterate_train.Iterater()
    evlt = evaluate.Evalueter(user_num)
    
    epoch = 1000
    evaluate_every = 5 
    n_iter = int(len(train_data) / batch_size)
    loss_list = []
    ndcg_list = []
    lr = 0.001
    decay_every = 10
    for i in range(epoch):
        print('epoch: {}'.format(i+1))
        loss_list.extend(itrter.iterate_train(model, train_data, target_train, user_aspect_dict, item_aspect_dict, n_iter=n_iter, batch_size=batch_size, learning_rate=lr, print_every=100, plot_every=100))
        
        if i % evaluate_every == 0:
            #roc_auc = evaluate_ranking(rnn, metric='roc_auc')
            start = time.time()
            pr_auc, roc_auc = evlt.evaluate_ranking(model, test_posi, test_nega, metric='map')
            #mi, sec = time_since(time.time() - start)
            print('pr_roc :{}'.format(pr_auc))
            print('roc_auc :{}'.format(roc_auc))
            #print('{}m{}sec'.format(mi, sec))
            
            # modelを保存する
            # model_name = 'gru_epoch' + str(i) + '.pickle'
            # with open('./rnn_atten_model/' + model_name, 'wb') as f:
            #    pickle.dump(model, f)
                            
                            
        plt.plot(loss_list)
        plt.show()
        
        if (i+1) % decay_every == 0:
            #lr = lr / 2
            lr = lr * 0.8
        
        print('-'*40)