# Explicit Factor Model [Zhang et al]

In [1]:
import pickle
import time
from collections import deque

from sklearn.utils import shuffle
from sklearn.metrics import average_precision_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_auc_score

import numpy as np
import networkx as nx

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
from multiprocessing import Pool

from scipy.sparse import lil_matrix, csr_matrix, csc_matrix
from scipy.sparse import vstack

import sys

torch.manual_seed(1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = 'cpu'

## ネットワーク読み込み, implicit feed back, user/item辞書

In [30]:
# ネットワーク
KG = pickle.load(open('../knowledge_graph/trip_ad/trip_ad_graph.pickle', 'rb'))
print(len(KG.edges()))
print(len(KG.nodes()))

30191
8074


In [67]:
# implicit feedback
edges = list(KG.edges())
# implicit feedback のedgeを格納
implicit_feedback = []

for e in edges:
    if e[0][0:2] == 'u_':
        implicit_feedback.append(e)
    elif e[1][0:2] == 'u_':
        implicit_feedback.append(e)
        
print(len(implicit_feedback))

23029


In [55]:
# user item dictionary
user2idx = {}
item2idx = {}
user2idx2 = {}
item2idx2 = {}
user_list = []
item_list = []
user_list2 = []
item_list2 = []

for f in implicit_feedback:
    if f[0][0:2] == 'u_':
        user = f[0]
        item = f[1]
    else:
        user = f[1]
        item = f[0]
        
    if user[2:] not in user2idx:
        user2idx[user[2:]] = len(user2idx)
        user2idx2[user] = len(user2idx)
    if item[2:] not in item2idx:
        item2idx[item[2:]] = len(item2idx)
        item2idx2[item] = len(item2idx)
        
        
user_list = list(user2idx)
item_list = list(item2idx)
user_list2 = list(user2idx2)
item_list2 = list(item2idx2)

print('user :{}'.format(len(user_list)))
print('item :{}'.format(len(item_list)))

user :6134
item :1763


In [41]:
with open('user2idx.pickle', 'wb') as f:
    pickle.dump(user2idx, f)
with open('item2idx.pickle', 'wb') as f:
    pickle.dump(item2idx, f)

## aspectの行列

In [33]:
user_aspect_dict = pickle.load(open('../trip_ad_test/aspect_word_feature/user_aspect_feat.pickle', 'rb'))
item_aspect_dict = pickle.load(open('../trip_ad_test/aspect_word_feature/item_aspect_feat.pickle', 'rb'))

In [57]:
user_aspect_dict2 = {}
for k in user_aspect_dict:
    user_aspect_dict2[user2idx[k]] = user_aspect_dict[k]
    
item_aspect_dict2 = {}
for k in item_aspect_dict:
    item_aspect_dict2[item2idx[k]] = item_aspect_dict[k]

In [59]:
with open('user_aspect_dict.pickle', 'wb') as f:
    pickle.dump(user_aspect_dict2, f)
    
with open('item_aspect_dict.pickle', 'wb') as f:
    pickle.dump(item_aspect_dict2, f)

In [58]:
print(len(item2idx))
print(len(item_aspect_dict))
print(len(item_aspect_dict2))

print(len(user2idx))
print(len(user_aspect_dict2))
print(len(user_aspect_dict))

1763
1763
1763
6134
6134
6134


## Negaデータづくり

In [69]:
nega_data = []

while len(nega_data) != len(implicit_feedback):
    idx_u = np.random.randint(0, len(user_list))
    idx_i = np.random.randint(0, len(item_list))

    if (user_list[idx_u], item_list[idx_i]) in implicit_feedback:
        continue
    else:
        nega_data.append((user_list2[idx_u], item_list2[idx_i]))
    

## NegaとPosi(Implicit Feedback)の中身を全てIDにする

In [60]:
def swap_u_i(datas):
    idx_list = []
    for i in range(len(datas)):
        datas[i] = list(datas[i])
        if datas[i][0][0] == 'u':
            
            tmp = datas[i][0]
            datas[i][0] = datas[i][1]
            datas[i][1] = tmp 
        
        idx_list.append([item2idx[datas[i][0][2:]], user2idx[datas[i][1][2:]]])
            
    return idx_list

In [72]:
posi_data = np.array(swap_u_i(implicit_feedback))
nega_data = np.array(swap_u_i(nega_data))

[('u_chufa', 'i_579210'),
 ('u_ElleSheri', 'i_222959'),
 ('u_Dan-Guthrie', 'i_530096'),
 ('u_sxa93', 'i_312457'),
 ('u_BenVanHalen', 'i_310303'),
 ('u_ssjj2007', 'i_590144'),
 ('u_roar_03', 'i_112412'),
 ('u_KalleGrabowski', 'i_77264'),
 ('u_hoteljunkies', 'i_77714'),
 ('u_JLR', 'i_601289'),
 ('u_MC-7', 'i_77866'),
 ('u_fc1', 'i_271939'),
 ('u_QueenRabbit', 'i_81042'),
 ('u_iamsiam', 'i_256666'),
 ('u_Sam39', 'i_292142'),
 ('u_kiwi400', 'i_558796'),
 ('u_Sharpette', 'i_305365'),
 ('u_Alexcat', 'i_302175'),
 ('u_bg148', 'i_208454'),
 ('u_SMTORD', 'i_259330'),
 ('u_mango_eater', 'i_556595'),
 ('u_inkwave17', 'i_112328'),
 ('u_cornish007', 'i_302434'),
 ('u_beach47', 'i_897108'),
 ('u_viajera79', 'i_217844'),
 ('u_mjhill1004', 'i_228483'),
 ('u_vermeer', 'i_150855'),
 ('u_cruztopless', 'i_256540'),
 ('u_nene-nono', 'i_218100'),
 ('u_Christophe_be', 'i_305294'),
 ('u_guyinAZ', 'i_98675'),
 ('u_naven', 'i_607470'),
 ('u_jeckel13', 'i_238569'),
 ('u_CnsteFr', 'i_231505'),
 ('u_aber-fan', 'i_

In [40]:
with open('posi_data.pickle', 'wb') as f:
    pickle.dump(posi_data, f)
with open('nega_data.pickle', 'wb') as f:
    pickle.dump(nega_data, f)