In [None]:
import tensorflow as tf
import random
import os
import pickle
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    tf.random.set_seed(seed)

seed_everything(42)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path = '/content/drive/MyDrive/Colab Notebooks/캡스톤/data/'

In [None]:
with open(path+"train_id_data.pickle","rb") as fr:
    train_data = pickle.load(fr)
    
with open(path+"valid_id_data.pickle","rb") as fr:
    valid_data = pickle.load(fr)
    
with open(path+"test_id_data.pickle","rb") as fr:
    test_data = pickle.load(fr)

In [None]:
with open(path+"train_target.pickle","rb") as fr:
    train_target = pickle.load(fr)
    
with open(path+"valid_target.pickle","rb") as fr:
    valid_target = pickle.load(fr)
    
with open(path+"test_target.pickle","rb") as fr:
    test_target = pickle.load(fr)

In [None]:
# ID
train_data[0] = tf.convert_to_tensor(train_data[0], dtype=tf.float32)
train_data[1] = tf.convert_to_tensor(train_data[1], dtype=tf.float32)

## Model

In [None]:
from tensorflow import keras
from keras.layers import Input, Embedding, LSTM, Dense, Lambda, Multiply, Bidirectional, Flatten, BatchNormalization, Add
from keras.layers.convolutional import Conv1D
from keras.layers.pooling import MaxPool1D
from keras.models import Model
from keras.layers.wrappers import TimeDistributed
import numpy as np
import keras.backend as K
from random import randint

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
description_num = 5
#current_path ='data/'
#local_path = 'data/'

In [None]:
# set parameters:
# V = 142381
# V = 226928
V = 48456 #  33531 6158 48456
embedding_dim = 50
max_len = 1 #  1945 77 1
filters_num = 64
kernel_size = 3

In [None]:
# description_input=Input(shape=(10,50))
# description_validation=Input(shape=(50,))
input_1 = Input(shape=(description_num, max_len))
input_2 = Input(shape=(max_len,))
embedding = Embedding(input_dim=V,
                      output_dim=embedding_dim,
                      input_length=max_len)
conv1d = Conv1D(filters=filters_num,
                kernel_size=kernel_size,
                activation='relu',
                padding='same')
maxpool1d = MaxPool1D(max_len)
dense = Dense(15)

# input_vector=TimeDistributed(embedding)(description_input)
# validation_vector=embedding(description_validation)

input_vector = TimeDistributed(embedding)(input_1)
validation_vector = embedding(input_2)

convolutional_vector = TimeDistributed(conv1d)(input_vector)
validation_conv = conv1d(validation_vector)

maxpooling_vector = TimeDistributed(maxpool1d)(convolutional_vector)
validation_maxpooling = maxpool1d(validation_conv)

middle_output = TimeDistributed(dense)(maxpooling_vector)
middle_validation = dense(validation_maxpooling)

In [None]:
print(input_vector.shape, validation_vector.shape,
      convolutional_vector.shape, validation_conv.shape,
      maxpooling_vector.shape, validation_maxpooling.shape,
      middle_output.shape, middle_validation.shape)

(None, 5, 1, 50) (None, 1, 50) (None, 5, 1, 64) (None, 1, 64) (None, 5, 1, 64) (None, 1, 64) (None, 5, 1, 15) (None, 1, 15)


In [None]:
def change_dim_1(X):
    return K.squeeze(X, 1)

def change_dim_2(X):
    return K.squeeze(X, 2)

def repeat(X):
    return K.repeat_elements(X, description_num, 1)

def repeat1(X):
    return K.repeat_elements(X, 15, 2)

def repeat2(X):
    return K.repeat_elements(X, description_num, 1)

def dot(X, Y):
    return K.dot(X, Y)

def sum_item(X):
    return K.sum(X, axis=2)

def sqrt_item(X):
    return K.sqrt(X)

def cal_denominator(X):
    return 1/(X)

def expand_item(X):
    return K.expand_dims(X, 2)

def expand_rate(X):
    return K.expand_dims(X, 1)

def sum_user(X):
    return K.sum(X, 1)

def sum_rate(X):
    return K.sum(X, 1)

def single_exp(X):
    return K.exp(X)

def sum_exp_denominator(X):
    return 1/(K.sum(K.exp(X), 1))

In [None]:
# def _test_generator(filename):

middle_output_final = Lambda(change_dim_2)(middle_output)#?*10*15
middle_validation_final = Lambda(repeat)(middle_validation)#?*10*15
molecule = Multiply()([middle_output_final, middle_validation_final])
molecule = Lambda(sum_item)(molecule)

denominator1 = Multiply()([middle_output_final, middle_output_final])
denominator1 = Lambda(sum_item)(denominator1)
denominator1 = Lambda(sqrt_item)(denominator1)

denominator2 = Multiply()([middle_validation_final, middle_validation_final])
denominator2 = Lambda(sum_item)(denominator2)
denominator2 = Lambda(sqrt_item)(denominator2)


denominator = Multiply()([denominator1, denominator2])
denominator = Lambda(cal_denominator)(denominator)

similarity = Multiply()([molecule, denominator])
similarity = Lambda(expand_item)(similarity)
similarity = Lambda(repeat1)(similarity)


user = Multiply()([similarity, middle_output_final])
user = Lambda(sum_user)(user)


item = Lambda(change_dim_1)(middle_validation)


rate_hat = Multiply()([user, item])
rate_hat = Lambda(sum_rate)(rate_hat)
rate_hat = Lambda(expand_rate)(rate_hat)

In [None]:
model = Model(inputs=[input_1, input_2], outputs=rate_hat)
model.compile(optimizer='adam', loss='mse', metrics=['mse', 'mae', 'mape'])

In [None]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 5, 1)]       0           []                               
                                                                                                  
 input_5 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 time_distributed_4 (TimeDistri  (None, 5, 1, 50)    2422800     ['input_4[0][0]']                
 buted)                                                                                           
                                                                                                  
 embedding_1 (Embedding)        (None, 1, 50)        2422800     ['input_5[0][0]']          

In [None]:
hist = model.fit(train_data, train_target,
                 validation_data=(valid_data, valid_target),
                 steps_per_epoch=256,
                 validation_steps=256,
                 epochs=30,
                 verbose=1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# scores = model.evaluate_generator(_evaluate_generator('train_whole_list_5s.txt','test_whole_list_5s.txt'),steps=134103)
# scores = model.evaluate_generator(_evaluate_generator('train_whole_list_10.txt','test_whole_list_10.txt'),steps=34266)
# scores = model.evaluate_generator(_evaluate_generator('train_whole_list_6s.txt','test_whole_list_6s.txt'),steps=134103)
scores = model.evaluate(test_data, test_target, steps=2500)
print(scores)

[2.6005020141601562, 2.6005020141601562, 1.2390682697296143, 19.76276206970215]


In [None]:
model.save('/content/drive/MyDrive/Colab Notebooks/학교수업/캡스톤/model/Anime_id_30')

INFO:tensorflow:Assets written to: /content/drive/MyDrive/Colab Notebooks/학교수업/캡스톤/model/Anime_id_30/assets
