# import

In [None]:
import copy
import csv
import gc
import math
import os
import random as rn
import re
from collections import deque

import joblib
import numpy as np
import pandas as pd
import tensorflow as tf
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from gensim.models import KeyedVectors, word2vec
from IPython.display import HTML, SVG
from keras import backend as K
from keras import (constraints, initializers, layers, models, optimizers,
                   regularizers)
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.engine.topology import Layer
from keras.layers import (LSTM, Activation, Average, Bidirectional, Dense,
                          Dropout, Embedding, Flatten, Input, Lambda, Masking,
                          Permute, Reshape, merge, multiply)
from keras.models import Model, Sequential, load_model, model_from_json
from keras.preprocessing.sequence import pad_sequences
from keras.utils.vis_utils import model_to_dot, plot_model
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import GridSearchCV, train_test_split
from torch.nn.utils.rnn import pack_padded_sequence
from tqdm import tqdm

pd.set_option('display.max_columns', 100)

In [None]:
torch.cuda.is_available()

In [None]:
# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda:0")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(2))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

# ハイパーパラメータ

In [None]:
LEARNING_RATE = 0.01
LSTM_UNITS = 64
BATCH_SIZE = 16
DROPOUT = 0.1
EPOCHS = 200

# データサイズ

In [None]:
train_recipe_size = 10000
dev_recipe_size = 5000
test_recipe_size = 10000

In [None]:
word2vec_path = '/hoge/hoge.model'
word2vec_matrix = word2vec.Word2Vec.load(word2vec_path)

In [None]:
import os
import numpy as np
import tensorflow.python.keras.backend as K
import tensorflow as tf
import random as rn
import json

from sklearn.metrics import roc_curve, auc
from keras.preprocessing import sequence
from keras.backend.tensorflow_backend import set_session

#シードの固定
def seed(seed):
    config = tf.compat.v1.ConfigProto()###
    config.gpu_options.allow_growth = True###
    sess = tf.compat.v1.Session(config=config)###
    tf.compat.v1.keras.backend.set_session(sess)###
    os.environ['PYTHONHASHSEED'] = '0'
    np.random.seed(seed)
    rn.seed(seed)
    session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads = 1, inter_op_parallelism_threads = 1)
    tf.compat.v1.set_random_seed(seed)
    session = tf.compat.v1.Session(graph = tf.compat.v1.get_default_graph(), config = session_conf)
    tf.compat.v1.keras.backend.set_session(session)

# 時間呼び出し

In [None]:
train_path = '/hoge/hoge.csv'
dev_path = '/hoge/hoge.csv'
test_path = '/hoge/hoge.csv'
df_id_process_time_turn_divide_time_train = pd.read_csv(train_path)
df_id_process_time_turn_divide_time_dev = pd.read_csv(dev_path)
df_id_process_time_turn_divide_time_test = pd.read_csv(test_path)

## 時間をlogに変換

In [None]:
def realtime_to_logtime(x):
    if x == 0:
        return 0
    return math.log(x)

In [None]:
#レシピごとの調理時間にまとめる
def process_log_time(df):
    log_time_list = []
    id_ = 0
    for index,row in df.iterrows():
        if row['id'] != id_:
            id_ = row['id']
            log_time_list.append(realtime_to_logtime(row['time']))
    return log_time_list

In [None]:
log_time_list_train = process_log_time(df_id_process_time_turn_divide_time_train)
log_time_array_train = np.array(log_time_list_train)

In [None]:
log_time_list_dev = process_log_time(df_id_process_time_turn_divide_time_dev)
log_time_array_dev = np.array(log_time_list_dev)

In [None]:
log_time_list_test = process_log_time(df_id_process_time_turn_divide_time_test)
log_time_array_test = np.array(log_time_list_test)

## レシピの最大手順をリストにする

In [None]:
train_max_turn_path = '/hoge/hoge.csv'
dev_max_turn_path = '/hoge/hoge.csv'
test_max_turn_path = '/hoge/hoge.csv'

In [None]:
df_max_turn_train= pd.read_csv(train_max_turn_path, sep=",")

max_turn_train_list = df_max_turn_train.values.tolist()

In [None]:
df_max_turn_dev= pd.read_csv(dev_max_turn_path, sep=",")

max_turn_dev_list = df_max_turn_dev.values.tolist()

In [None]:
df_max_turn_test= pd.read_csv(test_max_turn_path, sep=",")

max_turn_test_list = df_max_turn_test.values.tolist()

In [None]:
del df_max_turn_train
del df_max_turn_dev
del df_max_turn_test
gc.collect()

# 単語にインデックスを割り当てる

In [None]:
word_list = word2vec_matrix.wv.index2word

In [None]:
word_index_dic = {}
#indexは１からスタートさせる
#index0をpadding用とする
index = 1
for word in word_list:
    if word not in word_index_dic:
        word_index_dic[word] = index
        index += 1
#unkのindexは最後とする
word_index_dic['<unk>'] = index

## mecabしたレシピを対応づける

In [None]:
train_mecab_path = '/hoge/hoge.csv'
dev_mecab_path = '/hoge/hoge.csv'
test_mecab_path = '/hoge/hoge.csv'

In [None]:
with open(train_mecab_path) as f:
    reader = csv.reader(f)
    mecab_procedure_words_train_list = [row for row in reader]

In [None]:
with open(dev_mecab_path) as f:
    reader = csv.reader(f)
    mecab_procedure_words_dev_list = [row for row in reader]

In [None]:
with open(test_mecab_path) as f:
    reader = csv.reader(f)
    mecab_procedure_words_test_list = [row for row in reader]

In [None]:
def get_words_index_list(mecab_procedure_words_list,max_turn_list):
    process_index_list = []
    d = deque(max_turn_list)
    #最後のpopが処理されるためにappend
    d.append(0)
    #divideは各手順の最大手順数
    max_turn = d.popleft()

    for procedure_words_list in mecab_procedure_words_list:
        procedure_index_list = []
  
        for word in procedure_words_list:
            procedure_index_list.append(word_index_dic[word])

        process_index_list.append(procedure_index_list)
    return process_index_list            

In [None]:
word_index_train_list = get_words_index_list(mecab_procedure_words_train_list,max_turn_train_list)

In [None]:
word_index_dev_list = get_words_index_list(mecab_procedure_words_dev_list,max_turn_dev_list)

In [None]:
#testデータが元のままでunkをunkとしていなかった
processes_test_list = []
for process in mecab_procedure_words_test_list:
    process_test_list = []
    for word in process:
        if word in word_list:
            process_test_list.append(word)
        else:
            process_test_list.append('<unk>')
    processes_test_list.append(process_test_list)

In [None]:
word_index_test_list = get_words_index_list(processes_test_list,max_turn_test_list)

# trainだけベクトルを持つembedding_matrixの作成

In [None]:
#valueからkeyを抽出
def get_keys_from_value(d, val):
    return [k for k, v in d.items() if v == val]

In [None]:
size = len(word2vec_matrix[word_list[0]].tolist())

In [None]:
unk_list = np.zeros(size).tolist()

In [None]:
#train,dev,test全ての単語を使用
#trainに対応しないdev,testの単語は全て０ベクトルとなる
#<unk>も０ベクトルとする
size = len(word2vec_matrix[word_list[0]].tolist())
embedding_matrix = np.zeros((len(word_index_dic)+1, size))
used_index = []
for procedure_index in word_index_train_list:
    for index in procedure_index:
        if index not in used_index:
            word = get_keys_from_value(word_index_dic,index)[0]
            if word == '<unk>':
                embedding_matrix[index] = unk_list
            else:
                embedding_matrix[index] = word2vec_matrix[word].tolist()
            used_index.append(index)

In [None]:
del used_index
gc.collect()

## 最大長を見つける

In [None]:
max_len_train = max([len(i) for i in word_index_train_list])
max_len_dev = max([len(i) for i in word_index_dev_list])
max_len_test = max([len(i) for i in word_index_test_list])

In [None]:
if max_len_train > max_len_test and max_len_train > max_len_dev:
    max_len = max_len_train
elif max_len_dev > max_len_train and max_len_dev > max_len_test:
        max_len = max_len_dev
else:
    max_len = max_len_test

In [None]:
# print(max_len_train,max_len_dev,max_len_test)

# 最大手順を探す

In [None]:
turn_train_list = [i[0] for i in max_turn_train_list]

In [None]:
turn_dev_list = [i[0] for i in max_turn_dev_list]

In [None]:
turn_test_list = [i[0] for i in max_turn_test_list]

In [None]:
max_turn = int(max(max(turn_train_list),max(turn_dev_list),max(turn_test_list)))

In [None]:
# max_turn

# パディング

## 単語数のパディング

In [None]:
index_for_padding=0

In [None]:
def padding_index_list(num):
    return [index_for_padding] * num

In [None]:
copy_index_train_list = copy.deepcopy(word_index_train_list)

for index,word_index_list in enumerate(copy_index_train_list):
    pad_num = max_len - len(word_index_list)
    word_index_train_list[index].extend(padding_index_list(pad_num))

In [None]:
copy_index_dev_list = copy.deepcopy(word_index_dev_list)

for index,word_index_list in enumerate(copy_index_dev_list):
    pad_num = max_len - len(word_index_list)
    word_index_dev_list[index].extend(padding_index_list(pad_num))

In [None]:
del copy_index_train_list
del copy_index_dev_list

gc.collect()

## 単語数のpaddingを表すone-hot-vector

In [None]:
row_train = len(word_index_train_list[0])
row_dev = len(word_index_dev_list[0])

column = 1

batch_train = train_recipe_size
batch_dev = dev_recipe_size

In [None]:
#(barch,row,column)のlistを0で初期化
paded_one_hot_vector_train_list = [[[[False] * column for i in range(row_train)] for j in range(max_turn)] for k in range(batch_train)]
# word_len_train_list = [len(i) for i in mecab_procedure_words_train_list]

paded_one_hot_vector_dev_list = [[[[False] * column for i in range(row_dev)] for j in range(max_turn)] for k in range(batch_dev)]
# word_len_dev_list = [len(i) for i in mecab_procedure_words_dev_list]

In [None]:
def recipe_words_len(word_list,max_turn_list):
    recipe_words_len_list = []
    index_ = 0
    for max_turn in max_turn_list:
        max_turn = int(max_turn[0])
        process_words_len_list = [len(i) for i in word_list[index_:index_+max_turn]]
        recipe_words_len_list.append(process_words_len_list)
    return recipe_words_len_list

In [None]:
word_len_train_list = recipe_words_len(mecab_procedure_words_train_list,max_turn_train_list)

In [None]:
word_len_dev_list = recipe_words_len(mecab_procedure_words_dev_list,max_turn_dev_list)

In [None]:
del mecab_procedure_words_train_list
del mecab_procedure_words_dev_list
gc.collect()

In [None]:
def padding_procedure(paded_list,word_len_list):
    for batch,process_words_len_list in enumerate(word_len_list):
        for index,word_len in enumerate(process_words_len_list):#手順数を入れるべき
            for i in range(word_len):
                paded_list[batch][index][i] = [True] * column

In [None]:
# paded_one_hot_vector_train_list[0]

In [None]:
#procedureのmask

In [None]:
padding_procedure(paded_one_hot_vector_train_list,word_len_train_list[:train_recipe_size])

In [None]:
padding_procedure(paded_one_hot_vector_dev_list,word_len_dev_list[:dev_recipe_size])

## 手順数のパディング

In [None]:
index_for_turn = 0

In [None]:
def padding_index_turn_list(num):
    return [[index_for_turn] * max_len for i in range(max_turn - num)]

In [None]:
def padding_max_turn(word_index_list,max_turn_list):
    now_index = 0
    process_padding_index_list = []
    copy_index_list = copy.deepcopy(word_index_list)
    for max_turn in max_turn_list:
        max_ = int(max_turn[0])
        procedure_padding_index_list = copy_index_list[now_index:now_index+max_]
        procedure_padding_index_list.extend(padding_index_turn_list(max_))
        process_padding_index_list.append(procedure_padding_index_list)   
        now_index += max_
    return process_padding_index_list

In [None]:
#(レシピ数，手順数，単語数(index))
process_padding_index_train_list = padding_max_turn(word_index_train_list,max_turn_train_list)

In [None]:
process_padding_index_dev_list = padding_max_turn(word_index_dev_list,max_turn_dev_list)

In [None]:
del word_index_train_list
del word_index_dev_list
gc.collect()

## 手順数のパディングを表すflagリスト

In [None]:
row_turn_train = len(process_padding_index_train_list[0])
row_turn_dev = len(process_padding_index_dev_list[0])

column = 1

batch_turn_train = len(process_padding_index_train_list)
batch_turn_dev = len(process_padding_index_dev_list)

In [None]:
#(barch,row,column)のlistを0で初期化
paded_flag_train_list = [[[False] * column for i in range(row_turn_train)] for j in range(batch_turn_train)]
# word_len_train_list = [len(i) for i in mecab_process_words_train_list]

paded_flag_dev_list = [[[False] * column for i in range(row_turn_dev)] for j in range(batch_turn_dev)]
# word_len_dev_list = [len(i) for i in mecab_process_words_dev_list]

In [None]:
def flag_list(paded_flag_list,max_turn_list):
    for index,max_turn in enumerate(max_turn_list):
        max_ = int(max_turn[0])
        for i in range(max_):
            paded_flag_list[index][i] = [True]

In [None]:
#processのmask

In [None]:
flag_list(paded_flag_train_list,max_turn_train_list)

In [None]:
flag_list(paded_flag_dev_list,max_turn_dev_list)

# Dataset

In [None]:
def worker_init_fn(worker_id):                                                          
    np.random.seed(np.random.get_state()[1][0] + worker_id)

In [None]:
class Recipe_Dataset(torch.utils.data.Dataset):

    def __init__(self, data, procedure_mask, process_mask, label, transform=None):
        self.transform = transform
        self.data = data
        self.procedure_mask = procedure_mask
        self.process_mask = process_mask
        self.data_num = len(data)
        self.label = label

    def __len__(self):
        return self.data_num

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx]),\
                    torch.tensor(self.process_mask[idx]),\
                    torch.tensor(self.procedure_mask[idx]),\
                    torch.tensor(self.label[idx],dtype=torch.float32)

In [None]:
dataset_train = Recipe_Dataset(process_padding_index_train_list[:train_recipe_size],
                               paded_one_hot_vector_train_list[:train_recipe_size],
                               paded_flag_train_list[:train_recipe_size],
                               log_time_array_train[:train_recipe_size])

In [None]:
dataset_dev = Recipe_Dataset(process_padding_index_dev_list[:dev_recipe_size],
                               paded_one_hot_vector_dev_list[:dev_recipe_size],
                               paded_flag_dev_list[:dev_recipe_size],
                               log_time_array_dev[:dev_recipe_size])

In [None]:
del process_padding_index_train_list
del paded_one_hot_vector_train_list
del paded_flag_train_list
#del log_time_array_train

del process_padding_index_dev_list
del paded_one_hot_vector_dev_list
del paded_flag_dev_list
# del log_time_array_dev

gc.collect()

# モデルの構築

In [None]:
class ProcedureAttention(nn.Module):
    def __init__(self, vocab_size, emb_size, hidden_size, word_att_size, procedure_size, word_size, epsilon, dropout):        
        super(ProcedureAttention, self).__init__()
        # Embedding layer
        self.embeddings = nn.Embedding(vocab_size, emb_size)
        # Bidirectional procedure-level RNN
        self.procedure_rnn = nn.LSTM(
            emb_size, hidden_size, bidirectional=True, batch_first=True)
        # Dropout
        self.dropout = nn.Dropout(dropout)

#         self.hidden_size = hidden_size
#         self.word_att_size = word_att_size
        self.linear_ = nn.Linear(2 * hidden_size, 2 * word_att_size)
        nn.init.xavier_uniform_(self.linear_.weight)
        self.tanh_ = nn.Tanh()
        self.softmax_ = nn.Softmax(dim=1)
        self.u_a = nn.Parameter(torch.Tensor(2 * hidden_size, 1))
        nn.init.xavier_uniform_(self.u_a)
        
        self.procedure_size = procedure_size  # 28
        self.word_size = word_size  # 112
        self.emb_size = emb_size  # 200
        self.epsilon = epsilon  # 1e-7

    def init_embeddings(self, weights):
        self.embeddings.weight = nn.Parameter(torch.from_numpy(weights))

        # トレーニング中，重みを更新させない
        self.embeddings.weight.requires_grad = False

    def forward(self, procedure, procedure_mask):
#         print(self.u_a)
#         print(self.linear_.weight)
        batch_size = procedure.size()[0]  # 16(最後は１６じゃない時がある)

        # procedure_mask:(16,28,112,1)->(16*28,112,1)

        procedure_mask = procedure_mask.view(
            batch_size*self.procedure_size, self.word_size, 1)

        word_vector = self.embeddings(procedure).float()

        # procedure:(16,28,112,200)->(16*28,112,200)
        word_vector = word_vector.view(
            batch_size*self.procedure_size, self.word_size, self.emb_size)
        
        word_vector = self.dropout(word_vector)

        rnn_out, _ = self.procedure_rnn(word_vector)

        h = self.tanh_(self.linear_(rnn_out))  # h = tanh(Wx+y)
        dot = torch.matmul(h, self.u_a)
        exp_ = torch.exp(dot)

        # masking
        # (16*28,112,1)*(16*28,112,1)->(16*28,112,1)
        exp_ = exp_ * procedure_mask

        exp_sum = torch.sum(exp_, dim=1)  # (16*28,1)
        exp_sum += self.epsilon

        x_dim = exp_sum.size()[0]
        y_dim = exp_sum.size()[1]
        exp_sum_add_dim = exp_sum.view(x_dim, y_dim, 1)  # (16*28,1,1)
 
        alpha = torch.div(exp_, exp_sum_add_dim)
        procedure_vector = torch.mul(rnn_out, alpha).sum(dim=1)  # (16*28,128)
 
        # reshape(16*28,128)->(16,28,128)
        procedure_vector_reshape = procedure_vector.view(
            batch_size, self.procedure_size, -1)
     
        return procedure_vector_reshape

In [None]:
class ProcessAttention(nn.Module):
    def __init__(self, vocab_size, emb_size, hidden_size, word_att_size, procedure_size, word_size, epsilon, dropout):
        super(ProcessAttention, self).__init__()
        self.procedure_attention = ProcedureAttention(
            vocab_size, emb_size, hidden_size, word_att_size, procedure_size, word_size, epsilon, dropout)
        # Bidirectional process-level RNN
        self.process_rnn = nn.LSTM(
            2 * hidden_size, hidden_size, bidirectional=True, batch_first=True)
        # Dropout
        self.dropout = nn.Dropout(dropout)

        self.linear_ = nn.Linear(2 * hidden_size, 2 * word_att_size)
        nn.init.xavier_uniform_(self.linear_.weight)
        self.tanh_ = nn.Tanh()
        self.softmax_ = nn.Softmax(dim=1)
        self.u_a = nn.Parameter(torch.Tensor(2 * hidden_size, 1))
        nn.init.xavier_uniform_(self.u_a)
        
        self.epsilon = epsilon #1e-7

    def forward(self, procedure, process_mask, procedure_mask):
        procedure_vector = self.procedure_attention(procedure, procedure_mask)
        
        procedure_vector = self.dropout(procedure_vector)
        
        rnn_out, _ = self.process_rnn(procedure_vector)  # (16,28,128)

        h = self.tanh_(self.linear_(rnn_out))  # h = tanh(Wx+y)
        dot = torch.matmul(h, self.u_a)
        exp_ = torch.exp(dot)

        # masking
        exp_ = exp_ * process_mask  # (16,28,1)*(16,28,1)

        exp_sum = torch.sum(exp_, dim=1)  # (16,1)
        exp_sum += self.epsilon
        
        x_dim = exp_sum.size()[0]
        y_dim = exp_sum.size()[1]
        exp_sum_add_dim = exp_sum.view(x_dim, y_dim, 1)  # (16,1,1)

        alpha = torch.div(exp_, exp_sum_add_dim)  # (16,28,1)
        procedure_attention_vector = torch.mul(rnn_out,alpha) #(16,28,128)        
        process_vector = torch.sum(procedure_attention_vector,dim=1) #(16,128)
        
        return process_vector,procedure_attention_vector

In [None]:
class HierarchialAttentionNetwork(nn.Module):
    def __init__(self, vocab_size, emb_size, hidden_size, word_att_size, procedure_size, word_size, epsilon, dropout):
        super(HierarchialAttentionNetwork, self).__init__()
        self.process_attention = ProcessAttention(
            vocab_size, emb_size, hidden_size, word_att_size, procedure_size, word_size, epsilon, dropout)
        # Regression
        self.regression_linear = nn.Linear(2 * hidden_size, 1)
        nn.init.xavier_uniform_(self.regression_linear.weight)

    def forward(self, process, process_mask, procedure_mask):
        process_vector,procedure_attention_vector = self.process_attention(
            process, process_mask, procedure_mask)
        output = self.regression_linear(process_vector)
        output = output.view(-1)

        return output,procedure_attention_vector

# 最適化,損失関数

In [None]:
vocab_size = len(word_index_dic)
emb_size = size #200
hidden_size = 64
word_att_size = 64
dropout=DROPOUT
batch_size=BATCH_SIZE
procedure_size=max_turn
word_size=max_len
epsilon = 1e-7
lr = 1e-3
epochs = EPOCHS

# データローダー

In [None]:
torch.manual_seed(1)
train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)#,worker_init_fn=worker_init_fn)

dev_loader = torch.utils.data.DataLoader(dataset_dev, batch_size=BATCH_SIZE, shuffle=False, num_workers=4,worker_init_fn=worker_init_fn)

In [None]:
def train(train_loader,model,criterion,optimizer,train_total_loss):
    model.train()
    procedure_attention_vectors_list = []
    for i,batch in enumerate(train_loader):
 
        #cpu,gpu
        batch = tuple(t.to(device) for t in batch)
        train_processes,train_process_mask,train_procedure_mask,train_labels = batch
        
        #Forward prop
        pred_y,procedure_attention_vector = model(train_processes,train_process_mask, train_procedure_mask)

        #Loss
        loss = criterion(pred_y,train_labels)
        
        #Back prop
        optimizer.zero_grad()
        loss.backward()
        
        #Update
        optimizer.step()

        #Accumulated Loss
        train_total_loss += float(loss)
  
        procedure_attention_vector_list = procedure_attention_vector.to('cpu').detach().numpy().copy().tolist()
        procedure_attention_vectors_list.extend(procedure_attention_vector_list)
    
    return train_total_loss,procedure_attention_vectors_list

In [None]:
def dev(dev_loader, model, criterion, dev_total_loss):
    model.eval()
    pred_dev_list = []
    for i, batch in enumerate(dev_loader):

        #cpu,gpu
        batch = tuple(t.to(device) for t in batch)
        dev_processes,dev_process_mask,dev_procedure_mask, dev_labels = batch
        
        pred_y,_ = model(dev_processes,dev_process_mask, dev_procedure_mask)

        loss = criterion(pred_y, dev_labels)
               
        dev_total_loss += float(loss)
        
        pred_y_list = pred_y.to('cpu').detach().numpy().copy().tolist()
        pred_dev_list.extend(pred_y_list)

    return dev_total_loss,pred_dev_list

In [None]:
train_batch_len = len(train_loader)
dev_batch_len = len(dev_loader)

In [None]:
# train_batch_len,dev_batch_len,test_batch_len

In [None]:
# model.state_dict()['process_attention.u_a']

# 学習

In [None]:
def time_procedure_to_process(log_time_series,max_turn_list):
    process_log_time_list = []
    
    count = 0
    d = deque(max_turn_list)
    #最後のpopが処理されるためにappend
    d.append(0)
    #divideは各手順の最大手順数
    max_turn = d.popleft()
    
    for log_time in log_time_series:
        count += 1
        if max_turn[0] == count:
            process_log_time_list.append(log_time)
            count = 0
            max_turn = d.popleft()
            
    return process_log_time_list

In [None]:
time_dev_list = df_id_process_time_turn_divide_time_dev['time'].tolist()
time_dev_list = time_procedure_to_process(time_dev_list,max_turn_dev_list)
a1=(math.log(5)+math.log(10))/2
a2=(math.log(10)+math.log(15))/2
a3=(math.log(15)+math.log(30))/2
a4=(math.log(30)+math.log(60))/2

In [None]:
def strict_acc(pred_dev):
    c5 = 0
    c10 = 0
    c15 = 0
    c30 = 0
    c60 = 0
    for ans_time, pred_time in zip(time_dev_list, pred_dev):
        if ans_time == 5:
            if pred_time <= a1:
                c5 += 1
        elif ans_time == 10:
            if a1 < pred_time <= a2:
                c10 += 1
        elif ans_time == 15:
            if a2 < pred_time <= a3:
                c15 += 1
        elif ans_time == 30:
            if a3 < pred_time <= a4:
                c30 += 1
        elif ans_time == 60:
            if a4 < pred_time:
                c60 += 1
 
    return (c5+c10+c15+c30+c60)/len(pred_dev)

In [None]:
max_ACC = 0
best_epoch = 1
save_path = '/hoge/hoge.pth'
torch.manual_seed(1)
seed(1)
model = HierarchialAttentionNetwork(vocab_size=vocab_size+1,
                                    emb_size=emb_size,
                                    hidden_size=hidden_size,
                                    word_att_size=word_att_size,
                                    procedure_size=procedure_size,
                                    word_size=word_size,
                                    epsilon=epsilon,
                                    dropout=dropout)

model.process_attention.procedure_attention.init_embeddings(embedding_matrix)
model.to(device)
optimizer = optim.Adam(params=filter(
    lambda p: p.requires_grad, model.parameters()), lr=lr)
criterion = nn.MSELoss()

for epoch in range(epochs):
    train_total_loss = 0
    dev_total_loss = 0
    print('###########')
    train_loss_total, _ = train(
        train_loader=train_loader,
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        train_total_loss=train_total_loss
    )

    dev_loss_total,pred_dev_list = dev(
        dev_loader=dev_loader,
        model=model,
        criterion=criterion,
        dev_total_loss=dev_total_loss
    )

    acc_dev = strict_acc(pred_dev_list)
    
    train_loss = train_loss_total/train_batch_len
    dev_loss = dev_loss_total/dev_batch_len
    print('%d回目' % (epoch+1))
    print('train_loss:%f\ndev_loss:%f\nacc_dev%f\n' %
          (train_loss, dev_loss,acc_dev))

    if max_ACC < acc_dev:
        max_ACC = acc_dev
        best_epoch = epoch + 1
        torch.save(
            model.state_dict(),
            save_path
        )
    print('%d回目 max_ACC:%f' % (best_epoch, max_ACC))