In [2]:
from keras import models
from keras.models import Sequential
from keras.models import Model
from keras.models import load_model
from keras import backend as K

from keras import layers
from keras.layers import Layer
from keras.layers import Input,Dense,Flatten,Embedding,Permute,Dot,Reshape
from keras.layers.convolutional import Conv1D,MaxPooling1D,MaxPooling2D
from keras.layers import Dropout
from keras.layers import LSTM,GRU

from keras.preprocessing import sequence
from keras.utils import np_utils


from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer

import pandas as pd
from pandas import DataFrame, Series
import numpy as np

import re

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

import copy

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
rawdata = pd.read_csv("최종리스트_맥주 키워드 제거.csv",engine="python",encoding = "utf-8")
del rawdata["Unnamed: 0"]
morphs = list()

for i in range(0,len(rawdata)):
    morphs.append(list(rawdata.loc[i,:].dropna()))

In [3]:
target_encoded = pd.read_csv("타겟.csv",engine="python",encoding="utf-8")

In [4]:
del target_encoded["Unnamed: 0"]

In [None]:
morphsVectored = list()


vocabulary = pd.read_csv("색인사전_맥주 키워드 제거.csv",engine="python",encoding="utf-8")
del vocabulary["Unnamed: 0"]

vocabulary = vocabulary.to_dict(orient="records")[0]

for i in morphs:
    temporailyList = list()
    for k in i:
        #print(k)
        try:
            temporailyList.append(vocabulary[k])
        except KeyError:
            temporailyList.append(0)
    morphsVectored.append(temporailyList)


In [None]:
vectorized_seq = sequence.pad_sequences(morphsVectored,maxlen=50)

In [7]:
X_train,X_test,y_train,y_test = train_test_split(vectorized_seq, target_encoded)

y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

In [4]:
import tensorflow as tf

In [8]:
tf.reset_default_graph

## GRU 입력 전 사전처리 모듈(임베딩 -> 컨볼루션 -> 맥스풀링)

inputs = Input(shape=[50])
embed = Embedding(6000,128,input_length = 50)(inputs)
model = Dropout(0.2)(embed)
model = Conv1D(256,3,padding="valid",activation="relu",strides=1)(model)
model = MaxPooling1D(pool_size = 4)(model)

## 인코더 부분

Encoder = GRU(128,return_sequences = True, return_state = True,name="Encoder")
attention_matrix,h_state = Encoder(model)

#2D텐서인 출력벡터(h_state)를 3D텐서로 만들기 위해, 임의로 1회 반복을 실시한다.
Reshape_3D_for_decoder = layers.RepeatVector(1,name="Reshape_3D_for_decoder")(h_state)

##디코더 부분

Decoder = GRU(128,name = "Decoder")
# h_state를 3D텐서로 바꾼 Reshape_3d를 입력으로 받는다.
Decoder_output = Decoder(Reshape_3D_for_decoder)

# Decoder에서 출력된 의도벡터를 1회 반복시켜 2D 텐서에서 3D 텐서로 변경한다.
Reshape_3D_for_dot = layers.RepeatVector(1,name="Reshape_3D_for_dot")(Decoder_output)


##어텐션 메커니즘

#normalize = True로 켠 상태에서, 코싸인 유사도를 구할 수 있도록 둘을 내적한다. 
Cosine_similarity = layers.dot([Reshape_3D_for_dot,attention_matrix],axes = -1,normalize=True,name="Cosine_similarity")

#유사도 벡터를 softmax층에 통과시켜 총합이 1인 확률로 변환한다. 이를 attention_score로 명명한다.
attention_score_layer = layers.Softmax(axis=-1,name="attention_score_from_Softmax") 
attention_score = attention_score_layer(Cosine_similarity)

#변환된 attention_score를 최초의 attention_matrix)와 각각 내적한다.
Transpose_attention_matrix = layers.Permute((2,1),name = "Transpose_attention_matrix")(attention_matrix)
weighted_attention_matrix = layers.multiply([attention_score,Transpose_attention_matrix],name="weighted_attention_matrix")

#내적한 가중 attention_matrix의 열벡터를 모두 더해 1D 텐서인 context vector를 만들어준다.(1 * 128)
context_vector = layers.Lambda(lambda x: K.sum(x, axis=-1),name="Making_context_vector")(weighted_attention_matrix)

predicts = Dense(13,activation="softmax")(context_vector)

GRUs = Model(inputs = [inputs], outputs = [predicts])
GRUs.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 50)           0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 50, 128)      768000      input_2[0][0]                    
__________________________________________________________________________________________________
dropout_2 (Dropout)             (None, 50, 128)      0           embedding_2[0][0]                
__________________________________________________________________________________________________
conv1d_2 (Conv1D)               (None, 48, 256)      98560       dropout_2[0][0]                  
__________________________________________________________________________________________________
max_poolin

In [9]:
GRUs.compile(loss="categorical_crossentropy",optimizer="adam",metrics=["accuracy"])

In [35]:
GRUs.fit(X_train,y_train,epochs=10,batch_size=64,validation_data=(X_test,y_test))

Train on 18430 samples, validate on 6144 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1bebb0d8cf8>

In [7]:
GRUs.save("어텐션.h5")

### 아래부턴 참고용 코드들

In [None]:
from keras import backend as K
from keras.layers import Layer

class MyLayer(Layer):

    def __init__(self, output_dim, **kwargs):
        self.output_dim = output_dim
        super(MyLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.kernel = self.add_weight(name='kernel', 
                                      shape=(input_shape[1], self.output_dim),
                                      initializer='uniform',
                                      trainable=True)
        super(MyLayer, self).build(input_shape)  # Be sure to call this at the end, 
        #super 함수는 공통의 부모 클래스를 단 한번만 불러오도록 한다.

    def call(self, x, y):
        bad = tf.matmul(x[0],tf.transpose(y))
        good = tf.norm(x[0]) * tf.norm(y)
        cossine = bad/good
        return cossine

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_dim)

In [None]:
# 인코더 생성 
encoder_inputs = layers.Input(shape=(max_encoder_seq_length, num_encoder_tokens)) 
encoder = layers.GRU(latent_dim, return_sequences=True, return_state=True) 
encoder_outputs, state_h = encoder(encoder_inputs) 

# 디코더 생성. 
decoder_inputs = layers.Input(shape=(max_decoder_seq_length, num_decoder_tokens)) 
decoder = layers.GRU(latent_dim, return_sequences=True, return_state=True) 
decoder_outputs, _ = decoder(decoder_inputs, initial_state=state_h)

# max_encoder_seq_length 등등은 학습 데이터의 개수 

repeat_d_layer = RepeatVectorLayer(max_encoder_seq_length, 2) 
repeat_d = repeat_d_layer(decoder_outputs) 

repeat_e_layer = RepeatVectorLayer(max_decoder_seq_length, 1) 
repeat_e = repeat_e_layer(encoder_outputs) 

## 디코더의 출력과 인코더의 출력을 하나로 합친다.
concat_for_score_layer = layers.Concatenate(axis=-1) 
concat_for_score = concat_for_score_layer([repeat_d, repeat_e]) 

dense1_t_score_layer = layers.Dense(latent_dim // 2, activation='tanh') 
dense1_score_layer = layers.TimeDistributed(dense1_t_score_layer) 
dense1_score = dense1_score_layer(concat_for_score) 

dense2_t_score_layer = layers.Dense(1) 
dense2_score_layer = layers.TimeDistributed(dense2_t_score_layer) 
dense2_score = dense2_score_layer(dense1_score) 
dense2_score = layers.Reshape((max_decoder_seq_length, max_encoder_seq_length))(dense2_score) 

softmax_score_layer = layers.Softmax(axis=-1) 
softmax_score = softmax_score_layer(dense2_score) 

repeat_score_layer = RepeatVectorLayer(latent_dim, 2) 
repeat_score = repeat_score_layer(softmax_score) 

permute_e = layers.Permute((2, 1))(encoder_outputs) 
repeat_e_layer = RepeatVectorLayer(max_decoder_seq_length, 1) 
repeat_e = repeat_e_layer(permute_e) 

attended_mat_layer = layers.Multiply() 
attended_mat = attended_mat_layer([repeat_score, repeat_e]) 

context_layer = layers.Lambda(lambda x: K.sum(x, axis=-1), lambda x: tuple(x[:-1])) 
context = context_layer(attended_mat) 

concat_context_layer = layers.Concatenate(axis=-1) 
concat_context = concat_context_layer([context, decoder_outputs]) 

attention_dense_output_layer = layers.Dense(latent_dim, activation='tanh') 
attention_output_layer = layers.TimeDistributed(attention_dense_output_layer) 
attention_output = attention_output_layer(concat_context) 

decoder_dense = layers.Dense(num_decoder_tokens, activation='softmax') 
decoder_outputs = decoder_dense(attention_output) 



In [None]:
a = layers.Reshape((-1,1))(attention_matrix[0][0])
b = layers.Reshape((-1,1))(attention_matrix[0][11])
aa = layers.dot([a,b],axes=0)
bb = layers.dot([a,b],axes=0,normalize = True)
tf.map_fn(lambda x : x[0]/x[1],(aa,bb))

In [None]:
def attention_3d_block(inputs):
    # inputs.shape = (batch_size, time_steps, input_dim)
    input_dim = int(inputs.shape[2])
    a = Permute((2, 1))(inputs)
    a = Reshape((input_dim, TIME_STEPS))(a) # this line is not useful. It's just to know which dimension is what.
    a = Dense(TIME_STEPS, activation='softmax')(a)
    if SINGLE_ATTENTION_VECTOR:
        a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a)
        a = RepeatVector(input_dim)(a)
    a_probs = Permute((2, 1), name='attention_vec')(a)
    output_attention_mul = merge([inputs, a_probs], name='attention_mul', mode='mul')
    return output_attention_mul


In [None]:
ctivations = LSTM(units, return_sequences=True)(embedded)

# compute importance for each step
attention = Dense(1, activation='tanh')(activations)
attention = Flatten()(attention)
attention = Activation('softmax')(attention)
attention = RepeatVector(units)(attention)
attention = Permute([2, 1])(attention)

sent_representation = merge([activations, attention], mode='mul')

In [None]:
_input = Input(shape=[3,3], dtype='int32')

# get the embedding layer
embedded = Embedding(
        input_dim = 50,
        output_dim=40,
        trainable=False,
        mask_zero=False
    )(_input)

activations = LSTM(units, return_sequences=True)(embedded)

# compute importance for each step
attention = Dense(1, activation='tanh')(activations)
attention = Flatten()(attention)
attention = Activation('softmax')(attention)
attention = RepeatVector(units)(attention)
attention = Permute([2, 1])(attention)


sent_representation = merge([activations, attention], mode='mul')
sent_representation = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(units,))(sent_representation)

probabilities = Dense(3, activation='softmax')(sent_representation)

In [None]:
inputs = Input(shape=[50])
embed = Embedding(6000,128,input_length = 50)(inputs)
model = Dropout(0.2)(embed)
model = Conv1D(256,3,padding="valid",activation="relu",strides=1)(model)
model = MaxPooling1D(pool_size = 4)(model)
model = MyLayer(model.shape).call(model)

Model(inputs = [inputs],outputs = [model])