In [2]:
import pandas as pd

try:
    with open('../../server/articles.csv') as f:
        articles = pd.read_csv(f)
except FileNotFoundError:
    with open('../server/articles.csv') as f:
        articles = pd.read_csv(f)
print(articles.head())

titles = articles['title'].tolist()
contents = articles['content'].tolist()

OSError: [Errno 5] Input/output error: '../../server/articles.csv'

In [5]:
from util.preprocessing import Preprocessor

content_preprocessor = Preprocessor()
title_preprocessor = Preprocessor()

content_separated = content_preprocessor.separate_to_words(contents)
title_separated = title_preprocessor.separate_to_words(titles, add_token=True)

content_separated = content_preprocessor.padding(content_separated)
title_separated = title_preprocessor.padding(title_separated)

Preprocessor.fit_tokenizer(content_separated)
Preprocessor.fit_tokenizer(title_separated)

content_sequence = content_preprocessor.tokenize(content_separated).to_numpy()
title_sequence = title_preprocessor.tokenize(title_separated).to_numpy()

content_len = content_preprocessor.get_sequence_len()
title_len = title_preprocessor.get_sequence_len()

num_words = Preprocessor.get_num_words()

ModuleNotFoundError: No module named 'util'

In [16]:
import tensorflow as tf

class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, input_dim=None, dim_model=None, scale=10000, **kwargs):
        super(PositionalEncoding, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.dim_model = dim_model
        self.scale = scale

    def build(self, input_shape):
        self.input_dim = input_shape[0]
        self.dim_model = input_shape[1]

    def call(self, inputs, **kwargs):
        pos = tf.range(self.input_dim)[:, tf.newaxis]
        i = tf.range(self.dim_model)[tf.newaxis, :]
        i[:, 1::2] = i[:, 1::2] - 1
        encoder = pos / tf.pow(self.scale, i / tf.float32(self.dim_model))
        encoder[:, 0::2] = tf.sin(encoder[:, 0::2])
        encoder[:, 1::2] = tf.cos(encoder[:, 1::2])
        return inputs + encoder

In [17]:
def scaled_dot_product_attention(query, key, value, mask):
    matmul_qk = tf.matmul(query, key, transpose_b=True)

    dk = tf.cast(tf.shape(key)[-1], tf.float32)
    lg = matmul_qk / tf.math.sqrt(dk)

    if mask is not None:
        lg += (mask * -1e9)

    attention_weights = tf.nn.softmax(lg, axis=-1)
    output = tf.matmul(attention_weights, value)

    return output, attention_weights

class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, model_dim, num_heads, name="multi_head_attention"): # 정의하기
        super(MultiHeadAttention, self).__init__(name=name)
        self.num_heads = num_heads
        self.d_model = model_dim

        assert model_dim % self.num_heads == 0

        self.depth = model_dim // self.num_heads

        self.query_dense = tf.keras.layers.Dense(units=model_dim) #WQ
        self.key_dense = tf.keras.layers.Dense(units=model_dim) #WK
        self.value_dense = tf.keras.layers.Dense(units=model_dim) #WV

        self.dense = tf.keras.layers.Dense(units=model_dim) #WO

    def split_heads(self, inputs, batch_size):
        inputs = tf.reshape(
            inputs, shape=(batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(inputs, perm=[0, 2, 1, 3])

    def call(self, inputs, **kwargs):
        if 'value' not in inputs:
            inputs['value'] = inputs['key']
        if 'mask' not in inputs:
            inputs['mask'] = None
        query, key, value, mask = inputs['query'], inputs['key'], inputs['value'], inputs['mask']
        batch_size = tf.shape(query)[0]

        query = self.query_dense(query) # (batch_size, seq_len, d_model)
        key = self.key_dense(key) # (batch_size, seq_len, d_model)
        value = self.value_dense(value) # (batch_size, seq_len, d_model)

        query = self.split_heads(query, batch_size) # (batch_size, num_heads, seq_len, d_model/num_heads)
        key = self.split_heads(key, batch_size) # (batch_size, num_heads, seq_len, d_model/num_heads)
        value = self.split_heads(value, batch_size) # (batch_size, num_heads, seq_len, d_model/num_heads)

        scaled_attention = scaled_dot_product_attention(query, key, value, mask)
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])

        concat_attention = tf.reshape(scaled_attention,
                                      (batch_size, -1, self.d_model))

        outputs = self.dense(concat_attention)

        return outputs

In [18]:
from tensorflow.keras.layers import Input, Embedding, Dense
from tensorflow.keras.activations import softmax, relu
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

class Transformer:
    def __init__(self, input_dim, output_dim, sequence_len,
                 embedding_dim=256, hidden_size=1024, num_heads=8, num_layers=6):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.sequence_len = sequence_len
        self.embedding_dim = embedding_dim
        self.hidden_size = hidden_size
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.model = None

    def compile(self):
        encoder_inputs = Input((self.input_dim,), name='encoder_inputs')

        encoder_embedding = Embedding(
            input_dim=self.sequence_len, output_dim=self.embedding_dim, name='encoder_embedding_layer'
        )(encoder_inputs)

        encoder_positional = PositionalEncoding(
            input_dim=self.input_dim, dim_model=self.embedding_dim, name='encoder_positional_encoding_layer'
        )(encoder_embedding)

        for i in range(self.num_layers):
            encoder_attention = MultiHeadAttention(
                model_dim=self.embedding_dim, num_heads=self.num_heads,
                name='encoder_self_attention_%d' % i
            )(inputs={'query': encoder_positional, 'key': encoder_positional})
            encoder_positional += encoder_attention

        encoder_outputs = Dense(
            units=self.embedding_dim, activation=relu, name='encoder_outputs'
        )(encoder_positional)

        decoder_inputs = Input((title_len - 1,), name='decoder_inputs')

        decoder_embedding = Embedding(
            input_dim=self.sequence_len, output_dim=self.embedding_dim, name='decoder_embedding_layer'
        )(decoder_inputs)

        decoder_positional = PositionalEncoding(
            input_dim=self.output_dim, dim_model=self.embedding_dim, name='decoder_positional_encoding_layer'
        )(decoder_embedding)

        for i in range(self.num_layers):
            decoder_attention = MultiHeadAttention(
                model_dim=self.embedding_dim, num_heads=self.num_heads,
                name='decoder_masked_self_attention_%d' % i
            )(inputs={'query': decoder_positional, 'key': decoder_positional})
            decoder_positional += decoder_attention
            encoder_decoder_attention = MultiHeadAttention(
                model_dim=self.embedding_dim, num_heads=self.num_heads,
                name='encoder_decoder_attention_%d' % i
            )(inputs={'query': decoder_positional, 'key': encoder_outputs})
            decoder_positional += encoder_decoder_attention

        decoder_outputs = Dense(
            units=self.embedding_dim, activation=relu, name='decoder_outputs'
        )(decoder_positional)

        outputs = Dense(
            units=title_len, activation=softmax, name='outputs'
        )(decoder_outputs)

        self.model = Model(
            inputs=[encoder_inputs, decoder_inputs],
            outputs=outputs,
            name='body_to_title_transformer_model'
        )

        self.model.encoder = Model(
            inputs=encoder_inputs, outputs=encoder_outputs,
            name='body_to_title_transformer_encoder_model'
        )

        decoder_param = Input(shape=(self.embedding_dim,), name='encoder_outputs')
        self.model.decoder = Model(
            inputs=[decoder_param, decoder_inputs],
            outputs=outputs,
            name='body_to_title_transformer_decoder_model'
        )

    def fit(self, x_data, y_data, epochs=50, batch_size=1, test_size=0.2):
        x_train, x_test, y_train, y_test = train_test_split(
            x_data, y_data, test_size=test_size
        )
        self.model.fit(
            [x_train, y_train[:, :-1]], y_train[:, 1:],
            epochs=epochs, batch_size=batch_size,
            validation_data=([x_test, y_test[:, :-1]], y_test[:, 1:])
        )
