In [1]:
# 필요한 라이브러리들 불러오기
import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from keras import layers
from keras import Input
import numpy as np
import pickle
import pandas as pd
import matplotlib.pyplot as plt

tf.random.set_seed(7)

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
# cnn_input

(x_image_train, _) , (x_image_test, _) = keras.datasets.mnist.load_data()
x_image_train = x_image_train.astype("float32") / 255
x_image_test = x_image_test.astype("float32") / 255

x_image_train = np.expand_dims(x_image_train, -1)
x_image_test = np.expand_dims(x_image_test, -1)

sample = layers.GaussianNoise(0.9)
x_image_train = sample(x_image_train, training = True)
x_image_test = sample(x_image_test, training = True)

print(x_image_train.shape)
print(x_image_test.shape)

(60000, 28, 28, 1)
(10000, 28, 28, 1)


2022-08-13 13:46:14.202196: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-13 13:46:14.568505: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22360 MB memory:  -> device: 0, name: NVIDIA RTX A5000, pci bus id: 0000:17:00.0, compute capability: 8.6


In [4]:
x_sequence_train_tf = np.load('../x_sequence_train_noise_ver3.npy')
x_sequence_test_tf = np.load('../x_sequence_test_noise_ver3.npy')

print(x_sequence_train_tf.shape)
print(x_sequence_test_tf.shape)

(60000, 472)
(10000, 472)


In [5]:
# 정답지 생성
train_label_textfile = pd.read_csv('../trainlabels.txt',index_col=False)
train_label = np.array(train_label_textfile)

test_label_textfile = pd.read_csv('../testlabels.txt',index_col=False)
test_label = np.array(test_label_textfile)

y_train = tf.keras.utils.to_categorical(train_label, num_classes=10)
y_test = tf.keras.utils.to_categorical(test_label, num_classes=10)

In [6]:
# 그래프 그려주는 함수
from matplotlib import pyplot as plt

def graph(history):
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(1,len(acc)+1)
    
    plt.plot(epochs, acc, 'r',label = 'Training acc')
    plt.plot(epochs, val_acc, 'b',label = 'Validation acc')
    plt.title('Training and validation accuracy')
    
    plt.legend(loc='lower right')
    plt.show()

## 3-1 CNN1 + LSTM

In [7]:
# cnn model 구성
image_input = Input(shape = (28,28,1), dtype = 'float32', name = 'image_mnist')

x = layers.Conv2D(32, kernel_size = (3,3), activation = 'relu')(image_input)
x = layers.MaxPooling2D(pool_size = (2,2))(x)
x = layers.Conv2D(64, kernel_size = (3,3), activation = 'relu')(x)
x = layers.MaxPooling2D(pool_size = (2,2))(x)
x = layers.Flatten()(x)
image_output = layers.Dropout(0.5)(x)

In [8]:
# rnn model 구성

sequence_input = Input(shape = (472), dtype = 'int32', name = 'sequence_mnist')
y = layers.Embedding(512,128)(sequence_input)
sequence_output = layers.LSTM(128)(y)

In [9]:
# 연결합니다.
concatenated = layers.concatenate([image_output, sequence_output])

# softmax 분류기를 추가합니다.
answer = layers.Dense(10, activation = 'softmax')(concatenated)

In [10]:
model3_1 = Model([image_input, sequence_input], answer)
model3_1.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['acc'])

In [11]:
model3_1.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
image_mnist (InputLayer)        [(None, 28, 28, 1)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 26, 26, 32)   320         image_mnist[0][0]                
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 13, 13, 32)   0           conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 11, 11, 64)   18496       max_pooling2d[0][0]              
______________________________________________________________________________________________

In [None]:
history3_1 = model3_1.fit([x_image_train, x_sequence_train_tf], y_train, epochs = 80, batch_size = 128, validation_split = 0.2)

2022-08-13 13:46:15.478480: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/80


2022-08-13 13:46:17.059074: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8201
2022-08-13 13:46:19.802589: I tensorflow/stream_executor/cuda/cuda_blas.cc:1760] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80

In [None]:
graph(history3_1)

In [None]:
model3_1.evaluate([x_image_test, x_sequence_test_tf], y_test, verbose=2)

## 3-2. CNN2 + biLSTM

In [None]:
# cnn model 구성 with lenet
image_input = Input(shape = (28,28,1), dtype = 'float32', name = 'image_mnist')

x = layers.Conv2D(20, kernel_size = 5, padding = 'same')(image_input)
x = layers.Activation("relu")(x)
x = layers.MaxPooling2D(pool_size = (2,2), strides = (2,2))(x)
x = layers.Conv2D(50, kernel_size = 5, padding = 'same')(x)
x = layers.Activation("relu")(x)
x = layers.MaxPooling2D(pool_size = (2,2), strides = (2,2))(x)
x = layers.Flatten()(x)
x = layers.Dense(500)(x)
x = layers.Activation("relu")(x)
image_output = layers.Dropout(0.5)(x)

In [None]:
class BahdanauAttention(tf.keras.Model):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = layers.Dense(units)
    self.W2 = layers.Dense(units)
    self.V = layers.Dense(1)

  def call(self, values, query): # 단, key와 value는 같음
    # query shape == (batch_size, hidden size)
    # hidden_with_time_axis shape == (batch_size, 1, hidden size)
    # score 계산을 위해 뒤에서 할 덧셈을 위해서 차원을 변경해줍니다.
    hidden_with_time_axis = tf.expand_dims(query, 1)

    # score shape == (batch_size, max_length, 1)
    # we get 1 at the last axis because we are applying score to self.V
    # the shape of the tensor before applying self.V is (batch_size, max_length, units)
    score = self.V(tf.nn.tanh(
        self.W1(values) + self.W2(hidden_with_time_axis)))

    # attention_weights shape == (batch_size, max_length, 1)
    attention_weights = tf.nn.softmax(score, axis=1)

    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

In [None]:
sequence_input_lstm = Input(shape = (118*4), dtype = 'int32', name = 'sequence_mnist_lstm')
y = layers.Embedding(512,32)(sequence_input_lstm)
lstm = layers.Bidirectional(layers.LSTM(32, dropout=0.5, return_sequences = True))(y)
lstm, forward_h, forward_c, backward_h, backward_c = layers.Bidirectional(layers.LSTM(64, dropout=0.5, return_sequences=True, return_state=True))(lstm)

state_h = layers.Concatenate()([forward_h, backward_h])
state_c = layers.Concatenate()([forward_c, backward_c])

attention = BahdanauAttention(64) # 가중치 크기 정의
context_vector, attention_weights = attention(lstm, state_h)

# 1. Dense
# 2. BN
# 3. Activation
dense = layers.Dense(20)(context_vector)
bn = layers.BatchNormalization()(dense)
activation = layers.Activation(activation = 'relu')(bn)

# BN
#BN = layers.BatchNormalization()(context_vector)
#dense1 = layers.Dense(20, activation="relu")(BN)

sequence_output = layers.Dropout(0.5)(activation)

In [None]:
# 연결합니다.
concatenated = layers.concatenate([image_output, sequence_output])

# softmax 분류기를 추가합니다.
answer = layers.Dense(10, activation = 'softmax')(concatenated)

In [None]:
model3_2 = Model([image_input, sequence_input_lstm], answer)
model3_2.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['acc'])

In [None]:
model3_2.summary()

In [None]:
history3_2 = model3_2.fit([x_image_train, x_sequence_train_tf], y_train, epochs = 80, batch_size = 128, validation_split = 0.2)

In [None]:
graph(history3_2)

In [None]:
model3_2.evaluate([x_image_test, x_sequence_test_tf], y_test, verbose=2)

## 3-3. CNN3 + Transformer

In [None]:
# cnn model 구성 with lenet
image_input = Input(shape = (28,28,1), dtype = 'float32', name = 'image_mnist')

x = layers.Conv2D(20, kernel_size = 5, padding = 'same')(image_input)
x = layers.Activation("relu")(x)
x = layers.MaxPooling2D(pool_size = (2,2), strides = (2,2))(x)
x = layers.Conv2D(50, kernel_size = 5, padding = 'same')(x)
x = layers.Activation("relu")(x)
x = layers.MaxPooling2D(pool_size = (2,2), strides = (2,2))(x)
x = layers.Conv2D(100, kernel_size = 5, padding = 'same')(x)
x = layers.Activation("relu")(x)
x = layers.MaxPooling2D(pool_size = (2,2), strides = (2,2))(x)
x = layers.Flatten()(x)
x = layers.Dense(500)(x)
x = layers.Activation("relu")(x)
x = layers.Dense(1000)(x)
x = layers.Activation("relu")(x)
x = layers.Dense(250)(x)
x = layers.Activation("relu")(x)
image_output = layers.Dropout(0.5)(x)

In [None]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [None]:
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [None]:
embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer
maxlen = 118*4
vocab_size = 1000

sequence_input_lstm = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(sequence_input_lstm)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(20, activation="relu")(x)
sequence_output = layers.Dropout(0.1)(x)

In [None]:
# 연결합니다.
concatenated = layers.concatenate([image_output, sequence_output])

# softmax 분류기를 추가합니다.
answer = layers.Dense(10, activation = 'softmax')(concatenated)

In [None]:
model3_3 = Model([image_input, sequence_input_lstm], answer)
model3_3.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['acc'])

In [None]:
model3_3.summary()

In [None]:
history3_3 = model3_3.fit([x_image_train, x_sequence_train_tf], y_train, epochs = 80, batch_size = 128, validation_split = 0.2)

In [None]:
graph(history3_3)

In [None]:
model3_3.evaluate([x_image_test, x_sequence_test_tf], y_test, verbose=2)