In [None]:
!nvidia-smi

Mon Sep  7 08:21:14 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.66       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    24W / 300W |      0MiB / 16130MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)

Mounted at /content/gdrive


In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

%tensorflow_version 2.x
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras import layers, Sequential

from sklearn.model_selection import train_test_split
import string

path = '/content/gdrive/My Drive/Dacon/ComputerVision/'

path_train = path + 'train.csv'
path_test = path + 'test.csv'
path_submission = path + 'submission.csv'

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [None]:
seed = 0
tf.random.set_seed(seed)
np.random.seed(seed)

ImageGen_coeff = 10
epochs_num = 30
verbose = 2

In [None]:
def normalization(df):
    for row in range(len(df)):
        maxi = df.iloc[row].max()
        mini = df.iloc[row].min()
        if maxi == mini:
            df.iloc[row] = df.iloc[row].apply(lambda x : 0)
        else:
            df.iloc[row] = df.iloc[row].apply(lambda x : (x - mini)/(maxi - mini))
    x = df.values
    if not np.any(x > 1.0) and not np.any(x < 0) and not np.any(np.isnan(x)):
        print('Boundary Clear')      
    return df

def ComputerVision_Dataset():
    path = '/content/gdrive/My Drive/Dacon/ComputerVision/train.csv'
    path_pixel = '/content/gdrive/My Drive/Dacon/ComputerVision/train_bicubic56.csv'
    train_ratio = 0.9

    train = pd.read_csv(path)
    train_pixel = pd.read_csv(path_pixel)

    point_to = int(len(train) * train_ratio)

    train_data = train.iloc[:point_to]
    train_data_pixel = train_pixel.iloc[:point_to]
    test_data = train.iloc[point_to:]
    test_data_pixel = train_pixel.iloc[point_to:]
    letter_hash = dict(zip(string.ascii_uppercase, [[1 if i == j else 0 for j in range(26)] for i in range(26)]))

    pix = train_data_pixel.iloc[:, 1:]
    pix = normalization(pix).values.reshape(-1, 56, 56, 1)

    fix = train_data.iloc[:, 1:3].values

    datagenerator = ImageDataGenerator(rotation_range=10, zoom_range=0.10, width_shift_range=0.1,
                                       height_shift_range=0.1)
    gen = datagenerator.flow(pix, fix, shuffle=False, batch_size=32)
    pixel, fixed, batch_index, limit = [], [], 0, 64 * ImageGen_coeff
    while batch_index <= limit:
        try:
            data = gen.next()
            pixel += list(data[0])
            fixed += list(data[1])
            batch_index += 1
        except:
            print("ImageGeneratorError")
            break
    fixed_train = np.asarray(fixed)
    X_train_pixel = np.asarray(pixel)

    Y_train = to_categorical(fixed_train[:, 0], 10)

    X_train_label = np.asarray([letter_hash[letter] for letter in fixed_train[:, 1]])

    X_valid_pixel = test_data_pixel.iloc[:, 1:]
    X_valid_pixel = normalization(X_valid_pixel).values.reshape(-1, 56, 56, 1)
    X_valid_label = np.array([letter_hash[letter] for letter in test_data.iloc[:, 2]])
    Y_valid = to_categorical(test_data.iloc[:, 1], 10)

    return X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid


X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid = ComputerVision_Dataset()

Boundary Clear
Boundary Clear


In [None]:
def train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid):
    print('RESNET50 + BICUBIC56')
    resnet50 = tf.keras.applications.ResNet50(include_top=False, weights=None, input_tensor=None, input_shape=(56,56,1),pooling=None)
    cnn_mid = layers.GlobalAveragePooling2D()(resnet50.output)
    cnn_out = layers.Dense(128, activation = 'relu')(cnn_mid)

    dense_input = layers.Input(shape=(26,))
    dense_mid = layers.Dense(52, activation = 'relu')(dense_input)
    dense_output = layers.Dense(52, activation = 'relu')(dense_mid)
    dense_model = tf.keras.Model(inputs=dense_input, outputs=dense_output)

    concatenated = layers.concatenate([cnn_out, dense_model.output])
    concatenated = layers.Dense(32, activation='relu')(concatenated)
    concat_output = layers.Dense(10, activation='softmax')(concatenated)
    concat_model = tf.keras.models.Model([resnet50.input, dense_input], concat_output)
    concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    with tf.device('/device:GPU:0'):
        history = concat_model.fit([X_train_pixel, X_train_label], Y_train, epochs=epochs_num,
                            validation_data=([X_valid_pixel, X_valid_label], Y_valid),
                            verbose=verbose)
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, " +
        f"Validation accuracy={max(history.history['val_accuracy']):.5f}"
    )

train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid)

def train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid):
    print('RESNET101V2 + BICUBIC56')
    resnet50 = tf.keras.applications.ResNet101V2(include_top=False, weights=None, input_tensor=None, input_shape=(56,56,1),pooling=None)
    cnn_mid = layers.GlobalAveragePooling2D()(resnet50.output)
    cnn_out = layers.Dense(128, activation = 'relu')(cnn_mid)

    dense_input = layers.Input(shape=(26,))
    dense_mid = layers.Dense(52, activation = 'relu')(dense_input)
    dense_output = layers.Dense(52, activation = 'relu')(dense_mid)
    dense_model = tf.keras.Model(inputs=dense_input, outputs=dense_output)

    concatenated = layers.concatenate([cnn_out, dense_model.output])
    concatenated = layers.Dense(32, activation='relu')(concatenated)
    concat_output = layers.Dense(10, activation='softmax')(concatenated)
    concat_model = tf.keras.models.Model([resnet50.input, dense_input], concat_output)
    concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    with tf.device('/device:GPU:0'):
        history = concat_model.fit([X_train_pixel, X_train_label], Y_train, epochs=epochs_num,
                            validation_data=([X_valid_pixel, X_valid_label], Y_valid),
                            verbose=verbose)
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, " +
        f"Validation accuracy={max(history.history['val_accuracy']):.5f}"
    )

train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid)

RESNET50 + BICUBIC56
Epoch 1/30
637/637 - 32s - loss: 1.4935 - accuracy: 0.4855 - val_loss: 3.2753 - val_accuracy: 0.3805
Epoch 2/30
637/637 - 30s - loss: 0.6386 - accuracy: 0.7908 - val_loss: 1.0318 - val_accuracy: 0.6878
Epoch 3/30
637/637 - 31s - loss: 0.3858 - accuracy: 0.8741 - val_loss: 0.7961 - val_accuracy: 0.7805
Epoch 4/30
637/637 - 31s - loss: 0.2867 - accuracy: 0.9047 - val_loss: 0.7269 - val_accuracy: 0.7951
Epoch 5/30
637/637 - 31s - loss: 0.2142 - accuracy: 0.9292 - val_loss: 0.7150 - val_accuracy: 0.8146
Epoch 6/30
637/637 - 31s - loss: 0.1852 - accuracy: 0.9382 - val_loss: 1.0670 - val_accuracy: 0.7366
Epoch 7/30
637/637 - 31s - loss: 0.1527 - accuracy: 0.9511 - val_loss: 0.6766 - val_accuracy: 0.8293
Epoch 8/30
637/637 - 31s - loss: 0.1295 - accuracy: 0.9583 - val_loss: 0.8370 - val_accuracy: 0.8146
Epoch 9/30
637/637 - 31s - loss: 0.1140 - accuracy: 0.9623 - val_loss: 0.6067 - val_accuracy: 0.8488
Epoch 10/30
637/637 - 31s - loss: 0.1152 - accuracy: 0.9632 - val_loss

In [None]:
def normalization(df):
    for row in range(len(df)):
        maxi = df.iloc[row].max()
        mini = df.iloc[row].min()
        if maxi == mini:
            df.iloc[row] = df.iloc[row].apply(lambda x : 0)
        else:
            df.iloc[row] = df.iloc[row].apply(lambda x : (x - mini)/(maxi - mini))
    x = df.values
    if not np.any(x > 1.0) and not np.any(x < 0) and not np.any(np.isnan(x)):
        print('Boundary Clear')      
    return df

def ComputerVision_Dataset():
    path = '/content/gdrive/My Drive/Dacon/ComputerVision/train.csv'
    path_pixel = '/content/gdrive/My Drive/Dacon/ComputerVision/train_bicubic112.csv'
    train_ratio = 0.9

    train = pd.read_csv(path)
    train_pixel = pd.read_csv(path_pixel)

    point_to = int(len(train) * train_ratio)

    train_data = train.iloc[:point_to]
    train_data_pixel = train_pixel.iloc[:point_to]
    test_data = train.iloc[point_to:]
    test_data_pixel = train_pixel.iloc[point_to:]
    letter_hash = dict(zip(string.ascii_uppercase, [[1 if i == j else 0 for j in range(26)] for i in range(26)]))

    pix = train_data_pixel.iloc[:, 1:]
    pix = normalization(pix).values.reshape(-1, 112, 112, 1)

    fix = train_data.iloc[:, 1:3].values

    datagenerator = ImageDataGenerator(rotation_range=10, zoom_range=0.10, width_shift_range=0.1,
                                       height_shift_range=0.1)
    gen = datagenerator.flow(pix, fix, shuffle=False, batch_size=32)
    pixel, fixed, batch_index, limit = [], [], 0, 64 * ImageGen_coeff
    while batch_index <= limit:
        try:
            data = gen.next()
            pixel += list(data[0])
            fixed += list(data[1])
            batch_index += 1
        except:
            print("ImageGeneratorError")
            break
    fixed_train = np.asarray(fixed)
    X_train_pixel = np.asarray(pixel)

    Y_train = to_categorical(fixed_train[:, 0], 10)

    X_train_label = np.asarray([letter_hash[letter] for letter in fixed_train[:, 1]])

    X_valid_pixel = test_data_pixel.iloc[:, 1:]
    X_valid_pixel = normalization(X_valid_pixel).values.reshape(-1, 112, 112, 1)
    X_valid_label = np.array([letter_hash[letter] for letter in test_data.iloc[:, 2]])
    Y_valid = to_categorical(test_data.iloc[:, 1], 10)

    return X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid


X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid = ComputerVision_Dataset()

Boundary Clear
Boundary Clear


In [None]:
def train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid):
    print('RESNET50 + BICUBIC112')
    resnet50 = tf.keras.applications.ResNet50(include_top=False, weights=None, input_tensor=None, input_shape=(112,112,1),pooling=None)
    cnn_mid = layers.GlobalAveragePooling2D()(resnet50.output)
    cnn_out = layers.Dense(128, activation = 'relu')(cnn_mid)

    dense_input = layers.Input(shape=(26,))
    dense_mid = layers.Dense(52, activation = 'relu')(dense_input)
    dense_output = layers.Dense(52, activation = 'relu')(dense_mid)
    dense_model = tf.keras.Model(inputs=dense_input, outputs=dense_output)

    concatenated = layers.concatenate([cnn_out, dense_model.output])
    concatenated = layers.Dense(32, activation='relu')(concatenated)
    concat_output = layers.Dense(10, activation='softmax')(concatenated)
    concat_model = tf.keras.models.Model([resnet50.input, dense_input], concat_output)
    concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    with tf.device('/device:GPU:0'):
        history = concat_model.fit([X_train_pixel, X_train_label], Y_train, epochs=epochs_num,
                            validation_data=([X_valid_pixel, X_valid_label], Y_valid),
                            verbose=verbose)
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, " +
        f"Validation accuracy={max(history.history['val_accuracy']):.5f}"
    )

train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid)

def train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid):
    print('RESNET101V2 + BICUBIC112')
    resnet50 = tf.keras.applications.ResNet101V2(include_top=False, weights=None, input_tensor=None, input_shape=(112,112,1),pooling=None)
    cnn_mid = layers.GlobalAveragePooling2D()(resnet50.output)
    cnn_out = layers.Dense(128, activation = 'relu')(cnn_mid)

    dense_input = layers.Input(shape=(26,))
    dense_mid = layers.Dense(52, activation = 'relu')(dense_input)
    dense_output = layers.Dense(52, activation = 'relu')(dense_mid)
    dense_model = tf.keras.Model(inputs=dense_input, outputs=dense_output)

    concatenated = layers.concatenate([cnn_out, dense_model.output])
    concatenated = layers.Dense(32, activation='relu')(concatenated)
    concat_output = layers.Dense(10, activation='softmax')(concatenated)
    concat_model = tf.keras.models.Model([resnet50.input, dense_input], concat_output)
    concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    with tf.device('/device:GPU:0'):
        history = concat_model.fit([X_train_pixel, X_train_label], Y_train, epochs=epochs_num,
                            validation_data=([X_valid_pixel, X_valid_label], Y_valid),
                            verbose=verbose)
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, " +
        f"Validation accuracy={max(history.history['val_accuracy']):.5f}"
    )

train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid)

def train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid):
    print('XCEPTION + BICUBIC112')
    resnet50 = tf.keras.applications.Xception(include_top=False, weights=None, input_tensor=None, input_shape=(112,112,1),pooling=None)
    cnn_mid = layers.GlobalAveragePooling2D()(resnet50.output)
    cnn_out = layers.Dense(128, activation = 'relu')(cnn_mid)

    dense_input = layers.Input(shape=(26,))
    dense_mid = layers.Dense(52, activation = 'relu')(dense_input)
    dense_output = layers.Dense(52, activation = 'relu')(dense_mid)
    dense_model = tf.keras.Model(inputs=dense_input, outputs=dense_output)

    concatenated = layers.concatenate([cnn_out, dense_model.output])
    concatenated = layers.Dense(32, activation='relu')(concatenated)
    concat_output = layers.Dense(10, activation='softmax')(concatenated)
    concat_model = tf.keras.models.Model([resnet50.input, dense_input], concat_output)
    concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    with tf.device('/device:GPU:0'):
        history = concat_model.fit([X_train_pixel, X_train_label], Y_train, epochs=epochs_num,
                            validation_data=([X_valid_pixel, X_valid_label], Y_valid),
                            verbose=verbose)
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, " +
        f"Validation accuracy={max(history.history['val_accuracy']):.5f}"
    )

train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid)

def train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid):
    print('INCEPTIONRESNETV2 + BICUBIC112')
    resnet50 = tf.keras.applications.InceptionResNetV2(include_top=False, weights=None, input_tensor=None, input_shape=(112,112,1),pooling=None)
    cnn_mid = layers.GlobalAveragePooling2D()(resnet50.output)
    cnn_out = layers.Dense(128, activation = 'relu')(cnn_mid)

    dense_input = layers.Input(shape=(26,))
    dense_mid = layers.Dense(52, activation = 'relu')(dense_input)
    dense_output = layers.Dense(52, activation = 'relu')(dense_mid)
    dense_model = tf.keras.Model(inputs=dense_input, outputs=dense_output)

    concatenated = layers.concatenate([cnn_out, dense_model.output])
    concatenated = layers.Dense(32, activation='relu')(concatenated)
    concat_output = layers.Dense(10, activation='softmax')(concatenated)
    concat_model = tf.keras.models.Model([resnet50.input, dense_input], concat_output)
    concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    with tf.device('/device:GPU:0'):
        history = concat_model.fit([X_train_pixel, X_train_label], Y_train, epochs=epochs_num,
                            validation_data=([X_valid_pixel, X_valid_label], Y_valid),
                            verbose=verbose)
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, " +
        f"Validation accuracy={max(history.history['val_accuracy']):.5f}"
    )

train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid)

RESNET50 + BICUBIC112
Epoch 1/30
637/637 - 67s - loss: 1.2578 - accuracy: 0.5765 - val_loss: 1.4671 - val_accuracy: 0.5951
Epoch 2/30
637/637 - 65s - loss: 0.4711 - accuracy: 0.8493 - val_loss: 1.0938 - val_accuracy: 0.7171
Epoch 3/30
637/637 - 65s - loss: 0.2853 - accuracy: 0.9059 - val_loss: 0.9259 - val_accuracy: 0.7854
Epoch 4/30
637/637 - 65s - loss: 0.2116 - accuracy: 0.9311 - val_loss: 0.4464 - val_accuracy: 0.8927
Epoch 5/30
637/637 - 65s - loss: 0.1860 - accuracy: 0.9405 - val_loss: 0.5499 - val_accuracy: 0.8683
Epoch 6/30
637/637 - 65s - loss: 0.1447 - accuracy: 0.9500 - val_loss: 0.9283 - val_accuracy: 0.8341
Epoch 7/30
637/637 - 65s - loss: 0.1273 - accuracy: 0.9585 - val_loss: 0.5418 - val_accuracy: 0.8780
Epoch 8/30
637/637 - 65s - loss: 0.1152 - accuracy: 0.9615 - val_loss: 0.6323 - val_accuracy: 0.8634
Epoch 9/30
637/637 - 65s - loss: 0.1030 - accuracy: 0.9663 - val_loss: 0.7388 - val_accuracy: 0.8780
Epoch 10/30
637/637 - 65s - loss: 0.0839 - accuracy: 0.9728 - val_los

bicubic 224 test
---------------

In [None]:
seed = 0
tf.random.set_seed(seed)
np.random.seed(seed)

ImageGen_coeff = 10
epochs_num = 30
verbose = 2

In [None]:
N_dim = 224

def normalization(df):
    for row in range(len(df)):
        maxi = df.iloc[row].max()
        mini = df.iloc[row].min()
        if maxi == mini:
            df.iloc[row] = df.iloc[row].apply(lambda x : 0)
        else:
            df.iloc[row] = df.iloc[row].apply(lambda x : (x - mini)/(maxi - mini))
    x = df.values
    if not np.any(x > 1.0) and not np.any(x < 0) and not np.any(np.isnan(x)):
        print('Boundary Clear')      
    return df

def ComputerVision_Dataset():
    path = '/content/gdrive/My Drive/Dacon/ComputerVision/train.csv'
    path_pixel = '/content/gdrive/My Drive/Dacon/ComputerVision/train_bicubic'+str(N_dim)+'.csv'
    train_ratio = 0.9

    train = pd.read_csv(path)
    train_pixel = pd.read_csv(path_pixel)

    point_to = int(len(train) * train_ratio)

    train_data = train.iloc[:point_to]
    train_data_pixel = train_pixel.iloc[:point_to]
    test_data = train.iloc[point_to:]
    test_data_pixel = train_pixel.iloc[point_to:]
    letter_hash = dict(zip(string.ascii_uppercase, [[1 if i == j else 0 for j in range(26)] for i in range(26)]))

    pix = train_data_pixel.iloc[:, 1:]
    pix = normalization(pix).values.reshape(-1, N_dim, N_dim, 1)

    fix = train_data.iloc[:, 1:3].values

    datagenerator = ImageDataGenerator(rotation_range=10, zoom_range=0.10, width_shift_range=0.1,
                                       height_shift_range=0.1)
    gen = datagenerator.flow(pix, fix, shuffle=False, batch_size=32)
    pixel, fixed, batch_index, limit = [], [], 0, 64 * ImageGen_coeff
    while batch_index <= limit:
        try:
            data = gen.next()
            pixel += list(data[0])
            fixed += list(data[1])
            batch_index += 1
        except:
            print("ImageGeneratorError")
            break
    fixed_train = np.asarray(fixed)
    X_train_pixel = np.asarray(pixel)

    Y_train = to_categorical(fixed_train[:, 0], 10)

    X_train_label = np.asarray([letter_hash[letter] for letter in fixed_train[:, 1]])

    X_valid_pixel = test_data_pixel.iloc[:, 1:]
    X_valid_pixel = normalization(X_valid_pixel).values.reshape(-1, N_dim, N_dim, 1)
    X_valid_label = np.array([letter_hash[letter] for letter in test_data.iloc[:, 2]])
    Y_valid = to_categorical(test_data.iloc[:, 1], 10)

    return X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid


X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid = ComputerVision_Dataset()

Boundary Clear
Boundary Clear


In [None]:
def train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid):
    print('RESNET50 + BICUBIC112')
    resnet50 = tf.keras.applications.ResNet50(include_top=False, weights=None, input_tensor=None, input_shape=(N_dim,N_dim,1),pooling=None)
    cnn_mid = layers.GlobalAveragePooling2D()(resnet50.output)
    cnn_out = layers.Dense(128, activation = 'relu')(cnn_mid)

    dense_input = layers.Input(shape=(26,))
    dense_mid = layers.Dense(52, activation = 'relu')(dense_input)
    dense_output = layers.Dense(52, activation = 'relu')(dense_mid)
    dense_model = tf.keras.Model(inputs=dense_input, outputs=dense_output)

    concatenated = layers.concatenate([cnn_out, dense_model.output])
    concatenated = layers.Dense(32, activation='relu')(concatenated)
    concat_output = layers.Dense(10, activation='softmax')(concatenated)
    concat_model = tf.keras.models.Model([resnet50.input, dense_input], concat_output)
    concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    with tf.device('/device:GPU:0'):
        history = concat_model.fit([X_train_pixel, X_train_label], Y_train, epochs=epochs_num,
                            validation_data=([X_valid_pixel, X_valid_label], Y_valid),
                            verbose=verbose)
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, " +
        f"Validation accuracy={max(history.history['val_accuracy']):.5f}"
    )

train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid)

def train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid):
    print('RESNET101V2 + BICUBIC112')
    resnet50 = tf.keras.applications.ResNet101V2(include_top=False, weights=None, input_tensor=None, input_shape=(N_dim,N_dim,1),pooling=None)
    cnn_mid = layers.GlobalAveragePooling2D()(resnet50.output)
    cnn_out = layers.Dense(128, activation = 'relu')(cnn_mid)

    dense_input = layers.Input(shape=(26,))
    dense_mid = layers.Dense(52, activation = 'relu')(dense_input)
    dense_output = layers.Dense(52, activation = 'relu')(dense_mid)
    dense_model = tf.keras.Model(inputs=dense_input, outputs=dense_output)

    concatenated = layers.concatenate([cnn_out, dense_model.output])
    concatenated = layers.Dense(32, activation='relu')(concatenated)
    concat_output = layers.Dense(10, activation='softmax')(concatenated)
    concat_model = tf.keras.models.Model([resnet50.input, dense_input], concat_output)
    concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    with tf.device('/device:GPU:0'):
        history = concat_model.fit([X_train_pixel, X_train_label], Y_train, epochs=epochs_num,
                            validation_data=([X_valid_pixel, X_valid_label], Y_valid),
                            verbose=verbose)
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, " +
        f"Validation accuracy={max(history.history['val_accuracy']):.5f}"
    )

train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid)

def train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid):
    print('XCEPTION + BICUBIC112')
    resnet50 = tf.keras.applications.Xception(include_top=False, weights=None, input_tensor=None, input_shape=(N_dim,N_dim,1),pooling=None)
    cnn_mid = layers.GlobalAveragePooling2D()(resnet50.output)
    cnn_out = layers.Dense(128, activation = 'relu')(cnn_mid)

    dense_input = layers.Input(shape=(26,))
    dense_mid = layers.Dense(52, activation = 'relu')(dense_input)
    dense_output = layers.Dense(52, activation = 'relu')(dense_mid)
    dense_model = tf.keras.Model(inputs=dense_input, outputs=dense_output)

    concatenated = layers.concatenate([cnn_out, dense_model.output])
    concatenated = layers.Dense(32, activation='relu')(concatenated)
    concat_output = layers.Dense(10, activation='softmax')(concatenated)
    concat_model = tf.keras.models.Model([resnet50.input, dense_input], concat_output)
    concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    with tf.device('/device:GPU:0'):
        history = concat_model.fit([X_train_pixel, X_train_label], Y_train, epochs=epochs_num,
                            validation_data=([X_valid_pixel, X_valid_label], Y_valid),
                            verbose=verbose)
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, " +
        f"Validation accuracy={max(history.history['val_accuracy']):.5f}"
    )

train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid)

def train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid):
    print('INCEPTIONRESNETV2 + BICUBIC112')
    resnet50 = tf.keras.applications.InceptionResNetV2(include_top=False, weights=None, input_tensor=None, input_shape=(N_dim,N_dim,1),pooling=None)
    cnn_mid = layers.GlobalAveragePooling2D()(resnet50.output)
    cnn_out = layers.Dense(128, activation = 'relu')(cnn_mid)

    dense_input = layers.Input(shape=(26,))
    dense_mid = layers.Dense(52, activation = 'relu')(dense_input)
    dense_output = layers.Dense(52, activation = 'relu')(dense_mid)
    dense_model = tf.keras.Model(inputs=dense_input, outputs=dense_output)

    concatenated = layers.concatenate([cnn_out, dense_model.output])
    concatenated = layers.Dense(32, activation='relu')(concatenated)
    concat_output = layers.Dense(10, activation='softmax')(concatenated)
    concat_model = tf.keras.models.Model([resnet50.input, dense_input], concat_output)
    concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    with tf.device('/device:GPU:0'):
        history = concat_model.fit([X_train_pixel, X_train_label], Y_train, epochs=epochs_num,
                            validation_data=([X_valid_pixel, X_valid_label], Y_valid),
                            verbose=verbose)
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, " +
        f"Validation accuracy={max(history.history['val_accuracy']):.5f}"
    )

train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid)

RESNET50 + BICUBIC112
Epoch 1/30
637/637 - 106s - loss: 1.2263 - accuracy: 0.5755 - val_loss: 1.6606 - val_accuracy: 0.6098
Epoch 2/30
637/637 - 104s - loss: 0.4229 - accuracy: 0.8634 - val_loss: 0.7325 - val_accuracy: 0.8049
Epoch 3/30
637/637 - 104s - loss: 0.2636 - accuracy: 0.9117 - val_loss: 0.5220 - val_accuracy: 0.8488
Epoch 4/30
637/637 - 104s - loss: 0.1956 - accuracy: 0.9331 - val_loss: 1.3291 - val_accuracy: 0.7366
Epoch 5/30
637/637 - 104s - loss: 0.1525 - accuracy: 0.9480 - val_loss: 0.9488 - val_accuracy: 0.8000
Epoch 6/30
637/637 - 104s - loss: 0.1318 - accuracy: 0.9564 - val_loss: 0.5332 - val_accuracy: 0.8780
Epoch 7/30
637/637 - 104s - loss: 0.0947 - accuracy: 0.9683 - val_loss: 0.6883 - val_accuracy: 0.8878
Epoch 8/30
637/637 - 104s - loss: 0.0930 - accuracy: 0.9690 - val_loss: 0.6965 - val_accuracy: 0.8439
Epoch 9/30
637/637 - 104s - loss: 0.0791 - accuracy: 0.9746 - val_loss: 0.6005 - val_accuracy: 0.8829
Epoch 10/30
637/637 - 104s - loss: 0.0752 - accuracy: 0.9750

letter 가 필요한 것인지에 대한 고찰
-----------

In [None]:
seed = 0
tf.random.set_seed(seed)
np.random.seed(seed)

ImageGen_coeff = 10
epochs_num = 30
verbose = 2

In [None]:
N_dim = 224

def normalization(df):
    for row in range(len(df)):
        maxi = df.iloc[row].max()
        mini = df.iloc[row].min()
        if maxi == mini:
            df.iloc[row] = df.iloc[row].apply(lambda x : 0)
        else:
            df.iloc[row] = df.iloc[row].apply(lambda x : (x - mini)/(maxi - mini))
    x = df.values
    if not np.any(x > 1.0) and not np.any(x < 0) and not np.any(np.isnan(x)):
        print('Boundary Clear')      
    return df

def ComputerVision_Dataset():
    path = '/content/gdrive/My Drive/Dacon/ComputerVision/train.csv'
    path_pixel = '/content/gdrive/My Drive/Dacon/ComputerVision/train_bicubic'+str(N_dim)+'.csv'
    train_ratio = 0.9

    train = pd.read_csv(path)
    train_pixel = pd.read_csv(path_pixel)

    point_to = int(len(train) * train_ratio)

    train_data = train.iloc[:point_to]
    train_data_pixel = train_pixel.iloc[:point_to]
    test_data = train.iloc[point_to:]
    test_data_pixel = train_pixel.iloc[point_to:]
    letter_hash = dict(zip(string.ascii_uppercase, [[1 if i == j else 0 for j in range(26)] for i in range(26)]))

    pix = train_data_pixel.iloc[:, 1:]
    pix = normalization(pix).values.reshape(-1, N_dim, N_dim, 1)

    fix = train_data.iloc[:, 1:3].values

    datagenerator = ImageDataGenerator(rotation_range=10, zoom_range=0.10, width_shift_range=0.1,
                                       height_shift_range=0.1)
    gen = datagenerator.flow(pix, fix, shuffle=False, batch_size=32)
    pixel, fixed, batch_index, limit = [], [], 0, 64 * ImageGen_coeff
    while batch_index <= limit:
        try:
            data = gen.next()
            pixel += list(data[0])
            fixed += list(data[1])
            batch_index += 1
        except:
            print("ImageGeneratorError")
            break
    fixed_train = np.asarray(fixed)
    X_train_pixel = np.asarray(pixel)

    Y_train = to_categorical(fixed_train[:, 0], 10)

    X_train_label = np.asarray([letter_hash[letter] for letter in fixed_train[:, 1]])

    X_valid_pixel = test_data_pixel.iloc[:, 1:]
    X_valid_pixel = normalization(X_valid_pixel).values.reshape(-1, N_dim, N_dim, 1)
    X_valid_label = np.array([letter_hash[letter] for letter in test_data.iloc[:, 2]])
    Y_valid = to_categorical(test_data.iloc[:, 1], 10)

    return X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid


X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid = ComputerVision_Dataset()

Boundary Clear
Boundary Clear


In [None]:
def train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid):
    print('RESNET50 + BICUBIC112')
    resnet50 = tf.keras.applications.ResNet50(include_top=False, weights=None, input_tensor=None, input_shape=(N_dim,N_dim,1),pooling=None)
    cnn_mid = layers.GlobalAveragePooling2D()(resnet50.output)
    cnn_out = layers.Dense(128, activation = 'relu')(cnn_mid)
    concatenated = layers.Dense(32, activation='relu')(cnn_out)
    concat_output = layers.Dense(10, activation='softmax')(concatenated)
    concat_model = tf.keras.models.Model(resnet50.input, concat_output)
    concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    with tf.device('/device:GPU:0'):
        history = concat_model.fit(X_train_pixel, Y_train, epochs=epochs_num,
                            validation_data=(X_valid_pixel, Y_valid),
                            verbose=verbose)
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, " +
        f"Validation accuracy={max(history.history['val_accuracy']):.5f}"
    )

train_test_model(X_train_label, X_train_pixel, Y_train, X_valid_label, X_valid_pixel, Y_valid)

RESNET50 + BICUBIC112
Epoch 1/30
637/637 - 106s - loss: 1.2474 - accuracy: 0.5725 - val_loss: 1.9998 - val_accuracy: 0.5561
Epoch 2/30
637/637 - 105s - loss: 0.4909 - accuracy: 0.8416 - val_loss: 1.4988 - val_accuracy: 0.6878
Epoch 3/30
637/637 - 105s - loss: 0.3088 - accuracy: 0.8962 - val_loss: 0.5912 - val_accuracy: 0.8146
Epoch 4/30
637/637 - 105s - loss: 0.2205 - accuracy: 0.9255 - val_loss: 0.5640 - val_accuracy: 0.8780
Epoch 5/30
637/637 - 105s - loss: 0.1771 - accuracy: 0.9384 - val_loss: 0.5258 - val_accuracy: 0.8439
Epoch 6/30
637/637 - 105s - loss: 0.1296 - accuracy: 0.9573 - val_loss: 0.5797 - val_accuracy: 0.8390
Epoch 7/30
637/637 - 105s - loss: 0.1259 - accuracy: 0.9593 - val_loss: 0.7757 - val_accuracy: 0.8244
Epoch 8/30
637/637 - 105s - loss: 0.1050 - accuracy: 0.9647 - val_loss: 0.8051 - val_accuracy: 0.8488
Epoch 9/30
637/637 - 105s - loss: 0.1034 - accuracy: 0.9662 - val_loss: 0.4480 - val_accuracy: 0.8878
Epoch 10/30
637/637 - 105s - loss: 0.0553 - accuracy: 0.9814

KeyboardInterrupt: ignored

letter field 를 one hot encoding 하지 않고 이미지 평균으로 처리하는 방법에 대하여 - 1
--------

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import math

%tensorflow_version 2.x
import tensorflow as tf
from tensorflow.image import per_image_standardization, resize
from tensorflow import keras
from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers

import string

path = '/content/gdrive/My Drive/Dacon/ComputerVision/'
path_train = path + 'train.csv'
path_test = path + 'test.csv'
path_submission = path + 'submission.csv'

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [None]:
seed = 0
tf.random.set_seed(seed)
np.random.seed(seed)

## Global Variables ##
train_val_ratio = 0.9
ImageGen_coeff = 10
N_dim = 224
epochs_num = 50
verbose = 1
batch_size = 32
ModelName = 'InceptionResNetV2_224_labelchanged'

In [None]:
def conc_and_get_letter_hash_function(train_raw,test_raw):
    df = train_raw.iloc[:,2:].append(test_raw.iloc[:,1:], ignore_index = True)
    letter_hash = {}
    for letter in string.ascii_uppercase:
        targ = df[df['letter'] == letter].iloc[1:].mean(axis = 'rows').values.reshape(1,28,28,1)
        letter_hash[letter] = per_image_standardization(resize(targ, [N_dim,N_dim], method = tf.image.ResizeMethod.BICUBIC)).numpy().reshape(N_dim,N_dim,1)
    return letter_hash

def prepare_datasets():
    '''
    ImageGen_coeff (Global Variable) : number of ImageDataGenerator generation per image
    N_dim (Global Variable) : we'll change the size of the image from (28,28) to (N_dim,N_dim)

    Since we don't have enough RAM for the resized image of test_pixel, we just return non-resized, non-standardized image.
    When we generate each batchs by making use of the tf.utils.Sequential class, we'll resize and standardize test_pixel.
    '''
    train_raw = pd.read_csv(path_train)
    test_raw = pd.read_csv(path_test)

    letter_hash = conc_and_get_letter_hash_function(train_raw,test_raw)
    letter_hash_func = lambda letter : letter_hash[letter]

    ## only for validation 
    point_to = int(len(train_raw) * train_val_ratio)
    train_raw = train_raw.sample(frac=1)
    validation_raw = train_raw.iloc[point_to:,:]
    train_raw = train_raw.iloc[:point_to,:]
    with tf.device('/device:GPU:0'):
        validation_pixel = resize(validation_raw.iloc[:,3:].values.astype('int32').reshape(-1,28,28,1), [N_dim,N_dim], method = tf.image.ResizeMethod.BICUBIC)
        validation_pixel = per_image_standardization(validation_pixel).numpy()
    validation_label = np.asarray(list(map(letter_hash_func, validation_raw.iloc[:,2].values)))
    validation_answer = to_categorical(validation_raw.iloc[:,1].values, 10)
    ##

    pix = train_raw.iloc[:,3:].values.astype('int32').reshape(-1,28,28,1)
    fix = train_raw.iloc[:,1:3].values
    datagenerator = ImageDataGenerator(rotation_range=10, zoom_range=0.10, width_shift_range=0.1,height_shift_range=0.1)
    gen = datagenerator.flow(pix, fix, shuffle=False, batch_size=32)
    pixel, fixed, batch_index, limit = [], [], 0, 64 * ImageGen_coeff
    while batch_index <= limit:
        try:
            data = gen.next()
            pixel += list(data[0])
            fixed += list(data[1])
            batch_index += 1
        except:
            print("ImageGeneratorError")
            break
    # print(np.asarray(pixel).shape)
    with tf.device('/device:GPU:0'):
        train_pixel = resize(np.asarray(pixel), [N_dim,N_dim], method = tf.image.ResizeMethod.BICUBIC)
        train_pixel = per_image_standardization(train_pixel).numpy()
    train_label = np.asarray(list(map(letter_hash_func, np.asarray(fixed)[:,1])))
    train_answer = to_categorical(np.asarray(fixed)[:,0], 10)

    test_pixel = test_raw.iloc[:,2:].values.astype('int32').reshape(-1,28,28,1)
    test_label = np.asarray(list(map(letter_hash_func, test_raw.iloc[:,1].values)))
    return train_pixel, train_label, train_answer, test_pixel, test_label, validation_pixel, validation_label, validation_answer

train_pixel, train_label, train_answer, test_pixel, test_label, validation_pixel, validation_label, validation_answer = prepare_datasets()
print(train_pixel.shape, train_label.shape, train_answer.shape, test_pixel.shape, test_label.shape, validation_pixel.shape, validation_label.shape, validation_answer.shape)

(20369, 224, 224, 1) (20369, 224, 224, 1) (20369, 10) (20480, 28, 28, 1) (20480, 224, 224, 1) (205, 224, 224, 1) (205, 224, 224, 1) (205, 10)


In [None]:
def model(pixel, label, answer, pixel_val, label_val, answer_val):
    with tf.device('/device:GPU:0'):
        cnn1 = tf.keras.applications.InceptionResNetV2(include_top=False, weights=None, input_tensor=None, input_shape=(N_dim,N_dim,1),pooling=None)
        cnn1_mid = layers.GlobalAveragePooling2D()(cnn1.output)
        cnn1_out = layers.Dense(256, activation = 'relu')(cnn1_mid)

        cnn2 = tf.keras.applications.ResNet50(include_top=False, weights=None, input_tensor=None, input_shape=(N_dim,N_dim,1),pooling=None)
        cnn2_mid = layers.GlobalAveragePooling2D()(cnn2.output)
        cnn2_out = layers.Dense(256, activation = 'relu')(cnn2_mid)

        concatenated = layers.concatenate([cnn1_out, cnn2_out])
        concatenated = layers.Dense(256, activation='relu')(concatenated)
        concatenated = layers.Dense(128, activation='relu')(concatenated)
        concat_output = layers.Dense(10, activation='softmax')(concatenated)
        concat_model = tf.keras.models.Model([cnn1.input, cnn2.input], concat_output)
        concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
        #print(concat_model.summary())
        history = concat_model.fit([pixel, label], answer, validation_data=([pixel_val, label_val], answer_val), epochs=epochs_num, verbose=verbose)
    concat_model.save(path + ModelName + '.h5')
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, "
    )
    return concat_model

model = model(train_pixel, train_label, train_answer,validation_pixel, validation_label, validation_answer)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50

KeyboardInterrupt: ignored

letter field 를 one hot encoding 하지 않고 이미지 평균으로 처리하는 방법에 대하여 - 2
--------

In [None]:
seed = 0
tf.random.set_seed(seed)
np.random.seed(seed)

## Global Variables ##
train_val_ratio = 0.9
ImageGen_coeff = 10
N_dim = 224
epochs_num = 50
verbose = 1
batch_size = 32
ModelName = 'InceptionResNetV2_224_labelchanged_2'

In [None]:
def conc_and_get_letter_hash_function(train_raw,test_raw):
    df = train_raw.iloc[:,2:].append(test_raw.iloc[:,1:], ignore_index = True)
    letter_hash = {}
    for letter in string.ascii_uppercase:
        targ = df[df['letter'] == letter].iloc[1:].mean(axis = 'rows').values.reshape(1,28,28,1)
        letter_hash[letter] = per_image_standardization(resize(targ, [N_dim,N_dim], method = tf.image.ResizeMethod.BICUBIC)).numpy().reshape(N_dim,N_dim,1)
    return letter_hash

def prepare_datasets():
    '''
    ImageGen_coeff (Global Variable) : number of ImageDataGenerator generation per image
    N_dim (Global Variable) : we'll change the size of the image from (28,28) to (N_dim,N_dim)

    Since we don't have enough RAM for the resized image of test_pixel, we just return non-resized, non-standardized image.
    When we generate each batchs by making use of the tf.utils.Sequential class, we'll resize and standardize test_pixel.
    '''
    train_raw = pd.read_csv(path_train)
    test_raw = pd.read_csv(path_test)

    letter_hash_onehot = dict(zip(string.ascii_uppercase, [[1 if i == j else 0 for j in range(26)] for i in range(26)]))
    letter_hash_func_onehot = lambda letter : letter_hash_onehot[letter]

    letter_hash = conc_and_get_letter_hash_function(train_raw,test_raw)
    letter_hash_func = lambda letter : letter_hash[letter]

    ## only for validation 
    point_to = int(len(train_raw) * train_val_ratio)
    train_raw = train_raw.sample(frac=1)
    validation_raw = train_raw.iloc[point_to:,:]
    train_raw = train_raw.iloc[:point_to,:]
    with tf.device('/device:GPU:0'):
        validation_pixel = resize(validation_raw.iloc[:,3:].values.astype('int32').reshape(-1,28,28,1), [N_dim,N_dim], method = tf.image.ResizeMethod.BICUBIC)
        validation_pixel = per_image_standardization(validation_pixel).numpy()
    validation_label = np.asarray(list(map(letter_hash_func, validation_raw.iloc[:,2].values)))
    validation_label_onehot = np.asarray(list(map(letter_hash_func_onehot, validation_raw.iloc[:,2].values)))
    validation_answer = to_categorical(validation_raw.iloc[:,1].values, 10)
    ##

    pix = train_raw.iloc[:,3:].values.astype('int32').reshape(-1,28,28,1)
    fix = train_raw.iloc[:,1:3].values
    datagenerator = ImageDataGenerator(rotation_range=10, zoom_range=0.10, width_shift_range=0.1,height_shift_range=0.1)
    gen = datagenerator.flow(pix, fix, shuffle=False, batch_size=32)
    pixel, fixed, batch_index, limit = [], [], 0, 64 * ImageGen_coeff
    while batch_index <= limit:
        try:
            data = gen.next()
            pixel += list(data[0])
            fixed += list(data[1])
            batch_index += 1
        except:
            print("ImageGeneratorError")
            break
    # print(np.asarray(pixel).shape)
    with tf.device('/device:GPU:0'):
        train_pixel = resize(np.asarray(pixel), [N_dim,N_dim], method = tf.image.ResizeMethod.BICUBIC)
        train_pixel = per_image_standardization(train_pixel).numpy()
    train_label = np.asarray(list(map(letter_hash_func, np.asarray(fixed)[:,1])))
    train_label_onehot = np.asarray(list(map(letter_hash_func_onehot, np.asarray(fixed)[:,1])))
    train_answer = to_categorical(np.asarray(fixed)[:,0], 10)

    test_pixel = test_raw.iloc[:,2:].values.astype('int32').reshape(-1,28,28,1)
    test_label = np.asarray(list(map(letter_hash_func, test_raw.iloc[:,1].values)))
    test_label_onehot = np.asarray(list(map(letter_hash_func_onehot, test_raw.iloc[:,1].values)))
    return train_pixel, train_label, train_label_onehot, train_answer, test_pixel, test_label, test_label_onehot, validation_pixel, validation_label, validation_label_onehot, validation_answer

train_pixel, train_label, train_label_onehot, train_answer, test_pixel, test_label, test_label_onehot, validation_pixel, validation_label, validation_label_onehot, validation_answer = prepare_datasets()
print(train_pixel.shape, train_label.shape, train_label_onehot.shape, train_answer.shape) 
print(test_pixel.shape, test_label.shape, test_label_onehot.shape)
print(validation_pixel.shape, validation_label.shape, validation_label_onehot.shape, validation_answer.shape)

(20369, 224, 224, 1) (20369, 224, 224, 1) (20369, 26) (20369, 10)
(20480, 28, 28, 1) (20480, 224, 224, 1) (20480, 26)
(205, 224, 224, 1) (205, 224, 224, 1) (205, 26) (205, 10)


In [None]:
def model(pixel, label, label_onehot, answer, pixel_val, label_val, label_val_onehot, answer_val):
    with tf.device('/device:GPU:0'):
        cnn1 = tf.keras.applications.InceptionResNetV2(include_top=False, weights=None, input_tensor=None, input_shape=(N_dim,N_dim,1),pooling=None)
        cnn1_mid = layers.GlobalAveragePooling2D()(cnn1.output)
        cnn1_out = layers.Dense(256, activation = 'relu')(cnn1_mid)

        cnn2 = tf.keras.applications.ResNet50(include_top=False, weights=None, input_tensor=None, input_shape=(N_dim,N_dim,1),pooling=None)
        cnn2_mid = layers.GlobalAveragePooling2D()(cnn2.output)
        cnn2_out = layers.Dense(128, activation = 'relu')(cnn2_mid)

        dense_input = layers.Input(shape=(26,))
        dense_mid = layers.Dense(52, activation = 'relu')(dense_input)
        dense_output = layers.Dense(52, activation = 'relu')(dense_mid)

        concatenated = layers.concatenate([cnn1_out, cnn2_out, dense_output])
        concatenated = layers.Dense(256, activation='relu')(concatenated)
        concatenated = layers.Dense(128, activation='relu')(concatenated)
        concat_output = layers.Dense(10, activation='softmax')(concatenated)
        concat_model = tf.keras.models.Model([cnn1.input, cnn2.input, dense_input], concat_output)
        concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
        #print(concat_model.summary())
        history = concat_model.fit([pixel, label, label_onehot], answer, validation_data=([pixel_val, label_val, label_val_onehot], answer_val), epochs=epochs_num, verbose=verbose)
    concat_model.save(path + ModelName + '.h5')
    print(
        f"CNN: Epochs={epochs_num:d}, " +
        f"Train accuracy={max(history.history['accuracy']):.5f}, "
    )
    return concat_model

model = model(train_pixel, train_label, train_label_onehot, train_answer, validation_pixel, validation_label, validation_label_onehot, validation_answer)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
CNN: Epochs=50, Train accuracy=0.99828, 


k-mean 을 이용한 대소문자 구분
----------------

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import math
from sklearn.model_selection import train_test_split
from sklearn import cluster
from skimage import feature
%tensorflow_version 2.x
import tensorflow as tf
from tensorflow.image import resize
from tensorflow import keras
from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import string

path = '/content/gdrive/My Drive/Dacon/ComputerVision/'
path_train = path + 'train.csv'
path_test = path + 'test.csv'
path_submission = path + 'submission.csv'

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

In [None]:
## Global Variables ##
ImageGen_coeff = 10
N_dim = 224
epochs_num = 80
verbose = 2
batch_size = 32
train_val_ratio = 0.9
def get_progressbar_str(progress):
    MAX_LEN = 30
    BAR_LEN = int(MAX_LEN * progress)
    return ('Progress:[' + '=' * BAR_LEN +
            ('>' if BAR_LEN < MAX_LEN else '') +
            ' ' * (MAX_LEN - BAR_LEN) +
            '] %.1f%%' % (progress * 100.))

In [None]:
def image_updown_clustering(df):
    for alphabet_large in string.ascii_uppercase:
        alphabet_small = alphabet_large.lower()
        img_converted = []
        images_alphabet = df[df.iloc[:,2] == alphabet_large].iloc[:,3:]
        real_idxs = list(images_alphabet.index)
        pixels = images_alphabet.values.reshape(-1,28,28)
        for pixel in pixels:
            img_converted.append(feature.hog(pixel))
        k_means = cluster.KMeans(n_clusters = 2)
        k_means.fit(np.asarray(img_converted))
        for idx, new_label in zip(real_idxs, k_means.labels_):
            if new_label == 0:
                df.loc[idx,'letter'] = alphabet_small
        
    return df

def prepare_datasets():
    '''
    ImageGen_coeff (Global Variable) : number of ImageDataGenerator generation per image
    N_dim (Global Variable) : we'll change the size of the image from (28,28) to (N_dim,N_dim)

    Since we don't have enough RAM for the resized image of test_pixel, we just return non-resized, non-standardized image.
    When we generate each batchs by making use of the tf.utils.Sequential class, we'll resize and standardize test_pixel.
    '''
    train_raw = pd.read_csv(path_train)
    train_raw = image_updown_clustering(train_raw)

    letter_hash = dict(zip(string.ascii_uppercase + string.ascii_lowercase, [[1 if i == j else 0 for j in range(52)] for i in range(52)]))
    letter_hash_func = lambda letter : letter_hash[letter]

    ### only for validation 
    point_to = int(len(train_raw) * train_val_ratio)
    train_raw = train_raw.sample(frac=1)
    validation_raw = train_raw.iloc[point_to:,:]
    train_raw = train_raw.iloc[:point_to,:]
    
    with tf.device('/device:GPU:0'):
        validation_pixel = resize(validation_raw.iloc[:,3:].values.astype('int32').reshape(-1,28,28,1), [N_dim,N_dim], method = tf.image.ResizeMethod.BICUBIC).numpy()
    validation_pixel = validation_pixel / 255.0
    validation_fix = validation_raw.iloc[:,1:3].values
    datagenerator = ImageDataGenerator(rotation_range=10, zoom_range=0.10, width_shift_range=0.1,height_shift_range=0.1)
    gen = datagenerator.flow(validation_pixel, validation_fix, shuffle=False, batch_size=32)
    pixel, fixed, batch_index, limit = [], [], 0, int((len(validation_pixel) / 32) * ImageGen_coeff)
    while batch_index <= limit:
        try:
            data = gen.next()
            pixel += list(data[0])
            fixed += list(data[1])
            batch_index += 1
        except:
            print("ImageGeneratorError")
            break

    validation_pixel = np.asarray(pixel)
    validation_label = np.asarray(list(map(letter_hash_func, np.asarray(fixed)[:,1])))
    validation_answer = to_categorical(np.asarray(fixed)[:,0], 10)
    ###

    pix = train_raw.iloc[:,3:].values.astype('int32').reshape(-1,28,28,1)
    with tf.device('/device:GPU:0'):
        pix = resize(pix, [N_dim,N_dim], method = tf.image.ResizeMethod.BICUBIC).numpy()
    pix = pix / 255.0
    fix = train_raw.iloc[:,1:3].values
    datagenerator = ImageDataGenerator(rotation_range=10, zoom_range=0.10, width_shift_range=0.1,height_shift_range=0.1)
    gen = datagenerator.flow(pix, fix, shuffle=False, batch_size=32)
    pixel, fixed, batch_index, limit = [], [], 0, int((len(pix) / 32) * ImageGen_coeff)
    while batch_index <= limit:
        try:
            data = gen.next()
            pixel += list(data[0])
            fixed += list(data[1])
            batch_index += 1
        except:
            print("ImageGeneratorError")
            break

    train_pixel = np.asarray(pixel)
    train_label = np.asarray(list(map(letter_hash_func, np.asarray(fixed)[:,1])))
    train_answer = to_categorical(np.asarray(fixed)[:,0], 10)

    return train_pixel, train_label, train_answer, validation_pixel, validation_label, validation_answer

def prepare_testset():
    letter_hash = dict(zip(string.ascii_uppercase + string.ascii_lowercase, [[1 if i == j else 0 for j in range(52)] for i in range(52)]))
    letter_hash_func = lambda letter : letter_hash[letter]

    test_raw = pd.read_csv(path_test)
    test_raw = image_updown_clustering(test_raw)
    test_pixel = test_raw.iloc[:,2:].values.astype('int32').reshape(-1,28,28,1)
    test_label = np.asarray(list(map(letter_hash_func, test_raw.iloc[:,1].values))).astype('float32')
    return test_pixel, test_label

test_pixel, test_label = prepare_testset()
print(test_pixel.shape, test_label.shape)

In [None]:
def train_test_model(i):
    tf.keras.backend.clear_session()
    '''
    I shuffled dataset every time since i wanted to get enoughly distributed models.
    '''
    train_pixel, train_label, train_answer, valid_pixel, valid_label, valid_answer = prepare_datasets()
    callbacks1 = [tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy', patience = 15, restore_best_weights = True), ReduceLROnPlateau(monitor = 'loss', patience = 5)]
    ModelName = 'InceptionResnetV2_bicubic224_val_kmean'
    print(ModelName + '  ' + str(i))
    with tf.device('/device:GPU:0'):
        cnn = tf.keras.applications.InceptionResNetV2(include_top=False, weights=None, input_tensor=None, input_shape=(N_dim,N_dim,1),pooling=None)
        cnn_mid = layers.GlobalAveragePooling2D()(cnn.output)
        cnn_out = layers.Dense(128, activation = 'relu')(cnn_mid)

        dense_input = layers.Input(shape=(52,))
        dense_mid = layers.Dense(104, activation = 'relu')(dense_input)
        dense_output = layers.Dense(104, activation = 'relu')(dense_mid)
        dense_model = tf.keras.Model(inputs=dense_input, outputs=dense_output)

        concatenated = layers.concatenate([cnn_out, dense_model.output])
        concatenated = layers.Dense(32, activation='relu')(concatenated)
        concat_output = layers.Dense(10, activation='softmax')(concatenated)
        concat_model = tf.keras.models.Model([cnn.input, dense_input], concat_output)
        concat_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
        history = concat_model.fit([train_pixel, train_label], train_answer, validation_data=([valid_pixel, valid_label], valid_answer), epochs=epochs_num, verbose=verbose, callbacks = callbacks1)
    print(
        f"Train accuracy={max(history.history['accuracy']):.5f}, " +
        f"Validation accuracy={max(history.history['val_accuracy']):.5f}"
    )
    with tf.device('/device:GPU:0'):
        concat_model.fit([valid_pixel, valid_label], valid_answer, epochs=5, verbose=verbose)
    concat_model.save(path + ModelName + '_' + str(i) + '.h5')

for i in range(5):
    train_test_model(i)