In [1]:
#  For reproducing results
from numpy.random import seed
seed(42)
from tensorflow import set_random_seed
set_random_seed(42)

In [2]:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split

%matplotlib inline

In [3]:
# Might help if the notebook kernel dies
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

# Подготовка данных

In [4]:
train_set = pd.read_csv('fashion-mnist_train.csv')
test_set = pd.read_csv('fashion-mnist_test.csv')

In [5]:
X_train = train_set.loc[:, train_set.columns != 'label'] 
X_test = test_set.loc[:, test_set.columns != 'label'] 

In [6]:
y_train = train_set['label']
y_test = test_set['label']

In [7]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [8]:
# Normalizing data
X_train /= 255
X_test /= 255

In [9]:
# Transforming labels into one-hot vectors
num_classes = 10
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [10]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=42, test_size=0.1)

In [11]:
batch_size = 512
epochs=20

# Logistic regression

In [12]:
model_name = 'log_regression'
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=f'./logs/{model_name}')

In [13]:
lr_model = tf.keras.models.Sequential()
lr_model.add(tf.keras.layers.Dense(num_classes, activation='softmax', input_shape=(784,)))

In [14]:
lr_model.compile(
    loss='categorical_crossentropy',
    optimizer='sgd',
    metrics=['accuracy']
)

In [15]:
lr_model.fit(
    X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    verbose=1,
    validation_data=(X_val, y_val),
    callbacks=[tensorboard]
)

Train on 54000 samples, validate on 6000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x108ef52e8>

In [16]:
loss, accuracy = lr_model.evaluate(X_val, y_val, verbose=0)
print('Test loss:', loss)
print('Test accuracy:', accuracy)

Test loss: 0.6085037322044372
Test accuracy: 0.8048333333333333


# Fully Connected Neural Network

In [17]:
# Model from the 5.2.6 lecture but without drop-out
model_name = 'NN_1'
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=f'./logs/{model_name}')

NN_model = tf.keras.models.Sequential()
NN_model.add(tf.keras.layers.Dense(512, activation='relu', input_shape=(784,)))
NN_model.add(tf.keras.layers.Dense(512, activation='relu'))
NN_model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

In [18]:
NN_model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

NN_model.fit(
    X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    verbose=1,
    validation_data=(X_val, y_val),
    callbacks=[tensorboard]
)

Train on 54000 samples, validate on 6000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1a38fa8390>

In [19]:
loss, accuracy = NN_model.evaluate(X_val, y_val, verbose=0)
print('Test loss:', loss)
print('Test accuracy:', accuracy)

Test loss: 0.35368617677688596
Test accuracy: 0.8826666666666667


In [20]:
# Adding drop-out
model_name = 'NN_2'
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=f'./logs/{model_name}')

NN_model2 = tf.keras.models.Sequential()
NN_model2.add(tf.keras.layers.Dense(512, activation='relu', input_shape=(784,)))
NN_model2.add(tf.keras.layers.Dropout(0.2))
NN_model2.add(tf.keras.layers.Dense(512, activation='relu'))
NN_model2.add(tf.keras.layers.Dropout(0.2))
NN_model2.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

In [21]:
NN_model2.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

NN_model2.fit(
    X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    verbose=1,
    validation_data=(X_val, y_val),
    callbacks=[tensorboard]
)

Train on 54000 samples, validate on 6000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1a4ea9def0>

In [22]:
loss, accuracy = NN_model2.evaluate(X_val, y_val, verbose=0)
print('Test loss:', loss)
print('Test accuracy:', accuracy)

Test loss: 0.2798681590259075
Test accuracy: 0.898


### Вывод

Полносвязная нейросеть работает лучше, чем логистическая регрессия, т.к. это более сложная модель, которая позволяет обнаружить больше зависимостей в данных.

# Convolutional Neural Network

In [23]:
CNN_X_train = X_train.as_matrix()
CNN_X_val = X_val.as_matrix()
CNN_X_test = X_test.as_matrix()

CNN_X_train = CNN_X_train.reshape(X_train.shape[0], 28, 28, 1)
CNN_X_val = CNN_X_val.reshape(X_val.shape[0], 28, 28, 1)
CNN_X_test = CNN_X_test.reshape(X_test.shape[0], 28, 28, 1)

  """Entry point for launching an IPython kernel.
  
  This is separate from the ipykernel package so we can avoid doing imports until


In [24]:
# Model from task 5.2
model_name = 'CNN_1'
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=f'./logs/{model_name}')

CNN_model = tf.keras.models.Sequential()
CNN_model.add(tf.keras.layers.Convolution2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)))
CNN_model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
CNN_model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu'))
CNN_model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
CNN_model.add(tf.keras.layers.Flatten())
CNN_model.add(tf.keras.layers.Dense(64, activation='relu'))
CNN_model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

In [25]:
CNN_model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

CNN_model.fit(
    CNN_X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    verbose=1,
    validation_data=(CNN_X_val , y_val),
    callbacks=[tensorboard]
)

Train on 54000 samples, validate on 6000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1a51b58dd8>

In [26]:
loss, accuracy = CNN_model.evaluate(CNN_X_val, y_val, verbose=0)
print('Test loss:', loss)
print('Test accuracy:', accuracy)

Test loss: 0.25830628256003063
Test accuracy: 0.905


In [27]:
# Adding 2 more layers (also added padding as without it I got 'Negative dimension size...' error)
model_name = 'CNN_2'
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=f'./logs/{model_name}')

CNN_model2 = tf.keras.models.Sequential()
CNN_model2.add(tf.keras.layers.Convolution2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', input_shape=(28,28,1)))
CNN_model2.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

CNN_model2.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', ))
CNN_model2.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

CNN_model2.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', ))
CNN_model2.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

CNN_model2.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', ))
CNN_model2.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

CNN_model2.add(tf.keras.layers.Flatten())
CNN_model2.add(tf.keras.layers.Dense(64, activation='relu'))
CNN_model2.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

In [28]:
CNN_model2.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

CNN_model2.fit(
    CNN_X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    verbose=1,
    validation_data=(CNN_X_val , y_val),
    callbacks=[tensorboard]
)

Train on 54000 samples, validate on 6000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1a51b581d0>

In [29]:
loss, accuracy = CNN_model2.evaluate(CNN_X_val, y_val, verbose=0)
print('Test loss:', loss)
print('Test accuracy:', accuracy)

Test loss: 0.24838692478338878
Test accuracy: 0.9138333333333334


In [30]:
# Adding batch mnormalization after each convolution layer
model_name = 'CNN_3'
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=f'./logs/{model_name}')

CNN_model3 = tf.keras.models.Sequential()
CNN_model3.add(tf.keras.layers.Convolution2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', input_shape=(28,28,1)))
CNN_model3.add(tf.keras.layers.BatchNormalization())
CNN_model3.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

CNN_model3.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', ))
CNN_model3.add(tf.keras.layers.BatchNormalization())
CNN_model3.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

CNN_model3.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', ))
CNN_model3.add(tf.keras.layers.BatchNormalization())
CNN_model3.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

CNN_model3.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', ))
CNN_model3.add(tf.keras.layers.BatchNormalization())
CNN_model3.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

CNN_model3.add(tf.keras.layers.Flatten())
CNN_model3.add(tf.keras.layers.Dense(64, activation='relu'))
CNN_model3.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

In [31]:
CNN_model3.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

CNN_model3.fit(
    CNN_X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    verbose=1,
    validation_data=(CNN_X_val , y_val),
    callbacks=[tensorboard]
)

Train on 54000 samples, validate on 6000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1a8d7cea58>

In [42]:
loss, accuracy = CNN_model3.evaluate(CNN_X_val, y_val, verbose=0)
print('Test loss:', loss)
print('Test accuracy:', accuracy)

Test loss: 0.4267293870647748
Test accuracy: 0.9036666666666666


In [33]:
# Adding drop-out after each max pooling layer
model_name = 'CNN_4'
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=f'./logs/{model_name}')

CNN_model4 = tf.keras.models.Sequential()
CNN_model4.add(tf.keras.layers.Convolution2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', input_shape=(28,28,1)))
CNN_model4.add(tf.keras.layers.BatchNormalization())
CNN_model4.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
CNN_model4.add(tf.keras.layers.Dropout(0.2))

CNN_model4.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', ))
CNN_model4.add(tf.keras.layers.BatchNormalization())
CNN_model4.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
CNN_model4.add(tf.keras.layers.Dropout(0.2))

CNN_model4.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', ))
CNN_model4.add(tf.keras.layers.BatchNormalization())
CNN_model4.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
CNN_model4.add(tf.keras.layers.Dropout(0.2))

CNN_model4.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', ))
CNN_model4.add(tf.keras.layers.BatchNormalization())
CNN_model4.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
CNN_model4.add(tf.keras.layers.Dropout(0.2))

CNN_model4.add(tf.keras.layers.Flatten())
CNN_model4.add(tf.keras.layers.Dense(64, activation='relu'))
CNN_model4.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

In [34]:
CNN_model4.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

CNN_model4.fit(
    CNN_X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    verbose=1,
    validation_data=(CNN_X_val , y_val),
    callbacks=[tensorboard]
)

Train on 54000 samples, validate on 6000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1a957b3390>

In [41]:
loss, accuracy = CNN_model4.evaluate(CNN_X_val, y_val, verbose=0)
print('Test loss:', loss)
print('Test accuracy:', accuracy)

Test loss: 0.20755504260460536
Test accuracy: 0.921


In [37]:
# Tried batch normalization after fully connected layer
model_name = 'CNN_5'
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=f'./logs/{model_name}')

CNN_model5 = tf.keras.models.Sequential()
CNN_model5.add(tf.keras.layers.Convolution2D(filters=32, kernel_size=(3,3), activation='relu', padding='same', input_shape=(28,28,1)))
CNN_model5.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

CNN_model5.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', ))
CNN_model5.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

CNN_model5.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', ))
CNN_model5.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

CNN_model5.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=(3,3), activation='relu', padding='same', ))
CNN_model5.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))

CNN_model5.add(tf.keras.layers.Flatten())
CNN_model5.add(tf.keras.layers.Dense(64, activation='relu'))
CNN_model5.add(tf.keras.layers.BatchNormalization())
CNN_model5.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

In [38]:
CNN_model5.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

CNN_model5.fit(
    CNN_X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    verbose=1,
    validation_data=(CNN_X_val , y_val),
    callbacks=[tensorboard]
)

Train on 54000 samples, validate on 6000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1a957b2ac8>

In [39]:
loss, accuracy = CNN_model5.evaluate(CNN_X_val, y_val, verbose=0)
print('Test loss:', loss)
print('Test accuracy:', accuracy)

Test loss: 0.4377724414467812
Test accuracy: 0.8905


### Вывод

Сверточная нейронная сеть не дает большого прироста качества по сравнению с полносвязной нейросетью (всего 0.01 пункт accuracy). Увеличение слоев и добавление batch normalization также дают совсем небольшой прирост качества.  

# Выбор лучшей модели

Судя по значению метрики accuracy на валидационных данных, наилучший результат имеет модель CNN_4 (уже на 9 эпохе обучения) - 0.92. Посмотрим, как будет вести себя эта модель на новых данных, посчитав метрику accuracy на тестовом сете.

In [40]:
loss, accuracy = CNN_model4.evaluate(CNN_X_test, y_test, verbose=0)
print('Test loss:', loss)
print('Test accuracy:', accuracy)

Test loss: 0.21060755120813845
Test accuracy: 0.9214
