In this notebook, we use some tensorflow tools build a model that can identify a piece of clothing from a low-reslution image

# Imports

In [1]:
#display
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

#machine learning libraries
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Dropout


In [2]:
img_rows, img_cols = 28, 28
num_classes = 10
def prep_data(raw):
    y = raw[:, 0]
    out_y = keras.utils.to_categorical(y, num_classes)
    
    x = raw[:,1:]
    num_images = raw.shape[0]
    out_x = x.reshape(num_images, img_rows, img_cols, 1)
    out_x = out_x / 255
    return out_x, out_y

fashion_file = "fashionmnist/fashion-mnist_train.csv"
fashion_data = np.loadtxt(fashion_file, skiprows=1, delimiter=',')
x, y = prep_data(fashion_data)

In [3]:
def buildModel(act='relu'):
    fashion_model = Sequential()
    fashion_model.add(Conv2D(20,
                    activation=act,
                    kernel_size=3,
                    input_shape = (img_rows, img_cols, 1)))
    fashion_model.add(Conv2D(20,
                    activation=act,
                    kernel_size=3,
                    input_shape = (img_rows, img_cols, 1)))
    fashion_model.add(Conv2D(20,
                    activation=act,
                    kernel_size=3,
                    input_shape = (img_rows, img_cols, 1)))
    fashion_model.add(Conv2D(40,
                    activation=act,
                    kernel_size=3,
                    input_shape = (img_rows, img_cols, 1)))
    fashion_model.add(Flatten())
    fashion_model.add(Dense(100, activation=act))
    fashion_model.add(Dense(10, activation='softmax'))

    return fashion_model

In [7]:
def compileModel(loss="categorical_crossentropy",optimizer='adam',metrics= ['accuracy'], build_model = buildModel()):
    fashion_model = build_model
    fashion_model.compile(loss = loss,
                      optimizer = optimizer,
                      metrics = metrics) 
    return fashion_model

In [5]:
def fitModel(batch_size = 100,epochs = 4,validation_split = 0.2,build_model = buildModel()):
    fashion_model = compileModel()
    fashion_model.fit(x,y, batch_size = batch_size,
                  epochs = epochs,
                  validation_split = validation_split)
    return fashion_model    

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [None]:
fitModel()

when run, the model works quite slowly but accurately, to speed up the model will increase the stride size and add a dropout layer to help decrease the amount of overfitting

In [8]:
def buildwithStridesAndDropout(act='relu'):
    fashion_model = Sequential()
    fashion_model.add(Conv2D(20,
                    activation=act,
                    kernel_size=3, 
                    strides = 2,
                    input_shape = (img_rows, img_cols, 1)))
    fashion_model.add(Dropout(0.5))
    fashion_model.add(Conv2D(20,
                    activation=act,
                    kernel_size=3,
                    input_shape = (img_rows, img_cols, 1)))
    fashion_model.add(Dropout(0.5))
    fashion_model.add(Conv2D(20,
                    activation=act,
                    kernel_size=3,
                    strides = 2,
                    input_shape = (img_rows, img_cols, 1)))
    fashion_model.add(Dropout(0.5))
    fashion_model.add(Conv2D(40,
                    activation=act,
                    kernel_size=3,
                    input_shape = (img_rows, img_cols, 1)))
    fashion_model.add(Conv2D(40,
                    activation=act,
                    kernel_size=3,
                    input_shape = (img_rows, img_cols, 1)))
    fashion_model.add(Flatten())
    fashion_model.add(Dense(128, activation=act))
    fashion_model.add(Dense(10, activation='softmax'))

    return fashion_model

fitModel(build_model = buildwithStridesAndDropout())

Train on 48000 samples, validate on 12000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.engine.sequential.Sequential at 0x20a37b675f8>

as seen, the value is about them same, but the speed is much faster. Does changing the activiation function make much difference?

In [9]:
fitModel(build_model = buildwithStridesAndDropout(act='elu'))

Train on 48000 samples, validate on 12000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.engine.sequential.Sequential at 0x20a37b675f8>

In [10]:
fitModel(build_model = buildwithStridesAndDropout(act='selu'))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 48000 samples, validate on 12000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.engine.sequential.Sequential at 0x20a37b675f8>

there is very little difference, but the selu function seems to the model that suffers the least from overfitting, as, unlike the other two, the valiation accuracy continually increases.