In [2]:
import pandas as pd
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam,RMSprop
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, BatchNormalization, Activation, MaxPooling2D, GlobalAveragePooling2D, Add,Flatten,Dropout
from tensorflow.keras import Model
from tensorflow.keras.callbacks import LearningRateScheduler
from sklearn.model_selection import train_test_split



#### Load data

In [3]:
file_path = "../train_valid.csv"
df = pd.read_csv(file_path,header=None)

#### Random sampling Non-1 images

In [4]:
y_count = Counter(df.iloc[:,784]) #counter for labels
max_val = y_count[1] #shown in task 1
for key in y_count.keys():
    if key != 1.0:
        num_sample = max_val - y_count[key]
        df_key = df[df[784] == key]
        sample_df = df_key.sample(n = num_sample, replace=True)
        df = df.append(sample_df)
    else:
        continue

#### New distribution on labels

In [5]:
print(Counter(df.iloc[:,784]))

Counter({0.0: 1405, 1.0: 1405, 2.0: 1405, 3.0: 1405, 4.0: 1405, 5.0: 1405, 6.0: 1405, 7.0: 1405, 8.0: 1405, 9.0: 1405})


#### Split data

In [6]:
df = df.sample(frac=1) #shuffle data

X,y = np.array(df.iloc[:,0:784]),np.array(df[784])

X = X.reshape(len(X),28,28,1)
y = to_categorical(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(11240, 28, 28, 1) (2810, 28, 28, 1) (11240, 10) (2810, 10)


### Build Model

In [12]:
model = Sequential()

model.add(Conv2D(32,kernel_size=3,activation='elu',input_shape=(28,28,1)))
model.add(MaxPooling2D())
model.add(BatchNormalization())
model.add(Dropout(0.15))

model.add(Conv2D(32,kernel_size=3,activation='elu'))
model.add(MaxPooling2D())
model.add(BatchNormalization())
model.add(Dropout(0.15))

model.add(Conv2D(64,kernel_size=5,padding='same',activation='elu'))
model.add(MaxPooling2D())
model.add(BatchNormalization())
model.add(Dropout(0.15))


model.add(GlobalAveragePooling2D())


model.add(Dense(128, activation='elu'))
model.add(BatchNormalization())

model.add(Dense(64, activation='elu'))
model.add(BatchNormalization())

model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
batch_normalization_5 (Batch (None, 13, 13, 32)        128       
_________________________________________________________________
dropout_4 (Dropout)          (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 11, 11, 32)        9248      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 5, 5, 32)          0         
_________________________________________________________________
batch_normalization_6 (Batch (None, 5, 5, 32)         

In [13]:
def step_decay(epoch):
    """
    a function to decay the learning rate 0.94 every 2 epoch
    """
    initial_lrate = 0.01
    drop = 0.94
    epochs_drop = 2
    end_lr = 0.0001
    lrate = initial_lrate * np.power(drop,  
        np.floor((1+epoch)/epochs_drop))
    if lrate > end_lr:
        return lrate
    else:
        return end_lr
lr_scheduler = LearningRateScheduler(step_decay)

#parameter setting
epochs = 100
batch_size = 256
optimizer = RMSprop(0.01)

#model compile
model.compile(optimizer,loss='categorical_crossentropy', metrics=['accuracy'])

In [14]:
model.fit(X_train,y_train,epochs = epochs, batch_size = batch_size,validation_data=(X_test,y_test),callbacks=[lr_scheduler],verbose = 2)

Epoch 1/100
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
44/44 - 2s - loss: 2.2600 - accuracy: 0.2262 - val_loss: 4.2541 - val_accuracy: 0.1263
Epoch 2/100
44/44 - 2s - loss: 1.4926 - accuracy: 0.4727 - val_loss: 6.9388 - val_accuracy: 0.1100
Epoch 3/100
44/44 - 2s - loss: 1.0902 - accuracy: 0.6303 - val_loss: 8.9878 - val_accuracy: 0.0868
Epoch 4/100
44/44 - 2s - loss: 0.8597 - accuracy: 0.7071 - val_loss: 8.7259 - val_accuracy: 0.0972
Epoch 5/100
44/44 - 2s - loss: 0.7410 - accuracy: 0.7491 - val_loss: 9.1739 - val_accuracy: 0.0986
Epoch 6/100
44/44 - 2s - loss: 0.6444 - accuracy: 0.7855 - val_loss: 6.6552 - val_accuracy: 0.1157
Epoch 7/100
44/44 - 2s - loss: 0.5742 - accuracy: 0.8097 - val_loss: 4.3441 - val_accuracy: 0.1811
Epoch 8/100
44/44 - 2s - loss: 0.5462 - accuracy: 0.8164 - val_loss: 5.6957 - val_accuracy: 0.19

<tensorflow.python.keras.callbacks.History at 0x17fdf2280>

#### Since model is good on val data, train it on the full training set and save 

In [15]:
#can use the parameters on the partial training set
model.fit(X,y,epochs = epochs, batch_size = batch_size,validation_data=(X_test,y_test),callbacks=[lr_scheduler],verbose = 2)

Epoch 1/100
55/55 - 2s - loss: 0.3410 - accuracy: 0.8955 - val_loss: 0.6348 - val_accuracy: 0.8217
Epoch 2/100
55/55 - 2s - loss: 0.2903 - accuracy: 0.9065 - val_loss: 0.4057 - val_accuracy: 0.8886
Epoch 3/100
55/55 - 2s - loss: 0.2971 - accuracy: 0.9014 - val_loss: 0.2264 - val_accuracy: 0.9352
Epoch 4/100
55/55 - 2s - loss: 0.2883 - accuracy: 0.9061 - val_loss: 0.2179 - val_accuracy: 0.9338
Epoch 5/100
55/55 - 2s - loss: 0.2745 - accuracy: 0.9107 - val_loss: 0.2306 - val_accuracy: 0.9217
Epoch 6/100
55/55 - 2s - loss: 0.2506 - accuracy: 0.9175 - val_loss: 0.3552 - val_accuracy: 0.8964
Epoch 7/100
55/55 - 2s - loss: 0.2536 - accuracy: 0.9168 - val_loss: 0.1524 - val_accuracy: 0.9537
Epoch 8/100
55/55 - 2s - loss: 0.2421 - accuracy: 0.9221 - val_loss: 0.1301 - val_accuracy: 0.9580
Epoch 9/100
55/55 - 2s - loss: 0.2336 - accuracy: 0.9226 - val_loss: 0.1705 - val_accuracy: 0.9470
Epoch 10/100
55/55 - 2s - loss: 0.2280 - accuracy: 0.9238 - val_loss: 0.1277 - val_accuracy: 0.9580
Epoch 11/

<tensorflow.python.keras.callbacks.History at 0x2d2c94250>

In [16]:
model.save("../Q3_output/model.h5")