In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import keras
import itertools 
import time

from ipynb.fs.defs.Additional_metrics import *
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_curve, precision_recall_curve
from sklearn.model_selection import GridSearchCV
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import KFold
import numpy as np

In [7]:
train_data = pd.read_csv("datasets/fashion-mnist_train.csv")
X_train = np.array(train_data.iloc[:,1:])
print(X_train)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


## Trained on 30% samples of the original training dataset

In [2]:
img_rows, img_cols = 28, 28
input_shape = (img_rows, img_cols, 1)

train_data = pd.read_csv("datasets/fashion-mnist_train.csv")
X_train = np.array(train_data.iloc[:,1:])
y_train = to_categorical(np.array(train_data.iloc[:,0]))

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_train = X_train.astype('float32')

test_data = pd.read_csv("datasets/fashion-mnist_test.csv")
X_test = np.array(test_data.iloc[:,1:])
y_test = to_categorical(np.array(test_data.iloc[:,0]))

X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
X_test = X_test.astype('float32')

X_train_std = (X_train - X_train.mean())/X_train.std()
X_test_std = (X_test - X_test.mean())/X_test.std()

In [3]:
X_train_30, X_test_30, y_train_30, y_test_30 = train_test_split(X_train_std, y_train, test_size=0.7, stratify=y_train, random_state=42)

In [4]:
X_train_301, X_test_301, y_train_301, y_test_301 = train_test_split(X_train_std, y_train, test_size=0.2, stratify=y_train, random_state=42)

In [5]:
acc_per_fold = []
loss_per_fold = []

inputs = np.concatenate((X_train_301, X_test_301), axis=0)
targets = np.concatenate((y_train_301, y_test_301), axis=0)

kfold = KFold(n_splits=5, shuffle=True)

fold_no = 1

time_tmp = []

for train, test in kfold.split(inputs, targets):

    cnn1 = Sequential()
    cnn1.add(Conv2D(32, kernel_size=(5,5), activation='relu', input_shape=input_shape))
    cnn1.add(MaxPooling2D(pool_size=(2, 2)))
    cnn1.add(Dropout(0.2))

    cnn1.add(Flatten())

    cnn1.add(Dense(128, activation='relu'))
    cnn1.add(Dense(10, activation='softmax'))

    cnn1.compile(loss=keras.losses.categorical_crossentropy,
                     optimizer=keras.optimizers.Adam(),
                     metrics=['accuracy'])

    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

    start = time.time()

    history = cnn1.fit(inputs[train], targets[train],
                  batch_size=256,
                  epochs=10,
                  verbose=1)

    scores = cnn1.evaluate(inputs[test], targets[test], verbose=0)

    end = time.time()

    time_tmp.append(end-start)

    print(f'Score for fold {fold_no}: {cnn1.metrics_names[0]} of {scores[0]}; {cnn1.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1

print('')
print('Average execution time: ', round(np.mean(time_tmp),8), sep='')    

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 1: loss of 0.2541258633136749; accuracy of 91.24166369438171%
------------------------------------------------------------------------
Training for fold 2 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 2: loss of 0.25623631477355957; accuracy of 91.00000262260437%
------------------------------------------------------------------------
Training for fold 3 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 3: loss of 0.2511812448501587; accuracy of 91.05833172798157%
------------------------------------------------------------------------
Training for fold 4 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epo

In [6]:
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
  print('------------------------------------------------------------------------')
  print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')

------------------------------------------------------------------------
Score per fold
------------------------------------------------------------------------
> Fold 1 - Loss: 0.2541258633136749 - Accuracy: 91.24166369438171%
------------------------------------------------------------------------
> Fold 2 - Loss: 0.25623631477355957 - Accuracy: 91.00000262260437%
------------------------------------------------------------------------
> Fold 3 - Loss: 0.2511812448501587 - Accuracy: 91.05833172798157%
------------------------------------------------------------------------
> Fold 4 - Loss: 0.24466632306575775 - Accuracy: 91.24166369438171%
------------------------------------------------------------------------
> Fold 5 - Loss: 0.2452380508184433 - Accuracy: 91.41666889190674%
------------------------------------------------------------------------
Average scores for all folds:
> Accuracy: 91.19166612625122 (+- 0.14841746432457012)
> Loss: 0.25028955936431885
------------------------

#### Training on 30% training samples, test on test dataset

In [15]:
acc_per_fold = []
loss_per_fold = []

time_tmp = []


cnn1 = Sequential()
cnn1.add(Conv2D(32, kernel_size=(5,5), activation='relu', input_shape=input_shape))
cnn1.add(MaxPooling2D(pool_size=(2, 2)))
cnn1.add(Dropout(0.2))

cnn1.add(Flatten())

cnn1.add(Dense(128, activation='relu'))
cnn1.add(Dense(10, activation='softmax'))

cnn1.compile(loss=keras.losses.categorical_crossentropy,
                     optimizer=keras.optimizers.Adam(),
                     metrics=['accuracy'])

start = time.time()
history = cnn1.fit(X_train_30, y_train_30,
                  batch_size=256,
                  epochs=10,
                  verbose=1)

scores = cnn1.evaluate(X_test_std, y_test, verbose=0)

end = time.time()

time_tmp.append(end-start)

print(f'Score for fold {fold_no}: {cnn1.metrics_names[0]} of {scores[0]}; {cnn1.metrics_names[1]} of {scores[1]*100}%')
acc_per_fold.append(scores[1] * 100)
loss_per_fold.append(scores[0])


print('')
print('Average execution time: ', round(np.mean(time_tmp),8), sep='')    

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 6: loss of 0.30254828929901123; accuracy of 89.17999863624573%

Average execution time: 24.60630941


## Trained on 100% samples of the original training dataset

In [9]:
img_rows, img_cols = 28, 28
input_shape = (img_rows, img_cols, 1)

train_data = pd.read_csv("datasets/fashion-mnist_train.csv")
X_train = np.array(train_data.iloc[:,1:])
y_train = to_categorical(np.array(train_data.iloc[:,0]))

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_train = X_train.astype('float32')

test_data = pd.read_csv("datasets/fashion-mnist_test.csv")
X_test = np.array(test_data.iloc[:,1:])
y_test = to_categorical(np.array(test_data.iloc[:,0]))

X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
X_test = X_test.astype('float32')

X_train_std = (X_train - X_train.mean())/X_train.std()
X_test_std = (X_test - X_test.mean())/X_test.std()

In [10]:
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(X_train_std, y_train, test_size=0.2, stratify=y_train, random_state=42)

In [11]:
acc_per_fold = []
loss_per_fold = []

inputs = np.concatenate((X_train_1, X_test_1), axis=0)
targets = np.concatenate((y_train_1, y_test_1), axis=0)

kfold = KFold(n_splits=5, shuffle=True)

fold_no = 1

time_tmp = []

for train, test in kfold.split(inputs, targets):

    cnn1 = Sequential()
    cnn1.add(Conv2D(32, kernel_size=(5,5), activation='relu', input_shape=input_shape))
    cnn1.add(MaxPooling2D(pool_size=(2, 2)))
    cnn1.add(Dropout(0.2))

    cnn1.add(Flatten())

    cnn1.add(Dense(128, activation='relu'))
    cnn1.add(Dense(10, activation='softmax'))

    cnn1.compile(loss=keras.losses.categorical_crossentropy,
                     optimizer=keras.optimizers.Adam(),
                     metrics=['accuracy'])

    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

    start = time.time()

    history = cnn1.fit(inputs[train], targets[train],
                  batch_size=256,
                  epochs=10,
                  verbose=1)

    scores = cnn1.evaluate(inputs[test], targets[test], verbose=0)

    end = time.time()

    time_tmp.append(end-start)

    print(f'Score for fold {fold_no}: {cnn1.metrics_names[0]} of {scores[0]}; {cnn1.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    # Increase fold number
    fold_no = fold_no + 1

print('')
print('Average execution time: ', round(np.mean(time_tmp),8), sep='')    

------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 1: loss of 0.24414268136024475; accuracy of 91.49166941642761%
------------------------------------------------------------------------
Training for fold 2 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 2: loss of 0.23866590857505798; accuracy of 91.44166707992554%
------------------------------------------------------------------------
Training for fold 3 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 3: loss of 0.24370573461055756; accuracy of 91.14999771118164%
------------------------------------------------------------------------
Training for fold 4 ...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
E

In [12]:
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
  print('------------------------------------------------------------------------')
  print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')

------------------------------------------------------------------------
Score per fold
------------------------------------------------------------------------
> Fold 1 - Loss: 0.24414268136024475 - Accuracy: 91.49166941642761%
------------------------------------------------------------------------
> Fold 2 - Loss: 0.23866590857505798 - Accuracy: 91.44166707992554%
------------------------------------------------------------------------
> Fold 3 - Loss: 0.24370573461055756 - Accuracy: 91.14999771118164%
------------------------------------------------------------------------
> Fold 4 - Loss: 0.23662619292736053 - Accuracy: 91.8416678905487%
------------------------------------------------------------------------
> Fold 5 - Loss: 0.2500353455543518 - Accuracy: 91.09166860580444%
------------------------------------------------------------------------
Average scores for all folds:
> Accuracy: 91.40333414077759 (+- 0.26934133124599136)
> Loss: 0.24263517260551454
-----------------------

In [13]:
acc_per_fold = []
loss_per_fold = []

time_tmp = []


cnn1 = Sequential()
cnn1.add(Conv2D(32, kernel_size=(5,5), activation='relu', input_shape=input_shape))
cnn1.add(MaxPooling2D(pool_size=(2, 2)))
cnn1.add(Dropout(0.2))

cnn1.add(Flatten())

cnn1.add(Dense(128, activation='relu'))
cnn1.add(Dense(10, activation='softmax'))

cnn1.compile(loss=keras.losses.categorical_crossentropy,
                     optimizer=keras.optimizers.Adam(),
                     metrics=['accuracy'])

start = time.time()
history = cnn1.fit(X_train_std, y_train,
                  batch_size=256,
                  epochs=10,
                  verbose=1)

scores = cnn1.evaluate(X_test_std, y_test, verbose=0)

end = time.time()

time_tmp.append(end-start)

print(f'Score for fold {fold_no}: {cnn1.metrics_names[0]} of {scores[0]}; {cnn1.metrics_names[1]} of {scores[1]*100}%')
acc_per_fold.append(scores[1] * 100)
loss_per_fold.append(scores[0])


print('')
print('Average execution time: ', round(np.mean(time_tmp),8), sep='')    

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 6: loss of 0.23493218421936035; accuracy of 91.39999747276306%

Average execution time: 77.93272161


## Trained on 100% samples with PCA reduction

In [None]:
img_rows, img_cols = 28, 28
input_shape = (img_rows, img_cols, 1)

train_data = pd.read_csv("datasets/fashion-mnist_train.csv")
X_train = np.array(train_data.iloc[:,1:])
y_train = to_categorical(np.array(train_data.iloc[:,0]))

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_train = X_train.astype('float32')

test_data = pd.read_csv("datasets/fashion-mnist_test.csv")
X_test = np.array(test_data.iloc[:,1:])
y_test = to_categorical(np.array(test_data.iloc[:,0]))

X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
X_test = X_test.astype('float32')

X_train_std = (X_train - X_train.mean())/X_train.std()
X_test_std = (X_test - X_test.mean())/X_test.std()

In [None]:
pca = PCA(n_components=pc)
                    pca.fit(X_trainCV3)
                    X_train_r = pca.transform(X_trainCV3)
                    X_test_r = pca.transform(X_testCV3)