# İçerik

# 1.Fashion Mnist
1. [Load Library](#ch0)
1. [Read Data](#ch1)
1. [Train Test Validation Split](#ch2)
1. [Normalization](#ch3)
1. [Convert Inputs for NNs](#ch4)



# 2.Models
1. [Pipeline](#ch6)
2. [deneme](#ch6)

<a id="ch0"></a>
# Load Library

In [None]:
import os
import numpy as np 
import pandas as pd 

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from keras.utils import to_categorical
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
import keras


from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

import matplotlib.pyplot as plt
%matplotlib inline 

import sys
sys.path.insert(0,'..')
from utils import util, cv_models
from utils.random_eraser import get_random_eraser

from skimage.color import rgb2gray
from skimage import transform

#import keras.backend as K
#K.set_floatx('float16')
#K.set_epsilon(1e-4) 

<a id="ch1"></a>
# Read Data

In [None]:
data_train = pd.read_csv('../Data/fashion-mnist_train.csv')
data_test  = pd.read_csv('../Data/fashion-mnist_test.csv')

target_names = {0:"T-shirt/top",
                1:"Trouser",
                2:"Pullover",
                3:"Dress",
                4:"Coat",
                5:"Sandal",
                6:"Shirt",
                7:"Sneaker",
                8:"Bag",
                9:"Ankle boot"}

<a id="ch2"></a>
# Train Test Validation Split

In [None]:
X = np.array(data_train.iloc[:, 1:])
y = to_categorical(np.array(data_train.iloc[:, 0]))

#Here we split validation data to optimiza classifier during training
#set random_state for reproduceable result
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=13)


#Test data
X_test = np.array(data_test.iloc[:, 1:])
y_test = to_categorical(np.array(data_test.iloc[:, 0]))

X_train = X_train.astype('float32')
X_test  = X_test.astype('float32')
X_val   = X_val.astype('float32')


#get the indices to be plotted
y_test_true  = data_test.iloc[:, 0]
y_train_true = np.argmax(y_train, axis=1, out=None)

<a id="ch3"></a>
# Normalization

In [None]:
X_train /= 255
X_test  /= 255
X_val   /= 255

num_classes = len(np.unique(data_train.iloc[:, 0]))

<a id="ch4"></a>
# Convert Inputs For NNs

In [None]:
def resize_32x32(imgs):
    imgs = imgs.reshape((-1, 28, 28, 1))
    resized_imgs = np.zeros((imgs.shape[0], 32, 32, 1))
    for i in range(imgs.shape[0]):
        resized_imgs[i, ..., 0] = transform.resize(imgs[i, ..., 0], (32, 32))
    return resized_imgs

# Flat Input
X_train_flat = X_train
X_test_flat  = X_test
X_val_flat   = X_val

# 28x28 Images for simple CNN
img_rows, img_cols = 28, 28
input_shape = (img_rows, img_cols, 1)

X_train28 = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test28  = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
X_val28   = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1)

# 32x32 Images for Deeper Architecture
X_train32 = resize_32x32(X_train)
X_val32   = resize_32x32(X_val)
X_test32  = resize_32x32(X_test)

<a id="ch6"></a>
# Pipeline

In [None]:
names = [#'mlp.h5',            \
         #'simpleCNN.h5',      \
         #'CNNDropout.h5',     \
         #'CNNBatchNorm.h5',   \
         #'simpleVGG.h5',      \
         #'simpleInception.h5', \
         #'simpleResnet.h5',   \
         'wideResnet.h5',     \
         'mobileNetV2.h5',    \
         #'Resnet50.h5',    \
         #'NASNet.h5'
        ]

flat  = ['mlp.h5']

inp28 = ['simpleCNN.h5',   \
         'CNNDropout.h5',  \
         'CNNBatchNorm.h5']

inp32 = ['Resnet50.h5', \
         'mobileNetV2.h5', \
         'wideResnet.h5', \
         'NASNet.h5', \
         'simpleVGG.h5',      \
         'simpleInception.h5',\
         'simpleResnet.h5' ]



models = [#cv_models.buildMLP(),
          #cv_models.buildSimpleCNN(),
          #cv_models.buildCNNDropout(),
          #cv_models.buildCNNBatchNorm(),
          #cv_models.buildSimpleVGG(),
          #cv_models.buildSimpleInception(),
          #cv_models.buildSimpleResnet(),
          cv_models.build_wide_resnet(16, 4),
          cv_models.buildMobileNetV2(),
          #cv_models.buildResNet50(),
          #cv_models.buildNASNet()
         ]

In [None]:
test_pred_df  = pd.DataFrame()
train_pred_df = pd.DataFrame()
metrics_dict  = {}
history_dict  = {}

# train each model
for model, model_name in zip(models, names):
    if(model_name in flat):
        X_train = X_train_flat
        X_test  = X_test_flat
        X_val   = X_val_flat
    elif(model_name in inp28):
        X_train = X_train28
        X_test  = X_test28
        X_val   = X_val28
    elif(model_name in inp32):
        X_train = X_train32
        X_test  = X_test32
        X_val   = X_val32
      
    print("----------------------------")
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adam(),
                  metrics=['accuracy'])
    
    history  = model.fit(X_train, y_train,
                     batch_size=64, epochs=20,
                     verbose=2,
                     validation_data=(X_val, y_val))
    
    # evulation
    score = model.evaluate(X_test, y_test, verbose=0)
    y_pred_test, metrics = util.get_pred_and_metrics(model,
                                                X_test,
                                                y_test_true,
                                                target_names)
    
    y_pred_train, metrics = util.get_pred_and_metrics(model,
                                                X_train,
                                                y_train_true,
                                                target_names)

    train_pred_df[model_name] = y_pred_train
    test_pred_df[model_name]  = y_pred_test
    metrics_dict[model_name]  = metrics
    history_dict[model_name]  = history
    
    print(model_name)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    
    # save model, plot mdeol acc,loss and confussion matrix
    util.save_model(model, model_name)
    model_name = model_name.split(".")[0]
    util.plot_accuracy_and_loss(history, model_name)
    util.plot_confussion_matrix(y_test_true, y_pred_test, target_names, model_name)

In [None]:
import pickle

with open('history8_9.pickle', 'wb') as f:
    pickle.dump(history_dict, f)
    
    
with open('metrics8_9.pickle', 'wb') as f:
    pickle.dump(metrics_dict, f)

# Majority Voting

In [None]:
pred_df = test_pred_df
y_true  = y_test_true

y_pred = np.asarray([np.argmax(np.bincount(pred_df.loc[row,:])) for row in range(pred_df.shape[0])])

prfs = util.precision_recall_fscore_support(y_true, y_pred)
df   = util.accuracyMetrics2df(prfs, target_names = target_names.values())

acc = accuracy_score(y_true, y_pred)
print(acc)
df

# Stacking

In [None]:
import keras
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation

model = Sequential()
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=  ['accuracy'])


X_train = np.array(train_pred_df)
X_test  = np.array(test_pred_df)

history  = model.fit(X_train, y_train,
                     batch_size=64, epochs=10,
                     verbose=0)
    
# evulation
score = model.evaluate(X_test, y_test, verbose=0)
y_pred, metrics = util.get_pred_and_metrics(model,
                                            X_test,
                                            y_test_true,
                                            target_names)

In [None]:
score

# Data Augmentation and CallBacks

In [None]:
datagen = ImageDataGenerator(
            featurewise_center=False,  
            samplewise_center=False, 
            featurewise_std_normalization=False,  
            samplewise_std_normalization=False,
            zca_whitening=False, 
            rotation_range=0,  
            zoom_range = 0.02,  
            width_shift_range=0.05,  
            height_shift_range=0.05,  
            horizontal_flip=False,  
            vertical_flip=False,
            preprocessing_function=get_random_eraser(v_l=0, v_h=1, pixel_level=False)) 


# CallBacks
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=10, 
                                            verbose=0, 
                                            factor=0.5, 
                                            min_lr=0.00001)

early = EarlyStopping(monitor="val_loss", 
                      mode="min", 
                      patience=10) 

In [None]:
test_pred_df  = pd.DataFrame()
train_pred_df = pd.DataFrame()
metrics_dict  = {}
history_dict  = {}

# train each model
for model, model_name in zip(models, names):
    if(model_name in flat):
        continue
        X_train = X_train_flat
        X_test  = X_test_flat
        X_val   = X_val_flat
    elif(model_name in inp28):
        X_train = X_train28
        X_test  = X_test28
        X_val   = X_val28
    elif(model_name in inp32):
        X_train = X_train32
        X_test  = X_test32
        X_val   = X_val32
        
    datagen.fit(X_train)

      
    print("----------------------------")
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adam(),
                  metrics=['accuracy'])
    
    history  = model.fit_generator(datagen.flow(X_train, y_train,
                     batch_size=64), steps_per_epoch=len(X_train) / 64,
                     epochs = 10,
                     verbose=0,
                     validation_data=(X_val, y_val),
                     callbacks=[early, learning_rate_reduction])
    
    # evulation
    score = model.evaluate(X_test, y_test, verbose=0)
    y_pred_test, metrics = util.get_pred_and_metrics(model,
                                                X_test,
                                                y_test_true,
                                                target_names)
    
    y_pred_train, metrics = util.get_pred_and_metrics(model,
                                                X_train,
                                                y_train_true,
                                                target_names)

    train_pred_df[model_name] = y_pred_train
    test_pred_df[model_name]  = y_pred_test
    metrics_dict[model_name]  = metrics
    history_dict[model_name]  = history
    
    print(model_name)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    #print(metrics_df)
    
    # save model, plot mdeol acc,loss and confussion matrix
    util.save_model(model, model_name)
    #util.plot_accuracy_and_loss(history)
    #util.plot_confussion_matrix(y_true, y_pred, target_names)

# Drawing

In [None]:
y_pred = list(y_pred)

correct = np.nonzero(y_pred==y_true)[0]
incorrect = np.nonzero(y_pred!=y_true)[0]

# Plot Correctly Classified

In [None]:
for i, correct in enumerate(correct[:9]):
    plt.subplot(3,3,i+1)
    plt.imshow(X_test[correct].reshape(32,32), cmap='gray', interpolation='none')
    plt.title("Predicted {}, Class {}".format(y_pred[correct], y_true[correct]))
    plt.tight_layout()

# Plot Incorrectly Classified

In [None]:
for i, incorrect in enumerate(incorrect[0:9]):
    plt.subplot(3,3,i+1)
    plt.imshow(X_test[incorrect].reshape(32,32), cmap='gray', interpolation='none')
    plt.title("Predicted {}, Class {}".format(y_pred[incorrect], y_true[incorrect]))
    plt.tight_layout()