# İçerik

# 1.Predict and Plot History 
1. [Library](#ch0)
1. [Load Data for Ensemble and Stacking](#ch1)
1. [Load Pickles and Predict](#ch2)
1. [Calculate Test Accuracy](#ch3)
1. [Majority Voting](#ch4)
1. [Stacking](#ch5)
1. [Order By Accuracy](#ch6)
1. [Plot History](#ch7)
1. [Cohen Kappa Scores](#ch8)

<a id="ch0"></a>
# Library

In [1]:
import glob
import pickle
import pandas as pd
import numpy as np

import sys
sys.path.insert(0,'..')

from utils import util, cv_models
from keras.utils import to_categorical
from keras import backend as K

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
%matplotlib inline 


pickles = glob.glob('../Models/*.pickle')

Using TensorFlow backend.


Couldn't import dot_parser, loading of dot files will not be possible.


<a id="ch1"></a>
# Load Data for Ensemble and Stacking

In [2]:
data_train = pd.read_csv('../Data/fashion-mnist_train.csv')
data_test  = pd.read_csv('../Data/fashion-mnist_test.csv')

target_names = {0:"T-shirt/top",
                1:"Trouser",
                2:"Pullover",
                3:"Dress",
                4:"Coat",
                5:"Sandal",
                6:"Shirt",
                7:"Sneaker",
                8:"Bag",
                9:"Ankle boot"}

In [3]:
X = np.array(data_train.iloc[:, 1:])
y = to_categorical(np.array(data_train.iloc[:, 0]))

# Split train and validation data
#set random_state for reproduceable result
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=13)


#Test data
X_test = np.array(data_test.iloc[:, 1:])
y_test = to_categorical(np.array(data_test.iloc[:, 0]))

X_train = X_train.astype('float32')
X_test  = X_test.astype('float32')
X_val   = X_val.astype('float32')


X_train /= 255
X_test  /= 255
X_val   /= 255


#get the indices to be plotted
y_test_true  = data_test.iloc[:, 0]
y_train_true = np.argmax(y_train, axis=1, out=None)

In [4]:
flat  = ['mlp']

inp28 = ['simpleCNN',   \
         'CNNDropout',  \
         'CNNBatchNorm']

inp32 = ['Resnet50', \
         'mobileNetV2', \
         'wideResnet', \
         'NASNet', \
         'simpleVGG',      \
         'simpleInception',\
         'simpleResnet' ]

<a id="ch2"></a>
# Load Pickles and Predict

In [5]:
with open("../Models/all_prfs.pickle", "rb") as f:
    dfs = pickle.load(f)
       
with open("../Models/pred_train.pickle", "rb") as f:
    train_pred_df = pickle.load(f)
    
with open("../Models/pred_test.pickle", "rb") as f:
    test_pred_df = pickle.load(f)
    
with open("../Models/pred_val.pickle", "rb") as f:
    val_pred_df = pickle.load(f)
    
with open("../Models/all_history.pickle", "rb") as f:
    history_dict = pickle.load(f)

<a id="ch3"></a>
# Calculate Test Accuracy

In [6]:
test_scores = {}

for col in test_pred_df:
    acc = accuracy_score(y_test_true, test_pred_df[col])
    test_scores[col] = acc

<a id="ch4"></a>
# Majority Voting

In [7]:
y_pred      = util.majorityVoting(test_pred_df, y_test_true, target_names)
acc         = accuracy_score(y_test_true, y_pred)
test_scores["majority_voting"] = acc

acc

0.9429

<a id="ch5"></a>
# Stacking

In [8]:
X_train = np.array(train_pred_df)
X_test  = np.array(test_pred_df)


stacking = cv_models.buildStacking()
history  = stacking.fit(X_train,
                        y_train,
                        batch_size=128,
                        epochs=10,
                        verbose=0)
    
# evulation
score = stacking.evaluate(X_test, y_test, verbose=0)
y_pred, metrics = util.get_pred_and_metrics(stacking,
                                            X_test,
                                            y_test_true,
                                            target_names)


test_scores["stacking"] = score[1]
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Test loss: 0.6228298036262393
Test accuracy: 0.9204


<a id="ch6"></a>
# Order By Accuracy

In [9]:
test_scores = pd.DataFrame(test_scores.items(), columns=["model_name", "test_acc"])
test_scores = test_scores.sort_values(["test_acc"], ascending=False).reset_index(drop = True)
test_scores

Unnamed: 0,model_name,test_acc
0,simpleVGG_hist_dataAug,0.9439
1,majority_voting,0.9429
2,simpleVGG_hist,0.9366
3,wideResnet_hist_dataAug,0.9333
4,CNNBatchNorm_hist,0.9333
5,wideResnet_hist,0.927
6,simpleResnet_hist_dataAug,0.925
7,CNNDropout_hist,0.921
8,stacking,0.9204
9,simpleResnet_hist,0.9181


<a id="ch7"></a>
# Plot History

In [10]:
for model_name in history_dict.keys():
    print(model_name)
    util.plot_accuracy_and_loss(history_dict[model_name])

CNNDropout_hist_dataAug
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



mobileNetV2_hist_dataAug
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



mobileNetV2_hist
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



mlp_hist
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



simpleVGG_hist_dataAug
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



CNNDropout_hist
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



wideResnet_hist_dataAug
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



CNNBatchNorm_hist
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



CNNBatchNorm_hist_dataAug
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



simpleCNN_hist
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



simpleCNN_hist_dataAug
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



simpleResnet_hist_dataAug
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



wideResnet_hist
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



simpleInception_hist_dataAug
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



simpleResnet_hist
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



simpleVGG_hist
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



simpleInception_hist
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]



<a id="ch8"></a>
# Cohen Kappa Scores

In [11]:
from sklearn.metrics import cohen_kappa_score


kohen_matrix = np.zeros((test_pred_df.shape[1], test_pred_df.shape[1]))

for i, x in enumerate(test_pred_df):
    for j, y in enumerate(test_pred_df):
        if(x!=y and i < j):
            kappa = cohen_kappa_score(test_pred_df[x], test_pred_df[y])
            kohen_matrix[i, j] = round(kappa, 3)

In [12]:
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly import tools

x = list(test_pred_df.columns)
y = list(test_pred_df.columns)

fig = ff.create_annotated_heatmap(kohen_matrix, x=x, y=y)
layout = go.Layout(title = model_name)

fig = go.Figure(data=fig, layout=layout)
iplot(fig)