## Train Test Split

### Simple train test split

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np

X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [5, 6], [6, 7], [8, 9], [9, 10], [11, 12]])
Y = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
print(X_train)
print(Y_train)
print(X_test)
print(Y_test)

### Kfold train test split

In [2]:
from sklearn.model_selection import KFold
import numpy as np

X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [5, 6], [6, 7], [8, 9], [9, 10], [11, 12]])
Y = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])
kf = KFold(n_splits=3)

print(kf)  
for train_index, test_index in kf.split(X, Y):
   print("train indices:", train_index, "test indices:", test_index)
   X_train, X_test = X[train_index], X[test_index]
   Y_train, Y_test = Y[train_index], Y[test_index]
#    print(X_train)

KFold(n_splits=3, random_state=None, shuffle=False)
train indices: [3 4 5 6 7 8] test indices: [0 1 2]
train indices: [0 1 2 6 7 8] test indices: [3 4 5]
train indices: [0 1 2 3 4 5] test indices: [6 7 8]


### Stratified KFold split: Classes are distributed across the splits

In [3]:
from sklearn.model_selection import StratifiedKFold
X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [5, 6], [6, 7], [8, 9], [9, 10], [11, 12]])
Y = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])
skf = StratifiedKFold(n_splits=3)

print(skf)  
for train_index, test_index in skf.split(X, Y):
   print("train indices:", train_index, "test indices:", test_index)
   X_train, X_test = X[train_index], X[test_index]
   Y_train, Y_test = Y[train_index], Y[test_index]


StratifiedKFold(n_splits=3, random_state=None, shuffle=False)
train indices: [1 2 4 5 7 8] test indices: [0 3 6]
train indices: [0 2 3 5 6 8] test indices: [1 4 7]
train indices: [0 1 3 4 6 7] test indices: [2 5 8]


## Hyperparameter Tuning

### Varying neural networks hyperparams

In [None]:
import numpy as np
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.wrappers.scikit_learn import KerasClassifier
from keras.datasets import mnist
from keras.optimizers import SGD
from keras.utils import np_utils

seed = 1
np.random.seed(seed)

def create_model():
    model = Sequential()
    model.add(Dense(40, activation='relu', input_shape=(28*28,)))
    model.add(Dense(20, activation='relu'))
    model.add(Dense(10, activation='sigmoid'))
    model.compile(loss='categorical_crossentropy',
                optimizer=SGD(),
                metrics=['accuracy'])
    model.summary()
    return model

(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 28*28)
X_test = X_test.reshape(10000, 28*28)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# normalize
X_train = X_train/255
X_test = X_test/255

print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# Encode Y as binary class vector
Y_train = np_utils.to_categorical(Y_train, 10)
Y_test = np_utils.to_categorical(Y_test, 10)

X = X_train
Y = Y_train

#Wrapper to make keras model work with scikitlearn
model = KerasClassifier(build_fn=create_model, verbose=0)

# define the grid search parameters
batch_size = [32, 50, 100]
epochs = [5, 7, 10]

param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv = 3)
grid_result = grid.fit(X, Y)

# # summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

### Varying neural network's layers and neurons

In [None]:
import numpy as np
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.wrappers.scikit_learn import KerasClassifier
from keras.datasets import mnist
from keras.optimizers import SGD, Adam
from keras.utils import np_utils

seed = 1
np.random.seed(seed)

def create_model(model_conf):
    model = Sequential()

    #add hidden layers 
    #print('building nn with conf' + str(model_conf))
    hidden_layers = model_conf['hidden_layers']
    n_h1 = hidden_layers[0]['neurons'] 
    n_h1_activation = hidden_layers[0]['activation']
    model.add(Dense(n_h1, activation=n_h1_activation, input_shape=(28*28,)))
    for layer in hidden_layers[1:]:
        neurons = layer['neurons']
        activation = layer['activation'] 
        model.add(Dense(neurons, activation=activation))
    
    #add output_layer     
    model.add(Dense(10, activation='sigmoid'))
    optimizer = model_conf['optimizer']
    model.compile(loss='categorical_crossentropy',
                optimizer=optimizer,
                metrics=['accuracy'])

    model.summary()
    return model


(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 28*28)
X_test = X_test.reshape(10000, 28*28)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# normalize
X_train = X_train/255
X_test = X_test/255

print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# Encode Y as binary class vector
Y_train = np_utils.to_categorical(Y_train, 10)
Y_test = np_utils.to_categorical(Y_test, 10)

X = X_train
Y = Y_train

#Wrapper to make keras model work with scikitlearn
model = KerasClassifier(build_fn=create_model, verbose=0)
# define the grid search parameters

batch_size = [32]
epochs = [5]
model_conf = [
    { "hidden_layers": [{"neurons": 40, "activation": "relu"},
                         {"neurons": 20, "activation": "relu"}
                        ],
       "optimizer": SGD(lr=0.005)
    },
    { "hidden_layers": [{"neurons": 30, "activation": "relu"},
                         {"neurons": 15, "activation": "relu"},
                        ],
       "optimizer": SGD(lr=0.005)
    },
    { "hidden_layers": [{"neurons": 60, "activation": "relu"},
                         {"neurons": 50, "activation": "relu"},
                        {"neurons": 40, "activation": "relu"},
                        {"neurons": 30, "activation": "relu"},
                        {"neurons": 20, "activation": "relu"}
                       ],
       "optimizer": SGD()
    },
    { "hidden_layers": [{"neurons": 40, "activation": "relu"},
                         {"neurons": 20, "activation": "relu"}
                        ],
       "optimizer": Adam()
    }


]

param_grid = dict(batch_size=batch_size, epochs=epochs, model_conf=model_conf)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv = 2, refit=False)
grid_result = grid.fit(X, Y)


print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

## Feature Normalization

In [None]:
from sklearn import preprocessing
import numpy as np

X = np.array([[1000, 2, 0.0001], [2000, 4, 0.0003], [3000, 2, 0.0005]])
scaled_X = preprocessing.scale(X)
print(scaled_X)

## Evaluation Metrics

In [None]:
#Mean Squared Error for Regression
from sklearn.metrics import mean_squared_error
Y = [100, 200, 300, 400]
Y_hat = [102, 198, 300, 405]

mse = mean_squared_error(Y, Y_hat)
print(mse)

In [None]:
#Accuracy for Classification Problems
from sklearn.metrics import accuracy_score
Y = [1, 1, 0, 0, 1]
Y_hat = [1, 1, 0, 0, 0 ]
accuracy = accuracy_score(Y, Y_hat)
print(accuracy)

In [None]:
#Evaluation metrics for skewed classes
from sklearn.metrics import accuracy_score
# 0 is normal class 1 is nearly outlier class
Y = [ 0 for i in range(1000)] + [1 for i in range(10)]

def dumb_classifier_predict(N):
    #returns class 0 for everything except some random ones
    res = [0 for i in range(N)]
    res[0] = res[5] = res[1002] = 1
    return res

Y_hat = dumb_classifier_predict(1010)

accuracy = accuracy_score(Y, Y_hat)
print('accuracy=', accuracy)
#even dumb classifiers can have high accuracy in skewed class scenarios
#Use precision and recall
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(Y, Y_hat)
print('precision =', precision)
recall = recall_score(Y, Y_hat)
print('recall =', recall)

#For single valued metric on skewed classes use F1 score which combines precision and recall
f1 = f1_score(Y, Y_hat)
print('f1 score =',f1)

#Confusion metrics
from sklearn.metrics import confusion_matrix
print(confusion_matrix(Y, Y_hat))



In [None]:
#Confusion matrix for multiclass classification
import random
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

Y = [ 0 for i in range(1000)] + [1 for i in range(1000)] + [2 for i in range(1000)] + [3 for i in range(1000)] + [4 for i in range(1000)]
Y_hat = [ random.randint(0,4) for i in range(5000)]

#Confusion matrix for multiclass classification
print(confusion_matrix(Y, Y_hat))

#Precision, Recall, F1 score for each class
print(precision_recall_fscore_support(Y, Y_hat))