# Predicting Mines or Rocks with Keras and TensorFlow

Using Keras library with TensorFlow backend, three Multilayer perceptron models were built and evaluated using KFold cross validation.
```
1. Model 1 - Input (60) - Hidden (60) - Output (1)
2. Model 2 - Input (60) - Hidden (30) - Output (1)
3. Model 3 - Input (60) - Hidden (60) - Hidden (60) - Output (1)
```

While training and evaluating models, standardization is being learned on each fold and applied to the validation fold. This is achieved using `StandardScaler()` function of `sklearn.preprocessing` and passing it to the sklearn's `make_pipeline` method

### Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.pipeline import make_pipeline

In [None]:
SEED = 42
from tensorflow.random import set_seed
from numpy.random import seed
seed(SEED)
set_seed(SEED)

### Loading the dataset 

In [None]:
df = pd.read_csv('../input/mines-vs-rocks/sonar.all-data.csv', header = None)
df = df.values
X = df[:,0:60].astype(float)
Y = df[:,60]
print ('X Shape :', X.shape)
print ('Y Shape :', Y.shape)
print ('Number of Unique Values in Y:', set(Y))

### Prepping Y

In [None]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
Y_encoded = encoder.fit_transform(Y).astype(int)
print ('Shape of Y_encoded :', len(Y_encoded))
print ('Unique values in Y_encoded :', list(set(Y_encoded)))
print ('Inverse transforming : ', encoder.inverse_transform(list(set(Y_encoded))))

### Prepping X

In [None]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
X_transformed = ss.fit_transform(X)
X_transformed.shape

In [None]:
def baseline_model():
    
    model = Sequential()
    model.add(Dense(60, input_dim=(60), activation = 'relu'))
    model.add(Dense(1, activation = 'sigmoid'))
    
    model.compile(loss = 'binary_crossentropy', metrics = ['accuracy'], optimizer = 'adam')
    
    return model

### Building a single model

In [None]:
single_model = baseline_model()
%time history = single_model.fit(X_transformed, Y_encoded, epochs = 200, batch_size = 8, verbose = 0, validation_split = 0.1)

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.show()

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.show()

### Evaluating Model using K-Fold Crossvalidation

In [None]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [None]:
EPOCHS     = 50
BATCH_SIZE = 8
VERBOSE    = 0
FOLDS      = 10

In [None]:
kfold = StratifiedKFold(n_splits = FOLDS, shuffle = True, random_state = SEED)
estimators = make_pipeline(StandardScaler(), KerasClassifier(build_fn = baseline_model, epochs = EPOCHS, batch_size = BATCH_SIZE, verbose = VERBOSE))
results = cross_val_score(estimators, X, Y_encoded, cv = kfold)
print (f'Mean Accuracy : {round(results.mean()*100,2)} %, Std. dev : {round(results.std()*100,2)}%')

### Trying out a small network
Reducing the number of hidden layer dimensions after the input dimension to 30 from 60 will put pressure on the network get the most important structure of the data.

In [None]:
%%time 
def small_model():
    model = Sequential()
    model.add(Dense(30, input_dim=(60), activation = 'relu'))
    model.add(Dense(1, activation = 'sigmoid'))
    model.compile(loss = 'binary_crossentropy', metrics = ['accuracy'], optimizer = 'adam')
    return model

kfold = StratifiedKFold(n_splits = FOLDS, shuffle = True, random_state = SEED)
estimators = make_pipeline(StandardScaler(), KerasClassifier(build_fn = small_model, epochs = EPOCHS, batch_size = BATCH_SIZE, verbose = VERBOSE))
results = cross_val_score(estimators, X, Y_encoded, cv = kfold)
print (f'Mean Accuracy : {round(results.mean()*100,2)} %, Std. dev : {round(results.std()*100,2)}%')

We got an equally good model with a smaller network

### Evaluating a larger network

Evaluating a larger Network - A neural network topology with more layers offers more opportunity for the network to extract key features and combined them in useful non-linear ways

In [None]:
%%time 
def large_model():
    model = Sequential()
    model.add(Dense(60, input_dim=(60), activation = 'relu'))
    model.add(Dense(60, activation = 'relu'))
    model.add(Dense(1, activation = 'sigmoid'))
    model.compile(loss = 'binary_crossentropy', metrics = ['accuracy'], optimizer = 'adam')
    return model

kfold = StratifiedKFold(n_splits = FOLDS, shuffle = True, random_state = SEED)
estimators = make_pipeline(StandardScaler(), KerasClassifier(build_fn = large_model, epochs = EPOCHS, batch_size = BATCH_SIZE, verbose = VERBOSE))
results = cross_val_score(estimators, X, Y_encoded, cv = kfold)
print (f'Mean Accuracy : {round(results.mean()*100,2)} %, Std. dev : {round(results.std()*100,2)}%')

The accuracy improved with increased hidden layer 