In [2]:
import numpy as np
import pandas as pd
import keras 
import tensorflow as tf
from keras import layers
from keras import models
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder 
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline



Using TensorFlow backend.


#Next, we can initialize the random number generator to ensure that we always get the same results when executing this code. This will help if we are debugging :

In [0]:
seed = 7
np.random.seed(seed)


#Loading the Sonar.csv data

In [4]:
device_name = tf.test.gpu_device_name()
!apt-get install graphviz -y
from google.colab import files
uploaded = files.upload()
import pandas as pd
import io
dataframe = pd.read_csv('sonar.csv', header=None)

Reading package lists... Done
Building dependency tree       
Reading state information... Done
graphviz is already the newest version (2.40.1-2).
0 upgraded, 0 newly installed, 0 to remove and 8 not upgraded.


Saving sonar.csv to sonar.csv


#Now we can split the columns into 60 input variables (X) and 1 output variable (Y). We use pandas to load the data because it easily handles strings (the output variable), whereas attempting to load the data directly using NumPy would be more difficult.

In [5]:
print(dataframe)

         0       1       2       3       4       5       6       7       8   \
0    0.0200  0.0371  0.0428  0.0207  0.0954  0.0986  0.1539  0.1601  0.3109   
1    0.0453  0.0523  0.0843  0.0689  0.1183  0.2583  0.2156  0.3481  0.3337   
2    0.0262  0.0582  0.1099  0.1083  0.0974  0.2280  0.2431  0.3771  0.5598   
3    0.0100  0.0171  0.0623  0.0205  0.0205  0.0368  0.1098  0.1276  0.0598   
4    0.0762  0.0666  0.0481  0.0394  0.0590  0.0649  0.1209  0.2467  0.3564   
5    0.0286  0.0453  0.0277  0.0174  0.0384  0.0990  0.1201  0.1833  0.2105   
6    0.0317  0.0956  0.1321  0.1408  0.1674  0.1710  0.0731  0.1401  0.2083   
7    0.0519  0.0548  0.0842  0.0319  0.1158  0.0922  0.1027  0.0613  0.1465   
8    0.0223  0.0375  0.0484  0.0475  0.0647  0.0591  0.0753  0.0098  0.0684   
9    0.0164  0.0173  0.0347  0.0070  0.0187  0.0671  0.1056  0.0697  0.0962   
10   0.0039  0.0063  0.0152  0.0336  0.0310  0.0284  0.0396  0.0272  0.0323   
11   0.0123  0.0309  0.0169  0.0313  0.0358  0.0102 

In [6]:
dataset = dataframe.values
print(dataset)

[[0.02 0.0371 0.0428 ... 0.009 0.0032 'R']
 [0.0453 0.0523 0.0843 ... 0.0052 0.0044 'R']
 [0.0262 0.0582 0.1099 ... 0.0095 0.0078 'R']
 ...
 [0.0522 0.0437 0.018 ... 0.0077 0.0031 'M']
 [0.0303 0.0353 0.049 ... 0.0036 0.0048 'M']
 [0.026 0.0363 0.0136 ... 0.0061 0.0115 'M']]


In [7]:
x = dataset[:,0:60].astype(float)    #splitting
y = dataset[:,60]
print(x)
print(y)

[[0.02   0.0371 0.0428 ... 0.0084 0.009  0.0032]
 [0.0453 0.0523 0.0843 ... 0.0049 0.0052 0.0044]
 [0.0262 0.0582 0.1099 ... 0.0164 0.0095 0.0078]
 ...
 [0.0522 0.0437 0.018  ... 0.0138 0.0077 0.0031]
 [0.0303 0.0353 0.049  ... 0.0079 0.0036 0.0048]
 [0.026  0.0363 0.0136 ... 0.0036 0.0061 0.0115]]
['R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M'
 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M' 'M

#The output variable is string values. We must convert them into integer values 0 and 1.



In [8]:
le = LabelEncoder()
encoded_Y = le.fit_transform(y)
print(encoded_Y)


[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]




#* Our model will have a single fully connected hidden layer with the same number of neurons as input variables. This is a good default starting point when creating neural networks.

#* The weights are initialized using a small Gaussian random number. The Rectifier activation function is used. The output layer contains a single neuron in order to make predictions. It uses the sigmoid activation function in order to produce a probability output in the range of 0 to 1 that can easily and automatically be converted to crisp class values. 

#*  Finally, we are using the logarithmic loss function (binary_crossentropy) during training, the preferred loss function for binary classification problems. The model also uses the efficient Adam optimization algorithm for gradient descent and accuracy metrics will be collected when the model is trained.


In [0]:
from keras import initializers
Gaussian = initializers.random_normal()

# baseline model

def create_baseline():
  
  #creating model
  
  model = models.Sequential()
  model.add(layers.Dense(60, kernel_initializer = Gaussian , activation = 'relu',  input_dim=60))
  model.add(layers.Dense(1, activation = 'sigmoid' ))
	
  #Compiling the model
  
  model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
	
  return model

#* Now it is time to evaluate this model using stratified cross validation in the scikit-learn framework.

#* We pass the number of training epochs to the KerasClassifier, again using reasonable default values. Verbose output is also turned off given that the model will be created 10 times for the 10-fold cross validation being performed.


In [0]:
# evaluate model with standardized dataset

estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, x ,encoded_Y, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Results: 83.71% (5.75%)


#Re-Run The Baseline Model With Data Preparation

In [0]:
# evaluate baseline model with standardized dataset

np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, encoded_Y, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))




Standardized: 85.06% (6.70%)


# #Tuning Layers and Number of Neurons in The Model

There are many things to tune on a neural network, such as the weight initialization, activation functions, optimization procedure and so on.

One aspect that may have an outsized effect is the structure of the network itself called the network topology. In this section, we take a look at two experiments on the structure of the network: making it smaller and making it larger.
[link text](https://)

#1) Evaluate a Smaller Network

In this experiment, we take our baseline model with 60 neurons in the hidden layer and reduce it by half to 30. This will put pressure on the network during training to pick out the most important structure in the input data to model.






In [0]:
# smaller model

def create_smaller():
  
  #Create model
  model = models.Sequential()
  model.add(layers.Dense(30,  activation = 'relu',  input_dim=60))
  model.add(layers.Dense(1, activation = 'sigmoid' ))
  
  # Compile model  
  model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
  
  
  return model


In [0]:
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_smaller, epochs=100, batch_size=6, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, encoded_Y, cv=kfold)
print("Smaller: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))



Smaller: 83.09% (6.41%)


Running the above code we can see that we have a very slight boost in the mean estimated accuracy and an important reduction in the standard deviation (average spread) of the accuracy scores for the model.

#2) Evaluate a Larger Network

We can evaluate whether adding more layers to the network improves the performance easily by making another small tweak to the function used to create our model. Here, we add one new layer (one line) to the network that introduces another hidden layer with 30 neurons after the first hidden layer.

Our network now has the topology:

60 inputs -> [60 -> 30] -> 1 output





In [0]:
# larger model

def create_larger():
  
	# create model
  
  model = models.Sequential()
  model.add(layers.Dense(60 , activation = 'relu' , input_dim = 60 ))
  model.add(layers.Dense(30 , activation = 'relu'))
  model.add(layers.Dense(1 , activation = 'sigmoid'))
	
	# Compile model
  
  model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
	
  return model

In [0]:
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_larger, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, encoded_Y, cv=kfold)
print("Larger: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Larger: 85.09% (7.25%)


Running the above code produces the results below. We can see that we do not get a lift in the model performance. This may be statistical noise or a sign that further training is needed.

# Rewriting the code using the Keras Functional API

In [0]:
import keras
from keras import layers

def func_API():
    
  # Build model

  input = keras.Input(shape =(60,))
  x = layers.Dense(60 , activation = 'relu')(input)
  output = layers.Dense(1 , activation = 'sigmoid')(x)
  model = keras.Model(input,output)

  # Compiled Model

  model.compile(loss='binary_crossentropy', optimizer='adam',   metrics=['accuracy'])
  
  return model

In [0]:
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=func_API, epochs=100, batch_size=6, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, encoded_Y, cv=kfold)
print("Functional API method : %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Functional API method : 84.11% (8.04%)


#Rewriting the code by doing Model Subclassing

In [0]:
import tensorflow as tf
import keras 
from keras import layers

def SubClass_API():

  class MyModel(tf.keras.Model):
  
    def __init__(self):
      super(MyModel, self).__init__()
    
      self.dense1 = layers.Dense(60 , activation = 'relu')
      self.dense2 = layers.Dense(30 , activation = 'relu')
      self.dense3 = layers.Dense(1 , activation = 'sigmoid')
  
    def call(self , inputs):
  
      x = self.dense1(inputs)
      x = self.dense2(x)
  
      return self.dense3(x) 

  model = MyModel()
  model.compile(loss='binary_crossentropy', optimizer='adam',   metrics=['accuracy'])
  return model

estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=SubClass_API, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, encoded_Y, cv=kfold)
print("Model SubClass API method : %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Model SubClass API method : 55.35% (8.99%)


#Rewriting the code without using scikit-learn
Once you have written the model in all three API style you are required to do k-fold cross validation without using scikit-learn library.


In [11]:
import numpy as np
k=4
num_val_samples = len(x) // k
num_epochs = 100
all_scores = []
for i in range(k):
  print('processing fold #', i)
  
  #Prepares the validation data : data from partition #k
  
  val_data = x[i * num_val_samples: (i + 1) * num_val_samples]     
  val_targets = encoded_Y[i * num_val_samples: (i + 1) * num_val_samples]
  
 
  #Prepares the training data : data from all other partitions
  
  partial_train_data = np.concatenate([x[:i * num_val_samples],x[(i + 1) * num_val_samples:]],axis=0)
  partial_train_targets = np.concatenate([encoded_Y[:i * num_val_samples],encoded_Y[(i + 1) * num_val_samples:]],axis=0)
  
 #Builds the Keras model (already compiled)

  model = create_baseline()
  
  #Trains the model(in silent mode,verbose = 0)
  
  model.fit(partial_train_data, partial_train_targets, epochs=num_epochs, batch_size=1, verbose=0)
  
  #Evaluates the model on the validation data

  val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
  all_scores.append(val_mae)

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3


In [12]:
mean = np.mean(all_scores)*100
SD = np.std(all_scores)*100
print("K-fold cross validation : %.2f%%  (%.2f%%)" %(mean , SD))


K-fold cross validation : 36.54%  (18.69%)
