
# Project # 1


## importing all the important liabraries

In [2]:
import numpy as np
import keras
from keras import optimizers
import tensorflow as tf
import pandas as pd
from keras import models,layers
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


Using TensorFlow backend.


### fix random seed for reproducibility

In [3]:
seed = 7
np.random.seed(seed)

## load dataset

In [4]:
dataframe = pd.read_csv("sonar.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype('float64')
Y = dataset[:,60]
X


array([[0.02  , 0.0371, 0.0428, ..., 0.0084, 0.009 , 0.0032],
       [0.0453, 0.0523, 0.0843, ..., 0.0049, 0.0052, 0.0044],
       [0.0262, 0.0582, 0.1099, ..., 0.0164, 0.0095, 0.0078],
       ...,
       [0.0522, 0.0437, 0.018 , ..., 0.0138, 0.0077, 0.0031],
       [0.0303, 0.0353, 0.049 , ..., 0.0079, 0.0036, 0.0048],
       [0.026 , 0.0363, 0.0136, ..., 0.0036, 0.0061, 0.0115]])

### Labelling the Mines and Rocks

In [34]:
le=LabelEncoder()
le.fit(Y)
encoded_Y=le.transform(Y)
le.classes_


array(['M', 'R'], dtype=object)

In [35]:
encoded_Y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [36]:
X.shape

(208, 60)

## Creating Baseline

In [37]:
def create_baseline():
    model=models.Sequential()
    model.add(layers.Dense(60,activation='relu',input_shape=(60,)))
    model.add(layers.Dense(1,activation='sigmoid'))
    model.compile(loss='binary_crossentropy',optimizer='adam'
                  ,metrics=['accuracy'])
    return model

### Evaluatating model with standardized dataset

In [38]:
estimator=KerasClassifier(build_fn=create_baseline,epochs=100,batch_size=5,verbose=0)
kfold=StratifiedKFold(n_splits=10,random_state=seed,shuffle=True)
results=cross_val_score(estimator,X,encoded_Y,cv=kfold)

In [11]:
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Results: 83.71% (6.13%)


### Ten different results using kfold strategy

In [12]:
results

array([0.81818183, 0.76190478, 0.80952382, 0.90476191, 0.80952382,
       0.85714286, 0.80952382, 0.95      , 0.75000001, 0.90000001])

### Re-Run The Baseline Model With Data Preparation

In [13]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))



Standardized: 86.09% (7.57%)


## Step # 4

## Smaller Model

In [14]:
def create_smaller():
    model=models.Sequential()
    model.add(layers.Dense(20,activation='relu',input_shape=(60,)))
    model.add(layers.Dense(1,activation='sigmoid'))
    model.compile(loss='binary_crossentropy',optimizer=keras.optimizers.Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False),
                  metrics=['accuracy'])
    return model
np.random.seed(seed)
estimators=[]
estimators.append(('standardize',StandardScaler()))
estimators.append(('classifier',KerasClassifier(build_fn=create_smaller,epochs=100,batch_size=5,verbose=0)))
kfold=StratifiedKFold(n_splits=10,shuffle=True,random_state=seed)
pipeline=Pipeline(estimators)
results=cross_val_score(pipeline,X,encoded_Y,cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Standardized: 86.54% (7.69%)


## Larger Model

In [15]:
def create_larger():
    model=models.Sequential()
    model.add(layers.Dense(60,activation='relu',input_shape=(60,)))
    model.add(layers.Dense(30,activation='relu'))
    model.add(layers.Dense(1,activation='sigmoid'))
    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

np.random.seed(seed)
estimators=[]
estimators.append(('standardize',StandardScaler()))
estimators.append(('classifier',KerasClassifier(build_fn=create_smaller,epochs=100,batch_size=5,verbose=0)))
kfold=StratifiedKFold(n_splits=10,shuffle=True,random_state=seed)
pipeline=Pipeline(estimators)
results=cross_val_score(pipeline,X,encoded_Y,cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Standardized: 85.59% (8.56%)


In [16]:
results

array([0.81818183, 1.        , 0.71428573, 0.90476191, 0.80952382,
       0.80952382, 0.95238096, 0.90000001, 0.75000001, 0.90000001])

## Step 7: Rewriting the code using the Keras Functional API

In [18]:
def create_baseline_2():
    inputs=keras.Input(shape=(60,))
    x=layers.Dense(60,activation='relu')(inputs)
    outputs=layers.Dense(1,activation='sigmoid')(x)

    model=keras.Model(inputs,outputs)
    model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
    return model

np.random.seed(seed)
estimators=[]
estimators.append(('standardize',StandardScaler()))
estimators.append(('classifier',KerasClassifier(build_fn=create_baseline_2,epochs=100,batch_size=5,verbose=0)))
kfold=StratifiedKFold(n_splits=10,shuffle=True,random_state=seed)
pipeline=Pipeline(estimators)
results=cross_val_score(pipeline,X,encoded_Y,cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Standardized: 85.59% (7.46%)


## Step 8: Rewriting the code by doing Model Subclassing

In [18]:
def create_baseline_3():
    class MyModel(tf. keras.Model):
        def __init__(self):
            super(MyModel,self).__init__()
            self.dense1 = Dense(60, activation="relu")
            self.dense2 = Dense(10, activation='relu')
            self.dense3 = Dense(1, activation='sigmoid')


        def call(self,inputs):
            x = self.dense1(inputs)
            x = self.dense2(x)
            return self.dense3(x)

    model=MyModel()
    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model




np.random.seed(seed)
estimators=[]
estimators.append(('standardize',StandardScaler()))
estimators.append(('classifier',KerasClassifier(build_fn=create_baseline_3,epochs=100,batch_size=5,verbose=0)))
kfold=StratifiedKFold(n_splits=10,shuffle=True,random_state=seed)
pipeline=Pipeline(estimators)
results=cross_val_score(pipeline,X,encoded_Y,cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Standardized: 51.94% (8.74%)


## Step 10: Without Skit-learn Library

In [103]:
k=10
num_val_samples = len(X) // k
num_epochs = 100
val_loss=[]
val_acc=[]
np.random.seed(seed)
for i in range(k):
    print('processing fold #', i)
    val_data = X[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = encoded_Y[i * num_val_samples: (i + 1) * num_val_samples]
    
    partial_train_data = np.concatenate([X[:i * num_val_samples],
                                         X[(i + 1) * num_val_samples:]],axis=0)
    partial_train_targets = np.concatenate([encoded_Y[:i * num_val_samples],
                                            encoded_Y[(i + 1) * num_val_samples:]],axis=0)
    
    model=create_baseline()
    history=model.fit(partial_train_data,partial_train_targets,shuffle=True,epochs=10,
                      batch_size=5,verbose=0)
    sys=model.evaluate(val_data,val_targets,verbose=0)
    val_loss.append(sys[0])
    val_acc.append(sys[1])

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3
processing fold # 4
processing fold # 5
processing fold # 6
processing fold # 7
processing fold # 8
processing fold # 9


In [106]:
len(val_acc)

10

### Validation accuracy is found to be:

In [110]:
np.mean(val_acc)

0.5650000013411045

### Validation loss is found to be:

In [111]:
np.mean(val_loss)

0.7046727418899537