<a href="https://colab.research.google.com/github/okanbuyuktepe/Deep-learning-Exercises/blob/master/sonar.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from pandas import read_csv
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline

In [7]:
seed = 7
numpy.random.seed(seed)

In [8]:
# Load dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data'
dataframe = read_csv(url, header=None)
dataset = dataframe.values
# split into input and output variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]

In [9]:
# Encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

In [10]:
# baseline model
def create_baseline():
  model = Sequential()
  model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

In [11]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print('Baseline: %2.f%% (%.2f%%)' % (results.mean()*100, results.std()*100))

Baseline: 79% (9.85%)


In [12]:
# Improve Performance with Data Preparation
'''
The pipeline is a wrapper that executes one or more models within a pass of the cross-validation procedure.
An effective data preparation scheme for tabular data when building neural network models is standardization
This is where the data is rescaled such that the mean value for each attribute is 0 and the standard
deviation is 1.
''' 
# Binary Classification with Sonar Dataset: Standardized
import numpy
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline

seed = 7
numpy.random.seed(seed)
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data'
dataframe = read_csv(url, header = None)
dataset = dataframe.values
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# baseline model
def create_baseline():
  model = Sequential()
  model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model
# evaluate baseline model with standardized dataset
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle= True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print('Standardized: %.2f%% (%.2f%%)' % (results.mean()*100, results.std()*100))

Standardized: 83.14% (8.63%)


Tuning Layers and Neurons in The Model


In [13]:
# Evaluate a smaller network
# we take our baseline model with 60 neurons in the hidden layer and reduce it by half to 30.
# Binary Classification with Sonar Dataset: Standardized Smaller
import numpy
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline

seed = 7
numpy.random.seed(seed)
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data'
dataframe = read_csv(url, header = None)
dataset = dataframe.values
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# baseline model
def create_baseline():
  model = Sequential()
  model.add(Dense(30, input_dim=60, kernel_initializer='normal', activation='relu'))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model
# evaluate baseline model with standardized dataset
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle= True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print('Standardized: %.2f%% (%.2f%%)' % (results.mean()*100, results.std()*100))

Standardized: 84.57% (5.86%)


In [14]:
# Evaluate a Larger Network
# Here, we add one new layer (one line) to the network that introduces another hidden layer with 30 neurons after the 
# first hidden layer.
'''
The idea here is that the network is given the opportunity to model all input variables
before being bottlenecked and forced to halve the representational capacity, much like we did in
the experiment above with the smaller network. Instead of squeezing the representation of the
inputs themselves, we have an additional hidden layer to aid in the process.
'''
# Binary Classification with Sonar Dataset: Standardized Larger
import numpy
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline

seed = 7
numpy.random.seed(seed)
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data'
dataframe = read_csv(url, header = None)
dataset = dataframe.values
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# baseline model
def create_baseline():
  model = Sequential()
  model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
  model.add(Dense(30,  kernel_initializer='normal', activation='relu'))
  model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model
# evaluate baseline model with standardized dataset
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle= True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print('Standardized: %.2f%% (%.2f%%)' % (results.mean()*100, results.std()*100))

Standardized: 85.07% (5.57%)
