<a href="https://colab.research.google.com/github/nandiniparekh/Shallow-Neural-Networks/blob/main/Shallow_Neural_Networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Shallow Neural Networks
I explore building a multi-layer perceptron network (MLP) using Keras using the backpropagation with momentum learning algorithm, to solve the Wine Dataset classification problem.

## Read the data
Wine is a 3-class problem with 13 predictor attributes.

In [None]:
import pandas as pd

clm_names = [ 'Class',
  'Alcohol',
  'Malic acid',
 	'Ash',
	'Alcalinity of ash',
 	'Magnesium',
	'Total phenols',
 	'Flavanoids',
 	'Nonflavanoid phenols',
 	'Proanthocyanins',
	'Color intensity',
 	'Hue',
 	'OD280/OD315 of diluted wines',
 	'Proline' ]

df = pd.read_csv("wine.data", names=clm_names)
df.head()

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


## Split Data

In [None]:
y = df['Class']
y

0      1
1      1
2      1
3      1
4      1
      ..
173    3
174    3
175    3
176    3
177    3
Name: Class, Length: 178, dtype: int64

In [None]:
X = df.drop(columns='Class')
X

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,14.23,1.71,2.43,15.6,127,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050
2,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185
3,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480
4,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735
...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740
174,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750
175,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835
176,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840


## Preprocessing Pipeline
The pipeline includes the following tasks:
* Mean Data Imputation
* Min Max Scaling
* One Hot Encoder


In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import MinMaxScaler

class scaler(BaseEstimator, TransformerMixin):
  def fit(self, X, y=None):
    return self

  def transform(self, X):
    scaler = MinMaxScaler()
    for i in X.columns:
      X[i] = scaler.fit_transform(X[[i]])

    return X

class dataImputer(BaseEstimator, TransformerMixin):

  def fit(self, X, y=None):
    return self

  def transform(self, X):
    imputer = SimpleImputer(strategy="mean")

    for i in X.columns:
      X[i] = imputer.fit_transform(X[[i]])

    return X

class oneHotEncoder(BaseEstimator, TransformerMixin):

  def fit(self, X, y=None):
    return self

  def transform(self, X):
    encoder = OneHotEncoder(handle_unknown='ignore')

    matrix = encoder.fit_transform(X[['Class']])
    matrix = matrix.toarray()

    column_names = ["Class 1", "Class 2", "Class 3"]

    for i in range(len(matrix.T)):
      X[column_names[i]] = matrix.T[i]

    return X.drop(['Class'], axis=1)

In [None]:
# Preprocessing Pipeline
from sklearn.pipeline import Pipeline

preprocessing_pipe = Pipeline(
    [('scaler', scaler()),
    ('imputation', dataImputer()),
     ('oneHotEncoder', oneHotEncoder())]
)

preprocessed_df = preprocessing_pipe.fit_transform(df)

## Data Split

Split the dataset into feature (X) and target sets (y).

In [None]:
X = preprocessed_df.iloc[:, 0:13]
X.head()

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,0.842105,0.1917,0.572193,0.257732,0.619565,0.627586,0.57384,0.283019,0.59306,0.372014,0.455285,0.970696,0.561341
1,0.571053,0.205534,0.417112,0.030928,0.326087,0.575862,0.510549,0.245283,0.274448,0.264505,0.463415,0.78022,0.550642
2,0.560526,0.320158,0.700535,0.412371,0.336957,0.627586,0.611814,0.320755,0.757098,0.375427,0.447154,0.695971,0.646933
3,0.878947,0.23913,0.609626,0.319588,0.467391,0.989655,0.664557,0.207547,0.55836,0.556314,0.308943,0.798535,0.857347
4,0.581579,0.365613,0.807487,0.536082,0.521739,0.627586,0.495781,0.490566,0.444795,0.259386,0.455285,0.608059,0.325963


In [None]:
y = preprocessed_df[['Class 1', 'Class 2', 'Class 3']]
y.head()

Unnamed: 0,Class 1,Class 2,Class 3
0,1.0,0.0,0.0
1,1.0,0.0,0.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,1.0,0.0,0.0


### Split into Test and Train Datasets

The datasets X and y are further split into training and testing sets using stratified sampling.

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=70, stratify=y)

print('X_train:', X_train.shape)
print('X_test:', X_test.shape)
print('y_train:', y_train.shape)
print('y_test:', y_test.shape)

X_train: (160, 13)
X_test: (18, 13)
y_train: (160, 3)
y_test: (18, 3)


Count of target variable in the training and testing set after stratifying.

In [None]:
stratified_dataset = { 'y_train': y_train.value_counts(normalize=True)*100, 'y_test': y_test.value_counts(normalize=True)*100 }

pd.DataFrame(stratified_dataset)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,y_train,y_test
Class 1,Class 2,Class 3,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,1.0,0.0,40.0,38.888889
1.0,0.0,0.0,33.125,33.333333
0.0,0.0,1.0,26.875,27.777778


## Multi-Layer Perceptron

Import libraries

In [None]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.5-py3-none-any.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.5/129.5 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras-core (from keras-tuner)
  Downloading keras_core-0.1.7-py3-none-any.whl (950 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m950.8/950.8 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Collecting namex (from keras-core->keras-tuner)
  Downloading namex-0.0.7-py3-none-any.whl (5.8 kB)
Installing collected packages: namex, kt-legacy, keras-core, keras-tuner
Successfully installed keras-core-0.1.7 keras-tuner-1.4.5 kt-legacy-1.0.5 namex-0.0.7


In [None]:
from sklearn.model_selection import GridSearchCV
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense, InputLayer
from keras_tuner.tuners import RandomSearch
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
import keras_tuner

Using TensorFlow backend


### Building a neural network

In [None]:
def build_model(hp):
  model = Sequential()
  model.add(InputLayer(input_shape=X_train.shape[1]))

  for i in range(hp.Int('layers', 1, 3)):
    model.add(Dense(
        units=hp.Int('units_' + str(i), 32, 128, step=32),
        activation='relu'
    ))

  model.add(Dense(3, activation='softmax'))
  model.compile(optimizer=Adam(
      hp.Choice('learning_rate', values=[1e-3, 1e-2, 1e-1])
  ), loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [None]:
stratified_kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

### Hyper-parameter exploration using RandomSearch

In [None]:
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=5,
    directory='dir_final_1',
    project_name='helloworld'
)

In [None]:
X_train = X_train.values
X_test = X_test.values
y_train = y_train.values
y_test = y_test.values

### Stratified ten-fold cross-validation

In [None]:
for train_index, val_index in stratified_kfold.split(X_train, y_train.argmax(axis=1)):
    X_fold_train, X_fold_val = X_train[train_index], X_train[val_index]
    y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

    # Perform hyperparameter tuning for this fold
    tuner.search(X_fold_train, y_fold_train, epochs=20, validation_data=(X_fold_val, y_fold_val))

Trial 10 Complete [00h 00m 13s]
val_loss: 0.0990804448723793

Best val_loss So Far: 0.07078583557158709
Total elapsed time: 00h 02m 40s


In [None]:
tuner.results_summary()

Results summary
Results in dir_final_1/helloworld
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 06 summary
Hyperparameters:
layers: 2
units_0: 64
learning_rate: 0.1
units_1: 128
units_2: 64
Score: 0.07078583557158709

Trial 03 summary
Hyperparameters:
layers: 2
units_0: 64
learning_rate: 0.01
units_1: 96
units_2: 96
Score: 0.08547969534993172

Trial 07 summary
Hyperparameters:
layers: 1
units_0: 128
learning_rate: 0.1
units_1: 96
units_2: 96
Score: 0.0892096847295761

Trial 08 summary
Hyperparameters:
layers: 2
units_0: 128
learning_rate: 0.01
units_1: 96
units_2: 64
Score: 0.09048778265714645

Trial 09 summary
Hyperparameters:
layers: 2
units_0: 128
learning_rate: 0.01
units_1: 64
units_2: 64
Score: 0.0990804448723793

Trial 00 summary
Hyperparameters:
layers: 3
units_0: 96
learning_rate: 0.1
units_1: 32
units_2: 32
Score: 0.10875982735306025

Trial 04 summary
Hyperparameters:
layers: 1
units_0: 128
learning_rate: 0.01
units_1: 96
units_2: 64
Score: 0.11571

Result of parameter exploration

In [None]:
best_hp = tuner.get_best_hyperparameters(1)[0]
best_hp.values

{'layers': 2,
 'units_0': 64,
 'learning_rate': 0.1,
 'units_1': 128,
 'units_2': 64}

## Building the best model using the parameters found

In [None]:
# Build the best model using the best hyperparameters
best_model = tuner.hypermodel.build(best_hp)

# Train the best model on the entire training dataset
best_model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1)

# Evaluate the best model on the test dataset
test_loss, test_accuracy = best_model.evaluate(X_test, y_test)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Evaluating the model

In [None]:
from sklearn.metrics import f1_score

predictions = best_model.predict(X_test)
predictions
predictions = (predictions > 0.5).astype(int)



f1 = f1_score(y_test, predictions, average='samples')
f1

