<a href="https://colab.research.google.com/github/opethaiwoh/Deep-Machine-Learning/blob/main/Deep_Learning_Opeyemi_Adeniran.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install keras

In [None]:
pip install tensorflow

In [None]:
pip install openpyxl

In [None]:
pip install xlrd

In [None]:
# Dependencies

# Standard Dependencies
import os
import numpy as np
import pandas as pd
from math import sqrt

# Visualization
from pylab import *
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import seaborn as sns

# Statistics
from statistics import median
from scipy import signal
#from scipy.misc import factorial
import scipy.stats as stats
from scipy.stats import sem, binom, lognorm, poisson, bernoulli, spearmanr
from scipy.fftpack import fft, fftshift

# Deep Learning
from sklearn.preprocessing import QuantileTransformer
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.optimizers import SGD, Adam, Nadam, RMSprop, Adadelta, Adagrad, Adamax, Ftrl
from keras.constraints import maxnorm
from sklearn.preprocessing import MinMaxScaler

In [None]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from keras.preprocessing.image import ImageDataGenerator

# About glass data
#URL: https://github.com/jbrownlee/Datasets/blob/master/glass.names

Explanation: The study of classification of types of glass was motivated by criminological investigation.  At the scene of the crime, the glass left can be used as evidence...if it is correctly identified!

In [None]:
# Loading the dataset
df = pd.read_csv('glass.csv', names=['Refractive Index', 'Sodium', 'Magnesium', 'Aluminium',
                                    'Silicon', 'Potassium', 'Calcium', 'Barium',
                                    'Iron', 'Class'])
df

# Data preprocessing

In [None]:
#To Test if Any Values Need to Dropped
df_copy = df.copy(deep = True)
df_copy[['Refractive Index', 'Sodium', 'Magnesium', 'Aluminium',
                                    'Silicon', 'Potassium', 'Calcium', 'Barium',
                                    'Iron', 'Class']].replace(0,np.NaN) 

print(df_copy.isnull().sum())

In [None]:
#Using standard sclae helps us to get all values centered around the mean with a unit standard deviation
from sklearn import preprocessing
sc=preprocessing.StandardScaler()

### Checking if the variables are all numeric

In [None]:
df.apply(lambda s: pd.to_numeric(s, errors='coerce').notnull().all())

### Checking if the variables use a consistent scale.

In [None]:
(df.info())

# The overall statistics of the dataset

In [None]:
df_stat = df.describe()
print ("Overall Statistics", "\n", df_stat, "\n")

# A visualization of one or more of features using bar chart

In [None]:
# Bar chart
df[['Refractive Index', 'Sodium', 'Magnesium', 'Aluminium',
                                    'Silicon', 'Potassium', 'Calcium', 'Barium',
                                    'Iron']].hist(figsize = (10,10))

### Drop all the rows with missing values

In [None]:
df = df.replace(r'^\s*$', np.nan, regex=True)
df.dropna()
df

# Checking for outliers

In [None]:
df1 = pd.DataFrame(data = df, columns = ['Refractive Index', 'Sodium', 'Magnesium', 'Aluminium',
                                    'Silicon'])

sns.boxplot(x="variable", y="value", data=pd.melt(df1))

plt.show()

In [None]:
# Detecting Outliers - Part 2
import numpy as np; np.random.seed(42)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df2 = pd.DataFrame(data = df, columns = [ 'Potassium', 'Calcium', 'Barium',
                                    'Iron'])

sns.boxplot(x="variable", y="value", data=pd.melt(df2))

plt.show()

In [None]:
# Detecting Outliers
z=np.abs(stats.zscore(df))
print(z)
print(df.shape)
print("\n")
# Where the outliers are:
print(np.where(z > 3))
print("\n")
Q1=df.quantile(0.25)
Q3=df.quantile(0.75)
IQR=Q3-Q1
lowqe_bound=Q1 - 1.5 * IQR
upper_bound=Q3 + 1.5 * IQR
print("IQR range: ", "\n", lowqe_bound,"\n", upper_bound)
# Transforming Outliers
Otl_df = df[(np.abs(stats.zscore(df)) < 3).all(axis=1)]
print(Otl_df.shape)

# Choosing a target variable

In [None]:
x= Otl_df.iloc[:,0:9]
y= Otl_df.iloc[:,9]

In [None]:
x.head()

In [None]:
y.head()

In [None]:
(Otl_df.info())

# The accuracy of your initial DL model.

In [None]:
#Initialize the network
scalar = MinMaxScaler()
scalar.fit(x)
x = scalar.transform(x)
model = Sequential()
model.add(Dense(12, input_dim=9, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
#Build it
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#Train it
model.fit(x, y, epochs=25, batch_size=10)
# Evaluate it
_, accuracy = model.evaluate(x, y)
print('Accuracy: %.2f' % (accuracy*100)) # We only show the accuracy
# make a prediction
Xnew = scalar.transform(x)
predictions = (model.predict(x) > 0.5).astype(int)
# show the inputs and predicted outputs

for i in range(len(x)):
	print("X=%s, Predicted=%s" % (Xnew[i].tolist(), predictions[i]))

# Tuned the following hyperparameters of the DL model

In [None]:
# Load the model
def create_model(activation='sigmoid', learn_rate=0.2, momentum=0, neurons=1, optimizer_list='adam', init_mode='uniform'):
	model = Sequential()
	model.add(Dense(neurons, input_dim=9, kernel_initializer=init_mode, activation=activation, kernel_constraint=maxnorm(4)))
	model.add(Dropout(0.5))
	model.add(Dense(12, kernel_initializer=init_mode, activation='relu'))
	model.add(Dense(8, kernel_initializer=init_mode, activation='relu'))
	model.add(Dense(1, kernel_initializer=init_mode, activation=activation))
	# Compile model
	try:
		optimizer = optimizer_list(learning_rate = learn_rate, momentum = momentum)
	except TypeError:
		optimizer = optimizer_list(learning_rate = learn_rate)
	model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
	return model
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

In [None]:
# Creating the model
model = KerasClassifier(build_fn=create_model, epochs=50, verbose=0)
param_grid = {
    'activation': ['linear', 'sigmoid'],
    'learn_rate': [0.1, 0.2],
    'momentum': [0.1, 0.2],
    'neurons': [1, 5],
    'optimizer_list': [SGD, Adagrad],
    'init_mode': ['uniform', 'zero'],
    'epochs': [25, 35],
    'batch_size': [5, 2]
    }

# create model
model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)
# define the grid search parameters
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(x, y)
grid_result.best_params_
grid_result.best_score_
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))