###Import Statements

In [0]:
%pip install tensorflow -qq

In [2]:
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

In [0]:
from google.colab import files

In [0]:
import pandas_profiling as pp
import pandas as pd

In [0]:
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [7]:
from __future__ import absolute_import, division, print_function, unicode_literals
from pathlib import Path
import os

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%pylab inline

#print(tf.__version__)

Populating the interactive namespace from numpy and matplotlib


##Basic Neural Networks

###Data

The data for this excerise we will use the Fashion MNIST database from the following repository. This data is already included in  the tensorflow library.

https://github.com/zalandoresearch/fashion-mnist/tree/master/data/fashion

In [0]:
# fashion_mnist = keras.datasets.fashion_mnist

In [0]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [0]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

Let us have a quick look at the basis of this dataset

Keep some backup images to test

In [0]:
test_x = x_test
test_y = y_test

In [0]:
x_train.shape

In [0]:
x_test.shape

In [0]:
plt.figure()
plt.imshow(x_test[10])
plt.colorbar()
plt.grid(False)
plt.show()

###Modelling

We find that the image scales between 0 & 255, however we are looking at probabilities and need to scale down to 0 & 1. We also set the labels to be categorical rather than numerical

In [0]:
y_train[0:10]

In [0]:
type(y_train)

In [0]:
x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
x_test = x_test.reshape(x_test.shape[0], -1) / 255.0
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [0]:
#Use with CNN
#x_train = x_train[:,:,:,np.newaxis] / 255.0
#x_test = x_test[:,:,:,np.newaxis] / 255.0
# y_train = to_categorical(y_train)
# y_test = to_categorical(y_test)

In [0]:
y_train.shape

###Single Layer Perceptron

In [0]:
model = Sequential()
model.add(Dense(10, input_dim=784, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Activation Layers:

ReLU - use in every layer

Softmax - for multiclass problems

Sigmoid/tanh - for binary classification

In [0]:
history = model.fit(x_train, y_train, epochs=10, validation_split=0.1)

In [0]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(22, 10))
ax1.plot(history.history['val_loss'])
ax1.plot(history.history['loss'])
ax1.set_ylabel("Loss")
ax1.set_xlabel("Epochs")
ax1.legend(labels=["Val Loss", "Training Loss"])
ax2.plot(history.history['val_acc'])
ax2.plot(history.history['acc'])
ax2.set_ylabel("Accuracy")
ax2.set_xlabel("Epochs")
ax2.legend(labels=["Val Accuracy", "Training Accuracy"])
plt.show()

In [0]:
_, test_acc = model.evaluate(x_test, y_test)
print(test_acc)

The accuracy has not changed by much. It remains the same. So lets try to change to a deeper NN

###Multilayer Perceptron

In [0]:
model2 = Sequential()
model2.add(Dense(50, input_dim=784, activation='relu'))
model2.add(Dense(10, activation='softmax'))
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model2.summary()
history = model2.fit(x_train, y_train, epochs=10, validation_split=0.2)

In [0]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(22, 10))
ax1.plot(history.history['val_loss'])
ax1.plot(history.history['loss'])
ax1.set_ylabel("Loss")
ax1.set_xlabel("Epochs")
ax1.legend(labels=["Val Loss", "Training Loss"])
ax2.plot(history.history['val_acc'])
ax2.plot(history.history['acc'])
ax2.set_ylabel("Accuracy")
ax2.set_xlabel("Epochs")
ax2.legend(labels=["Val Accuracy", "Training Accuracy"])
plt.show()

In [0]:
break

###3 layer perceptron

In [0]:
model3 = Sequential()
model3.add(Dense(50, input_dim=784, activation='relu'))
model3.add(Dense(50, activation='relu'))
model3.add(Dense(10, activation='softmax'))
model3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model3.summary()


In [0]:
model3.fit(x_train, y_train, epochs=10, validation_split=0.1)

###CNN

In [0]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten

The data for a CNN has to be :total_data x width x height x channels (eg. RGB = 3 or Greyscale = 1).

In [0]:
model4 = Sequential()
model4.add(Conv2D(filters=64, kernel_size=2, stride=1, padding='same', activation='relu', input_shape=(28,28,1)))
model4.add(MaxPooling2D(pool_size=2))
model4.add(Flatten())
model4.add(Dense(10, activation='softmax'))
model4.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

conv2d changes your 28x28x1 image to 28x28x64. Just imagine this as 64 hidden layer cells.

MaxPooling2D reduces the width and height so that you will not need to compute all the cells. It reduces the size to 14x14x64.

Finally, flatten just flattens out the output of MaxPooling into a hidden layer of 12544 cells.

In [0]:
model4.summary()

In [0]:
model4.fit(x_train, y_train, epochs=10, validation_split=0.1)

In [0]:
_, test_acc = model4.evaluate(x_test, y_test)
print(test_acc)4

###Predictions

Now that we have the model, lets have a look at the predictions we are making with it.

In [0]:
# test_images = files.upload()

In [0]:
test_images = x_test[245]

In [0]:
test_images = test_images[0:10] / 255.0

In [0]:
test_images.shape

In [0]:
plt.figure()
plt.imshow(x_test[245])
plt.colorbar()
plt.grid(False)
plt.show()

In [0]:
predictions = model.predict(x_test[245:246])

In [0]:
predictions

The prediction is a list of ten probabilities for each class. So lets see if the test label.

In [0]:
class_names[8]

We can graph this to look at the full set of 10 class predictions

In [0]:
def plot_image(i, predictions_array, true_label, img):
  predictions_array, true_label, img = predictions_array[i], true_label[i], img[i]
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])
  
  plt.imshow(img, cmap=plt.cm.binary)
  
  predicted_label = np.argmax(predictions_array)
  if predicted_label == true_label:
    color = 'blue'
  else:
    color = 'red'
  
  plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                100*np.max(predictions_array),
                                class_names[true_label]),
                                color=color)

def plot_value_array(i, predictions_array, true_label):
  predictions_array, true_label = predictions_array[i], true_label[i]
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])
  thisplot = plt.bar(range(10), predictions_array, color="#777777")
  plt.ylim([0, 1])
  predicted_label = np.argmax(predictions_array)
  
  thisplot[predicted_label].set_color('red')
  thisplot[true_label].set_color('blue')

Let us plot the comparision

In [0]:
i = 0
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(i, predictions, y_test, x_test)
plt.subplot(1,2,2)
plot_value_array(i, predictions,  y_test)
plt.show()

Let us the plot a whole bunch of test values

In [0]:
# Plot the first X test images, their predicted label, and the true label
# Color correct predictions in blue, incorrect predictions in red
num_rows = 5
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
  plt.subplot(num_rows, 2*num_cols, 2*i+1)
  plot_image(i, predictions, y_test, x_test)
  plt.subplot(num_rows, 2*num_cols, 2*i+2)
  plot_value_array(i, predictions, y_test)
plt.show()

In [0]:
# Grab an image from the test dataset
img = x_test[0]

print(img.shape)

###Churn Data Set

In [0]:
import pandas as pd

In [0]:
!wget -c "https://bml-data.s3.amazonaws.com/churn-bigml-80.csv"


In [0]:
# churn_data = files.upload()
# churn_data = 'churn-bigml-80.csv'
churn_data = "https://bml-data.s3.amazonaws.com/churn-bigml-80.csv"
# churn_data = "/content/churn-bigml-80.csv"
df = pd.read_csv(churn_data)

In [10]:
#Do some profiling EDA
import pandas_profiling as pfr
pfr = pp.ProfileReport(df)

  variable_stats = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1)


In [0]:
pfr.to_file("churn_data.html")

##Hyper Tuning

Hypertuning: Pima Indians onset of diabetes classification dataset.

In [12]:
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier

Using TensorFlow backend.


 Tuning the batch size and number of epochs

In [0]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

In [0]:
# load dataset
# files.upload()
!wget -c https://archive.ics.uci.edu/ml/machine-learning-databases/diabetes/diabetes-data.tar.Z -O - | tar -xz

In [0]:
# diabetes_file = "pima-indians-diabetes.data.txt"
diabetes_file = "/content/Diabetes-Data/data-01"
df = pd.read_csv(diabetes_file, delimiter="\t", header = None)
df.columns = ["Date", "Time", "Code", "Value"]
df.head()

In [15]:
pp.ProfileReport(df)

  variable_stats = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1)


0,1
Number of variables,20
Number of observations,2666
Total Missing (%),0.0%
Total size in memory,398.5 KiB
Average record size in memory,153.0 B

0,1
Numeric,12
Categorical,3
Boolean,1
Date,0
Text (Unique),0
Rejected,4
Unsupported,0

0,1
Distinct count,51
Unique (%),1.9%
Missing (%),0.0%
Missing (n),0

0,1
WV,88
MN,70
NY,68
Other values (48),2440

Value,Count,Frequency (%),Unnamed: 3
WV,88,3.3%,
MN,70,2.6%,
NY,68,2.6%,
VA,67,2.5%,
AL,66,2.5%,
WY,66,2.5%,
OH,66,2.5%,
OR,62,2.3%,
NV,61,2.3%,
WI,61,2.3%,

0,1
Distinct count,205
Unique (%),7.7%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,100.62
Minimum,1
Maximum,243
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,36
Q1,73
Median,100
Q3,127
95-th percentile,166
Maximum,243
Range,242
Interquartile range,54

0,1
Standard deviation,39.564
Coef of variation,0.3932
Kurtosis,-0.13831
Mean,100.62
MAD,31.663
Skewness,0.079023
Sum,268254
Variance,1565.3
Memory size,21.0 KiB

Value,Count,Frequency (%),Unnamed: 3
93,35,1.3%,
87,33,1.2%,
105,33,1.2%,
101,32,1.2%,
99,32,1.2%,
100,31,1.2%,
116,29,1.1%,
106,29,1.1%,
98,29,1.1%,
90,29,1.1%,

Value,Count,Frequency (%),Unnamed: 3
1,6,0.2%,
2,1,0.0%,
3,4,0.2%,
4,1,0.0%,
5,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
217,1,0.0%,
221,1,0.0%,
224,2,0.1%,
225,2,0.1%,
243,1,0.0%,

0,1
Distinct count,3
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,437.44
Minimum,408
Maximum,510
Zeros (%),0.0%

0,1
Minimum,408
5-th percentile,408
Q1,408
Median,415
Q3,510
95-th percentile,510
Maximum,510
Range,102
Interquartile range,102

0,1
Standard deviation,42.521
Coef of variation,0.097204
Kurtosis,-0.74166
Mean,437.44
MAD,36.961
Skewness,1.111
Sum,1166212
Variance,1808
Memory size,21.0 KiB

Value,Count,Frequency (%),Unnamed: 3
415,1318,49.4%,
510,679,25.5%,
408,669,25.1%,

Value,Count,Frequency (%),Unnamed: 3
408,669,25.1%,
415,1318,49.4%,
510,679,25.5%,

Value,Count,Frequency (%),Unnamed: 3
408,669,25.1%,
415,1318,49.4%,
510,679,25.5%,

0,1
Distinct count,2
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0

0,1
No,2396
Yes,270

Value,Count,Frequency (%),Unnamed: 3
No,2396,89.9%,
Yes,270,10.1%,

0,1
Distinct count,2
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0

0,1
No,1933
Yes,733

Value,Count,Frequency (%),Unnamed: 3
No,1933,72.5%,
Yes,733,27.5%,

0,1
Distinct count,42
Unique (%),1.6%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,8.0218
Minimum,0
Maximum,50
Zeros (%),72.5%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,19
95-th percentile,36
Maximum,50
Range,50
Interquartile range,19

0,1
Standard deviation,13.612
Coef of variation,1.6969
Kurtosis,-0.040158
Mean,8.0218
MAD,11.635
Skewness,1.2718
Sum,21386
Variance,185.29
Memory size,21.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0,1933,72.5%,
31,50,1.9%,
28,42,1.6%,
29,39,1.5%,
24,37,1.4%,
33,37,1.4%,
30,35,1.3%,
27,34,1.3%,
25,33,1.2%,
32,33,1.2%,

Value,Count,Frequency (%),Unnamed: 3
0,1933,72.5%,
4,1,0.0%,
8,2,0.1%,
9,2,0.1%,
10,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
44,7,0.3%,
45,4,0.2%,
46,3,0.1%,
47,3,0.1%,
50,2,0.1%,

0,1
Distinct count,1489
Unique (%),55.9%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,179.48
Minimum,0
Maximum,350.8
Zeros (%),0.1%

0,1
Minimum,0.0
5-th percentile,90.425
Q1,143.4
Median,179.95
Q3,215.9
95-th percentile,269.78
Maximum,350.8
Range,350.8
Interquartile range,72.5

0,1
Standard deviation,54.21
Coef of variation,0.30204
Kurtosis,0.019364
Mean,179.48
MAD,43.29
Skewness,-0.053106
Sum,478500
Variance,2938.8
Memory size,21.0 KiB

Value,Count,Frequency (%),Unnamed: 3
162.3,7,0.3%,
183.4,7,0.3%,
194.8,6,0.2%,
175.4,6,0.2%,
159.5,6,0.2%,
185.0,6,0.2%,
216.0,6,0.2%,
145.0,5,0.2%,
124.3,5,0.2%,
141.3,5,0.2%,

Value,Count,Frequency (%),Unnamed: 3
0.0,2,0.1%,
2.6,1,0.0%,
7.8,1,0.0%,
7.9,1,0.0%,
12.5,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
335.5,1,0.0%,
337.4,1,0.0%,
345.3,1,0.0%,
346.8,1,0.0%,
350.8,1,0.0%,

0,1
Distinct count,115
Unique (%),4.3%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,100.31
Minimum,0
Maximum,160
Zeros (%),0.1%

0,1
Minimum,0
5-th percentile,67
Q1,87
Median,101
Q3,114
95-th percentile,133
Maximum,160
Range,160
Interquartile range,27

0,1
Standard deviation,19.988
Coef of variation,0.19926
Kurtosis,0.28955
Mean,100.31
MAD,15.869
Skewness,-0.12827
Sum,267427
Variance,399.53
Memory size,21.0 KiB

Value,Count,Frequency (%),Unnamed: 3
105,62,2.3%,
106,59,2.2%,
108,59,2.2%,
112,58,2.2%,
107,57,2.1%,
102,57,2.1%,
100,56,2.1%,
104,55,2.1%,
95,55,2.1%,
88,54,2.0%,

Value,Count,Frequency (%),Unnamed: 3
0,2,0.1%,
36,1,0.0%,
40,1,0.0%,
42,2,0.1%,
44,3,0.1%,

Value,Count,Frequency (%),Unnamed: 3
152,1,0.0%,
156,1,0.0%,
157,1,0.0%,
158,3,0.1%,
160,1,0.0%,

0,1
Correlation,1

0,1
Distinct count,1442
Unique (%),54.1%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,200.39
Minimum,0
Maximum,363.7
Zeros (%),0.0%

0,1
Minimum,0.0
5-th percentile,118.73
Q1,165.3
Median,200.9
Q3,235.1
95-th percentile,285.03
Maximum,363.7
Range,363.7
Interquartile range,69.8

0,1
Standard deviation,50.952
Coef of variation,0.25427
Kurtosis,-0.025493
Mean,200.39
MAD,40.845
Skewness,-0.012665
Sum,534230
Variance,2596.1
Memory size,21.0 KiB

Value,Count,Frequency (%),Unnamed: 3
169.9,8,0.3%,
220.6,7,0.3%,
167.2,7,0.3%,
161.7,7,0.3%,
181.6,6,0.2%,
195.5,6,0.2%,
194.0,6,0.2%,
224.9,6,0.2%,
205.1,6,0.2%,
209.4,6,0.2%,

Value,Count,Frequency (%),Unnamed: 3
0.0,1,0.0%,
31.2,1,0.0%,
42.2,1,0.0%,
42.5,1,0.0%,
43.9,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
347.3,1,0.0%,
348.5,1,0.0%,
350.9,1,0.0%,
354.2,1,0.0%,
363.7,1,0.0%,

0,1
Distinct count,120
Unique (%),4.5%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,100.02
Minimum,0
Maximum,170
Zeros (%),0.0%

0,1
Minimum,0
5-th percentile,67
Q1,87
Median,100
Q3,114
95-th percentile,133
Maximum,170
Range,170
Interquartile range,27

0,1
Standard deviation,20.161
Coef of variation,0.20157
Kurtosis,0.1894
Mean,100.02
MAD,16.067
Skewness,-0.065209
Sum,266663
Variance,406.48
Memory size,21.0 KiB

Value,Count,Frequency (%),Unnamed: 3
105,64,2.4%,
94,62,2.3%,
109,58,2.2%,
102,56,2.1%,
108,55,2.1%,
87,54,2.0%,
97,54,2.0%,
115,53,2.0%,
111,52,2.0%,
98,52,2.0%,

Value,Count,Frequency (%),Unnamed: 3
0,1,0.0%,
12,1,0.0%,
36,1,0.0%,
42,1,0.0%,
43,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
155,2,0.1%,
156,1,0.0%,
157,1,0.0%,
159,1,0.0%,
170,1,0.0%,

0,1
Correlation,1

0,1
Distinct count,1444
Unique (%),54.2%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,201.17
Minimum,43.7
Maximum,395
Zeros (%),0.0%

0,1
Minimum,43.7
5-th percentile,117.93
Q1,166.93
Median,201.15
Q3,236.47
95-th percentile,283.67
Maximum,395.0
Range,351.3
Interquartile range,69.55

0,1
Standard deviation,50.78
Coef of variation,0.25243
Kurtosis,0.050382
Mean,201.17
MAD,40.677
Skewness,0.023362
Sum,536320
Variance,2578.6
Memory size,21.0 KiB

Value,Count,Frequency (%),Unnamed: 3
214.7,7,0.3%,
172.7,6,0.2%,
181.2,6,0.2%,
214.6,6,0.2%,
193.6,6,0.2%,
182.1,6,0.2%,
210.0,6,0.2%,
214.0,6,0.2%,
197.4,6,0.2%,
194.3,6,0.2%,

Value,Count,Frequency (%),Unnamed: 3
43.7,1,0.0%,
45.0,1,0.0%,
47.4,1,0.0%,
50.1,2,0.1%,
53.3,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
364.3,1,0.0%,
364.9,1,0.0%,
377.5,1,0.0%,
381.9,1,0.0%,
395.0,1,0.0%,

0,1
Distinct count,118
Unique (%),4.4%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,100.11
Minimum,33
Maximum,166
Zeros (%),0.0%

0,1
Minimum,33
5-th percentile,68
Q1,87
Median,100
Q3,113
95-th percentile,131
Maximum,166
Range,133
Interquartile range,26

0,1
Standard deviation,19.418
Coef of variation,0.19398
Kurtosis,-0.048009
Mean,100.11
MAD,15.552
Skewness,0.01041
Sum,266883
Variance,377.08
Memory size,21.0 KiB

Value,Count,Frequency (%),Unnamed: 3
105,70,2.6%,
104,67,2.5%,
91,60,2.3%,
102,58,2.2%,
106,58,2.2%,
100,57,2.1%,
96,54,2.0%,
95,53,2.0%,
108,53,2.0%,
98,53,2.0%,

Value,Count,Frequency (%),Unnamed: 3
33,1,0.0%,
36,1,0.0%,
38,1,0.0%,
42,1,0.0%,
44,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
156,2,0.1%,
157,2,0.1%,
158,1,0.0%,
164,1,0.0%,
166,1,0.0%,

0,1
Correlation,1

0,1
Distinct count,158
Unique (%),5.9%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,10.237
Minimum,0
Maximum,20
Zeros (%),0.6%

0,1
Minimum,0.0
5-th percentile,5.8
Q1,8.5
Median,10.2
Q3,12.1
95-th percentile,14.7
Maximum,20.0
Range,20.0
Interquartile range,3.6

0,1
Standard deviation,2.7883
Coef of variation,0.27238
Kurtosis,0.61655
Mean,10.237
MAD,2.1874
Skewness,-0.22443
Sum,27292
Variance,7.7749
Memory size,21.0 KiB

Value,Count,Frequency (%),Unnamed: 3
10.0,54,2.0%,
10.2,47,1.8%,
9.8,45,1.7%,
11.5,43,1.6%,
9.1,42,1.6%,
11.3,42,1.6%,
10.6,42,1.6%,
9.7,41,1.5%,
9.5,41,1.5%,
10.9,41,1.5%,

Value,Count,Frequency (%),Unnamed: 3
0.0,15,0.6%,
1.1,1,0.0%,
1.3,1,0.0%,
2.1,1,0.0%,
2.2,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
18.0,2,0.1%,
18.2,2,0.1%,
18.4,1,0.0%,
18.9,1,0.0%,
20.0,1,0.0%,

0,1
Distinct count,21
Unique (%),0.8%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,4.4674
Minimum,0
Maximum,20
Zeros (%),0.6%

0,1
Minimum,0
5-th percentile,1
Q1,3
Median,4
Q3,6
95-th percentile,9
Maximum,20
Range,20
Interquartile range,3

0,1
Standard deviation,2.4562
Coef of variation,0.54981
Kurtosis,3.2666
Mean,4.4674
MAD,1.8688
Skewness,1.3588
Sum,11910
Variance,6.0329
Memory size,21.0 KiB

Value,Count,Frequency (%),Unnamed: 3
3,544,20.4%,
4,503,18.9%,
2,388,14.6%,
5,376,14.1%,
6,267,10.0%,
7,172,6.5%,
1,125,4.7%,
8,90,3.4%,
9,83,3.1%,
10,37,1.4%,

Value,Count,Frequency (%),Unnamed: 3
0,15,0.6%,
1,125,4.7%,
2,388,14.6%,
3,544,20.4%,
4,503,18.9%,

Value,Count,Frequency (%),Unnamed: 3
16,2,0.1%,
17,1,0.0%,
18,2,0.1%,
19,1,0.0%,
20,1,0.0%,

0,1
Correlation,0.99999

0,1
Distinct count,10
Unique (%),0.4%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,1.5626
Minimum,0
Maximum,9
Zeros (%),20.8%

0,1
Minimum,0
5-th percentile,0
Q1,1
Median,1
Q3,2
95-th percentile,4
Maximum,9
Range,9
Interquartile range,1

0,1
Standard deviation,1.3112
Coef of variation,0.83912
Kurtosis,1.814
Mean,1.5626
MAD,1.0495
Skewness,1.0952
Sum,4166
Variance,1.7193
Memory size,21.0 KiB

Value,Count,Frequency (%),Unnamed: 3
1,945,35.4%,
2,608,22.8%,
0,555,20.8%,
3,348,13.1%,
4,133,5.0%,
5,49,1.8%,
6,17,0.6%,
7,8,0.3%,
9,2,0.1%,
8,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0,555,20.8%,
1,945,35.4%,
2,608,22.8%,
3,348,13.1%,
4,133,5.0%,

Value,Count,Frequency (%),Unnamed: 3
5,49,1.8%,
6,17,0.6%,
7,8,0.3%,
8,1,0.0%,
9,2,0.1%,

0,1
Distinct count,2
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0

0,1
Mean,0.14554

0,1
True,388
(Missing),2278

Value,Count,Frequency (%),Unnamed: 3
True,388,14.6%,
(Missing),2278,85.4%,

Unnamed: 0,State,Account length,Area code,International plan,Voice mail plan,Number vmail messages,Total day minutes,Total day calls,Total day charge,Total eve minutes,Total eve calls,Total eve charge,Total night minutes,Total night calls,Total night charge,Total intl minutes,Total intl calls,Total intl charge,Customer service calls,Churn
0,KS,128,415,No,Yes,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.7,1,False
1,OH,107,415,No,Yes,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.7,1,False
2,NJ,137,415,No,No,0,243.4,114,41.38,121.2,110,10.3,162.6,104,7.32,12.2,5,3.29,0,False
3,OH,84,408,Yes,No,0,299.4,71,50.9,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78,2,False
4,OK,75,415,Yes,No,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73,3,False


In [0]:
# split into input (X) and output (Y) variables
X = df.iloc[:,:-1]
Y = df.iloc[:,-1]

In [35]:
Y = Y.map({True: 1, False: 0})

0    0
1    0
2    0
3    0
4    0
5    0
6    0
7    0
8    0
9    0
Name: Churn, dtype: int64

In [27]:
from sklearn.preprocessing import Binarizer, LabelEncoder
le = LabelEncoder()
label_encoded_state = le.fit_transform(X.loc[:, "State"])
X.loc[:, "State"] = label_encoded_state
X.head()

Unnamed: 0,State,Account length,Area code,International plan,Voice mail plan,Number vmail messages,Total day minutes,Total day calls,Total day charge,Total eve minutes,Total eve calls,Total eve charge,Total night minutes,Total night calls,Total night charge,Total intl minutes,Total intl calls,Total intl charge,Customer service calls
0,16,128,415,No,Yes,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.7,1
1,35,107,415,No,Yes,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.7,1
2,31,137,415,No,No,0,243.4,114,41.38,121.2,110,10.3,162.6,104,7.32,12.2,5,3.29,0
3,35,84,408,Yes,No,0,299.4,71,50.9,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78,2
4,36,75,415,Yes,No,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73,3


In [29]:
features = ["International plan", "Voice mail plan"]
for feat in features:
    X.loc[:, feat] = X.loc[:, feat].apply(map={"No": 0, "Yes": 1}, inplace=True, axis=1)

X.head()

[0 0 0 ... 0 0 0]
[1 1 0 ... 0 0 1]


In [36]:
X = X.iloc[:, 5:-1]
X.shape

(2666, 13)

###Finetune for batch and epoch

In [0]:
# Function to create model, required for KerasClassifier
def create_model():
  model = Sequential()
  model.add(Dense(12, input_dim=X.shape[1], activation='relu'))
  model.add(Dense(1, activation='sigmoid'))
  model.summary()
  # Compile model
  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

In [0]:
# create model
model = KerasClassifier(build_fn=create_model, verbose=0)

In [39]:
# define the grid search parameters
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)



Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 12)                168       
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 13        
Total params: 181
Trainable params: 181
Non-trainable params: 0
_________________________________________________________________










###Summarise Results

In [40]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.857083 using {'batch_size': 20, 'epochs': 50}
0.712620 (0.280783) with: {'batch_size': 10, 'epochs': 10}
0.715244 (0.282255) with: {'batch_size': 10, 'epochs': 50}
0.856707 (0.016939) with: {'batch_size': 10, 'epochs': 100}
0.854081 (0.018526) with: {'batch_size': 20, 'epochs': 10}
0.857083 (0.018096) with: {'batch_size': 20, 'epochs': 50}
0.717120 (0.282870) with: {'batch_size': 20, 'epochs': 100}
0.703893 (0.289724) with: {'batch_size': 40, 'epochs': 10}
0.851828 (0.022158) with: {'batch_size': 40, 'epochs': 50}
0.842447 (0.039432) with: {'batch_size': 40, 'epochs': 100}
0.424343 (0.344837) with: {'batch_size': 60, 'epochs': 10}
0.852955 (0.019390) with: {'batch_size': 60, 'epochs': 50}
0.566651 (0.350421) with: {'batch_size': 60, 'epochs': 100}
0.837580 (0.037060) with: {'batch_size': 80, 'epochs': 10}
0.853706 (0.018926) with: {'batch_size': 80, 'epochs': 50}
0.703893 (0.289627) with: {'batch_size': 80, 'epochs': 100}
0.851455 (0.019237) with: {'batch_size': 100, 'epochs': 

###Now that we have figured out the optimised batch and epoch values, lets fine tune optimiser

In [0]:
def create_model(optimizer='adam'):
  model = Sequential()
  model.add(Dense(12, input_dim=X.shape[1], activation='relu'))
  model.add(Dense(1, activation='sigmoid'))
  model.summary()
  # Compile model
  model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
  return model

In [0]:
# create model
model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=20, verbose=0)

In [49]:
# define the grid search parameters
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)



Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 12)                168       
_________________________________________________________________
dense_10 (Dense)             (None, 1)                 13        
Total params: 181
Trainable params: 181
Non-trainable params: 0
_________________________________________________________________


In [50]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.855582 using {'optimizer': 'Adam'}
0.854456 (0.018946) with: {'optimizer': 'SGD'}
0.710647 (0.292771) with: {'optimizer': 'RMSprop'}
0.701641 (0.288388) with: {'optimizer': 'Adagrad'}
0.836070 (0.030210) with: {'optimizer': 'Adadelta'}
0.855582 (0.018696) with: {'optimizer': 'Adam'}
0.699109 (0.276918) with: {'optimizer': 'Adamax'}
0.573781 (0.347210) with: {'optimizer': 'Nadam'}


###Now lets look at tuning the weights of the network. We can tune the weights on each layer based on the type of activation function used

In [0]:
# Function to create model, required for KerasClassifier
def create_model(init_mode='he_uniform'):
	# create model
	model = Sequential()
	model.add(Dense(12, input_dim=X.shape[1], kernel_initializer=init_mode, activation='relu'))
	model.add(Dense(1, kernel_initializer=init_mode, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [0]:
# create model
model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=20, verbose=0)
history = model.fit(X, Y)

In [54]:
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
param_grid = dict(init_mode=init_mode)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)

ValueError: ignored

###Lets tune for the various activation functions

In [0]:
def create_model(activation='relu'):
	# create model
	model = Sequential()
	model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation=activation))
	model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [0]:
# create model
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0)

In [0]:

# define the grid search parameters
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)

###Dropout layer tuning

In [0]:
from keras.layers import Dropout
from keras.constraints import MaxNorm

In [0]:
def create_model(dropout_rate=0.0, weight_constraint=0):
	# create model
	model = Sequential()
	model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='linear', kernel_constraint=MaxNorm(weight_constraint)))
	model.add(Dropout(dropout_rate))
	model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [0]:
# create model
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0)

In [0]:
# define the grid search parameters
weight_constraint = [1, 2, 3, 4, 5]
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
param_grid = dict(dropout_rate=dropout_rate, weight_constraint=weight_constraint)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)

###Tune for the number of neurons

In [0]:
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm

In [0]:

# Function to create model, required for KerasClassifier
def create_model(neurons=1):
	# create model
	model = Sequential()
	model.add(Dense(neurons, input_dim=8, kernel_initializer='uniform', activation='linear', kernel_constraint=maxnorm(4)))
	model.add(Dropout(0.2))
	model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [0]:
# create model
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0)

In [0]:

# define the grid search parameters
neurons = [1, 5, 10, 15, 20, 25, 30]
param_grid = dict(neurons=neurons)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)

##Image Classification

In [0]:
!pip install fastai -qq

In [0]:
# Import necessary libraries
from fastai.vision import * 
import matplotlib.pyplot as plt

I have downloaded set of images in my disk, for which i am creating individual directories and uploading those images to the correct directory settings. TO NOTE: This still is not in the directory structure that is needed by FastAI

In [0]:
class_names = ["classes/car","classes/flower","classes/fruit"]
for base_dir in class_names:
  os.mkdir(base_dir)


###Trials

In order to get the URLs, we will navigate to Google Images, search for our category of choice, scroll down until enough images are loaded and then execute the following javascript code from the developer console.

urls = Array.from(document.querySelectorAll('.rg_di .rg_meta')).map(el=>JSON.parse(el.textContent).ou);
window.open('data:text/csv;charset=utf-8,' + escape(urls.join('\n')));

Put the data in the correct folder structure for fastai to work with. Eg:
data = (ImageList.from_csv(planet, 'labels.csv', folder='train', suffix='.jpg')
        #Where to find the data? -> in planet 'train' folder
        .split_by_rand_pct()
        #How to split in train/valid? -> randomly with the default 20% in valid
        .label_from_df(label_delim=' ')
        #How to label? -> use the second column of the csv file and split the tags by ' '
        .transform(planet_tfms, size=128)
        #Data augmentation? -> use tfms with a size of 128
        .databunch())                          
        #Finally -> use the defaults for conversion to databunch


In [0]:
path = Path('/content')
for file, folder in [('download.csv', 'aircraft'), ]:
    dest = path/folder # path + '/' + folder
    dest.mkdir(parents=True, exist_ok=True)
    download_images(path/file, dest)

In [0]:
files.upload()

In [0]:
mnist = untar_data(URLs.MNIST_TINY)
tfms = get_transforms(do_flip=False)

In [0]:
data = (ImageList.from_folder(mnist)
        .split_by_folder()          
        .label_from_folder()
        .transform(tfms, size=32)
        .databunch()
        .normalize(imagenet_stats))

In [0]:
data.show_batch()

In [0]:
planet = untar_data(URLs.PLANET_TINY)
planet_tfms = get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.)

In [0]:
data = (ImageList.from_folder(path) #Where to find the data? -> in path and its subfolders
        .databunch())          #How to label? -> depending on the folder of the filenames
                      #Optionally add a test set (here default name is test)
              #Data augmentation? -> use tfms with a size of 64
        

In [0]:
data = (ImageList.from_folder(path) #Where to find the data? -> in path and its subfolders
        .random_split_by_pct()              #How to split in train/valid? -> use the folders
        .label_from_folder()            #How to label? -> depending on the folder of the filenames
        .transform(size=224)       #Data augmentation? -> use tfms with a size of 64
        .databunch())

####Organise into proper directories

In [0]:
import os
import shutil
os.mkdir('classes')
shutil.move('/content/aircraft','/content/classes')
shutil.move('/content/atvs','/content/classes')
shutil.move('/content/bicycle','/content/classes')

###Continue

In [0]:
path = Path('/content/classes')

In [0]:
for folder in (['aircraft','car','fruit','flower']):
  verify_images(path/folder, max_size=300, delete = True)

In [0]:
data = ImageDataBunch.from_folder(path, train=".", valid_pct=0.2,
         size=224, num_workers=4)

In [0]:
data

In [0]:
doc(ImageList)

In [0]:
data.classes # will give the different

In [0]:
data.show_batch(rows=2, figsize=(9,7))

Now lets create a CNN, using ResNet34, which is a pretrained model with 34 layers, on the imagenet database. So we are going to use the transfer learning method on our dataset

In [0]:
%matplotlib inline

In [0]:
from fastai.metrics import error_rate # 1 - accuracy
learn = create_cnn(data, models.resnet34, metrics=error_rate)

fit_one_cycle method actually changes the learning rate over time

In [0]:
defaults.device = torch.device('cuda') # makes sure the gpu is used
learn.fit_one_cycle(4)

In order to optimise the learning rate, we unfreeze all the layers, and plot the learning rate at different parts of the network

In [0]:
learn.unfreeze() # must be done before calling lr_find
learn.lr_find()
learn.recorder.plot()

Lets use the best learning rate (steep slope but with still a high loss value)

In [0]:
learn.fit_one_cycle(4, max_lr=slice(3e-5, 3e-4))

In [0]:
learn.save('object-detection-stage-1')

We can also clean our mis-identified images in FASTAI

In [0]:
from fastai.widgets import *

ds, idxs = DatasetFormatter().from_toplosses(learn)
ImageCleaner(ds, idxs, path)

In [0]:
doc(DatasetFormatter().from_toplosses)

Now lets retrain our model with the cleaned dataset

TO NOTE: Please use the correct version of the command to split your training and test data



In [0]:
df = pd.read_csv(path/'cleaned.csv', header='infer') ##hope this works in colab!!
print(df.head())

db = (ImageItemList.from_df(df, path)
                   .random_split_by_pct(0.2)
                   .label_from_df()
                   .transform(get_transforms(), size=224)
                   .databunch(bs=8)).normalize(imagenet_stats)

In [0]:
data.classes, data.c, len(data.train_ds), len(data.valid_ds)

In [0]:
db.classes, db.c, len(db.train_ds), len(db.valid_ds)

Lets repeat the process

In [0]:
learn.load('object-detection-stage-1') # loading the weights
learn.data = db # replacing the data

learn.freeze()
learn.fit_one_cycle(4)

learn.unfreeze()

learn.lr_find()
learn.recorder.plot()

learn.fit_one_cycle(4, max_lr=slice(3e-5, 3e-4))
learn.save('object-detection-stage-2')

Let us use the classificationinterpretation method to understand our results

In [0]:
interp = ClassificationInterpretation.from_learner(learn)

In [0]:
interp.plot_confusion_matrix()

In [0]:
interp.plot_top_losses(9, figsize=(15,15))