In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
from sklearn.datasets import make_circles

In [None]:
samples = 1000
X,y=make_circles(
    samples,
    noise=0.03,
    random_state=42
)
X.shape, y.shape

In [None]:
circles = pd.DataFrame({
    "X0":X[:,0],
    "X1":X[:,1],
    "label":y
})
circles.head()

In [None]:
# Visualize this with a plot
plt.scatter(X[:,0],X[:,1],c=y, cmap=plt.cm.RdYlBu)

In [None]:
# Create a single layer and single neuron neural network

tf.random.set_seed(42)
model_1 = tf.keras.Sequential([
    tf.keras.layers.Dense(1)
])

model_1.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.SGD(),
    metrics=['accuracy']
)

model_1.fit(X,y,epochs=5)

In [None]:
model_1.fit(X,y,epochs=200,verbose=0)
res = model_1.evaluate(X,y)

In [None]:
X[0].shape

In [None]:
model_results = {}
model_results["model_1"] = {
    "loss":f"{res[0]}",
    "accuracy":f"{res[1]}"
}
model_results

### Even training for longer, we can see that the model is not improving.  Hence we can make some changes to it. Including :
+ Add more layers or increase number of neurons
+ Choose other optimization function.
+ Fit/train for linger.

In [None]:
tf.random.set_seed(42)

model_2 = tf.keras.Sequential([
    tf.keras.layers.Dense(100),
    tf.keras.layers.Dense(10),
    tf.keras.layers.Dense(1)
])

model_2.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

model_2.fit(X,y,epochs=100,verbose=0)

In [None]:
res= model_2.evaluate(X,y)
model_results["model_2"] = {
    "loss":f"{res[0]}",
    "accuracy":f"{res[1]}"
}
model_results

### For the better evaluation and visualization, we need to plot a decision boundary.

A decision boundary is the region of a problem space in which the output label of the classifier is ambigious.

In [None]:
def plot_decision_boundary(model, X, y):
    x_min, x_max = X[:, 0].min()-0.1, X[:, 0].max()+0.1
    y_min, y_max = X[:, 1].min()-0.1, X[:, 1].max()+0.1

    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                         np.linspace(y_min, y_max, 100))

    x_in = np.c_[(xx.ravel(), yy.ravel())]

    y_pred = model.predict(x_in)

    if len(y_pred[0]) > 1:
        print('Doing multi-class')
        y_pred = np.argmax(y_pred, axis=1).reshape(xx.shape)
    else:
        print('Doing binary classification')
        y_pred = np.round(y_pred).reshape(xx.shape)


    plt.contourf(xx, yy, y_pred, cmap=plt.cm.RdYlBu,alpha=0.7)
    plt.scatter(X[:,0],X[:,1],c=y,s=40,cmap=plt.cm.RdYlBu)
    plt.xlim(xx.min(),xx.max())
    plt.ylim(yy.min(),yy.max())
plot_decision_boundary(model=model_2,X=X,y=y)
    

In [None]:
# OR we can use a built in function
from mlxtend.plotting import plot_decision_regions

plot_decision_regions(X=X,y=y,clf=model_2,legend=1)
plt.xlabel("x", size=5)
plt.ylabel("y", size=5)
plt.title('Plot Decision Region Boundary', size=10)
plt.show()

### Classification model fails because of the non-linearity

In [None]:
tf.random.set_seed(42)

model_3 = tf.keras.Sequential([
    tf.keras.layers.Dense(1, activation="relu"),
])

model_3.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"]
)

history = model_3.fit(X,y,epochs=100,verbose=0)

res=model_3.evaluate(X,y)
model_results["model_3"] = {
    "loss":f"{res[0]}",
    "accuracy":f"{res[1]}"
}
model_results

In [None]:
plot_decision_regions(X=X,y=y,clf=model_3,legend=1)
plt.xlabel("x", size=5)
plt.ylabel("y", size=5)
plt.title('Plot Decision Region Boundary', size=10)
plt.show()

## Add more layers and neurons

In [None]:
tf.random.set_seed(42)

model_4 =tf.keras.Sequential([
    tf.keras.layers.Dense(4, activation="relu"),
    tf.keras.layers.Dense(4, activation="relu"),
    tf.keras.layers.Dense(4),
])

model_4.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"]
)

history = model_4.fit(X,y,epochs=250, verbose=0)
res = model_4.evaluate(X,y)
model_results["model_4"] = {
    "loss":f"{res[0]}",
    "accuracy":f"{res[1]}"
}
model_results

In [None]:
plot_decision_boundary(model=model_4,X=X,y=y)

### As observed :
+ Adding more layers and activation function improves the model performance.
+ Now let's create a model  with acivation function in the output layer

In [None]:
tf.random.set_seed(42)

model_5 = tf.keras.Sequential([
    tf.keras.layers.Dense(4, activation="relu"),
    tf.keras.layers.Dense(4, activation="relu"),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_5.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=['accuracy']
)

history = model_5.fit(X,y,epochs=100, verbose=0)

res = model_5.evaluate(X,y)
model_results["model_5"] = {
    "loss":f"{res[0]}",
    "accuracy":f"{res[1]}"
}
model_results

In [None]:
plot_decision_boundary(model_5,X,y)

This model (model 5) seems promising, let's train and evaluate it on train/test data splits.

In [None]:
x_train, y_train = X[:800],y[:800]
x_test,y_test = X[800:],y[800:]
x_train.shape,y_train.shape,x_test.shape,y_test.shape

In [None]:
tf.random.set_seed(42)

model_6 = tf.keras.Sequential([
    tf.keras.layers.Dense(4,activation='relu'),
    tf.keras.layers.Dense(4,activation="relu"),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_6.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

history = model_6.fit(x_train,y_train,epochs=25, verbose=0)
res = model_6.evaluate(x_train,y_train)
model_results['model_6'] = {
    "loss":f"{res[0]}",
    "accuracy":f"{res[1]}"
}
model_results

In [None]:
# Plot decision boundary on both Train and Test Data
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.title("Decision Boundary of Train Data")
plot_decision_boundary(model_6,x_train,y_train)
plt.subplot(1,2,2)
plt.title("Decision Boundary of Test Data")
plot_decision_boundary(model_6,x_test,y_test)

In [None]:
# Visualise History
pd.DataFrame(history.history).plot()

### Find the ideal learning rate

The learning rate where the loss decreasr the most duringt thr training.

In [None]:
tf.random.set_seed(42)

model_7 = tf.keras.Sequential([
    tf.keras.layers.Dense(4, activation="relu"),
    tf.keras.layers.Dense(4,activation="relu"),
    tf.keras.layers.Dense(1,activation="sigmoid")
])

model_7.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    metrics=["accuracy"]
)

# Add a learning rate sheduler callback
lr_sheduler = tf.keras.callbacks.LearningRateScheduler(lambda epochs:1e-4*10**(epochs/20))

history = model_7.fit(x_train, y_train, epochs=100, verbose=0, callbacks=[lr_sheduler])
res = model_7.evaluate(x_train, y_train)
model_results["model_7"] = {
    "loss":f"{res[0]}",
    "accuracy":f"{res[1]}"
}

model_results

In [None]:
pd.DataFrame(history.history).plot(figsize=(10,7),xlabel="epochs",ylabel="learning_rate")

In [None]:
lrs = 1e-4*10**(tf.range(100)/20)
len(lrs)

In [None]:
plt.figure(figsize=(10,7))
plt.semilogx(lrs, history.history["loss"])
plt.xlabel("Learning Rate")
plt.ylabel("Loss")
plt.title("Learning rate v/s Loss")

We need to select a point on the graph with the fastest decrease in the loss.
Here it is 

### Training a model with the ideal learning rate.

In [None]:
tf.random.set_seed(42)


model_8 = tf.keras.Sequential([
    tf.keras.layers.Dense(4, activation="relu"),
    tf.keras.layers.Dense(4, activation="relu"),
    tf.keras.layers.Dense(1,activation="sigmoid")
])

model_8.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.15),
    metrics=['accuracy']
)

history = model_8.fit(x_train, y_train, epochs=25, verbose=0)
res = model_8.evaluate(x_test, y_test)
model_results["model_8"] = {
    "loss":f"{res[0]}",
    "accuracy":f"{res[1]}"
}
model_results

In [None]:
# Plotting Decisoin Boundary of train and test data
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.title("Decision Boundary of Train Data")
plot_decision_boundary(model_8,x_train,y_train)
plt.subplot(1,2,2)
plt.title("Decision Boundary of Test Data")
plot_decision_boundary(model_8,x_test,y_test)
