In [None]:
import numpy as np
import matplotlib.pyplot as plt
import keras
from sklearn import datasets
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

In [None]:
np.random.seed(0)

In [None]:
n_pts = 500
# X is data points, y is the label related to each points
# a high noise (i.e 0.8) will cause the data points to become overly convoluted
# making it a lot harder to classify the data, you want to ensure that the noise is
# kept relatively low (i.e 0.1)
X, y = datasets.make_circles(n_samples = n_pts, random_state = 123, noise = 0.1, factor = 0.2)
# print(X)
# print(y)

In [None]:
plt.scatter(X[y==0, 0], X[y==0, 1])
plt.scatter(X[y==1, 0], X[y==1, 1])

In [None]:
model = Sequential()
model.add(Dense(4, input_shape = (2,), activation = 'sigmoid'))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(Adam(lr = 0.01), 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
h = model.fit(x = X, y = y, verbose = 1, batch_size = 20, epochs = 100, shuffle = 'true')

In [None]:
plt.plot(h.history['acc'])
plt.xlabel('epoch')
plt.legend(['accuracy'])
plt.title('accuracy')
# notice that this time at about 50 something epochs our model pretty much reached the max accuracy.
# also training our network to fit our circular data took much longer to reachmax accuracy when compared to our 
# previous perceptron model where it only took 10 epochs, this can be attributed to the fact that our dataset
# was more complex in this specific case requiring the network to run an increased number of epochs.

In [None]:
plt.plot(h.history['loss'])
plt.xlabel('epoch')
plt.legend(['loss'])
plt.title('loss')
# it seems evident that the full 100 epochs run through was required for the lost function to fully minimize.
# this is actually indicative of the fact that our dataset was more difficult to classify as opposed to the
# linear separable data that we dealt with in the last section

In [None]:
def plot_decision_boundary(X, y, model):
    X_span = np.linspace(min(X[:, 0]) - 0.25, max(X[:, 0]) + 0.25, 50)
    y_span = np.linspace(min(X[:, 1]) - 0.25, max(X[:, 1]) + 0.25, 50)
    xx, yy = np.meshgrid(X_span, y_span)
    xx_, yy_ = xx.ravel(), yy.ravel()
    grid = np.c_[xx_, yy_]
    pred_func = model.predict(grid)
    z = pred_func.reshape(xx.shape)
    plt.contourf(xx, yy, z)

In [None]:
plot_decision_boundary(X, y, model)
plt.scatter(X[:n_pts, 0], X[:n_pts, 1])
plt.scatter(X[n_pts:, 0], X[n_pts:, 1])

# clearly we can see that our decision boundary plot effectively separates our data points into the  appropriate labels
# It classifies it in the sense that everything inside has a label of one, and everything outside is labeled zero.
# plot contourf function, plots distinct contours zones, each of these contour zones represent an increased threshold
# of probablility the dark blue zone on the very outer edge represents the lowet probablity of a point being labeled as one
# this means that any point inside of this blue zone most likely has a label of zero, but now our model was trained so that
# the probablity that a point is labeled as one increases as you go from the absolute blue contour zone to the absolute 
# yellow contour zone, any point inside of this yellow contour zoneis most likely going to have  label of 1.

In [None]:
plot_decision_boundary(X, y, model)
plt.scatter(X[:n_pts, 0], X[:n_pts, 1])
plt.scatter(X[n_pts:, 0], X[n_pts:, 1])
x1 = 0.1
y1 = 0.75
point = np.array([[x1, y1]])
prediction = model.predict(point)
plt.plot([x1], [y1], marker = 'o', markersize = 10, color ='red')
print('prediction is :', prediction)

In [None]:
plot_decision_boundary(X, y, model)
plt.scatter(X[:n_pts, 0], X[:n_pts, 1])
plt.scatter(X[n_pts:, 0], X[n_pts:, 1])
x1 = 0.1
y1 = 0
point = np.array([[x1, y1]])
prediction = model.predict(point)
plt.plot([x1], [y1], marker = 'o', markersize = 10, color ='red')
print('prediction is :', prediction)