In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

<p style="color:blue;">
By Pramod Sharma : pramod.sharma@prasami.com
<p>

# Implementation of Perceptron including Softmax and Gradient Descend

In [None]:
# Lets import some libraries
import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.linear_model import Perceptron

%matplotlib inline

In [None]:
# Some basic parameters
inpDir = '../input'
outDir = '../output'

RANDOM_STATE = 24

EPOCHS = 100

# parameters for Matplotlib
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (10, 8),
          'axes.labelsize': 'x-large',
          'axes.titlesize':'x-large',
          'xtick.labelsize':'x-large',
          'ytick.labelsize':'x-large'
         }

plt.rcParams.update(params)

## Generate Data Set
<p style="font-family: Arial; font-size:1.2em;color:black;">
Sklearn's dataset generator is good source of data for learning. To keep the example simple, I'll suggest  <a href="http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_moons.html">make_moon</a> dataset generator.
</p>

In [None]:
X, y = datasets.make_moons(n_samples=1000, shuffle=True, noise=0.2, random_state=RANDOM_STATE)

<p style="font-family: Arial; font-size:1.2em;color:black;">
Dataframes are easier to visualize
</p>

In [None]:
data_df = pd.DataFrame(X, columns = ['A', 'B'])
data_df['target'] = y
data_df.head()

In [None]:
data_df.describe()

In [None]:
# Lets look at what unique values are there in the data
data_df.target.unique()

In [None]:
data_df.plot.scatter('A', 'B', s=40, c = 'target', cmap=plt.cm.Spectral);

Out of 5000 records, we will keep 20%, ie 1000 records for testing and remaining records will be used in training.

In [None]:
#X = data.drop('target', axis=1)
#Y = data['target']
data_train, data_test = data_df.values[:900], data_df.values[900:]

print (data_train.shape, data_test.shape, )

# Over to Perceptron

<p style="font-family: Arial; font-size:1.2em;color:black;">
Since our output is binary, it makes sense to use $\mathrm{softmax}$. In this particular case we will simply output 1 for positive values of activations and zero for negative values.</p>
$$
\begin{aligned}
z & = x_1 . w_1 + x_2 . w_2 + b_1 \\
a & = \hat{y} = \mathrm{sigmoid}(z)\\
dz & = (a - \hat{y})\\
db & = dz\\
b & = b - \alpha . db\\
dw_1 & = x_1. dz\\
dw_2 & = x_2.dz\\
w_1 & = w_1 - \alpha . dw_1\\
w_2 & = w_1 - \alpha . dw_2\\
\end{aligned}
$$

In [None]:
def sigmoid(z):
    
    return 1 / ( 1 + np.exp ( -z ) )

In [None]:
# We will make first a function to make predictions
def predict(row, weights):
    
    
    # building up z = X * W + b
    
    z = weights[0] # Initialize with b, bias
    
    for i in range(len(row)-1):
        
        z += weights[i + 1] * row[i] # and weights are added here to the values
    
    a = sigmoid(z)
    # print ("Z = {}, a = {}".format(z, a))
    #return 1 if a >= 0.5 else 0
    return a

In [None]:
#lets test predictions
weights = [0.1, 2.0, 1.0]
display(data_df.loc[0])
predict(data_df.loc[0].values, weights)

In [None]:
# Estimate Perceptron weights
def train_weights(train, alpha, n_epoch):
    
    errors = []
    
    #Initialize Weights. In theory b can be zero.            
    weights = np.random.rand(train.shape[1])           
    
    for epoch in range(n_epoch):
        
        sum_error = 0.0
        
        for row in train:
            
            a = predict(row, weights)

            
            error = -row[-1] * np.log(a)
                
            dz = a - row[-1]
            
            sum_error += error
            
            weights[0] = weights[0] - alpha * dz
            
            for i in range(len(row)-1):
                
                weights[i + 1] = weights[i + 1] - alpha * dz * row[i]
        
        errors.append(sum_error)
    
    return errors, weights

In [None]:
alpha = 0.05
n_epoch = 100
errors, weights = train_weights(data_train, alpha, n_epoch)
print(weights)

In [None]:
errors[0],errors[-1]

In [None]:
fig, ax = plt.subplots()
ax.plot(errors);

# little beautification
txtstr = "Errors: \n  Start : {:7.2f}\n   End : {:7.2f}".format(errors[0],errors[-1]) #text to plot
# properties  matplotlib.patch.Patch 
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

# place a text box in upper left in axes coords

ax.text(0.75, 0.95, txtstr, transform=ax.transAxes, fontsize=14,
        verticalalignment='top', bbox=props)

ax.set_xlabel("Epochs")
ax.set_ylabel("Error")
ax.grid();

In [None]:
predictions = []
for row in data_test:
    a = predict(row, weights)
    prediction = 0
    if a >=0.5: prediction = 1
    
    predictions.append(prediction)

In [None]:
accuracy_score(data_test[:,-1], predictions)

In [None]:
confusion_matrix(data_test[:,-1], predictions)

In [None]:
def fn_plot_decision_boundary(X, y):
    
    fig, ax = plt.subplots();
    
    dm = 0.05
    
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    
    xx, yy = np.meshgrid(np.arange(x_min, x_max, dm),
                         np.arange(y_min, y_max, dm))
    
    XX = np.c_[xx.ravel(), yy.ravel()]

    XX = np.hstack( ( XX, np.ones((XX.shape[0],1)) ) )
    
    
    y_pred = []
    
    for row in (XX):
        a = predict(row, weights)
        y_p = 0
        if a >=0.5: y_p = 1
        y_pred.append(y_p)
        
    Z = np.array(y_pred).reshape(xx.shape)

    ax.contourf(xx, yy, Z, alpha=0.4, cmap=plt.cm.Paired)
    ax.scatter(X[:, 0], X[:, 1], c=y,
                                  s=20, edgecolor='k', cmap=plt.cm.Paired)
    ax.set_title('Decision Boundary')

    ax.set_xlabel('A')
    ax.set_ylabel('B')
    plt.show()

In [None]:
fn_plot_decision_boundary(X, y)