In [1]:
# Author: Young Park
# Student ID: 301200413

# load the data: training.json, test.json

# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
%matplotlib inline

# load the data
with open('training.json') as f:
    training = json.load(f)
with open('test.json') as f:
    test = json.load(f)

# convert the data into a dataframe
training_df = pd.DataFrame(training)
test_df = pd.DataFrame(test)

In [2]:
print(training_df.shape, test_df.shape)

(147, 5) (3, 5)


In [3]:
# encoding categorical data to numerical data
training_df['species'] = training_df['species'].astype('category')
training_df['species'] = training_df['species'].cat.codes
test_df['species'] = test_df['species'].astype('category')
test_df['species'] = test_df['species'].cat.codes

In [4]:
# scale the data
import sklearn.preprocessing as preprocessing
scaler = preprocessing.StandardScaler()
training_df[['petal_length', 'petal_width', 'sepal_length', 'sepal_width']] = scaler.fit_transform(training_df[['petal_length', 'petal_width', 'sepal_length', 'sepal_width']])
test_df[['petal_length', 'petal_width', 'sepal_length', 'sepal_width']] = scaler.fit_transform(test_df[['petal_length', 'petal_width', 'sepal_length', 'sepal_width']])

In [5]:
# Use backpropagation to train a neural network to classify the different species of the Iris flower.

sigmoid = lambda x: 1 / (1 + np.exp(-x));

# simple 4-5-3 ANN
def backprop(W1,W2,X,D):
    alpha = 0.01; # learning rate
    for n in range(0,len(X)):
        # X is pandas dataframe
        x = X.iloc[n].values
        # D is pandas series
        d = D.iloc[n]
        # forward pass
        v1 = np.dot(W1,x)
        y1 = sigmoid(v1)
        v2 = np.dot(W2,y1)
        y2 = sigmoid(v2)
        # backward pass
        e2 = d - y2
        delta2 = y2 * (1 - y2) * e2 # delta: derivative of activation function times error
        e1 = np.dot(W2.T,delta2)
        delta1 = y1 * (1 - y1) * e1 # 𝜎′(𝑥)=𝜎(𝑥)∙(1− 𝜎(𝑥)) = y * (1-y)
        # update weights
        W2 += alpha * np.outer(delta2,y1)
        W1 += alpha * np.outer(delta1,x)
    return W1,W2

In [6]:
# initialize the weights
W1 = np.random.rand(5,4)
W2 = np.random.rand(3,5)

# train the network
for i in range(0,10000):
    W1,W2 = backprop(W1,W2,training_df[['petal_length', 'petal_width', 'sepal_length', 'sepal_width']],training_df['species'])

In [9]:
W1

array([[ 2.60070479,  2.81282305,  1.65245452, -1.65850712],
       [ 2.67447143,  2.54591595,  1.74525047, -1.59365531],
       [ 2.83473327,  2.90580255,  1.2724203 , -1.53652267],
       [ 2.64912969,  2.91994318,  1.56140248, -1.75626899],
       [ 2.80705289,  3.04462224,  1.37114527, -1.92871199]])

In [10]:
W2

array([[1.95288534, 1.62529586, 1.70136365, 2.34182707, 2.51425664],
       [2.08689811, 1.56728098, 1.87208325, 1.94271611, 2.66234192],
       [1.78350861, 1.78419657, 2.09995916, 2.27753205, 2.20821587]])

In [13]:
training_df[['petal_length', 'petal_width', 'sepal_length', 'sepal_width']].head()

Unnamed: 0,petal_length,petal_width,sepal_length,sepal_width
0,-1.337867,-1.308265,-0.89693,1.045627
1,-1.337867,-1.308265,-1.137091,-0.115303
2,-1.394543,-1.308265,-1.377252,0.349069
3,-1.281191,-1.308265,-1.497333,0.116883
4,-1.337867,-1.308265,-1.01701,1.277813


In [14]:
training_df['species'].head()

0    0
1    0
2    0
3    0
4    0
Name: species, dtype: int8

In [19]:
# classify the test data and display the flower species
def classify(W1,W2,X):
    for n in range(0,len(X)):
        x = X.iloc[n].values
        v1 = np.dot(W1,x)
        y1 = sigmoid(v1)
        v2 = np.dot(W2,y1)
        y2 = sigmoid(v2)
        print('The flower is classified as species',np.argmax(y2))

classify(W1,W2,test_df[['petal_length', 'petal_width', 'sepal_length', 'sepal_width']])


The flower is classified as species 2
The flower is classified as species 2
The flower is classified as species 2


In [21]:
# print the actual flower species names
test_df['species'].replace(0, 'setosa', inplace=True)
test_df['species'].replace(1, 'versicolor', inplace=True)
test_df['species'].replace(2, 'virginica', inplace=True)
test_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,-1.297771,1.408374,-1.367295,-1.323501,setosa
1,1.13555,-0.592999,0.996503,1.093327,virginica
2,0.162221,-0.815374,0.370792,0.230174,versicolor


In [22]:
# report the accuracy of the network
def accuracy(W1,W2,X,D):
    correct = 0
    for n in range(0,len(X)):
        x = X.iloc[n].values
        d = D.iloc[n]
        v1 = np.dot(W1,x)
        y1 = sigmoid(v1)
        v2 = np.dot(W2,y1)
        y2 = sigmoid(v2)
        if np.argmax(y2) == d:
            correct += 1
    print('The accuracy of the network is',correct/len(X))

accuracy(W1,W2,training_df[['petal_length', 'petal_width', 'sepal_length', 'sepal_width']],training_df['species'])


The accuracy of the network is 0.3673469387755102
