In [1]:
import pandas as pd
import numpy as np

In [2]:
all_data = pd.read_csv('iris\iris.csv')

In [3]:
all_data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [4]:
all_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
sepal_length    150 non-null float64
sepal_width     150 non-null float64
petal_length    150 non-null float64
petal_width     150 non-null float64
species         150 non-null object
dtypes: float64(4), object(1)
memory usage: 5.9+ KB


In [5]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
all_data['species'] = label_encoder.fit_transform(all_data['species'])
all_data['species'].unique()

array([0, 1, 2], dtype=int64)

In [6]:
from sklearn.model_selection import train_test_split

all_labels = all_data['species'].copy()
all_features = all_data.drop(['species'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels, test_size=0.3, random_state=0)

In [7]:
X_train.shape, y_train.shape

((105, 4), (105,))

In [8]:
X_test.shape, y_test.shape

((45, 4), (45,))

In [9]:
# Reshape features for NN
X_train = X_train.T
X_test = X_test.T

In [10]:
# Reshape labels for NN
y_train = (pd.DataFrame(y_train)).T
y_test = (pd.DataFrame(y_test)).T

In [11]:
# Get the node sizes for NN
n_x = X_train.shape[0]
n_h = 6
n_y = y_train.shape[0]

### Initialize the parameters

In [12]:
np.random.seed(2)

W1 = np.random.randn(n_h, n_x) * 0.01
b1 = np.random.randn(n_h, 1) * 0.01
W2 = np.random.randn(n_y, n_h) * 0.01
b2 = np.random.randn(n_y, 1) * 0.01

In [17]:
def tanh(Z):
    return np.tanh(Z)

In [18]:
def softmax(Z):
    expA = np.exp(Z)
    return expA / expA.sum(axis=1, keepdims=True)

In [19]:
def derivative_tanh(Z, A1):
    return (1 - np.power(A1, 2))

### Forward-Propagate

In [20]:
def forward_propagate(X, W1, b1, W2, b2):
    Z1 = np.dot(W1, X) + b1
    A1 = tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = softmax(Z2)
    
    cache = {
        'Z1': Z1,
        'A1': A1,
        'Z2': Z2,
        'A2': A2
    }
    
    return cache

In [21]:
forward_try_result = forward_propagate(X_train, W1, b1, W2, b2)
forward_try_result['A2'].shape

(1, 105)

### Cost Function

In [49]:
def compute_cost(A2, Y, W1, b1, W2, b2):
    m = Y.shape[1]
    
    cost = - np.sum(np.dot(Y, np.log(A2).T), axis=1) / m
    cost = cost.reshape(-1, 1)
    
    return cost

In [50]:
cost_try_result = compute_cost(forward_try_result['A2'], y_train, W1, b1, W2, b2)
cost_try_result.shape

(1, 1)

### Backward-Propagate

In [51]:
def backward_propagagate(X, Y, forward_result, W1, b1, W2, b2):
    m = X.shape[1]
    
    A1 = forward_result['A1']
    A2 = forward_result['A2']
    Z1 = forward_result['Z1']
    
    dZ2 = A2 - Y
    dW2 = (1/m) * np.dot(dZ2, A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1).to_numpy().reshape(-1, 1)
    
    dZ1 = np.dot(W2.T, dZ2) * derivative_tanh(Z1, A1)
    dW1 = (1/m) * np.dot(dZ1, X.T)
    db1 = (1/m) * np.sum(dZ1, axis=1).reshape(-1, 1)
    
    grads = {
        'dZ2': dZ2,
        'dW2': dW2,
        'db2': db2,
        'dZ1': dZ1,
        'dW1': dW1,
        'db1': db1
    }
#     return None
    return grads

In [52]:
backward_try_result = backward_propagagate(X_train, y_train, forward_try_result, W1, b1, W2, b2)
backward_try_result['db1'].shape

(6, 1)

In [53]:
def update_params(W1, b1, W2, b2, backward_result, learning_rate=0.001):
    
    dW1 = backward_result['dW1']
    db1 = backward_result['db1']
    dW2 = backward_result['dW2']
    db2 = backward_result['db2']
    
    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*db2
    
    return W1, b1, W2, b2

In [54]:
# Train the Neural Network!
def nn_model(X, Y, W1, b1, W2, b2, num_iterations=1000):
    for i in range(0, num_iterations):
        forward_result = forward_propagate(X, W1, b1, W2, b2)
        cost = compute_cost(forward_result['A2'], Y, W1, b1, W2, b2)
        backward_result = backward_propagagate(X, Y, forward_result, W1, b1, W2, b2)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, backward_result, learning_rate=0.009)
        
        print('Cost after iteration i:', i, cost) 
        
    return W1, b1, W2, b2

In [55]:
W1, b1, W2, b2 = nn_model(X_train, y_train, W1, b1, W2, b2, num_iterations=1000)

Cost after iteration i: 0 [[4.83121748]]
Cost after iteration i: 1 [[4.83123341]]
Cost after iteration i: 2 [[4.83124932]]
Cost after iteration i: 3 [[4.8312652]]
Cost after iteration i: 4 [[4.83128107]]
Cost after iteration i: 5 [[4.83129691]]
Cost after iteration i: 6 [[4.83131274]]
Cost after iteration i: 7 [[4.83132854]]
Cost after iteration i: 8 [[4.83134433]]
Cost after iteration i: 9 [[4.83136009]]
Cost after iteration i: 10 [[4.83137583]]
Cost after iteration i: 11 [[4.83139156]]
Cost after iteration i: 12 [[4.83140726]]
Cost after iteration i: 13 [[4.83142294]]
Cost after iteration i: 14 [[4.83143861]]
Cost after iteration i: 15 [[4.83145425]]
Cost after iteration i: 16 [[4.83146987]]
Cost after iteration i: 17 [[4.83148548]]
Cost after iteration i: 18 [[4.83150106]]
Cost after iteration i: 19 [[4.83151662]]
Cost after iteration i: 20 [[4.83153217]]
Cost after iteration i: 21 [[4.83154769]]
Cost after iteration i: 22 [[4.8315632]]
Cost after iteration i: 23 [[4.83157868]]
Cost

Cost after iteration i: 219 [[4.83428274]]
Cost after iteration i: 220 [[4.83429506]]
Cost after iteration i: 221 [[4.83430736]]
Cost after iteration i: 222 [[4.83431965]]
Cost after iteration i: 223 [[4.83433193]]
Cost after iteration i: 224 [[4.8343442]]
Cost after iteration i: 225 [[4.83435645]]
Cost after iteration i: 226 [[4.83436869]]
Cost after iteration i: 227 [[4.83438091]]
Cost after iteration i: 228 [[4.83439312]]
Cost after iteration i: 229 [[4.83440532]]
Cost after iteration i: 230 [[4.83441751]]
Cost after iteration i: 231 [[4.83442968]]
Cost after iteration i: 232 [[4.83444184]]
Cost after iteration i: 233 [[4.83445399]]
Cost after iteration i: 234 [[4.83446613]]
Cost after iteration i: 235 [[4.83447825]]
Cost after iteration i: 236 [[4.83449036]]
Cost after iteration i: 237 [[4.83450246]]
Cost after iteration i: 238 [[4.83451454]]
Cost after iteration i: 239 [[4.83452661]]
Cost after iteration i: 240 [[4.83453867]]
Cost after iteration i: 241 [[4.83455071]]
Cost after i

Cost after iteration i: 460 [[4.83691882]]
Cost after iteration i: 461 [[4.83692855]]
Cost after iteration i: 462 [[4.83693826]]
Cost after iteration i: 463 [[4.83694797]]
Cost after iteration i: 464 [[4.83695767]]
Cost after iteration i: 465 [[4.83696736]]
Cost after iteration i: 466 [[4.83697704]]
Cost after iteration i: 467 [[4.83698671]]
Cost after iteration i: 468 [[4.83699637]]
Cost after iteration i: 469 [[4.83700603]]
Cost after iteration i: 470 [[4.83701567]]
Cost after iteration i: 471 [[4.83702531]]
Cost after iteration i: 472 [[4.83703494]]
Cost after iteration i: 473 [[4.83704456]]
Cost after iteration i: 474 [[4.83705417]]
Cost after iteration i: 475 [[4.83706378]]
Cost after iteration i: 476 [[4.83707337]]
Cost after iteration i: 477 [[4.83708296]]
Cost after iteration i: 478 [[4.83709253]]
Cost after iteration i: 479 [[4.8371021]]
Cost after iteration i: 480 [[4.83711166]]
Cost after iteration i: 481 [[4.83712121]]
Cost after iteration i: 482 [[4.83713076]]
Cost after i

Cost after iteration i: 654 [[4.83865611]]
Cost after iteration i: 655 [[4.83866436]]
Cost after iteration i: 656 [[4.8386726]]
Cost after iteration i: 657 [[4.83868083]]
Cost after iteration i: 658 [[4.83868906]]
Cost after iteration i: 659 [[4.83869728]]
Cost after iteration i: 660 [[4.8387055]]
Cost after iteration i: 661 [[4.83871371]]
Cost after iteration i: 662 [[4.83872191]]
Cost after iteration i: 663 [[4.8387301]]
Cost after iteration i: 664 [[4.83873829]]
Cost after iteration i: 665 [[4.83874647]]
Cost after iteration i: 666 [[4.83875465]]
Cost after iteration i: 667 [[4.83876282]]
Cost after iteration i: 668 [[4.83877098]]
Cost after iteration i: 669 [[4.83877914]]
Cost after iteration i: 670 [[4.83878729]]
Cost after iteration i: 671 [[4.83879543]]
Cost after iteration i: 672 [[4.83880357]]
Cost after iteration i: 673 [[4.8388117]]
Cost after iteration i: 674 [[4.83881982]]
Cost after iteration i: 675 [[4.83882794]]
Cost after iteration i: 676 [[4.83883605]]
Cost after iter

Cost after iteration i: 859 [[4.84022067]]
Cost after iteration i: 860 [[4.84022773]]
Cost after iteration i: 861 [[4.84023479]]
Cost after iteration i: 862 [[4.84024185]]
Cost after iteration i: 863 [[4.84024889]]
Cost after iteration i: 864 [[4.84025594]]
Cost after iteration i: 865 [[4.84026297]]
Cost after iteration i: 866 [[4.84027001]]
Cost after iteration i: 867 [[4.84027703]]
Cost after iteration i: 868 [[4.84028406]]
Cost after iteration i: 869 [[4.84029107]]
Cost after iteration i: 870 [[4.84029809]]
Cost after iteration i: 871 [[4.84030509]]
Cost after iteration i: 872 [[4.8403121]]
Cost after iteration i: 873 [[4.84031909]]
Cost after iteration i: 874 [[4.84032609]]
Cost after iteration i: 875 [[4.84033308]]
Cost after iteration i: 876 [[4.84034006]]
Cost after iteration i: 877 [[4.84034704]]
Cost after iteration i: 878 [[4.84035401]]
Cost after iteration i: 879 [[4.84036098]]
Cost after iteration i: 880 [[4.84036794]]
Cost after iteration i: 881 [[4.8403749]]
Cost after it

In [57]:
prediction_results = forward_propagate(X_train, W1, b1, W2, b2)

predictions = prediction_results['A2']

# for pred in range(predictions.shape[1]):
#     if predictions[:, pred] <= 0.5:
#         predictions[:, pred] = 0
#     else:
#         predictions[:, pred] = 1

train_pred = pd.DataFrame({'prediction': predictions.squeeze(),
              'actual': y_train.squeeze()
             })
print(train_pred.head(15))

     prediction  actual
60     0.009585       1
116    0.009863       2
144    0.009867       2
119    0.009843       2
108    0.009865       2
69     0.009779       1
135    0.009869       2
56     0.009855       1
80     0.009757       1
123    0.009854       2
133    0.009856       2
106    0.009772       2
146    0.009855       2
50     0.009862       1
147    0.009862       2


In [58]:
ave_err = np.sum(train_pred['prediction'] - train_pred['actual']) / len(train_pred)
ave_err

-1.0380952380952382

### Test

In [126]:
test_prediction_results = forward_propagate(X_test, W1, b1, W2, b2)

test_predictions = test_prediction_results['A2']

for pred in range(test_predictions.shape[1]):
    if test_predictions[:, pred] <= 0.5:
        test_predictions[:, pred] = 0
    else:
        test_predictions[:, pred] = 1

test_pred = pd.DataFrame({'prediction': test_predictions.squeeze(),
              'actual': y_test.squeeze()
             })

print(test_pred)

ave_err = np.sum(test_pred['prediction'] - test_pred['actual']) / len(test_pred)
print('Average error: ', ave_err)

    prediction  actual
26         0.0       0
86         1.0       1
2          0.0       0
55         1.0       1
75         1.0       1
93         1.0       1
16         0.0       0
73         1.0       1
54         1.0       1
95         1.0       1
53         1.0       1
92         1.0       1
78         1.0       1
13         0.0       0
7          0.0       0
30         0.0       0
22         0.0       0
24         0.0       0
33         0.0       0
8          0.0       0
43         0.0       0
62         1.0       1
3          0.0       0
71         1.0       1
45         0.0       0
48         0.0       0
6          0.0       0
99         1.0       1
82         1.0       1
76         1.0       1
Average error:  0.0
