# Importing libraries

In [1]:
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt 

from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
np.random.seed(42)

In [2]:
print(load_wine().DESCR)

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0

# Loading Data 

In [3]:
wine_data = load_wine()
X = wine_data.data
Y = wine_data.target.reshape((-1,1))

In [4]:
y = np.zeros((X.shape[0],3))
for i in range(len(Y)):
    y[i][Y[i]] = 1

# Normalising Input Data

In [5]:
X = X.T
for i in range(len(X)):
    X[i] = X[i]/np.max(X[i])
X = X.T
X.shape

(178, 13)

In [6]:
(X,y) = shuffle(X,y,random_state = 40)

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.20)

In [8]:
def relu(x):
    x[x<0]=0
    return x

def softmax(x):
    return np.exp(x)/(np.sum(np.exp(x),axis=0))

def diff_relu(x):
    y = np.zeros(x.shape)
    y[x<=0] = 0
    y[x>0] = 1
    return y

def weight_init(x,y):
    return np.sqrt(2.0/(x+y))*np.random.normal(0,1,(x,y))

# Architecture of MLP and Initializing weights

In [9]:
inp_dim = 13
hl1_units = 15
hl2_units = 15
out_dim = 3

W1 = weight_init(hl1_units,inp_dim)
b1 = weight_init(hl1_units,1)
W2 = weight_init(hl2_units,hl1_units)
b2 = weight_init(hl2_units,1)
W3 = weight_init(out_dim,hl2_units)
b3 = weight_init(out_dim,1)

# Fitting training data

In [10]:
epochs = 1000
l_r = 0.01

for i in range(epochs):
    (x_train,y_train) = shuffle(X_train,Y_train,random_state = 40)
    loss = 0
    for j in range(0,len(x_train)):
        a1 = relu(np.matmul(W1,x_train[j]).reshape((-1,1)) + b1)
        a2 = relu(np.matmul(W2,a1).reshape((-1,1)) + b2)
        x = np.matmul(W3,a2) + b3
        a3 = softmax(x)
        loss += -np.log(a3[np.argmax(y_train[j])]) 
        
        delta3 = a3 - y_train[j].reshape(-1,1)
        delta2 = np.matmul(W3.T,delta3)*diff_relu(a2)
        delta1 = np.matmul(W2.T,delta2)*diff_relu(a1)
        
        grd_b3 = delta3
        grd_W3 = np.matmul(delta3,a2.T)
        grd_b2 = delta2
        grd_W2 = np.matmul(delta2,a1.T)
        grd_b1 = delta1
        grd_W1 = np.matmul(delta1,x_train[j].reshape((-1,1)).T)
        
        W1 += (-1*l_r)*grd_W1
        b1 += (-1*l_r)*grd_b1
        W2 += (-1*l_r)*grd_W2
        b2 += (-1*l_r)*grd_b2
        W3 += (-1*l_r)*grd_W3
        b3 += (-1*l_r)*grd_b3
        
    if i%10 == 0:
        print (str(i) + ":" + " loss =" + str(loss/len(x_train)))

0: loss =[1.06592045]
10: loss =[0.27434424]
20: loss =[0.15594959]
30: loss =[0.09883962]
40: loss =[0.07104598]
50: loss =[0.05703894]
60: loss =[0.05086939]
70: loss =[0.0477188]
80: loss =[0.04386272]
90: loss =[0.0368947]
100: loss =[0.0240034]
110: loss =[0.01285333]
120: loss =[0.00791626]
130: loss =[0.0064596]
140: loss =[0.00565208]
150: loss =[0.00500935]
160: loss =[0.00448272]
170: loss =[0.00398281]
180: loss =[0.00356932]
190: loss =[0.00319582]
200: loss =[0.00288289]
210: loss =[0.00259662]
220: loss =[0.00235837]
230: loss =[0.00215256]
240: loss =[0.00197756]
250: loss =[0.00182731]
260: loss =[0.00170431]
270: loss =[0.00158979]
280: loss =[0.00148841]
290: loss =[0.00139925]
300: loss =[0.00131971]
310: loss =[0.00124845]
320: loss =[0.00118414]
330: loss =[0.00112587]
340: loss =[0.0010698]
350: loss =[0.00102162]
360: loss =[0.0009789]
370: loss =[0.00093787]
380: loss =[0.00090006]
390: loss =[0.00086594]
400: loss =[0.00083373]
410: loss =[0.00080168]
420: loss

# Model Accuracy

In [11]:
y_predicted = list()
y_actual = list()
for j in range(0,len(X_test)):
    a1 = relu(np.matmul(W1,X_test[j]).reshape((-1,1)) + b1)
    a2 = relu(np.matmul(W2,a1).reshape((-1,1)) + b2)
    x = np.matmul(W3,a2) + b3
    out = softmax(x)
    y_predicted.append(np.argmax(out))
    y_actual.append(np.argwhere(Y_test[j] == 1)[0][0])
print ("actual out:"+str(y_actual))
print ("predic out:"+str(y_predicted))

actual out:[0, 1, 1, 0, 0, 2, 1, 0, 1, 1, 2, 0, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 0, 1, 1, 2, 0, 1, 1, 1, 0, 0, 2]
predic out:[0, 1, 1, 0, 0, 2, 1, 0, 1, 1, 2, 0, 2, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 1, 0, 1, 0, 2, 0, 1, 1, 1, 0, 0, 2]
