In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.datasets import load_iris
data = load_iris()

In [3]:
data.keys()

dict_keys(['data', 'DESCR', 'target_names', 'feature_names', 'target'])

In [4]:
df = pd.DataFrame(data = data['data'],columns = data['feature_names'])

In [5]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [6]:
X = np.array(df)
y = np.array(data['target'])

In [7]:
from sklearn.utils import shuffle
X, y = shuffle(X, y)

In [8]:
print(X.shape)
print(y.shape)

(150, 4)
(150,)


In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
clf = RandomForestClassifier()
clf.fit(X,y)
print(cross_val_score(clf, X, y)) 

[ 0.96078431  0.92156863  0.9375    ]


In [10]:
from sklearn.preprocessing import OneHotEncoder
from scipy.sparse import csr_matrix
enc = OneHotEncoder()
y_train = enc.fit_transform(y.reshape(-1, 1)).toarray()
y_train.shape

(150, 3)

In [11]:
w1 = np.random.randn(4,10)*.001
b1 = np.random.randn(10)*.001
w2 = np.random.randn(10,10)*.001
b2 = np.random.randn(10)*.001
w_out = np.random.randn(10,3)*.001
b_out = np.random.randn(3)*.001

In [12]:
'''
Stochastic Gradient Descent
(1x4)(4X10)+(1x10) = (1x10) 

'''
'''



'''

'\n\n\n\n'

In [13]:
def forward(x,y,w1,b1,w2,b2,w_out,b_out):
    '''
    print('SHAPES GOING INTO FORWARD-PASS')
    print('w1: ',w1.shape)
    print('w2: ',w2.shape)
    print('b1: ',b1.shape)
    print('b2: ',b2.shape)
    print('w_out: ',w_out.shape)
    '''
    #forward pass for x (SGD)
    fc1 = (x.dot(w1)+b1).reshape(10,1)
    #(1x4)(4X10)+(1x10) = (1x10)
    activation_1 = sigmoid(fc1).T
    #(1x10)
    
    fc2 = (activation_1.dot(w2)+b2).reshape(10,1)
    #(1x10)(10x10)=(1x10)
    activation_2 = sigmoid(fc2).T
    #(1x10)
    
    output = (activation_2.dot(w_out)+b_out).reshape(3,1)
    #(1x10)(10x3)=(1x3)
    class_probs = softmax(output)
    #(1x4)
    cost = MSE(class_probs,y)
    '''
    print('SHAPES COMING OUT OF FORWARD-PASS')
    print('w1: ',w1.shape)
    print('w2: ',w2.shape)
    print('b1: ',b1.shape)
    print('b2: ',b2.shape)
    print('w_out: ',w_out.shape)
    print('fc1: ', fc1.shape)
    print('activation_1: ', activation_1.shape)
    print('fc2: ', fc2.shape)
    print('activation_2: ',activation_2.shape)
    print('output: ',output.shape)
    print('class_probs: ', class_probs.shape)
    '''
    return fc1, activation_1, fc2, activation_2, output, class_probs, cost
    
def sigmoid(x):
    return 1/(1+np.exp(-x))

# def softmax(z):
#     assert len(z.shape) == 2
#     s = np.max(z, axis=1)
#     s = s[:, np.newaxis] # necessary step to do broadcasting
#     e_x = np.exp(z - s)
#     div = np.sum(e_x, axis=0)
#     div = div[:, np.newaxis] # dito
#     return e_x / div

def softmax(w, t = 1.0):
    e = np.exp(np.array(w) / t)
    dist = e / np.sum(e)
    return dist

def MSE(class_probs,y):
    #using a simple SSE cost function for ease of differentiation.
    return np.sum((class_probs.T-y)**2)/3

In [14]:
z= np.array([[-4.45923876],
             [-3.15245467],
             [-6.69678721]])
# z= np.array([[0.45923876],
#              [1.15245467],
#              [0.69678721]])
softmax(z)

array([[ 0.2082902 ],
       [ 0.76948111],
       [ 0.02222869]])

In [15]:
x = X[0]
y = y_train[0]
fc1, activation_1, fc2, activation_2,output,class_probs, cost = forward(x,y,w1,b1,w2,b2,w_out,b_out)

In [135]:
'''
x > fc1(w1,b1) > activation_1(sigmoid) > 
fc2(w2,b2) > activation_2(sigmoid) >
output(w_out) > class_probs(softmax) > cost(MSE)

cost(class_probs) = np.sum((class_probs.T-y)**2)/3
                    2*(class_probs[0]+class_probs[1]+class_probs[2])
d_cost/d_class_probs = np.array([2*class_probs[0],2*class_probs[1],2*class_probs[2]]) 

d_class_probs_d_output = d_softmax/d_output
(1x3) > (1x3)
'''

def d_MSE(class_probs):
    #IN: cost(1)
    #OUT: dcost/dclass_probs(1x3)
    return np.array([class_probs[0],class_probs[1],class_probs[2]]).T*2
    

def d_sigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))

def d_softmax(class_probs):
    return softmax(class_probs)*(1-softmax(class_probs))

def backward(w1,w2,b1,b2,w_out,b_out,fc1, activation_1, fc2, activation_2,output,class_probs,cost,learning_rate):
    '''
    print('SHAPES GOING INTO BACKPASS')
    print('w1: ',w1.shape)
    print('w2: ',w2.shape)
    print('b1: ',b1.shape)
    print('b2: ',b2.shape)
    print('w_out: ',w_out.shape)
    print('fc1: ', fc1.shape)
    print('activation_1: ', activation_1.shape)
    print('fc2: ', fc2.shape)
    print('activation_2: ',activation_2.shape)
    print('output: ',output.shape)
    print('class_probs: ', class_probs.shape)
    '''
    d_cost_d_class_probs = d_MSE(class_probs).T
    #print('d_cost_d_class_probs',d_cost_d_class_probs.shape)
    d_class_probs_d_output = d_softmax(output)
    #print('d_class_probs_d_output',d_class_probs_d_output.shape)
    d_output_d_activation_2 = w_out
    #print('d_output_d_activation_2',d_output_d_activation_2.shape)
    d_activation_2_d_fc2 = d_sigmoid(fc2)
    #print('d_activation_2_d_fc2',d_activation_2_d_fc2.shape)
    d_fc2_d_activation1 = w2
    #print('d_fc2_d_activation1',d_fc2_d_activation1.shape)
    d_activation_1_d_fc1 = d_sigmoid(fc1)
    #print('d_activation_1_d_fc1',d_activation_1_d_fc1.shape)
    d_fc1_d_x = w1
    #print('d_fc1_d_x',d_fc1_d_x.shape, '\n \n')
    
    d_cost_d_output = d_cost_d_class_probs * d_class_probs_d_output
    #print('d_cost_d_output',d_cost_d_output.shape, '\n \n')
    
    d_cost_d_w_out = (d_cost_d_output * activation_2).T
    #print('d_cost_d_w_out',d_cost_d_w_out.shape, '\n \n')
    d_cost_d_b_out = d_cost_d_output
    #print('d_cost_d_b_out',d_cost_d_b_out.shape, '\n \n')
    
    d_cost_d_activation_2 = d_cost_d_output.T.dot(d_output_d_activation_2.T)
    #print('d_cost_d_activation_2',d_cost_d_activation_2.shape)
    
    d_cost_d_fc2 = d_cost_d_activation_2 * d_activation_2_d_fc2
    #print('d_cost_d_fc2',d_cost_d_fc2.shape, '\n \n')
    
    d_cost_d_w2 = d_cost_d_activation_2.T.dot(fc2.reshape(1,10))
    #print('d_cost_d_w2',d_cost_d_w2.shape)
    d_cost_d_b2 = d_cost_d_activation_2
    #print('d_cost_d_b2',d_cost_d_b2.shape, '\n \n')
    
    d_cost_d_activation_1 = d_cost_d_fc2 * d_fc2_d_activation1
    #print('d_cost_d_activation_1',d_cost_d_activation_1.shape)
    
    d_cost_d_fc1 = d_cost_d_activation_1 * d_activation_1_d_fc1
    #print('d_cost_d_fc1',d_cost_d_fc1.shape, '\n \n')
    
    d_cost_d_w1 = d_cost_d_activation_1.T.dot(fc1.reshape(10,1))
    #print('d_cost_d_w1',d_cost_d_w1.shape)
    d_cost_d_b1 = d_cost_d_activation_2
    #print('d_cost_d_b1',d_cost_d_b1.shape, '\n \n')
    
    d_cost_d_x = d_cost_d_fc1.dot(d_fc1_d_x.T)
    #print('d_cost_d_x',d_cost_d_x.shape)
    
    w_out = w_out-learning_rate*d_cost_d_w_out
    b_out = b_out-learning_rate*d_cost_d_b_out
    
    w2 = w2-learning_rate*d_cost_d_w2.T
    b2 = b2-learning_rate*d_cost_d_b2

    w1 = w1-learning_rate*d_cost_d_w1.T
    b1 = b1-learning_rate*d_cost_d_b1
    '''
    print('SHAPES COMING OUT OF BACKPASS')
    print('w1: ',w1.shape)
    print('w2: ',w2.shape)
    print('b1: ',b1.shape)
    print('b2: ',b2.shape)
    print('w_out: ',w_out.shape)
    print('fc1: ', fc1.shape)
    print('activation_1: ', activation_1.shape)
    print('fc2: ', fc2.shape)
    print('activation_2: ',activation_2.shape)
    print('output: ',output.shape)
    print('class_probs: ', class_probs.shape)
    '''
    return w1,b1,w2,b2,w_out
    

w1,b1,w2,b2,w_out = backward(w1,w2,b1,b2,w_out, b_out,fc1, activation_1, fc2, activation_2,output,class_probs, cost,learning_rate=0.01)

In [136]:
activation_2.dot(w_out).shape

(1, 3)

In [132]:
w1 = (np.random.randn(4,10)-1)*.0001
b1 = (np.random.randn(10)-1)*.0001
w2 = (np.random.randn(10,10)-1)*.0001
b2 = (np.random.randn(10)-1)*.0001
w_out = (np.random.randn(10,3)-1)*.0001
b_out = (np.random.randn(3)-1)*.0001
y = y_train
learning_rate = 0.001
epochs = 200

In [133]:
print(w_out.max())
print(b1.min())

0.000118680126322
-0.000236250193984


In [134]:
for epoch in range(epochs):
    costs = []
    for i,x in enumerate(X):
        #print(i)
        x = x.reshape(1,4)
        fc1, activation_1, fc2, activation_2, output, class_probs, cost = forward(x,y[i],w1,b1,w2,b2,w_out,b_out)
        w1,b1,w2,b2,w_out = backward(w1,w2,b1,b2,w_out,b_out,fc1, activation_1, fc2, activation_2,output,class_probs, cost,learning_rate)
        costs.append(cost)
        #print('Cost: ',cost)
        #print(b1[0][1])
        #print('activation_1: ',activation_1)
        #print('activation_2: ',activation_2)
        #print('Output: ',output)
    #print(b1[0][4])
    print(w1[0][2])
    #print('Output: ',output)
    #print('Class Probs: ',class_probs)
    #print('Ave Cost: ',np.mean(costs))

-2.77840876758e-05
-2.77842900447e-05
-2.77851759904e-05
-2.77873758175e-05
-2.77909457778e-05
-2.77934902287e-05
-2.778617777e-05
-2.77463517456e-05
-2.76249988524e-05
-2.73267970774e-05
-2.66797793668e-05
-2.53907754596e-05
-2.29816710655e-05
-1.87000748075e-05
-1.1396114728e-05
6.45313890131e-07
1.99305797259e-05
5.0059165031e-05
9.61234682247e-05
0.00016523389597
0.000267201722702
0.000415415554543
0.000627944914881
0.000928888257383
0.00134993614293
0.00193201191814
0.00272662539237
0.00379613742319
0.00521136236607
0.00704375450588
0.00934809115804
0.0121314047618
0.0153084804738
0.0186590497761
0.0218267268431
0.0244101351335
0.0261426733914
0.0270420253679
0.0273754447956
0.0274543340106
0.0274644793061
0.0274650428677
0.0274650525683
0.0274650525999
0.0274650525999
0.0274650525999
0.0274650525999
0.0274650525999
0.0274650525999
0.0274650525999
0.0274650525999
0.0274650525999
0.0274650525999
0.0274650525999
0.0274650525999
0.0274650525999
0.0274650525999
0.0274650525999
0.02746

In [20]:
fc1, activation_1, fc2, activation_2, output, class_probs, cost = forward(X[1],y[1],w1,b1,w2,b2,w_out,b_out)

In [21]:
y[1]

array([ 1.,  0.,  0.])

In [22]:
class_probs

array([[ 0.00975022],
       [ 0.45229313],
       [ 0.53795665]])

In [23]:
#Come back to this
class MLP():
    
    def __init__(self, graph):
    self.graph = graph
        
class Graph():
    def __init__(self):
        pass
    
    def add_FCL(n):
        

IndentationError: expected an indented block (<ipython-input-23-ab000310a780>, line 5)