## Define functions

In [1]:
import numpy as np
import numpy.matlib
import pandas as pd

# This is referred above as f(u).
class nn_MSECriterion:
    def forward(self, predictions, labels):
        return np.sum(np.square(predictions - labels))
        
    def backward(self, predictions, labels):
        num_samples = labels.shape[0]
        return num_samples * 2 * (predictions - labels) ### why num_samples * ... ?

# This is referred above as g(v).
class nn_Sigmoid:
    def forward(self, x):
        return 1 / (1 + np.exp(-x))

    def inverse(self, x):
        return np.log(x/(1-x))
    
    def backward(self, x, gradOutput):
        # It is usually a good idea to use gv from the forward pass and not recompute it again here.
        gv = 1 / (1 + np.exp(-x))  
        return np.multiply(np.multiply(gv, (1 - gv)), gradOutput) ### what is gradOutput?

# This is referred above as h(W, b)
class nn_Linear:
    def __init__(self, input_dim, output_dim):
        # Initialized with random numbers from a gaussian N(0, 0.001)
        self.weight = np.matlib.randn(input_dim, output_dim) * 0.01
        self.bias = np.matlib.randn((1, output_dim)) * 0.01
        self.gradWeight = np.zeros_like(self.weight)
        self.gradBias = np.zeros_like(self.bias)
        
    def forward(self, x):
        return np.dot(x, self.weight) + self.bias
    
    def backward(self, x, gradOutput):
        # dL/dw = dh/dw * dL/dv
        self.gradWeight = np.dot(x.T, gradOutput)
        # dL/db = dh/db * dL/dv
        self.gradBias = np.copy(gradOutput)
        # return dL/dx = dh/dx * dL/dv
        return np.dot(gradOutput, self.weight.T)
    
    def getParameters(self):
        params = [self.weight, self.bias]
        gradParams = [self.gradWeight, self.gradBias]
        return params, gradParams
    

## load data

In [2]:
#import data with to numpy array
# to pandas first?
# signalOutput-coco_3_cv_3_netAng_30_twc_10_tfidfNoPro_pronoun_bin_1-OQD9U4.csv
rawSignals = pd.read_csv('signalOutput-coco_3_cv_3_netAng_30_twc_10_tfidfNoPro_pronoun_bin_1-OQD9U4.csv', index_col=0)

In [3]:
### GET X MATRIX
xBig = rawSignals.ix[:,4:].as_matrix()
print(len(xBig[0]))
xBig[0:5,0:5]

17


array([[ 0.02436323,  0.01904762,  1.        ,  0.        ,  0.20304569],
       [ 0.01027027,  0.01513514,  0.        ,  1.        ,  0.39130435],
       [ 0.03768116,  0.02318841,  1.        ,  0.        ,  0.88235294],
       [ 0.04921136,  0.02618297,  1.        ,  0.        ,  0.49418605],
       [ 0.04949153,  0.02067797,  1.        ,  0.        ,  0.21269841]])

### STANDARDIZE THE X-MATRIX

In [4]:
from sklearn.preprocessing import StandardScaler
x = StandardScaler().fit_transform(xBig)

In [5]:
###OR (if you don't want to standardize)
#x = xBig
###but you probably do...
### Accuracy on 400 epochs of 2-layer was 41%
### ... on 1000 epochs of 5-layer was 39% (the same? something must be wrong...)

### Make the y vector

In [6]:
splits = [group.split("_") for group in rawSignals['groupId']]
rawSignals['groupId'] = [doc[0] for doc in splits]
rawSignals = rawSignals.assign(tt = [doc[1] for doc in splits])

In [7]:
#rawSignals.head()

In [8]:
docRanks = pd.read_csv('docRanks.csv')
docRanks[['groupId']] = docRanks[['groupName']] 
docRanks.head()

Unnamed: 0,groupName,rank,groupId
0,ACLU01,3,ACLU01
1,ACLU02,3,ACLU02
2,ACLU03,3,ACLU03
3,ACLU04,3,ACLU04
4,ACLU05,3,ACLU05


In [9]:
yFull = pd.merge(rawSignals.ix[:,:1], docRanks.ix[:,1:] )
yFull.head()

Unnamed: 0,groupId,rank
0,Unitarian145,7
1,ACLU07,3
2,SeaShepherds397,1
3,JohnPiper413,2
4,Shepherd695,4


In [10]:
yBig = np.array(yFull['rank'])

In [11]:
yBig

array([7, 3, 1, 2, 4, 2, 2, 5, 5, 4, 6, 6, 1, 3, 8, 2, 8, 2, 5, 3, 1, 2, 3,
       9, 2, 2, 3, 2, 5, 1, 2, 6, 4, 3, 3, 5, 6, 2, 2, 6, 1, 1, 3, 5, 1, 3,
       3, 7, 1, 1, 5, 6, 3, 1, 1, 1, 5, 3, 3, 1, 4, 2, 2, 2, 2, 5, 1, 1, 5,
       1, 4, 2, 6, 1, 8, 5, 8, 3, 8, 3, 5, 5, 4, 3, 1, 5, 5, 1, 8, 2, 5, 5,
       6, 3, 8, 3, 8, 3, 1, 4, 1, 4, 5, 1, 4, 1, 3, 1, 6, 3, 3, 3, 3, 4, 1,
       4, 1, 2, 4, 3, 2, 3, 2, 2, 3, 4, 3, 3, 7, 3, 3, 5, 3, 8, 3, 5, 4, 2,
       6, 1, 2, 1, 2, 4, 7, 5, 3, 6, 1, 1, 3, 2, 5, 3, 2, 3, 1, 1, 1, 3, 4,
       1, 2, 4, 5, 8, 1, 3, 3, 1, 3, 2, 5, 7, 5, 4, 3, 6, 8, 2, 5, 1, 2, 5,
       4, 3, 6, 1, 3, 5, 1, 3, 2])

In [12]:
y = nn_Sigmoid().forward(yBig)
len(y)

193

In [13]:
x.shape

(193, 17)

### test the sigmoid on the y's

In [14]:
nn_Sigmoid().forward(7)

0.9990889488055994

In [15]:
nn_Sigmoid().inverse(0.9990889488055994)

7.0000000000000471

## the Models

### the 2-layer (same as VisLang homework)

In [16]:
learningRate = 0.1

model = {}  
#
#model['linear1'] = nn_Linear(4, 5)
#model['linear2'] = nn_Linear(5, 3)
model['linear1'] = nn_Linear(x.shape[1], 5)
model['linear2'] = nn_Linear(5, 1)
#
model['sigmoid'] = nn_Sigmoid()
model['loss'] = nn_MSECriterion()

preds100 = []
preds400 = []

epochsToRun = 401
for epoch in range(0, epochsToRun+1):
    loss = 0
    
    for i in range(0, x.shape[0]):
        xi = x[i:i+1, :]
        #yi = y[i:i+1, :] 
        yi = y[i:i+1]

        # Forward layer 1
        a0_L1 = model['linear1'].forward(xi)
        a1_L1 = model['sigmoid'].forward(a0_L1)
        # Forward layer 2
        a0_L2 = model['linear2'].forward(a1_L1)
        a1_L2 = model['sigmoid'].forward(a0_L2)
        #
        if epoch == 100: 
            preds100 = preds100 + [float(a1_L2)]
        if epoch == 400: 
            preds400 = preds400 + [float(a1_L2)]
            #print(a1_L2)
            #print(yi)
            #print(model['loss'].forward(a1_L2, yi))
        #
        loss += model['loss'].forward(a1_L2, yi)

        # Backward layer 2
        da1_L2 = model['loss'].backward(a1_L2, yi)
        da0_L2 = model['sigmoid'].backward(a0_L2, da1_L2)
        da1_L1 = model['linear2'].backward(a1_L1, da0_L2) # IS THIS RIGHT???

        # Backward layer 1
        #da1_L1 = model['loss'].backward(a1_L1, y2i) ### AND IS THIS WHAT GETS PASSED HERE?
        da0_L1 = model['sigmoid'].backward(a0_L1, da1_L1)
        model['linear1'].backward(xi, da0_L1)
        
        ##update layer 2
        model['linear2'].weight = model['linear2'].weight - learningRate * model['linear2'].gradWeight
        model['linear2'].bias = model['linear2'].bias - learningRate * model['linear2'].gradBias
        #
        ##update layer 1
        model['linear1'].weight = model['linear1'].weight - learningRate * model['linear1'].gradWeight
        model['linear1'].bias = model['linear1'].bias - learningRate * model['linear1'].gradBias
          
    if (epoch % 100 == 0) | (epoch == epochsToRun):
        print('epoch[%d] = %.8f' % (epoch, loss / x.shape[0]))
        #print('$$$ ' + 'weight = \n' + str(model['linear'].weight))
        #print('$$$ ' + 'bias = \n' + str(model['linear'].bias))
        print('************')
        

epoch[0] = 0.03106804
************
epoch[100] = 0.00658460
************
epoch[200] = 0.00638634
************
epoch[300] = 0.00607706
************
epoch[400] = 0.00565687
************
epoch[401] = 0.00565294
************


In [17]:
[nn_Sigmoid().inverse(pred) for pred in preds400]

[2.6010303414367244,
 1.3455039468729162,
 1.8232434596667109,
 2.3983093920701313,
 3.6060067434999339,
 2.0168084557116441,
 2.3038982676203852,
 1.9995111218376229,
 3.9836879856909815,
 2.9271971435578732,
 2.1571224982227211,
 3.8293889189737205,
 2.0251408893245335,
 4.0502298306859386,
 3.2776849379905335,
 2.8108477604535484,
 2.9283347016473349,
 1.893386146286685,
 4.2273498280527333,
 1.793682764756005,
 1.1998190623075218,
 1.3057503041376204,
 2.9277035513377969,
 2.3443095056968981,
 1.4845161679974597,
 2.1915905543348004,
 3.0413613844895231,
 2.3286423688511921,
 3.7816962619121672,
 2.6207094169982841,
 2.342078092787951,
 2.2493933565911273,
 2.3945358971029904,
 2.517149073709565,
 2.6979085983027722,
 3.617011688549626,
 4.6127102331624217,
 2.414019543475491,
 2.9587379655986155,
 2.2882194805987304,
 1.6095120425819112,
 1.3528908165291929,
 2.8334251884905024,
 3.8426155409623903,
 1.4620392678917367,
 3.1625949917377336,
 4.0040944096012279,
 2.8684300996132093

### the 5-layer


In [18]:
learningRate = 0.1

model = {}  
#
#model['linear1'] = nn_Linear(4, 5)
#model['linear2'] = nn_Linear(5, 3)
model['linear1'] = nn_Linear(x.shape[1], 12)
model['linear2'] = nn_Linear(12, 8)
model['linear3'] = nn_Linear(8, 5)
model['linear4'] = nn_Linear(5, 5)

model['linearF'] = nn_Linear(5, 1)
#
model['sigmoid'] = nn_Sigmoid()
model['loss'] = nn_MSECriterion()

preds100 = []
preds1000 = []

epochsToRun = 1001
for epoch in range(0, epochsToRun+1):
    loss = 0
    
    for i in range(0, x.shape[0]):
        xi = x[i:i+1, :]
        #yi = y[i:i+1, :] 
        yi = y[i:i+1]

        # Forward layer 1
        a0_L1 = model['linear1'].forward(xi)
        a1_L1 = model['sigmoid'].forward(a0_L1)
        # Forward layer 2
        a0_L2 = model['linear2'].forward(a1_L1)
        a1_L2 = model['sigmoid'].forward(a0_L2)
        # Forward layer 3
        a0_L3 = model['linear3'].forward(a1_L2)
        a1_L3 = model['sigmoid'].forward(a0_L3)
        # Forward layer 4
        a0_L4 = model['linear4'].forward(a1_L3)
        a1_L4 = model['sigmoid'].forward(a0_L4)
        # Forward layer F
        a0_LF = model['linearF'].forward(a1_L4)
        a1_LF = model['sigmoid'].forward(a0_LF)
        #
        if epoch == 100: 
            preds100 = preds100 + [float(a1_LF)]
        if epoch == 1000: 
            preds1000 = preds1000 + [float(a1_LF)]
            #print(a1_L2)
            #print(yi)
            #print(model['loss'].forward(a1_L2, yi))
        #
        loss += model['loss'].forward(a1_LF, yi)

        # Backward layer F
        da1_LF = model['loss'].backward(a1_LF, yi)
        da0_LF = model['sigmoid'].backward(a0_LF, da1_LF)
        da1_L4 = model['linearF'].backward(a1_L4, da0_LF) # IS THIS RIGHT???
        # Backward layer 4
        da0_L4 = model['sigmoid'].backward(a0_L4, da1_L4)
        da1_L3 = model['linear4'].backward(a1_L3, da0_L4)
        # Backward layer 3
        da0_L3 = model['sigmoid'].backward(a0_L3, da1_L3)
        da1_L2 = model['linear3'].backward(a1_L2, da0_L3)

        # Backward layer 2
        da0_L2 = model['sigmoid'].backward(a0_L2, da1_L2)
        da1_L1 = model['linear2'].backward(a1_L1, da0_L2)

        # Backward layer 1
        da0_L1 = model['sigmoid'].backward(a0_L1, da1_L1)
        model['linear1'].backward(xi, da0_L1)
        
        ####
        ##update layer F
        model['linearF'].weight = model['linearF'].weight - learningRate * model['linearF'].gradWeight
        model['linearF'].bias = model['linearF'].bias - learningRate * model['linearF'].gradBias
        ##update layer 4
        model['linear4'].weight = model['linear4'].weight - learningRate * model['linear4'].gradWeight
        model['linear4'].bias = model['linear4'].bias - learningRate * model['linear4'].gradBias
        ##update layer 3
        model['linear3'].weight = model['linear3'].weight - learningRate * model['linear3'].gradWeight
        model['linear3'].bias = model['linear3'].bias - learningRate * model['linear3'].gradBias

        ##update layer 2
        model['linear2'].weight = model['linear2'].weight - learningRate * model['linear2'].gradWeight
        model['linear2'].bias = model['linear2'].bias - learningRate * model['linear2'].gradBias
        #
        ##update layer 1
        model['linear1'].weight = model['linear1'].weight - learningRate * model['linear1'].gradWeight
        model['linear1'].bias = model['linear1'].bias - learningRate * model['linear1'].gradBias
          
    if (epoch % 100 == 0) | (epoch == epochsToRun):
        print('epoch[%d] = %.8f' % (epoch, loss / x.shape[0]))
        #print('$$$ ' + 'weight = \n' + str(model['linear'].weight))
        #print('$$$ ' + 'bias = \n' + str(model['linear'].bias))
        print('************')
        

epoch[0] = 0.03110181
************
epoch[100] = 0.00980162
************
epoch[200] = 0.00980153
************
epoch[300] = 0.00980143
************
epoch[400] = 0.00980134
************
epoch[500] = 0.00980125
************
epoch[600] = 0.00980115
************
epoch[700] = 0.00980106
************
epoch[800] = 0.00980097
************
epoch[900] = 0.00980087
************
epoch[1000] = 0.00980078
************
epoch[1001] = 0.00980078
************


In [19]:
[nn_Sigmoid().inverse(pred) for pred in preds1000]

[2.3401095262686087,
 2.3436322656354833,
 2.3452547971257469,
 2.3379300828404275,
 2.3366652620360218,
 2.33951425632941,
 2.3382457016809699,
 2.3369801356681061,
 2.3402861055011308,
 2.3435735718632618,
 2.3463862465343857,
 2.3498100850999966,
 2.3532146003071084,
 2.3459058593418445,
 2.3475184235400484,
 2.3510219777527763,
 2.3497268649737433,
 2.3532178704380109,
 2.3519177701154956,
 2.3551408856258949,
 2.3567134786956565,
 2.3494118309641134,
 2.3481203745271917,
 2.3497233063207812,
 2.3532228538520146,
 2.3519227293745719,
 2.3506255637039293,
 2.3522176156308876,
 2.3509197738040934,
 2.3541483674802723,
 2.3468415089194887,
 2.3455559419332204,
 2.3489844623295628,
 2.3517689698185151,
 2.3533560648471599,
 2.3549363253568645,
 2.3581429329207308,
 2.3615009091053589,
 2.360182352955031,
 2.358866689012848,
 2.3622206665005954,
 2.3549302963395347,
 2.3476250653691753,
 2.3492301469020584,
 2.3524679995539342,
 2.3451577915969808,
 2.3467736203254757,
 2.34838239763220

## accuracy

In [20]:
def Accuracy(yActual, yPred):
    return float(len([i for i in range(len(yPred)) if abs(yActual[i]-yPred[i])<=1])/float(len(yPred)))

In [21]:
## the 2-layer
Accuracy(yBig, [nn_Sigmoid().inverse(pred) for pred in preds400])

0.533678756476684

In [22]:
## the 5-layer
Accuracy(yBig, [nn_Sigmoid().inverse(pred) for pred in preds1000])

0.39896373056994816