In [502]:
import numpy as np
import copy
import csv
import matplotlib.pyplot as plt
from sklearn import linear_model, metrics
import math
def plot_decision_boundary(model, X, y, filename):
    """
    Given a model(a function) and a set of points(X), corresponding labels(y), scatter the points in X with color coding
    according to y. Also use the model to predict the label at grid points to get the region for each label, and thus the 
    descion boundary.
    Example usage:
    say we have a function predict(x,other params) which makes 0/1 prediction for point x and we want to plot
    train set then call as:
    plot_decision_boundary(lambda x:predict(x,other params),X_train,Y_train)
    params(3): 
        model : a function which expectes the point to make 0/1 label prediction
        X : a (mx2) numpy array with the points
        y : a (mx1) numpy array with labels
    outputs(None)
    """
    # Set min and max values and give it some padding
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole grid
    Z = model(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.ylabel('x2')
    plt.xlabel('x1')
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
    plt.savefig(filename)
    plt.close()

In [527]:
def csv_reader(file_obj):
    reader = csv.reader(file_obj)
    final_ans = []
    for row in reader:
        temp_ele = []
        for ele in row:
            temp_ele.append(float(ele))
        final_ans.append(temp_ele)
    return final_ans

def mnist_reader(file_obj):
    reader = csv.reader(file_obj)
    final_ans = []
    final_y = []
    for row in reader:
#         print("Row num", row[-1])
        if(float(row[-1])==6):
#             print("yeah")
            final_y.append(0.0)
        else:
            final_y.append(1.0)
        temp_ele = []
        for ele in row:
            temp_ele.append(float(ele))
        final_ans.append(temp_ele[:-1])
    return final_ans, final_y

def csv_reader_y(file_obj):
    reader = csv.reader(file_obj)
    final_ans = []
    for row in reader:
        for ele in row:
            final_ans.append(float(ele))
    return final_ans

In [556]:
def sigmoid(x):
#     print("hello")
#     print(x)
    return (1.0/(1.0 + np.exp(-x)))
#     print("bye")

def fprop(inp, lay_w, lay_b):
    outpts = []
    outpts.append(np.reshape(inp, [-1]))
    for (wei, bia) in zip(lay_w, lay_b):
        prop = np.reshape(outpts[-1], [1, -1])
        prop = np.add(np.dot(prop, wei),bia)
        outpts.append(sigmoid(np.reshape(prop, [-1])))
    return outpts

def bprop(y_exp, lay_w, lay_b, outpts):
    bia_up = []
    wei_up = []
    last_delta = [-1*(y_exp - outpts[-1][0])]
    for (wei, bia, out, out_prev) in zip(reversed(lay_w), reversed(lay_b), reversed(outpts), reversed(outpts[:-1])):
        bia_up.append(np.reshape(last_delta, [1, -1]))
        out_temp = np.reshape(out_prev, [-1, 1])
        total_deriv = np.dot(out_temp, np.reshape(last_delta, [1, -1]))
        wei_up.append(total_deriv)
        sig_deriv = np.dot(np.diag(out_prev), np.diag(1 - out_prev))
        no_oj = np.dot(wei, np.reshape(last_delta, [-1, 1]))
        last_delta = np.dot(sig_deriv, no_oj)
        last_delta = np.reshape(last_delta, [-1])
    return wei_up[::-1], bia_up[::-1]

def neural_train(inp, out_exp, hidden_lay, l_rate, batch, max_iter):
    wei_up = []
    bia_up = []
    lay_w = []
    lay_b = []
    fir = len(inp[0])
    for ele in hidden_lay:
#         lay_w.append(np.random.rand(fir, ele))
        lay_w.append((np.random.rand(fir, ele)-0.5)*10)
#         lay_b.append(np.random.rand(1, ele))
        lay_b.append((np.random.rand(1, ele)-0.5)*10)
        wei_up.append(np.zeros((fir, ele)))
        bia_up.append(np.zeros((1, ele)))
        fir = ele
#     lay_w.append(np.random.rand(fir, 1))
    lay_w.append((np.random.rand(fir, 1)-0.5)*10)
#     lay_b.append(np.random.rand(1, 1))
    lay_b.append((np.random.rand(1, 1)-0.5)*10)
    wei_up.append(np.zeros((fir, 1)))
    bia_up.append(np.zeros((1, 1)))
    err_sum = 0.0
    prev_err_sum = 0.0
    total_err_sum = 0.0
    epoch = 0
    while(True):
        if(epoch>max_iter):
            break
        l_rate = 0.001/(math.sqrt(epoch+1))
        print("Epoch", epoch)
        if(epoch%10==0):
            print(epoch, total_err_sum)
            total_err_sum = 0.0
        for i in range(0, len(inp)):
            if(i%1000==0):
                print(i)
            outputs = fprop(inp[i], lay_w, lay_b)
            error = (out_exp[i] - outputs[-1][0])**2
            err_sum += error
            wup_tmp, bup_tmp = bprop(out_exp[i], lay_w, lay_b, outputs)
            for (wei, bia, wup, bup) in zip(wei_up, bia_up, wup_tmp, bup_tmp):
                wei += wup
                bia += bup
            if((i+1)%batch==0):
#                 print(wei_up)
#                 print(bia_up)
                for j in range(len(lay_w)):
                    lay_w[j] -= l_rate*wei_up[j]
                    lay_b[j] -= l_rate*bia_up[j]
                    wei_up[j] = np.zeros(np.shape(wei_up[j]))
                    bia_up[j] = np.zeros(np.shape(bia_up[j]))
        
        epoch += 1
        diff = prev_err_sum - err_sum
        if(diff < 0.001 and diff >=0):
            break
        prev_err_sum = err_sum
        total_err_sum += err_sum
        err_sum = 0.0
    return lay_w, lay_b

def fprop_new(inp, lay_w, lay_b):
    outpts = []
    outpts.append(inp)
    for (wei, bia) in zip(lay_w, lay_b):
        prop = outpts[-1]
        prop = np.add(np.dot(prop, wei),bia)
        outpts.append(sigmoid(prop))
    return outpts


def neural_predict(inp):
    global lw, lb
    outputs = fprop_new(inp, lw, lb)
#     print(np.shape(outputs[-1]))
    arr = []
    for i in range(len(outputs[-1])):
        if(outputs[-1][i][0]>=0.5):
            arr.append([1])
        else:
            arr.append([0])
    return np.array(arr)

In [450]:
with open("toy_data/toy_trainX.csv", "r") as f_obj:
    train_inp = csv_reader(f_obj)
with open("toy_data/toy_trainY.csv", "r") as f_obj:
    train_out = csv_reader_y(f_obj)

In [486]:
hidden_lay = [5, 5]
lw, lb = neural_train(train_inp, train_out, hidden_lay, 0.001, len(inp))


0 0.0
10 1074.5158382483216
20 953.060725938025
30 952.841803156986
40 952.6366657505607
50 952.4416793428418
60 952.2560282226215
70 952.0789616067855
80 951.9097871726783
90 951.7478649229877
100 951.5926017230138
110 951.4434464219922
120 951.2998854817176
130 951.1614390464965
140 951.0276573975583
150 950.8981177426882
160 950.7724212983219
170 950.6501906267921
180 950.5310671960247
190 950.4147091328983
200 950.3007891447975
210 950.1889925866717
220 950.0790156533302
230 949.9705636786827
240 949.8633495253752
250 949.7570920496796
260 949.6515146277336
270 949.546343730213
280 949.44130753337
290 949.3361345550478
300 949.230552304821
310 949.1242859378456
320 949.0170569023162
330 948.9085815706305
340 948.7985698445115
350 948.6867237243467
360 948.5727358329914
370 948.4562878841443
380 948.3370490852311
390 948.2146744644276
400 948.088803111154
410 947.9590563189001
420 947.8250356187365
430 947.6863206912514
440 947.5424671438955
450 947.3930041398797
460 947.23743186371

3590 357.68826939658857
3600 357.50738384690953
3610 357.3278042771678
3620 357.1495212111579
3630 356.9725251760324
3640 356.796806701813
3650 356.6223563209994
3660 356.44916456827605
3670 356.27722198032046
3680 356.1065190957054
3690 355.93704645489896
3700 355.76879460035923
3710 355.60175407672193
3720 355.43591543107846
3730 355.27126921334434
3740 355.1078059767145
3750 354.9455162782045
3760 354.7843906792715
3770 354.6244197465186
3780 354.4655940524742
3790 354.3079041764462
3800 354.1513407054469
3810 353.99589423518665
3820 353.8415553711319
3830 353.6883147296233
3840 353.5361629390529
3850 353.3850906410954
3860 353.2350884919882
3870 353.086147163859
3880 352.9382573460966
3890 352.791409746759
3900 352.64559509401784
3910 352.5008041376324
3920 352.3570276504519
3930 352.21425642993967
3940 352.0724812997166
3950 351.93169311112075
3960 351.7918827447773
3970 351.6530411121761
3980 351.5151591572532
3990 351.37822785797283
4000 351.2422382279053
4010 351.10718131779845

In [487]:
with open("toy_data/toy_testX.csv", "r") as f_obj:
    inp_test = csv_reader(f_obj)
with open("toy_data/toy_testY.csv", "r") as f_obj:
    out_exp_test = csv_reader_y(f_obj)


    
corr = 0
total = 0
for data_pt, corr_pred in zip(train_inp, train_out):
#     print(corr_pred[0], neural_predict(data_pt))
    if(corr_pred == neural_predict(data_pt)):
        corr += 1
    total += 1

print((corr+0.0)/total)

corr = 0
total = 0
for data_pt, corr_pred in zip(inp_test, out_exp_test):
#     print(corr_pred[0], neural_predict(data_pt))
    if(corr_pred == neural_predict(data_pt)):
        corr += 1
    total += 1

print((corr+0.0)/total)

# loglin = linear_model.LogisticRegression()

# loglin.fit(train_inp, train_out)

# total = 0
# corr = 0
# one = 0
# zero = 0
# ans = loglin.predict(inp_test)
# print(metrics.accuracy_score(ans, out_exp_test))
# ans = loglin.predict(train_inp)
# print(metrics.accuracy_score(ans, train_out))

# plot_decision_boundary(lambda x:loglin.predict(x), np.array(inp_test), np.array(out_exp_test), "lin_reg_test.png")
# plot_decision_boundary(lambda x:loglin.predict(x), np.array(train_inp), np.array(train_out), "lin_reg_train.png")

plot_decision_boundary(lambda x:neural_predict(x), np.array(inp_test), np.array(out_exp_test), "neural_test_5_5.png")
# plot_decision_boundary(lambda x:neural_predict(x), np.array(train_inp), np.array(train_out), "neural_train_5.png")


0.8921052631578947
0.8333333333333334


In [547]:
with open("mnist_data/MNIST_train.csv", "r") as f_obj:
    train_inp, train_out = mnist_reader(f_obj)
with open("mnist_data/MNIST_test.csv", "r") as f_obj:
    inp_test, out_exp_test = mnist_reader(f_obj)
# print(train_out)

In [557]:
hidden_lay = [100]
print(len(train_inp))
lw, lb = neural_train(train_inp[1750:1850], train_out[1750:1850], hidden_lay, 0.001, 100, 200)

10000
Epoch 0
0 0.0
0


  after removing the cwd from sys.path.


Epoch 1
0
Epoch 2
0
Epoch 3
0
Epoch 4
0
Epoch 5
0
Epoch 6
0
Epoch 7
0
Epoch 8
0
Epoch 9
0
Epoch 10
10 602.4707143471488
0
Epoch 11
0
Epoch 12
0
Epoch 13
0
Epoch 14
0
Epoch 15
0
Epoch 16
0
Epoch 17
0
Epoch 18
0
Epoch 19
0
Epoch 20
20 385.64715063354265
0
Epoch 21
0
Epoch 22
0
Epoch 23
0
Epoch 24
0
Epoch 25
0
Epoch 26
0
Epoch 27
0
Epoch 28
0
Epoch 29
0
Epoch 30
30 309.3546161046396
0
Epoch 31
0
Epoch 32
0
Epoch 33
0
Epoch 34
0
Epoch 35
0
Epoch 36
0
Epoch 37
0
Epoch 38
0
Epoch 39
0
Epoch 40
40 272.23397227466313
0
Epoch 41
0
Epoch 42
0
Epoch 43
0
Epoch 44
0
Epoch 45
0
Epoch 46
0
Epoch 47
0
Epoch 48
0
Epoch 49
0
Epoch 50
50 245.9028848192191
0
Epoch 51
0
Epoch 52
0
Epoch 53
0
Epoch 54
0
Epoch 55
0
Epoch 56
0
Epoch 57
0
Epoch 58
0
Epoch 59
0
Epoch 60
60 216.24673310734693
0
Epoch 61
0
Epoch 62
0
Epoch 63
0
Epoch 64
0
Epoch 65
0
Epoch 66
0
Epoch 67
0
Epoch 68
0
Epoch 69
0
Epoch 70
70 188.8133836599242
0
Epoch 71
0
Epoch 72
0
Epoch 73
0
Epoch 74
0
Epoch 75
0
Epoch 76
0
Epoch 77
0
Epoch 78
0
E

In [560]:
corr = 0
total = 0
for data_pt, corr_pred in zip(inp_test, out_exp_test):
#     print(neural_predict(data_pt))
    if(corr_pred == neural_predict(data_pt)):
        corr += 1
    total += 1

print(corr)
print((corr+0.0)/total)

  after removing the cwd from sys.path.


1936
0.5377777777777778
