### Installing Packages

In [1]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

#### Create Fictional Data

In [2]:
Movie_Ratings = {'Rating_1' : [0,0,1,1,1],
                 'Rating_2' : [1,1,0,0,1],
                 'My_Ratings' : [0,0,1,1,1]}

movie_df = pd.DataFrame(Movie_Ratings)
initial_weights = [0.5,0.5]
initail_intercept = 3.0

In [136]:
x_test = pd.DataFrame({'Rating_1' : [0,0,1,1,1],
                 'Rating_2' : [1,1,0,0,1]})
y_test = pd.DataFrame({'My_Ratings' : [0,0,1,1,1]})
initial_weights = [0.5,0.5]
initail_intercept = 3.0

##### Creating a Function to calculate z-values

In [6]:
def get_z_vals(x, weights_vector, intercept):
    z = []
    for i in range(len(x)):
        line = []
        for j in range(x.shape[1]):
            line.append(x.iloc[i,:][j] * weights_vector[j])
        z.append(sum(line) + intercept)
    return z

In [115]:
def get_z_vals(x, weights_vector, intercept):
    return np.dot(x, weights_vector) + intercept

In [217]:
get_z_vals(x_test, initial_weights, initail_intercept)

array([3.48076611, 3.48076611, 3.50076611, 3.50076611, 3.98153221])

##### $y_{pred}$ function

In [120]:
def get_y_hat(z_vector):
    y_hat = []
    for z_score in z_vector:
        y_hat.append(1 / (1 + math.exp(-z_score)))
    
    return y_hat

In [121]:
def get_y_hat(z):
    return 1 / (1+ np.exp(-z))

In [122]:
get_y_hat(get_z_vals(x_test, initial_weights, initail_intercept))

array([0.97068777, 0.97068777, 0.97068777, 0.97068777, 0.98201379])

#### Cost function

In [125]:
def cost_fun(y, y_hat):
    print(y_hat[i])
    return -1/len(y_hat) * sum([y.iloc[i]*math.log(y_hat[i]) + 
                     (1-y.iloc[i])* math.log(1-(y_hat[i]))for i in range(len(y_hat))])[0]

In [275]:
def cost_fun(y, y_hat):
    print(y_hat)
    return (-1/len(y_hat)) * np.sum(np.array(y) * np.log(y_hat) + (1-np.array(y)) * (np.log(1 - y_hat)))

In [239]:
cost_fun(y_test, get_y_hat(get_z_vals(x_test, initial_weights, initail_intercept)))

7.337151601008292

##### Updating Parameters

In [12]:
def update_intercept(y, y_hat, alpha, intercept):
    
    difference = [y_hat[i] - y.iloc[i] for i in range(len(y_hat))]
    
    return (intercept - (alpha * (sum(difference)/len(difference))))[0]

In [13]:
update_intercept(y_test, get_y_hat(get_z_vals(x_test, initial_weights, initail_intercept)), 0.01, initail_intercept)

2.996270470265935

In [14]:
def update_weight_vector(y, y_hat, alpha, weight_vec, x):
    
    difference = [y_hat[i] - y.iloc[i] for i in range(len(y_hat))]
    
    del_w = []
    for i in range(len(weight_vec)):
        delta_w = sum([difference[j] * x.iloc[j][i] for j in range(len(difference))]) / len(difference)
        del_w.append(delta_w)
        
    return [(weight_vec[i] - (alpha) * del_w[i])[0] for i in range(len(weight_vec))]


In [214]:
def update_weight_vector(y, y_hat, alpha, weight_vec, x):
    
    diff = np.subtract(y_hat ,np.array(y).reshape(1,len(y)))

    return np.subtract(weight_vec, (alpha * np.dot(diff, x)))[0]

In [215]:
np_test(y_test, get_y_hat(get_z_vals(x_test, initial_weights, initail_intercept)),
                     0.01, initial_weights, x_test)

array([[0.50076611, 0.48076611]])

In [216]:
update_weight_vector(y_test, get_y_hat(get_z_vals(x_test, initial_weights, initail_intercept)),
                     0.01, initial_weights, x_test)

array([0.50076611, 0.48076611])

### Testing

In [16]:
def gradient_descent(x,y, init_w, init_b, alpha = 0.01):
    
    #Get z values using x, init_w and init b
    z = get_z_vals(x, init_w, init_b)
        
    #Get y prediction values
    y_hat = get_y_hat(z)
        
    # Calc Cost Function 
    Cost_Fun = cost_fun(y, y_hat)
    
    #Delta w and Delta b
    intercept_new = update_intercept(y, y_hat, alpha, init_b)
        
    new_weights = update_weight_vector(y, y_hat, alpha, init_w, x)

    
    return new_weights, Cost_Fun, intercept_new, [round(pred) for pred in y_hat]

In [203]:
gradient_descent(x_test,y_test, initial_weights, initail_intercept)

0.9706877692486436


(array([[0.50076611, 0.48076611]]),
 1.427430320201658,
 2.996270470265935,
 [1, 1, 1, 1, 1])

##### Creting Iterative Function to handle iterations/tolerance

In [272]:
def grad_its(x,y, init_w, init_b, alpha = 0.01, tol = 1e-8, max_its = 1000):
    i = 0
    new_weights, Cost_Fun, intercept_new, _ = gradient_descent(x,y, init_w, init_b, alpha)
    new_weights, Cost_Fun_new, intercept_new , _= gradient_descent(x,y,new_weights, intercept_new, alpha)
    
    while (abs(Cost_Fun - Cost_Fun_new) > tol) and i < max_its:
        Cost_Fun = Cost_Fun_new
        new_weights, Cost_Fun_new, intercept_new, y_hat = gradient_descent(x,y,new_weights, intercept_new, alpha)
        i += 1 
        if i / max_its == 0.25:
            print("Quarter of the way there!")
        elif i / max_its == 0.5:
            print("Halfway there!")
        elif i / max_its == 0.75:
            print("One more quarter to go!")
    print(y_hat)
    return new_weights, Cost_Fun_new, intercept_new, y_hat

In [218]:
grad_its(x_test,y_test, [1.6533184539468795, -1.9471455873302386], 1.0005869287065845,alpha = 0.1, max_its = 1000)

0.2795774298717765
0.2746036381863131
0.2697881124134525
1
0.26512505343876513
2
0.2606087369672776
3
0.2562335483252568
4
0.2519940087049124
5
0.2478847944254633
6
0.2439007505216837
7
0.24003689974948214
8
0.2362884479115444
9
0.23265078624949387
10
0.22911949151792377
11
0.2256903242461424
12
0.22235922560219257
13
0.21912231319777364
14
0.2159758761096584
15
0.21291636934095676
16
0.20994040790237764
17
0.2070447606579776
18
0.20422634405050552
19
0.20148221579730646
20
0.1988095686279425
21
0.1962057241184929
22
0.19366812666428546
23
0.1911943376220748
24
0.18878202964399154
25
0.1864289812185787
26
0.18413307142861607
27
0.18189227493095855
28
0.17970465716007983
29
0.17756836975424467
30
0.17548164620109666
31
0.17344279769781887
32
0.17145020921981693
33
0.16950233579099852
34
0.16759769894812143
35
0.16573488339129655
36
0.16391253381252138
37
0.16212935189404315
38
0.16038409346838417
39
0.15867556583197528
40
0.15700262520452327
41
0.15536417432646013
42
0.1537591601870839


390
0.032797989336023334
391
0.03272310711324846
392
0.032648563667132494
393
0.032574356714661995
394
0.032500483993221405
395
0.03242694326036624
396
0.032353732293599474
397
0.032280848890150655
398
0.032208290866758235
399
0.03213605605945448
400
0.03206414232335344
401
0.03199254753244164
402
0.031921269579371365
403
0.03185030637525689
404
0.03177965584947325
405
0.03170931594945752
406
0.03163928464051296
407
0.031569559905615406
408
0.03150013974522234
409
0.031431022177084426
410
0.03136220523605932
411
0.03129368697392813
412
0.031225465459213942
413
0.031157538777002884
414
0.031089905028767444
415
0.031022562332191906
416
0.030955508821000156
417
0.030888742644785683
418
0.030822261968843575
419
0.030756064974004765
420
0.03069014985647238
421
0.030624514827660113
422
0.030559158114032533
423
0.030494077956947595
424
0.030429272612501005
425
0.03036474035137248
426
0.030300479458674066
427
0.030236488233800193
428
0.03017276499027965
429
0.0301093080556295
430
0.03004611577

774
0.017425565345562288
775
0.017404268189266586
776
0.017383022844550164
777
0.017361829123050076
778
0.017340686837313553
779
0.017319595800792544
780
0.017298555827838246
781
0.017277566733695734
782
0.017256628334498536
783
0.017235740447263385
784
0.017214902889884875
785
0.01719411548113021
786
0.017173378040634065
787
0.017152690388893277
788
0.01713205234726186
789
0.01711146373794576
790
0.017090924383997917
791
0.01707043410931314
792
0.017049992738623156
793
0.017029600097491653
794
0.017009256012309334
795
0.016988960310289077
796
0.01696871281946101
797
0.01694851336866778
798
0.01692836178755964
799
0.016908257906589865
800
0.016888201557009902
801
0.01686819257086472
802
0.016848230780988203
803
0.016828316020998463
804
0.016808448125293265
805
0.016788626929045553
806
0.01676885226819883
807
0.01674912397946272
808
0.016729441900308454
809
0.016709805868964513
810
0.016690215724412216
811
0.01667067130638127
812
0.016651172455345538
813
0.016631719012518677
814
0.01661

(array([ 8.21713272, -4.82704806]),
 0.009433346357437195,
 0.5466358819242223,
 [0, 0, 1, 1, 1])

In [20]:
blobs = pd.read_csv("/Users/joshocurry/Downloads/blobs300.csv")
blobs

Unnamed: 0,X1,X2,X3,X4,Class
0,0.726739,0.035393,1.759229,1.633698,1
1,2.691834,2.715685,-0.085346,2.131453,0
2,2.446502,3.853792,0.487143,2.138328,0
3,0.899178,-0.596257,1.764085,1.863523,1
4,0.520531,0.521058,1.607952,1.757980,1
...,...,...,...,...,...
295,1.324187,0.125884,1.759156,2.089274,1
296,2.095409,3.344137,0.725646,2.181084,0
297,1.524761,3.281810,-0.855361,2.572439,0
298,2.597079,3.353805,1.101402,1.850293,0


In [21]:
x_blobs = blobs[['X1', 'X2', 'X3', 'X4']]
y_blobs = blobs[['Class']]

In [248]:
grad_its(x_blobs,y_blobs, [1,1,1,1] , 10)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


(array([-2.10486482, -5.02127072,  2.72114134,  1.32037664]),
 1625.5293953769647,
 10.003543415091794,
 [1,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  1,
  0,
  0,
  1,
  1,
  1,
  0,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  0,
  1,
  1,
  1,
  1,
  0,
  0,
  0,
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  0,
  1,
  0,
  0,
  1,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  0,
  0,
  0,
  1,
  1,
  0,
  1,
  1,
  1,
  1,
  0,
  1,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  1,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  1,
  1,
  0,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
 

In [23]:
blob_opt = [-1.2974544127210041,
  -2.8621849481304396,
  0.9455682174016076,
  -0.6800520240761868]
b_opt = 8.901394350229799

z = []
for i in range(len(x_blobs)):
    line = []
    for j in range(x_blobs.shape[1]):
        line.append(x_blobs.iloc[i,:][j] * blob_opt[j])
    z.append(sum(line) + b_opt)

#Get y prediction values
y_hat = []
for z_score in z:
    y_hat.append(1/(1+math.exp(-z_score)))
       
errors = []
for pair in list(zip(y_blobs['Class'],y_hat)):
    errors.append(abs(pair[0] - pair[1]))
    
print(sum(errors)/len(errors))
print(max(errors))

0.015672735914413042
0.3633951503524396


In [25]:
get_z_vals(x_blobs, blob_opt, b_opt)

[8.409654994810595,
 -3.894129329265816,
 -6.296642332066293,
 9.842119672781015,
 7.059573962562666,
 -5.619838605008898,
 -4.0811260089403,
 -4.946853754893775,
 -3.1440558554347593,
 7.189378065863846,
 5.717266598611658,
 -4.333328342224709,
 7.576732792811905,
 -5.612699687757074,
 -4.580713210277285,
 6.15616065416236,
 8.699226724864975,
 5.579858364004253,
 -0.6725333871933437,
 7.585672156969465,
 -5.942487824514965,
 5.434581989608764,
 -8.232474445564831,
 -6.503819782485046,
 -5.764964246537911,
 -4.492422474325576,
 -9.048827358837377,
 -5.368550024128739,
 7.895318451530372,
 -4.033791681448767,
 -4.7253089445773195,
 -5.096346838139835,
 -4.855175455210729,
 7.1183952087127595,
 -1.8656301427227788,
 -5.545179157860668,
 -3.86954697218218,
 -3.7119897887704276,
 6.580507687823696,
 -5.675514910978253,
 7.227957719360623,
 -7.591176411404456,
 7.225303082973934,
 8.32921727560763,
 4.987947541292299,
 7.296171887846249,
 -4.715263165264492,
 -5.030895527022237,
 -3.167457

In [249]:
pairs = list(zip(y_blobs['Class'],[round(val) for val in y_hat]))

print(sum([1 for pair in pairs if pair[0] == pair[1]])/len(pairs))

1.0


In [27]:
circles = pd.read_csv("/Users/joshocurry/Downloads/circles600.csv")
circles

Unnamed: 0,X1,X2,Class
0,-0.371514,0.012406,1
1,-1.014230,0.223095,0
2,-1.123804,0.249026,0
3,0.213708,0.032912,1
4,-0.833531,-0.491312,0
...,...,...,...
595,-0.100827,-0.386171,1
596,0.533986,-0.144301,1
597,-0.445042,-0.042955,1
598,-0.052254,-0.324085,1


In [28]:
x_circles = circles[['X1', 'X2']]
y_circles = circles[['Class']]

In [250]:
grad_its(x_circles,y_circles, [0.1,0.1] , 0.05, max_its=500, alpha = 0.1, tol = 1e-8)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


(array([-1.78344888,  5.13153762]),
 760.3813592948692,
 -0.0009702125723146003,
 [0,
  0,
  0,
  1,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  1,
  1,
  0,
  1,
  1,
  1,
  0,
  0,
  0,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  1,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  0,
  1,
  0,
  0,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  1,
  1,
  0,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  0,
  0,
  0,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  0,
  1,
  1,
  0,
  1,
  0,
  0,
  0,
  1,
  1,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  1,
  0,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  1,
  0,
  0,
  0,
  1,
  0,
  1,
  0,

In [263]:
circles_pred = get_y_hat(get_z_vals(x_circles, [-1.78344888,  5.13153762], -0.0009702125723146003))

In [264]:
pairs = list(zip(y_circles['Class'],[round(val) for val in circles_pred]))

print(sum([1 for pair in pairs if pair[0] == pair[1]])/len(pairs))

0.5116666666666667


In [36]:
np.random.normal(loc = 0, scale = 0.1)

-0.036798101039065015

In [260]:
def one_hidden_layer(x,y,n_hidden_nodes = 4,alpha = 0.01, tol = 1e-8, max_its = 1000):
    
    hidden_dict = dict()
    
    for i in range(n_hidden_nodes):
        
        print(f'Starting Node {i} of {n_hidden_nodes} nodes')
        init_w = list(abs(np.random.normal(loc = 0, scale = 0.01, size = (1,x.shape[1])))[0]) 
        
        *_,predictions = grad_its(x,y,
                         init_w,
                         np.random.normal(loc = 0, scale = 0.01)
                         ,alpha, tol, max_its)
        hidden_dict[i] = predictions
        
    hidden_df = pd.DataFrame(hidden_dict)
    
    final_weights = list(abs(np.random.normal(loc = 0, scale = 0.01, size = (1,n_hidden_nodes)))[0]) 
    print('FINAL +++++++++++++++++++++++++++++++++')
    return  grad_its(hidden_df,y,
                         final_weights,
                         np.random.normal(loc = 0, scale = 0.01),
                        alpha, tol, max_its)[-1]
    

In [256]:
one_hidden_layer(x_blobs, y_blobs,alpha = 0.01, tol = 1e-5, max_its = 100)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
4

[1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,


In [258]:
circles_predictions = one_hidden_layer(x_circles, y_circles, 4, alpha = 0.01, max_its = 1000)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862


949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000


In [261]:

total = list(zip(circles_predictions, y_circles.iloc[:,0]))
print(sum([1 for pair in total if pair[0] == pair[1]])/len(total))

0.5216666666666666


In [43]:
# This function taken from the CIFAR website

def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

# Loaded in this way, each of the batch files contains a dictionary with the following elements:
#   data -- a 10000x3072 numpy array of uint8s. Each row of the array stores a 32x32 colour image. 
#           The first 1024 entries contain the red channel values, the next 1024 the green, and the final 1024 the blue. 
#           The image is stored in row-major order, so that the first 32 entries of the array are the red channel values 
#           of the first row of the image.
#   labels -- a list of 10000 numbers in the range 0-9. 
#             The number at index i indicates the label of the ith image in the array data.

In [44]:
def loadbatch(batchname):
    folder = 'cifar-10-batches-py'
    batch = unpickle(folder+"/"+batchname)
    return batch

In [45]:
def loadlabelnames():
    folder = 'cifar-10-batches-py'
    meta = unpickle(folder+"/"+'batches.meta')
    return meta[b'label_names']

In [46]:
batch1 = loadbatch('data_batch_1')
print("Number of items in the batch is", len(batch1))

# Display all keys, so we can see the ones we want
print('All keys in the batch:', batch1.keys())

Number of items in the batch is 4
All keys in the batch: dict_keys([b'batch_label', b'labels', b'data', b'filenames'])


In [47]:
data = batch1[b'data']
labels = batch1[b'labels']
print ("size of data in this batch:", len(data), ", size of labels:", len(labels))
print (type(data))
print(data.shape)

names = loadlabelnames()

size of data in this batch: 10000 , size of labels: 10000
<class 'numpy.ndarray'>
(10000, 3072)


In [48]:
cat_car = []
cc_labels = []
cc_name = []

for i in range(len(data)):
    if labels[i] == 1 or labels[i] == 3:
        cat_car.append(data[i])
        cc_labels.append(labels[i])
        cc_name.append(names[labels[i]])

In [49]:
def gray(pic):
    pic.shape = (3,32,32)
    pic = pic.transpose([1,2,0])

    r, g, b = pic[:,:,0], pic[:,:,1], pic[:,:,2]
    gray = 0.2990 * r + 0.2870 * g + 0.1140 * b
    
    return gray

In [50]:
cat_car_gray = [gray(x) for x in cat_car]

In [54]:
cat_ind = []

for i in cc_labels:
    if i == 1:
        cat_ind.append(0)
    else:
        cat_ind.append(1)
        
cat_ind = pd.DataFrame(cat_ind)
cat_ind

Unnamed: 0,0
0,0
1,0
2,1
3,1
4,1
...,...
1985,0
1986,0
1987,1
1988,0


In [53]:
cc_dict = {}

for i in range(len(cat_car_gray)):
    cat_car_gray[i].shape = (1024,)
    cc_dict[i] = cat_car_gray[i]
    
cat_car_df = pd.DataFrame(cc_dict).T

In [277]:
%%time
pic_pred = grad_its(X_scaled,cat_ind, list(abs(np.random.normal(loc = 0, scale = 0.1, size = (1,cat_car_df.shape[1])))[0]),
         0.07, alpha = 0.01, tol = 1e-8, max_its = 10)

[3.47537587e-11 4.29673376e-12 2.06526467e-27 ... 3.61757807e-21
 1.83058972e-09 4.91950563e-01]


  return (-1/len(y_hat)) * np.sum(np.array(y) * np.log(y_hat) + (1-np.array(y)) * (np.log(1 - y_hat)))
  return (-1/len(y_hat)) * np.sum(np.array(y) * np.log(y_hat) + (1-np.array(y)) * (np.log(1 - y_hat)))
  return 1 / (1+ np.exp(-z))


[1.00000000e+00 1.00000000e+00 1.00000000e+00 ... 1.00000000e+00
 9.99995827e-01 2.01301061e-75]


UnboundLocalError: local variable 'y_hat' referenced before assignment

In [273]:
%%time
gradient_descent(X_scaled,cat_ind, list(abs(np.random.normal(loc = 0, scale = 0.01, size = (1,cat_car_df.shape[1])))[0]),
         0.07, alpha = 0.001)

CPU times: user 1.22 s, sys: 330 ms, total: 1.55 s
Wall time: 411 ms


(array([-0.50729648, -0.51881345, -0.52307644, ..., -0.42886876,
        -0.44091997, -0.45140551]),
 3370.3390374134715,
 0.07002527245046408,
 [0,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  0,
  0,
  0,
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  1,
  0,
  1,
  1,
  1,
  0,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  1,
  1,
  1,
  0,
  0,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  1,
  1,
  0,
  0,
  0,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  0,
  1,
  0,
  0,
  1,
  1,
  0,
  1,
 

In [223]:
weights = list(abs(np.random.normal(loc = 0, scale = 0.01, size = (1,cat_car_df.shape[1])))[0])

In [224]:
zed = get_z_vals(X_scaled, weights, 3)

In [225]:
zed

array([ 0.90322867,  0.34226047, -2.76639824, ..., -1.39272992,
        1.30579004,  3.0583485 ])

In [226]:
preds = get_y_hat(zed)

In [104]:
preds

[0.939505370932505,
 0.9408105797963642,
 0.9161294815682697,
 0.9349288834121516,
 0.9519679521154726,
 0.9326293482067182,
 0.93929039461481,
 0.9572226792940084,
 0.9729063382446581,
 0.9581365959497694,
 0.9500025794294198,
 0.9427054335558975,
 0.9397773311675522,
 0.9501615437210967,
 0.93652008048531,
 0.9499869756711972,
 0.9629344808471426,
 0.950259752412003,
 0.962301942647059,
 0.9607737972404022,
 0.9531966472923454,
 0.9510787212796565,
 0.9773686824674496,
 0.9583369770679898,
 0.942613544690279,
 0.9590305196984139,
 0.9321373646694825,
 0.9365114308568678,
 0.9286590829651541,
 0.9453193305646458,
 0.9727681871026419,
 0.9451410682796336,
 0.9747069008490084,
 0.9629113012781662,
 0.8760437269718108,
 0.9357899905456775,
 0.969140439229617,
 0.9673571895209772,
 0.9622045887308265,
 0.9239487734624304,
 0.9507245918035021,
 0.9284790721195879,
 0.9804040676444608,
 0.9172007485044772,
 0.9629974025497159,
 0.9589285933815253,
 0.9497534112157706,
 0.9316274733781466,
 

In [108]:
cost_fun(cat_ind, preds)

0.9408105797963642


1.5136844942702332

In [95]:
from sklearn import preprocessing

In [96]:
scaler = preprocessing.StandardScaler().fit(cat_car_df)
X_scaled = scaler.transform(cat_car_df)

In [100]:
X_scaled = pd.DataFrame(X_scaled)