In [None]:
'''
PURPOSE: To build a classifier using Linear Regression to distinguish images of two actors.
'''

In [211]:
import numpy as np 
import matplotlib.pyplot as plt
from random import randint

In [212]:
#load data sets containing only 'Alec Baldwin' and 'Steve Carel'
x_train = np.load("x_train0.npy") 
y_train = np.load("y_train0.npy")

x_val = np.load("x_val0.npy")
y_val = np.load("y_val0.npy")

x_test = np.load("x_test0.npy")
y_test = np.load("y_test0.npy")

In [213]:
def replace_labels(y,labels):
    y_relabeled = np.copy(y)
    for label in labels:
        for index in np.where(y == label[0]):
            np.put(y_relabeled, index, label[1])
    return y_relabeled.astype(int)

#change output labels to 0 and 1


In [214]:
def flatten_set(x):
    #returned ndarray should have shape (N, M), where N = # pixels and M = # images
    for i in range(x.shape[-1]):
        flattened_image = x[...,i].flatten() 
        if i == 0:
            x_flattened = flattened_image
        else:
            x_flattened = np.vstack((x_flattened, flattened_image))
            
    return x_flattened.T

In [215]:
def cost(x,y,theta):
    #quadratic cost function
    #x = np.vstack( (np.ones((1, x.shape[1])), x))
    return np.sum( (y - np.dot(theta.T,x)) ** 2)
    

In [216]:
def dcost_dtheta(x,y,theta):
    #x = np.vstack( (np.ones((1, x.shape[1])), x))
    return -2*np.sum((y-np.dot(theta.T, x))*x, 1)



In [224]:
def grad_descent(cost, dcost_dtheta, x, y, init_theta, alpha,max_iter):
    EPS = 1e-5   #EPS = 10**(-5)
    prev_t = init_theta-10*EPS
    t = init_theta.copy()
    itr  = 1
 
    while np.linalg.norm(t - prev_t) >  EPS and itr < max_iter:
        prev_t = t.copy()
        t -= alpha*dcost_dtheta(x, y, t)
        if itr % 50 == 0:
            print "Iter", itr
            print "x = (%.2f, %.2f, %.2f), cost(x) = %.2f" % (t[0], t[1], t[2], cost(x, y, t)) 
            print "Gradient: ", dcost_dtheta(x, y, t), "\n"
            y_pred = pred_y(x,t)
            print("Performance: ",performance(y_pred,y_val))
        itr += 1

    
    return t

In [218]:
def pred_y(x,theta):

    #x = np.vstack((np.ones((1, x.shape[1])), x ))    
    h_all = np.dot(theta.T,x)
#    print("h(theta) for all images: ")
#    print(h_all)
    y_pred = np.ones(h_all.shape[0])
    
    for i in range(h_all.shape[0]):
        h=h_all[i]
        if h > 0.5:
            y_pred[i] = 1
        elif h < 0.5:
            y_pred[i] = 0
        else:
            y_pred[i]=randint(0,1)
    return y_pred

In [219]:
def performance(y_pred, y):
    sum = 0.0
    test_size = y.shape[0]
    for i in range(test_size):
        if y_pred[i] == y[i]:
            sum +=1
    return sum/test_size * 100

In [220]:
y_train = replace_labels(y_train, [("Alec Baldwin",1), ("Steve Carell",0)])
y_val = replace_labels(y_val, [("Alec Baldwin",1), ("Steve Carell",0)])
y_test = replace_labels(y_test, [("Alec Baldwin",1), ("Steve Carell",0)])

In [221]:
x_train = flatten_set(x_train) / 255.0
x_val = flatten_set(x_val) / 255.0
x_test = flatten_set(x_test) / 255.0

In [222]:
#initialize from normal distribution
pixel_inten_mean = np.mean(x_train)
pixel_inten_std  = np.std(x_train)
theta0 = np.random.normal( 0, 0.5, x_train.shape[0]+1) #of dimension (1025,)


In [223]:
x_train_w_bias = np.vstack( (np.ones((1, x_train.shape[1])), x_train))
x_val_w_bias = np.vstack( (np.ones((1, x_val.shape[1])), x_val))

In [226]:
theta = grad_descent(cost, dcost_dtheta, x_train_w_bias, y_train, theta0, 0.00001,30000)


Iter 50
x = (0.06, -0.38, 0.07), cost(x) = 895.75
Gradient:  [ -4.47793327 -30.97060222 -49.60451631 ...,  33.01297073  26.3042006
  25.76728286] 

('Performance: ', 45.0)
Iter 100
x = (0.06, -0.36, 0.10), cost(x) = 692.48
Gradient:  [ -4.69194122 -22.02578832 -35.94588707 ...,  18.74629839  12.50538679
  13.1249068 ] 

('Performance: ', 35.0)
Iter 150
x = (0.07, -0.35, 0.11), cost(x) = 575.73
Gradient:  [ -4.60022867 -16.16217224 -26.62791772 ...,  11.08466189   5.24596488
   6.18265252] 

('Performance: ', 45.0)
Iter 200
x = (0.07, -0.35, 0.12), cost(x) = 501.80
Gradient:  [ -4.37992568 -11.92558203 -19.89698395 ...,   6.56477218   1.02034814
   1.96927963] 

('Performance: ', 45.0)
Iter 250
x = (0.07, -0.34, 0.13), cost(x) = 451.14
Gradient:  [ -4.11026213  -8.70884084 -14.88602624 ...,   3.68105523  -1.65810265
  -0.80342188] 

('Performance: ', 45.0)
Iter 300
x = (0.07, -0.34, 0.14), cost(x) = 413.93
Gradient:  [ -3.8298339   -6.21534625 -11.09857519 ...,   1.73583103  -3.45844794

Iter 2450
x = (0.11, -0.35, 0.14), cost(x) = 108.14
Gradient:  [-1.20270811  0.17908607  0.44892922 ..., -0.33022337 -1.81828077
 -2.31363642] 

('Performance: ', 50.0)
Iter 2500
x = (0.11, -0.35, 0.14), cost(x) = 106.06
Gradient:  [-1.18441611  0.13721249  0.4215455  ..., -0.30261277 -1.73691974
 -2.22077328] 

('Performance: ', 50.0)
Iter 2550
x = (0.11, -0.35, 0.14), cost(x) = 104.05
Gradient:  [-1.16629715  0.09681318  0.39491908 ..., -0.27678041 -1.65887357
 -2.13107534] 

('Performance: ', 50.0)
Iter 2600
x = (0.11, -0.35, 0.14), cost(x) = 102.09
Gradient:  [-1.148355    0.05783698  0.36904176 ..., -0.2526304  -1.58400227
 -2.04443089] 

('Performance: ', 50.0)
Iter 2650
x = (0.11, -0.35, 0.14), cost(x) = 100.19
Gradient:  [-1.13059311  0.02023341  0.34390314 ..., -0.23007105 -1.51217116
 -1.96073083] 

('Performance: ', 50.0)
Iter 2700
x = (0.11, -0.35, 0.14), cost(x) = 98.34
Gradient:  [-1.11301461 -0.01604707  0.31949108 ..., -0.20901491 -1.44325079
 -1.87986897] 

('Performan

Iter 4950
x = (0.13, -0.34, 0.14), cost(x) = 48.70
Gradient:  [-0.51620597 -0.82281562 -0.26306968 ...,  0.00921621 -0.03641124
 -0.00929427] 

('Performance: ', 55.00000000000001)
Iter 5000
x = (0.13, -0.34, 0.14), cost(x) = 48.05
Gradient:  [-0.50684406 -0.82991304 -0.26875663 ...,  0.00776132 -0.02526418
  0.00954748] 

('Performance: ', 55.00000000000001)
Iter 5050
x = (0.13, -0.34, 0.14), cost(x) = 47.41
Gradient:  [-0.49762542 -0.83673563 -0.27425524 ...,  0.0062439  -0.01455214
  0.02784103] 

('Performance: ', 55.00000000000001)
Iter 5100
x = (0.13, -0.34, 0.14), cost(x) = 46.78
Gradient:  [-0.48854831 -0.84329086 -0.27957075 ...,  0.00466967 -0.00425954
  0.04560299] 

('Performance: ', 55.00000000000001)
Iter 5150
x = (0.13, -0.34, 0.14), cost(x) = 46.16
Gradient:  [-0.47961099 -0.84958595 -0.28470824 ...,  0.00304404  0.00562861
  0.06284937] 

('Performance: ', 55.00000000000001)
Iter 5200
x = (0.13, -0.34, 0.14), cost(x) = 45.56
Gradient:  [-0.47081172 -0.8556279  -0.28967

Gradient:  [-0.20049473 -0.948598   -0.38981348 ..., -0.0746812   0.19329713
  0.48044942] 

('Performance: ', 50.0)
Iter 7400
x = (0.14, -0.32, 0.15), cost(x) = 26.84
Gradient:  [-0.19624975 -0.94812566 -0.39035262 ..., -0.07609354  0.19425025
  0.48484997] 

('Performance: ', 50.0)
Iter 7450
x = (0.14, -0.32, 0.15), cost(x) = 26.55
Gradient:  [-0.1920786  -0.94757548 -0.39083808 ..., -0.0774818   0.19512334
  0.48910592] 

('Performance: ', 50.0)
Iter 7500
x = (0.14, -0.32, 0.15), cost(x) = 26.26
Gradient:  [-0.18798018 -0.94694952 -0.39127119 ..., -0.07884594  0.19591919
  0.49322092] 

('Performance: ', 50.0)
Iter 7550
x = (0.14, -0.32, 0.15), cost(x) = 25.97
Gradient:  [-0.18395341 -0.94624982 -0.39165323 ..., -0.08018592  0.19664052
  0.49719851] 

('Performance: ', 50.0)
Iter 7600
x = (0.14, -0.32, 0.15), cost(x) = 25.69
Gradient:  [-0.1799972  -0.94547834 -0.39198547 ..., -0.08150175  0.19728994
  0.50104213] 

('Performance: ', 50.0)
Iter 7650
x = (0.14, -0.32, 0.15), cost(x) 

Iter 9950
x = (0.14, -0.30, 0.16), cost(x) = 15.93
Gradient:  [-0.05671483 -0.85609855 -0.36905978 ..., -0.11865424  0.18010186
  0.57727585] 

('Performance: ', 45.0)
Iter 10000
x = (0.14, -0.30, 0.16), cost(x) = 15.78
Gradient:  [-0.05512019 -0.85351528 -0.36803589 ..., -0.11900117  0.17920213
  0.57740254] 

('Performance: ', 45.0)
Iter 10050
x = (0.14, -0.29, 0.16), cost(x) = 15.63
Gradient:  [-0.0535578  -0.85091755 -0.36699856 ..., -0.11933342  0.17829332
  0.57748887] 

('Performance: ', 45.0)
Iter 10100
x = (0.14, -0.29, 0.16), cost(x) = 15.48
Gradient:  [-0.05202712 -0.84830598 -0.3659482  ..., -0.11965122  0.17737593
  0.5775358 ] 

('Performance: ', 45.0)
Iter 10150
x = (0.14, -0.29, 0.16), cost(x) = 15.34
Gradient:  [-0.05052761 -0.84568118 -0.3648852  ..., -0.11995479  0.17645045
  0.57754428] 

('Performance: ', 45.0)
Iter 10200
x = (0.14, -0.29, 0.16), cost(x) = 15.19
Gradient:  [-0.04905875 -0.84304375 -0.36380997 ..., -0.12024436  0.17551735
  0.57751521] 

('Performan

Iter 12400
x = (0.14, -0.28, 0.17), cost(x) = 10.25
Gradient:  [-0.00813688 -0.72142617 -0.30907046 ..., -0.12218162  0.13191172
  0.54991796] 

('Performance: ', 50.0)
Iter 12450
x = (0.14, -0.28, 0.17), cost(x) = 10.16
Gradient:  [-0.00761956 -0.71865463 -0.30774127 ..., -0.12204096  0.13093876
  0.54888292] 

('Performance: ', 50.0)
Iter 12500
x = (0.14, -0.28, 0.17), cost(x) = 10.08
Gradient:  [-0.00711595 -0.71588665 -0.30641119 ..., -0.12189442  0.12996874
  0.54783623] 

('Performance: ', 50.0)
Iter 12550
x = (0.14, -0.28, 0.17), cost(x) = 9.99
Gradient:  [-0.00662579 -0.71312242 -0.30508036 ..., -0.12174211  0.12900176
  0.54677823] 

('Performance: ', 50.0)
Iter 12600
x = (0.14, -0.27, 0.17), cost(x) = 9.91
Gradient:  [-0.00614885 -0.71036211 -0.30374892 ..., -0.12158416  0.12803788
  0.54570922] 

('Performance: ', 50.0)
Iter 12650
x = (0.14, -0.27, 0.17), cost(x) = 9.83
Gradient:  [-0.00568488 -0.7076059  -0.302417   ..., -0.1214207   0.12707719
  0.54462951] 

('Performance

Iter 15000
x = (0.14, -0.26, 0.17), cost(x) = 6.72
Gradient:  [ 0.00512807 -0.58492686 -0.24123256 ..., -0.10940584  0.08631343
  0.48625238] 

('Performance: ', 50.0)
Iter 15050
x = (0.14, -0.26, 0.17), cost(x) = 6.67
Gradient:  [ 0.00518744 -0.58249499 -0.23999208 ..., -0.10909082  0.08554751
  0.48492122] 

('Performance: ', 50.0)
Iter 15100
x = (0.14, -0.26, 0.17), cost(x) = 6.61
Gradient:  [ 0.00524181 -0.58007139 -0.2387551  ..., -0.10877443  0.08478589
  0.48358868] 

('Performance: ', 50.0)
Iter 15150
x = (0.14, -0.26, 0.17), cost(x) = 6.56
Gradient:  [ 0.00529131 -0.57765607 -0.23752165 ..., -0.10845671  0.08402856
  0.48225489] 

('Performance: ', 50.0)
Iter 15200
x = (0.14, -0.26, 0.17), cost(x) = 6.51
Gradient:  [ 0.00533603 -0.57524908 -0.23629179 ..., -0.10813773  0.08327552
  0.48091994] 

('Performance: ', 50.0)
Iter 15250
x = (0.14, -0.26, 0.17), cost(x) = 6.46
Gradient:  [ 0.00537607 -0.57285041 -0.23506554 ..., -0.10781753  0.08252675
  0.47958395] 

('Performance: '

Iter 17550
x = (0.14, -0.25, 0.18), cost(x) = 4.59
Gradient:  [ 0.00358301 -0.47168169 -0.18301942 ..., -0.092481    0.05249872
  0.41841903] 

('Performance: ', 50.0)
Iter 17600
x = (0.14, -0.25, 0.18), cost(x) = 4.56
Gradient:  [ 0.00349006 -0.46968082 -0.18198895 ..., -0.09214657  0.0519363
  0.41711976] 

('Performance: ', 50.0)
Iter 17650
x = (0.14, -0.25, 0.18), cost(x) = 4.52
Gradient:  [ 0.00339567 -0.46768822 -0.18096291 ..., -0.0918125   0.05137747
  0.41582256] 

('Performance: ', 50.0)
Iter 17700
x = (0.14, -0.25, 0.18), cost(x) = 4.49
Gradient:  [ 0.00329987 -0.46570386 -0.1799413  ..., -0.0914788   0.05082219
  0.41452748] 

('Performance: ', 50.0)
Iter 17750
x = (0.14, -0.24, 0.18), cost(x) = 4.46
Gradient:  [ 0.00320271 -0.46372774 -0.17892413 ..., -0.09114549  0.05027046
  0.41323455] 

('Performance: ', 50.0)
Iter 17800
x = (0.14, -0.24, 0.18), cost(x) = 4.43
Gradient:  [ 0.00310424 -0.46175984 -0.17791139 ..., -0.0908126   0.04972225
  0.41194379] 

('Performance: ',

Iter 19950
x = (0.14, -0.24, 0.18), cost(x) = 3.28
Gradient:  [-0.00190825 -0.38459958 -0.13851582 ..., -0.07708238  0.02924167
  0.35888707] 

('Performance: ', 50.0)
Iter 20000
x = (0.14, -0.24, 0.18), cost(x) = 3.26
Gradient:  [-0.00203411 -0.38297066 -0.13769411 ..., -0.07678005  0.02883176
  0.35771666] 

('Performance: ', 50.0)
Iter 20050
x = (0.14, -0.24, 0.18), cost(x) = 3.23
Gradient:  [-0.00216008 -0.38134887 -0.13687657 ..., -0.0764786   0.02842462
  0.35654935] 

('Performance: ', 50.0)
Iter 20100
x = (0.14, -0.24, 0.18), cost(x) = 3.21
Gradient:  [-0.00228614 -0.37973419 -0.13606318 ..., -0.07617803  0.02802023
  0.35538515] 

('Performance: ', 50.0)
Iter 20150
x = (0.14, -0.23, 0.18), cost(x) = 3.19
Gradient:  [-0.00241227 -0.3781266  -0.13525392 ..., -0.07587835  0.02761859
  0.35422406] 

('Performance: ', 50.0)
Iter 20200
x = (0.14, -0.23, 0.18), cost(x) = 3.17
Gradient:  [-0.00253845 -0.37652606 -0.13444879 ..., -0.07557957  0.02721968
  0.3530661 ] 

('Performance: '

Iter 22400
x = (0.14, -0.23, 0.19), cost(x) = 2.37
Gradient:  [-0.00795807 -0.3126771  -0.10291689 ..., -0.06336062  0.01214812
  0.30528869] 

('Performance: ', 50.0)
Iter 22450
x = (0.14, -0.23, 0.19), cost(x) = 2.35
Gradient:  [-0.00807494 -0.31136618 -0.10228409 ..., -0.06310444  0.01185722
  0.30427572] 

('Performance: ', 50.0)
Iter 22500
x = (0.14, -0.23, 0.19), cost(x) = 2.34
Gradient:  [-0.00819143 -0.31006109 -0.10165481 ..., -0.06284922  0.01156843
  0.30326599] 

('Performance: ', 50.0)
Iter 22550
x = (0.14, -0.23, 0.19), cost(x) = 2.32
Gradient:  [-0.00830752 -0.30876181 -0.10102903 ..., -0.06259497  0.01128171
  0.3022595 ] 

('Performance: ', 50.0)
Iter 22600
x = (0.14, -0.23, 0.19), cost(x) = 2.31
Gradient:  [-0.00842321 -0.30746829 -0.10040672 ..., -0.06234166  0.01099707
  0.30125624] 

('Performance: ', 50.0)
Iter 22650
x = (0.14, -0.23, 0.19), cost(x) = 2.29
Gradient:  [-0.00853849 -0.30618053 -0.09978788 ..., -0.06208932  0.01071449
  0.3002562 ] 

('Performance: '

Iter 24900
x = (0.14, -0.22, 0.19), cost(x) = 1.72
Gradient:  [ -1.32548650e-02  -2.53784362e-01  -7.52923556e-02 ...,  -5.16981719e-02
  -5.23667758e-05   2.58531266e-01] 

('Performance: ', 50.0)
Iter 24950
x = (0.14, -0.22, 0.19), cost(x) = 1.71
Gradient:  [ -1.33484327e-02  -2.52734888e-01  -7.48173177e-02 ...,  -5.14879585e-02
  -2.52134553e-04   2.57674992e-01] 

('Performance: ', 50.0)
Iter 25000
x = (0.14, -0.22, 0.19), cost(x) = 1.70
Gradient:  [-0.01344149 -0.25169006 -0.07434507 ..., -0.05127861 -0.00045034
  0.25682171] 

('Performance: ', 50.0)
Iter 25050
x = (0.14, -0.22, 0.19), cost(x) = 1.69
Gradient:  [-0.01353405 -0.25064984 -0.0738756  ..., -0.05107012 -0.000647    0.2559714 ] 

('Performance: ', 50.0)
Iter 25100
x = (0.14, -0.22, 0.19), cost(x) = 1.68
Gradient:  [-0.0136261  -0.24961423 -0.0734089  ..., -0.05086248 -0.00084211
  0.25512407] 

('Performance: ', 50.0)
Iter 25150
x = (0.14, -0.22, 0.19), cost(x) = 1.67
Gradient:  [-0.01371764 -0.24858319 -0.07294494 ..

Iter 27350
x = (0.14, -0.21, 0.19), cost(x) = 1.28
Gradient:  [-0.01724859 -0.20743948 -0.05505161 ..., -0.04236096 -0.00816636
  0.21995294] 

('Performance: ', 50.0)
Iter 27400
x = (0.14, -0.21, 0.19), cost(x) = 1.27
Gradient:  [-0.01731781 -0.20659368 -0.05469781 ..., -0.04218971 -0.00829971
  0.21923432] 

('Performance: ', 50.0)
Iter 27450
x = (0.14, -0.21, 0.19), cost(x) = 1.26
Gradient:  [-0.01738655 -0.20575158 -0.05434617 ..., -0.04201919 -0.0084319   0.2185183 ] 

('Performance: ', 50.0)
Iter 27500
x = (0.14, -0.21, 0.19), cost(x) = 1.26
Gradient:  [-0.01745482 -0.20491314 -0.05399668 ..., -0.04184939 -0.00856294
  0.21780489] 

('Performance: ', 50.0)
Iter 27550
x = (0.14, -0.21, 0.19), cost(x) = 1.25
Gradient:  [-0.01752263 -0.20407836 -0.05364933 ..., -0.0416803  -0.00869283
  0.21709408] 

('Performance: ', 50.0)
Iter 27600
x = (0.14, -0.21, 0.19), cost(x) = 1.24
Gradient:  [-0.01758996 -0.20324722 -0.0533041  ..., -0.04151193 -0.00882159
  0.21638585] 

('Performance: ',

In [210]:
np.save("working_theta.npy", theta)

In [None]:
#undo delete cells anfd duplicate

In [None]:
for i in range(0,1000):
    theta = grad_descent(cost, dcost_dtheta, x_train_w_bias, y_train, theta0, 0.0001,i)
    if i%10 == 0:
        y_pred = pred_y(x_val_w_bias,theta)
        print(performance(y_pred,y_val)

In [167]:
x_train_w_bias = np.vstack( (np.ones((1, x_train.shape[1])), x_train))
theta0 = np.random.normal( 0, 0.5, x_train.shape[0]+1)
y_pred = pred_y(x_val_w_bias,theta0) 


#ytf are they all negative and extremely huge ugh #this definitely looks like the problem
#okay they were huge because i used the theta from gradient descent. But now I see they're all 
#negative and no where close to zero :(
#after this, I'm going to take a closer look at all the function definitions


# performance(y_pred,y_val)
# print(y_pred) #why are my predicted values always 0??????
# print(y_val)

h(theta) for all images: 
[-1.1445487   0.82411935  0.91911756  6.16138154  0.24956069 -4.89738809
  1.9450231   1.27694493  2.59521169  1.86729262  8.10894868  6.57775487
  6.10997506  3.10246342  7.2339009   0.76694609  1.85313979  3.69542052
  3.16526114  1.18021677]


In [168]:
performance(y_pred,y_test)

35.0

In [160]:
np.dot(x_train_w_bias[:,6],theta0) #I seem to always get negative values, so strange

-21.410217605241026

In [137]:
print(theta0)
print(x_train_w_bias[:,4])

# sum = 0
# for i in range(1025):
#     sum += theta0[i]*x_train_w_bias[i,0]
    
# print(sum)
#and why the fuck is my sum different?

[ 0.71480777  0.43189064 -0.88548037 ...,  0.28195435 -0.97410807
  0.2220193 ]
[ 1.          0.39215686  0.17254902 ...,  0.4627451   0.07843137
  0.08235294]
-12.8688288163


In [None]:
'''
gonna check the dot function, it makes no sense for them to be neagtive O.o
coudl be for the fact that x contains bias....., but the right constant would've taken care of
it. lemme check
'''