In [1]:
'''
PURPOSE: To build a classifier using Linear Regression to distinguish images of two actors.
'''

'\nPURPOSE: To build a classifier using Linear Regression to distinguish images of two actors.\n'

In [6]:
import numpy as np 
import matplotlib.pyplot as plt
from random import randint

In [8]:
#load data sets containing only 'Alec Baldwin' and 'Steve Carel'
x_train = np.load("x_train0.npy") 
y_train = np.load("y_train0.npy")

x_val = np.load("x_val0.npy")
y_val = np.load("y_val0.npy")

x_test = np.load("x_test0.npy")
y_test = np.load("y_test0.npy")

In [9]:
def replace_labels(y,labels):
    y_relabeled = np.copy(y)
    for label in labels:
        for index in np.where(y == label[0]):
            np.put(y_relabeled, index, label[1])
    return y_relabeled.astype(int)

#change output labels to 0 and 1


In [10]:
def flatten_set(x):
    #returned ndarray should have shape (N, M), where N = # pixels and M = # images
    for i in range(x.shape[-1]):
        flattened_image = x[...,i].flatten() 
        if i == 0:
            x_flattened = flattened_image
        else:
            x_flattened = np.vstack((x_flattened, flattened_image))
            
    return x_flattened.T

In [12]:
def cost(x,y,theta):
    #quadratic cost function
    #x = np.vstack( (np.ones((1, x.shape[1])), x))
    return np.sum( (y - np.dot(theta.T,x)) ** 2)
    

In [13]:
def dcost_dtheta(x,y,theta):
    #x = np.vstack( (np.ones((1, x.shape[1])), x))
    return -2*np.sum((y-np.dot(theta.T, x))*x, 1)



In [14]:
def grad_descent(cost, dcost_dtheta, x, y, init_theta, alpha,max_iter):
    EPS = 1e-5   #EPS = 10**(-5)
    prev_t = init_theta-10*EPS
    t = init_theta.copy()
    itr  = 1
 
    while np.linalg.norm(t - prev_t) >  EPS and itr < max_iter:
        prev_t = t.copy()
        t -= alpha*dcost_dtheta(x, y, t)
        if itr % 50 == 0:
            print "Iter", itr
            print "x = (%.2f, %.2f, %.2f), cost(x) = %.2f" % (t[0], t[1], t[2], cost(x, y, t)) 
            print "Gradient: ", dcost_dtheta(x, y, t), "\n"
            y_pred = pred_y(x,t)
            print("Performance: ",performance(y_pred,y_val))
        itr += 1

    
    return t

In [15]:
def pred_y(x,theta):

    #x = np.vstack((np.ones((1, x.shape[1])), x ))    
    h_all = np.dot(theta.T,x)
#    print("h(theta) for all images: ")
#    print(h_all)
    y_pred = np.ones(h_all.shape[0])
    
    for i in range(h_all.shape[0]):
        h=h_all[i]
        if h > 0.5:
            y_pred[i] = 1
        elif h < 0.5:
            y_pred[i] = 0
        else:
            y_pred[i]=randint(0,1)
    return y_pred

In [16]:
def performance(y_pred, y):
    sum = 0.0
    test_size = y.shape[0]
    for i in range(test_size):
        if y_pred[i] == y[i]:
            sum +=1
    return sum/test_size * 100

In [17]:
y_train = replace_labels(y_train, [("Alec Baldwin",1), ("Steve Carell",0)])
y_val = replace_labels(y_val, [("Alec Baldwin",1), ("Steve Carell",0)])
y_test = replace_labels(y_test, [("Alec Baldwin",1), ("Steve Carell",0)])

In [18]:
x_train = flatten_set(x_train) / 255.0
x_val = flatten_set(x_val) / 255.0
x_test = flatten_set(x_test) / 255.0

In [19]:
#initialize from normal distribution
pixel_inten_mean = np.mean(x_train)
pixel_inten_std  = np.std(x_train)
theta0 = np.random.normal( 0, 0.5, x_train.shape[0]+1) #of dimension (1025,)


In [20]:
x_train_w_bias = np.vstack( (np.ones((1, x_train.shape[1])), x_train))
x_val_w_bias = np.vstack( (np.ones((1, x_val.shape[1])), x_val))

In [21]:
theta = grad_descent(cost, dcost_dtheta, x_train_w_bias, y_train, theta0, 0.00001,30000)


Iter 50
x = (-0.16, -0.68, 0.54), cost(x) = 786.50
Gradient:  [ -7.8407628    6.75991858  13.60036604 ...,  44.28080217  52.90625526
  46.33720512] 

('Performance: ', 45.0)
Iter 100
x = (-0.16, -0.68, 0.53), cost(x) = 593.36
Gradient:  [ -6.80882358   4.43194157  12.1431976  ...,  14.49225828  23.07472804
  19.39161963] 

('Performance: ', 50.0)
Iter 150
x = (-0.15, -0.68, 0.53), cost(x) = 516.07
Gradient:  [ -6.05988456   2.37947441  10.69284971 ...,   1.43381183   9.96935514
   7.35229253] 

('Performance: ', 40.0)
Iter 200
x = (-0.15, -0.68, 0.52), cost(x) = 468.33
Gradient:  [-5.55593859  1.26719682  9.87914811 ..., -4.3939155   3.98817636
  1.80373226] 

('Performance: ', 40.0)
Iter 250
x = (-0.15, -0.68, 0.52), cost(x) = 431.67
Gradient:  [-5.21932218  0.87551668  9.55116851 ..., -6.99912025  1.14127844
 -0.82853746] 

('Performance: ', 45.0)
Iter 300
x = (-0.14, -0.68, 0.51), cost(x) = 401.07
Gradient:  [-4.98834147  0.89474568  9.47447876 ..., -8.11700963 -0.26995583
 -2.10209

Iter 2650
x = (-0.06, -0.73, 0.38), cost(x) = 84.29
Gradient:  [-3.03298182  1.19306745  2.3329002  ..., -0.50859104  1.04408263
 -0.15373312] 

('Performance: ', 50.0)
Iter 2700
x = (-0.05, -0.73, 0.38), cost(x) = 82.72
Gradient:  [-3.00284856  1.17383966  2.26011569 ..., -0.46139803  1.04849394
 -0.1492238 ] 

('Performance: ', 50.0)
Iter 2750
x = (-0.05, -0.73, 0.38), cost(x) = 81.19
Gradient:  [-2.97306527  1.15532177  2.18995391 ..., -0.41678563  1.0516241
 -0.14592749] 

('Performance: ', 50.0)
Iter 2800
x = (-0.05, -0.73, 0.38), cost(x) = 79.70
Gradient:  [-2.94363575  1.13748276  2.12230878 ..., -0.37461241  1.05356089
 -0.14376156] 

('Performance: ', 50.0)
Iter 2850
x = (-0.05, -0.73, 0.38), cost(x) = 78.27
Gradient:  [-2.91456283  1.12029271  2.05707885 ..., -0.33474467  1.05438722
 -0.14264806] 

('Performance: ', 50.0)
Iter 2900
x = (-0.05, -0.73, 0.37), cost(x) = 76.87
Gradient:  [-2.88584854  1.10372281  1.99416704 ..., -0.2970561   1.05418139
 -0.14251349] 

('Performan

Iter 5100
x = (0.00, -0.75, 0.35), cost(x) = 39.82
Gradient:  [-1.94185465  0.70820927  0.58233614 ...,  0.29327848  0.67669814
 -0.48652766] 

('Performance: ', 50.0)
Iter 5150
x = (0.01, -0.75, 0.35), cost(x) = 39.32
Gradient:  [-1.92634898  0.7029861   0.56780758 ...,  0.29579071  0.66713939
 -0.49507113] 

('Performance: ', 45.0)
Iter 5200
x = (0.01, -0.75, 0.35), cost(x) = 38.82
Gradient:  [-1.911048    0.69783641  0.55370256 ...,  0.29811866  0.65767463
 -0.50350346] 

('Performance: ', 45.0)
Iter 5250
x = (0.01, -0.75, 0.35), cost(x) = 38.33
Gradient:  [-1.89594826  0.69275717  0.54000742 ...,  0.30027135  0.64830616
 -0.51182175] 

('Performance: ', 45.0)
Iter 5300
x = (0.01, -0.75, 0.35), cost(x) = 37.84
Gradient:  [-1.8810464   0.6877455   0.52670902 ...,  0.30225732  0.63903605
 -0.52002342] 

('Performance: ', 45.0)
Iter 5350
x = (0.01, -0.75, 0.35), cost(x) = 37.37
Gradient:  [-1.86633911  0.68279865  0.51379471 ...,  0.30408468  0.62986609
 -0.52810614] 

('Performance: '

Iter 7750
x = (0.05, -0.76, 0.34), cost(x) = 21.65
Gradient:  [-1.33645533  0.49104404  0.18017316 ...,  0.29697861  0.30879784
 -0.76972436] 

('Performance: ', 40.0)
Iter 7800
x = (0.05, -0.76, 0.34), cost(x) = 21.43
Gradient:  [-1.32820872  0.48766144  0.17690868 ...,  0.29592605  0.30427667
 -0.77201421] 

('Performance: ', 40.0)
Iter 7850
x = (0.05, -0.76, 0.34), cost(x) = 21.21
Gradient:  [-1.32004805  0.48429709  0.17373379 ...,  0.29486021  0.29982603
 -0.77421277] 

('Performance: ', 40.0)
Iter 7900
x = (0.05, -0.76, 0.34), cost(x) = 20.99
Gradient:  [-1.31197201  0.48095083  0.17064613 ...,  0.29378183  0.29544483
 -0.77632142] 

('Performance: ', 40.0)
Iter 7950
x = (0.05, -0.76, 0.34), cost(x) = 20.78
Gradient:  [-1.30397934  0.47762249  0.1676434  ...,  0.29269161  0.29113198
 -0.77834154] 

('Performance: ', 40.0)
Iter 8000
x = (0.05, -0.76, 0.34), cost(x) = 20.56
Gradient:  [-1.29606876  0.47431193  0.16472338 ...,  0.29159021  0.28688641
 -0.7802745 ] 

('Performance: '

Iter 10250
x = (0.08, -0.77, 0.34), cost(x) = 13.36
Gradient:  [-1.00851862  0.34229076  0.09225345 ...,  0.23759674  0.15033013
 -0.79732406] 

('Performance: ', 45.0)
Iter 10300
x = (0.08, -0.77, 0.34), cost(x) = 13.24
Gradient:  [-1.00335129  0.33971346  0.09151814 ...,  0.23639967  0.14822329
 -0.79654514] 

('Performance: ', 45.0)
Iter 10350
x = (0.08, -0.77, 0.34), cost(x) = 13.12
Gradient:  [-0.99822667  0.337151    0.09080714 ...,  0.23520545  0.14614667
 -0.7957306 ] 

('Performance: ', 45.0)
Iter 10400
x = (0.08, -0.77, 0.34), cost(x) = 13.00
Gradient:  [-0.99314421  0.33460335  0.09011983 ...,  0.23401416  0.14409974
 -0.79488122] 

('Performance: ', 45.0)
Iter 10450
x = (0.08, -0.77, 0.34), cost(x) = 12.89
Gradient:  [-0.98810339  0.33207047  0.08945557 ...,  0.23282591  0.14208201
 -0.79399778] 

('Performance: ', 45.0)
Iter 10500
x = (0.08, -0.77, 0.34), cost(x) = 12.77
Gradient:  [-0.9831037   0.32955232  0.08881378 ...,  0.23164079  0.14009296
 -0.79308103] 

('Performa

Iter 12700
x = (0.10, -0.78, 0.34), cost(x) = 8.80
Gradient:  [-0.79724627  0.23269081  0.07581083 ...,  0.18348292  0.07478137
 -0.72930222] 

('Performance: ', 50.0)
Iter 12750
x = (0.10, -0.78, 0.34), cost(x) = 8.73
Gradient:  [-0.79367314  0.23078984  0.07574055 ...,  0.18249052  0.07368786
 -0.72749729] 

('Performance: ', 50.0)
Iter 12800
x = (0.10, -0.78, 0.34), cost(x) = 8.66
Gradient:  [-0.79012432  0.22890144  0.07567625 ...,  0.18150288  0.07260767
 -0.72568292] 

('Performance: ', 50.0)
Iter 12850
x = (0.10, -0.78, 0.34), cost(x) = 8.60
Gradient:  [-0.78659956  0.22702554  0.07561773 ...,  0.18052001  0.07154059
 -0.72385942] 

('Performance: ', 50.0)
Iter 12900
x = (0.10, -0.78, 0.34), cost(x) = 8.53
Gradient:  [-0.78309861  0.2251621   0.0755648  ...,  0.1795419   0.07048641
 -0.72202714] 

('Performance: ', 50.0)
Iter 12950
x = (0.10, -0.78, 0.34), cost(x) = 8.46
Gradient:  [-0.77962122  0.22331106  0.07551728 ...,  0.17856855  0.06944494
 -0.72018637] 

('Performance: '

Iter 15350
x = (0.12, -0.79, 0.33), cost(x) = 5.87
Gradient:  [-0.63637865  0.14792177  0.07687956 ...,  0.13729597  0.03121929
 -0.62674396] 

('Performance: ', 50.0)
Iter 15400
x = (0.12, -0.79, 0.33), cost(x) = 5.83
Gradient:  [-0.63381643  0.14660806  0.07694236 ...,  0.13654431  0.03061523
 -0.62477025] 

('Performance: ', 50.0)
Iter 15450
x = (0.12, -0.79, 0.33), cost(x) = 5.79
Gradient:  [-0.63126899  0.14530388  0.07700536 ...,  0.13579678  0.03001731
 -0.62279789] 

('Performance: ', 50.0)
Iter 15500
x = (0.12, -0.79, 0.33), cost(x) = 5.75
Gradient:  [-0.62873618  0.14400917  0.07706851 ...,  0.13505338  0.02942545
 -0.62082698] 

('Performance: ', 50.0)
Iter 15550
x = (0.12, -0.79, 0.33), cost(x) = 5.71
Gradient:  [-0.6262179   0.14272388  0.07713177 ...,  0.13431408  0.02883956
 -0.61885763] 

('Performance: ', 50.0)
Iter 15600
x = (0.12, -0.79, 0.33), cost(x) = 5.67
Gradient:  [-0.62371402  0.14144795  0.07719508 ...,  0.13357887  0.02825958
 -0.61688995] 

('Performance: '

Iter 17850
x = (0.13, -0.79, 0.33), cost(x) = 4.15
Gradient:  [-0.52421457  0.09284484  0.07956756 ...,  0.10440594  0.00721713
 -0.53134915] 

('Performance: ', 50.0)
Iter 17900
x = (0.13, -0.79, 0.33), cost(x) = 4.12
Gradient:  [-0.52226268  0.091942    0.07960168 ...,  0.10383757  0.00684263
 -0.5295362 ] 

('Performance: ', 50.0)
Iter 17950
x = (0.13, -0.79, 0.33), cost(x) = 4.09
Gradient:  [-0.52032078  0.09104609  0.07963478 ...,  0.10327238  0.00647148
 -0.52772772] 

('Performance: ', 50.0)
Iter 18000
x = (0.13, -0.79, 0.33), cost(x) = 4.07
Gradient:  [-0.5183888   0.09015707  0.07966685 ...,  0.10271034  0.00610365
 -0.52592376] 

('Performance: ', 50.0)
Iter 18050
x = (0.13, -0.79, 0.33), cost(x) = 4.04
Gradient:  [-0.51646667  0.08927489  0.07969788 ...,  0.10215145  0.00573911
 -0.52412433] 

('Performance: ', 50.0)
Iter 18100
x = (0.13, -0.79, 0.33), cost(x) = 4.01
Gradient:  [-0.51455432  0.08839951  0.07972788 ...,  0.10159567  0.00537781
 -0.52232946] 

('Performance: '

Iter 20500
x = (0.14, -0.79, 0.33), cost(x) = 2.96
Gradient:  [-0.43304171  0.05352793  0.07990584 ...,  0.07824405 -0.00872278
 -0.44186212] 

('Performance: ', 50.0)
Iter 20550
x = (0.14, -0.79, 0.33), cost(x) = 2.94
Gradient:  [-0.43153518  0.05293392  0.0798838  ...,  0.07782012 -0.00895916
 -0.4403077 ] 

('Performance: ', 50.0)
Iter 20600
x = (0.14, -0.79, 0.33), cost(x) = 2.92
Gradient:  [-0.43003565  0.0523447   0.07986077 ...,  0.07739849 -0.00919356
 -0.4387583 ] 

('Performance: ', 50.0)
Iter 20650
x = (0.14, -0.79, 0.33), cost(x) = 2.90
Gradient:  [-0.42854309  0.05176023  0.07983675 ...,  0.07697914 -0.00942597
 -0.43721391] 

('Performance: ', 50.0)
Iter 20700
x = (0.14, -0.79, 0.33), cost(x) = 2.88
Gradient:  [-0.42705745  0.05118048  0.07981176 ...,  0.07656206 -0.00965643
 -0.43567454] 

('Performance: ', 50.0)
Iter 20750
x = (0.14, -0.79, 0.33), cost(x) = 2.87
Gradient:  [-0.42557868  0.05060542  0.07978579 ...,  0.07614724 -0.00988495
 -0.43414017] 

('Performance: '

Iter 23100
x = (0.15, -0.79, 0.33), cost(x) = 2.17
Gradient:  [-0.36311937  0.02828089  0.07758072 ...,  0.05895272 -0.01870896
 -0.36755953] 

('Performance: ', 50.0)
Iter 23150
x = (0.15, -0.79, 0.33), cost(x) = 2.16
Gradient:  [-0.36192656  0.02789461  0.07751543 ...,  0.05863113 -0.01886098
 -0.36625732] 

('Performance: ', 50.0)
Iter 23200
x = (0.15, -0.79, 0.33), cost(x) = 2.14
Gradient:  [-0.3607389   0.0275116   0.07744949 ...,  0.05831121 -0.0190117
 -0.36495971] 

('Performance: ', 50.0)
Iter 23250
x = (0.15, -0.79, 0.33), cost(x) = 2.13
Gradient:  [-0.35955637  0.02713182  0.07738289 ...,  0.05799294 -0.01916112
 -0.36366668] 

('Performance: ', 50.0)
Iter 23300
x = (0.15, -0.79, 0.33), cost(x) = 2.12
Gradient:  [-0.35837893  0.02675526  0.07731565 ...,  0.05767631 -0.01930925
 -0.36237823] 

('Performance: ', 50.0)
Iter 23350
x = (0.15, -0.79, 0.33), cost(x) = 2.11
Gradient:  [-0.35720656  0.02638188  0.07724778 ...,  0.05736131 -0.01945611
 -0.36109434] 

('Performance: ',

Iter 25950
x = (0.16, -0.79, 0.33), cost(x) = 1.58
Gradient:  [-0.30258765  0.01081658  0.0729994  ...,  0.04300409 -0.0255402
 -0.30031066] 

('Performance: ', 50.0)
Iter 26000
x = (0.16, -0.79, 0.33), cost(x) = 1.57
Gradient:  [-0.30164813  0.0105818   0.07290671 ...,  0.04276292 -0.02563085
 -0.2992506 ] 

('Performance: ', 50.0)
Iter 26050
x = (0.16, -0.79, 0.33), cost(x) = 1.57
Gradient:  [-0.30071242  0.01034913  0.07281371 ...,  0.04252294 -0.02572062
 -0.29819441] 

('Performance: ', 50.0)
Iter 26100
x = (0.16, -0.79, 0.33), cost(x) = 1.56
Gradient:  [-0.29978049  0.01011856  0.0727204  ...,  0.04228413 -0.02580952
 -0.29714208] 

('Performance: ', 50.0)
Iter 26150
x = (0.16, -0.79, 0.33), cost(x) = 1.55
Gradient:  [-0.29885233  0.00989007  0.07262679 ...,  0.04204649 -0.02589755
 -0.2960936 ] 

('Performance: ', 50.0)
Iter 26200
x = (0.16, -0.79, 0.33), cost(x) = 1.54
Gradient:  [-0.2979279   0.00966364  0.07253287 ...,  0.04181002 -0.02598473
 -0.29504895] 

('Performance: ',

Iter 28550
x = (0.17, -0.79, 0.32), cost(x) = 1.21
Gradient:  [-0.25837594  0.00107961  0.06786353 ...,  0.03189313 -0.02920417
 -0.25004133] 

('Performance: ', 50.0)
Iter 28600
x = (0.17, -0.79, 0.32), cost(x) = 1.20
Gradient:  [-0.2576109   0.00093553  0.06776035 ...,  0.0317054  -0.0292558
 -0.24916588] 

('Performance: ', 50.0)
Iter 28650
x = (0.17, -0.79, 0.32), cost(x) = 1.20
Gradient:  [-0.25684879  0.00079285  0.06765708 ...,  0.03151854 -0.02930679
 -0.24829366] 

('Performance: ', 50.0)
Iter 28700
x = (0.17, -0.79, 0.32), cost(x) = 1.19
Gradient:  [-0.25608961  0.00065158  0.06755371 ...,  0.03133257 -0.02935716
 -0.24742464] 

('Performance: ', 50.0)
Iter 28750
x = (0.17, -0.79, 0.32), cost(x) = 1.19
Gradient:  [-0.25533334  0.00051168  0.06745025 ...,  0.03114747 -0.0294069
 -0.24655883] 

('Performance: ', 50.0)
Iter 28800
x = (0.17, -0.79, 0.32), cost(x) = 1.18
Gradient:  [-0.25457997  0.00037317  0.0673467  ...,  0.03096325 -0.02945602
 -0.24569621] 

('Performance: ', 