In [None]:
'''
PURPOSE: 
To build a classifier using Linear Regression to distinguish images of two actors.
'''

In [None]:
import numpy as np 
import matplotlib.pyplot as plt
from random import randint

In [None]:
def replace_labels(y,labels):
    y_relabeled = np.copy(y)
    for label in labels:
        for index in np.where(y == label[0]):
            np.put(y_relabeled, index, label[1])
    return y_relabeled.astype(int)

#change output labels to 0 and 1


In [None]:
def flatten_set(x):
    #returned ndarray should have shape (N, M), where N = # pixels and M = # images
    for i in range(x.shape[-1]):
        flattened_image = x[...,i].flatten() 
        if i == 0:
            x_flattened = flattened_image
        else:
            x_flattened = np.vstack((x_flattened, flattened_image))
            
    return x_flattened.T

In [None]:
def cost(x,y,theta):
    #quadratic cost function
    #x = np.vstack( (np.ones((1, x.shape[1])), x))
    return np.sum( (y - np.dot(theta.T,x)) ** 2)
    

In [None]:
def dcost_dtheta(x,y,theta):
    #x = np.vstack( (np.ones((1, x.shape[1])), x))
    return -2*np.sum((y-np.dot(theta.T, x))*x, 1)



In [168]:
def grad_descent(cost, dcost_dtheta, x, y, init_theta, alpha,max_iter):
    EPS = 1e-5   #EPS = 10**(-5)
    prev_t = init_theta-10*EPS
    t = init_theta.copy()
    itr  = 1
 
    while np.linalg.norm(t - prev_t) >  EPS and itr < max_iter:
        prev_t = t.copy()
        t -= alpha*dcost_dtheta(x, y, t)
        if itr % 50 == 0:
            print "Iter", itr
            print "t = (%.2f, %.2f, %.2f), cost(x) = %.2f" % (t[0], t[1], t[2], cost(x, y, t)) 
            print "Gradient: ", dcost_dtheta(x, y, t), "\n"
#           y_pred = pred_y(x,t)
#            print("Performance: ",performance(y_pred,y_val))
        itr += 1

    
    return t

In [None]:
def pred_y(x,theta):

    #x = np.vstack((np.ones((1, x.shape[1])), x ))    
    h_all = np.dot(theta.T,x)
#    print("h(theta) for all images: ")
#    print(h_all)
    y_pred = np.ones(h_all.shape[0])
    
    for i in range(h_all.shape[0]):
        h=h_all[i]
        if h > 0.5:
            y_pred[i] = 1
        elif h < 0.5:
            y_pred[i] = 0
        else:
            y_pred[i]=randint(0,1)
    return y_pred

In [None]:
def performance(y_pred, y):
    sum = 0.0
    test_size = y.shape[0]
    for i in range(test_size):
        if y_pred[i] == y[i]:
            sum +=1
    return sum/test_size * 100

In [None]:
np.random.seed(2837499)

In [None]:
#load data sets containing only 'Alec Baldwin' and 'Steve Carel'
x_train = np.load("x_train0.npy") 
y_train = np.load("y_train0.npy")

x_val = np.load("x_val0.npy")
y_val = np.load("y_val0.npy")

x_test = np.load("x_test0.npy")
y_test = np.load("y_test0.npy")

In [None]:
y_train = replace_labels(y_train, [("Alec Baldwin",1), ("Steve Carell",0)])
y_val = replace_labels(y_val, [("Alec Baldwin",1), ("Steve Carell",0)])
y_test = replace_labels(y_test, [("Alec Baldwin",1), ("Steve Carell",0)])

In [None]:
x_train = flatten_set(x_train) / 255.0
x_val = flatten_set(x_val) / 255.0
x_test = flatten_set(x_test) / 255.0

In [None]:
#initialize from normal distribution
pixel_inten_mean = np.mean(x_train)
pixel_inten_std  = np.std(x_train)
#change center to 0.5 and std to 0.2 or something
theta0 = np.random.normal( 0, 0.2, x_train.shape[0]+1) #of dimension (1025,)


In [None]:
x_train_w_bias = np.vstack( (np.ones((1, x_train.shape[1])), x_train))
x_val_w_bias = np.vstack( (np.ones((1, x_val.shape[1])), x_val))

In [None]:
theta_complete = grad_descent(cost, dcost_dtheta, x_train_w_bias, y_train, theta0, 0.00001,30000)


In [None]:
#theta for complete dataset
np.save("theta_part3_complete.npy",theta_complete)

In [None]:
'''
Part 4 : Obtaning thetas from training on data set of only 2 images per actor
'''

In [164]:
x_train_small = np.load("x_train1.npy") 
y_train_small = np.load("y_train1.npy")

x_train_small = flatten_set(x_train_small) / 255.0
y_train_small = replace_labels(y_train_small, [("Alec Baldwin",1), ("Steve Carell",0)])

In [165]:
pixel_inten_mean = np.mean(x_train_small)
pixel_inten_std  = np.std(x_train_small)
theta0 = np.random.normal( 0, 0, x_train_small.shape[0]+1) #of dimension (1025,)

In [166]:
x_train_small_w_bias = np.vstack( (np.ones((1, x_train_small.shape[1])), x_train_small))

In [167]:
theta_small = grad_descent(cost, dcost_dtheta, x_train_small_w_bias, y_train_small, theta0, 0.00001,30000)


Iter 50
x = (0.00, 0.00, 0.00), cost(x) = 0.98
Gradient:  [-1.13695569 -0.13883805 -0.20568604 ..., -0.07405688 -0.18759982
 -1.10536329] 

Iter 100
x = (0.00, 0.00, 0.00), cost(x) = 0.84
Gradient:  [-0.25382162  0.05277619 -0.03106503 ...,  0.18371558  0.14943483
 -0.80037351] 

Iter 150
x = (0.00, 0.00, 0.00), cost(x) = 0.79
Gradient:  [ 0.01804717  0.11580846  0.02791909 ...,  0.24969464  0.23817691
 -0.69474113] 

Iter 200
x = (0.00, 0.00, 0.00), cost(x) = 0.75
Gradient:  [ 0.10114145  0.13861408  0.05064967 ...,  0.25752311  0.2515068
 -0.65102224] 

Iter 250
x = (0.00, 0.00, 0.00), cost(x) = 0.72
Gradient:  [ 0.12588767  0.14851452  0.06167908 ...,  0.2483701   0.24269027
 -0.62677135] 

Iter 300
x = (0.00, -0.00, 0.00), cost(x) = 0.68
Gradient:  [ 0.13255553  0.15398707  0.06863028 ...,  0.23486631  0.22807794
 -0.60886879] 

Iter 350
x = (0.00, -0.00, 0.00), cost(x) = 0.65
Gradient:  [ 0.13358399  0.15768854  0.07389676 ...,  0.22083931  0.21263271
 -0.59324849] 

Iter 400
x = 

Iter 4050
x = (-0.00, -0.00, -0.00), cost(x) = 0.03
Gradient:  [ 0.03299122  0.06020566  0.03957884 ...,  0.02197269  0.01723196
 -0.10904364] 

Iter 4100
x = (-0.00, -0.00, -0.00), cost(x) = 0.03
Gradient:  [ 0.03230395  0.05908336  0.03883957 ...,  0.02163766  0.01704813
 -0.10656463] 

Iter 4150
x = (-0.00, -0.00, -0.00), cost(x) = 0.03
Gradient:  [ 0.03163063  0.05798157  0.03811358 ...,  0.02130909  0.01686718
 -0.10413956] 

Iter 4200
x = (-0.00, -0.00, -0.00), cost(x) = 0.03
Gradient:  [ 0.03097099  0.05689999  0.03740071 ...,  0.02098677  0.01668896
 -0.10176729] 

Iter 4250
x = (-0.00, -0.00, -0.00), cost(x) = 0.03
Gradient:  [ 0.03032477  0.05583828  0.03670075 ...,  0.02067048  0.01651331
 -0.09944669] 

Iter 4300
x = (-0.00, -0.00, -0.00), cost(x) = 0.03
Gradient:  [ 0.02969171  0.05479612  0.03601353 ...,  0.02036002  0.01634009
 -0.09717665] 

Iter 4350
x = (-0.00, -0.00, -0.00), cost(x) = 0.03
Gradient:  [ 0.02907156  0.0537732   0.03533884 ...,  0.0200552   0.01616917
 

In [169]:
np.save("theta_part3_small.npy",theta_small)