In [1]:
'''
PURPOSE: build classifier to predict actor using one-hot-encoding
         obtain performance on train and test sets

Remember: X = np.random.rand(n, m)
Y = np.random.rand(k, m)
theta = np.random.rand(n, k)

'''

'\nPURPOSE: build classifier to predict actor using one-hot-encoding\n         obtain performance on train and test sets\n\nRemember: X = np.random.rand(n, m)\nY = np.random.rand(k, m)\ntheta = np.random.rand(n, k)\n\n'

In [2]:
import numpy as np
import random

In [3]:
random.seed(5)

In [4]:
x_train = np.load("x_train.npy")
y_train = np.load("y_train.npy")
x_val = np.load("x_val.npy")
y_val = np.load("y_val.npy")
x_test = np.load("x_test.npy")
y_test = np.load("y_test.npy")

In [126]:
def name_to_onehot(y,k):
    names = np.unique(y)
#    print(names)
    y_relabeled = np.zeros((k,y.shape[0]))
    for i in range(len(names)):
        name = names[i]
        for j in np.where(y == name)[0]:
#            print(i,j,y_relabeled.shape)
            y_relabeled[i,j] = 1
    return y_relabeled

def flatten_set(x):
    #returned ndarray should have shape (N, M), where N = # pixels and M = # images
    for i in range(x.shape[-1]):
        flattened_image = x[...,i].flatten() 
        if i == 0:
            x_flattened = flattened_image
        else:
            x_flattened = np.vstack((x_flattened, flattened_image))
            
    return x_flattened.T

def cost_vec(x,y,theta):
    #can we optimize this further by vectorization?
    return np.sum ( ( y - np.dot(theta.T,x) ) ** 2) 

def dcost_dtheta_vec(x, y, theta):
    return 2* np.dot( x, (np.dot( theta.T, x ) - y).T )

def grad_desc_vec(cost, dcost_dtheta, x, y, init_theta, alpha,max_iter):
    EPS = 1e-4   #EPS = 10**(-5)
    prev_t = init_theta-10*EPS
    t = init_theta.copy()

    itr = 1
    while np.linalg.norm(t - prev_t) >  EPS and itr < max_iter:
        prev_t = t.copy()
        t -= alpha*dcost_dtheta(x, y, t)

        if itr % 2000 == 0:
            print "Iter", itr
            print ("cost(x) = %.2f" %  cost(x, y, t)) 

    #         print "t = (%.2f, %.2f, %.2f), cost(x) = %.2f" % (t[0], t[1], t[2], cost(x, y, t)) 
    #         print "Gradient: ", dcost_dtheta(x, y, t), "\n"
    #         y_pred = pred_y(x,t)
    #         print("Performance: ",performance(y_pred,y_val))

        itr+=1
    return t

def pred_onehot(x,theta):
    #what is the output of x, theta?
    out_mat = np.dot(theta.T, x)
    for i in range(out_mat.shape[-1]):
        out = out_mat[:,i]    
        out[np.where(out != max(out))] =0
        out[np.where(out == max(out))] = 1 
    return out_mat.astype(int)

def performance_onehot(y_pred,y_actual):
    #asuuming y is of the size k x m
    sum = 0.0
    size = y_actual.shape[-1]
    for i in range(size):
        if np.array_equal(y_pred[:,i], y_actual[:,i]):
            sum +=1
    return sum/size * 100

In [6]:
#get data ready
num_actors = 6
y_train = name_to_onehot(y_train, num_actors)
y_val = name_to_onehot(y_val, num_actors)
y_test = name_to_onehot(y_test, num_actors)

#prepare for gradient descent
x_train = flatten_set(x_train) / 255.0
x_val = flatten_set(x_val) / 255.0
x_test = flatten_set(x_test) / 255.0

x_train_w_bias = np.vstack( (np.ones((1, x_train.shape[1])), x_train))
x_val_w_bias = np.vstack( (np.ones((1, x_val.shape[1])), x_val))


In [130]:
# theta0 = np.random.normal(0,0.01, (x_train.shape[0]+1, y_train.shape[0]))
# theta_complete = grad_desc_vec(cost_vec, dcost_dtheta_vec, x_train_w_bias, y_train, theta0, 0.000001,200000)
#took a lont time to run. Got cost down to 15.63?
#performance: (train, val) (100.0, 65.0)

# theta0 = np.random.normal(0,0.005, (x_train.shape[0]+1, y_train.shape[0]))
# theta_complete = grad_desc_vec(cost_vec, dcost_dtheta_vec, x_train_w_bias, y_train, theta0, 0.0000001,200000)
# Iter 52000
# cost(x) = 148.69

theta0 = np.random.normal(0,0.01, (x_train.shape[0]+1, y_train.shape[0]))
theta_complete = grad_desc_vec(cost_vec, dcost_dtheta_vec, x_train_w_bias, y_train, theta0, 0.000001,20000)
#cost: 88.24, performance: (97.61904761904762, 70.0)

#theta0 = np.random.normal(0,0.01, (x_train.shape[0]+1, y_train.shape[0]))
#theta_complete = grad_desc_vec(cost_vec, dcost_dtheta_vec, x_train_w_bias, y_train, theta0, 0.000001,10000)
#cost: 128, performance: (94.52380952380952, 68.33333333333333)

# theta0 = np.random.normal(0,0.01, (x_train.shape[0]+1, y_train.shape[0]))
# theta_complete = grad_desc_vec(cost_vec, dcost_dtheta_vec, x_train_w_bias, y_train, theta0, 0.000001,1000)
# cost: ?(76.19047619047619, 63.33333333333333)


#at this point, gonnna change epsilon instead to 1e-4 from 1e-5
# theta0 = np.random.normal(0,0.01, (x_train.shape[0]+1, y_train.shape[0]))
# theta_complete = grad_desc_vec(cost_vec, dcost_dtheta_vec, x_train_w_bias, y_train, theta0, 0.000001,30000)
# cost(x) = 164.99,(89.52380952380953, 70.0)

# theta0 = np.random.normal(0,0.01, (x_train.shape[0]+1, y_train.shape[0]))
# theta_complete = grad_desc_vec(cost_vec, dcost_dtheta_vec, x_train_w_bias, y_train, theta0, 0.00001,30000)
#overflow




Iter 2000
cost(x) = 200.41
Iter 4000
cost(x) = 163.89


In [131]:
y_pred_train = pred_onehot(x=x_train_w_bias, theta=theta_complete)

y_pred_val = pred_onehot(x=x_val_w_bias, theta=theta_complete)

performance_train = performance_onehot(y_pred_train,y_train)
performance_val = performance_onehot(y_pred_val,y_val)

print(performance_train,performance_val)

(89.52380952380953, 71.66666666666667)


In [132]:
 np.save("thetas_part7.npy",theta_complete)