In [1]:
'''
PURPOSE: build classifier to predict actor using one-hot-encoding
         obtain performance on train and test sets

Remember: X = np.random.rand(n, m)
Y = np.random.rand(k, m)
theta = np.random.rand(n, k)

'''

'\nPURPOSE: build classifier to predict actor using one-hot-encoding\n         obtain performance on train and test sets\n\nRemember: X = np.random.rand(n, m)\nY = np.random.rand(k, m)\ntheta = np.random.rand(n, k)\n\n'

In [2]:
import numpy as np
import random

In [3]:
random.seed(5)

In [4]:
x_train = np.load("x_train.npy")
y_train = np.load("y_train.npy")
x_val = np.load("x_val.npy")
y_val = np.load("y_val.npy")
x_test = np.load("x_test.npy")
y_test = np.load("y_test.npy")

In [92]:
def name_to_onehot(y,k):
    names = np.unique(y)
#    print(names)
    y_relabeled = np.zeros((k,y.shape[0]))
    for i in range(len(names)):
        name = names[i]
        for j in np.where(y == name)[0]:
#            print(i,j,y_relabeled.shape)
            y_relabeled[i,j] = 1
    return y_relabeled

def flatten_set(x):
    #returned ndarray should have shape (N, M), where N = # pixels and M = # images
    for i in range(x.shape[-1]):
        flattened_image = x[...,i].flatten() 
        if i == 0:
            x_flattened = flattened_image
        else:
            x_flattened = np.vstack((x_flattened, flattened_image))
            
    return x_flattened.T

def cost_vec(x,y,theta):
    #can we optimize this further by vectorization?
    return np.sum ( ( y - np.dot(theta.T,x) ) ** 2) 

def dcost_dtheta_vec(x, y, theta):
    return 2* np.dot( x, (np.dot( theta.T, x ) - y).T )

def grad_desc_vec(cost, dcost_dtheta, x, y, init_theta, alpha,max_iter):
    EPS = 1e-5   #EPS = 10**(-5)
    prev_t = init_theta-10*EPS
    t = init_theta.copy()

    itr = 1
    while np.linalg.norm(t - prev_t) >  EPS and itr < max_iter:
        prev_t = t.copy()
        t -= alpha*dcost_dtheta(x, y, t)

        if itr % 2000 == 0:
            print "Iter", itr
            print ("cost(x) = %.2f" %  cost(x, y, t)) 

    #         print "t = (%.2f, %.2f, %.2f), cost(x) = %.2f" % (t[0], t[1], t[2], cost(x, y, t)) 
    #         print "Gradient: ", dcost_dtheta(x, y, t), "\n"
    #         y_pred = pred_y(x,t)
    #         print("Performance: ",performance(y_pred,y_val))

        itr+=1
    return t

def pred_onehot(x,theta):
    #what is the output of x, theta?
    out_mat = np.dot(theta.T, x)
    for i in range(out_mat.shape[-1]):
        out = out_mat[:,i]    
        out[np.where(out != max(out))] =0
        out[np.where(out == max(out))] = 1 
    return out_mat.astype(int)

def performance(y_pred,y_actual):
    sum = 0.0
    test_size = y_actual.shape[0]
    for i in range(test_size):
        if y_pred[i] == y_actual[i]:
            sum +=1
    return sum/test_size * 100
    return

In [6]:
#get data ready
num_actors = 6
y_train = name_to_onehot(y_train, num_actors)
y_val = name_to_onehot(y_val, num_actors)
y_test = name_to_onehot(y_test, num_actors)

#prepare for gradient descent
x_train = flatten_set(x_train) / 255.0
x_val = flatten_set(x_val) / 255.0
x_test = flatten_set(x_test) / 255.0

x_train_w_bias = np.vstack( (np.ones((1, x_train.shape[1])), x_train))
x_val_w_bias = np.vstack( (np.ones((1, x_val.shape[1])), x_val))


In [8]:
# theta0 = np.random.normal(0,0.01, (x_train.shape[0]+1, y_train.shape[0]))
# theta_complete = grad_desc_vec(cost_vec, dcost_dtheta_vec, x_train_w_bias, y_train, theta0, 0.000001,200000)
#took a lont time to run. Got cost down to 15.63?

# theta0 = np.random.normal(0,0.005, (x_train.shape[0]+1, y_train.shape[0]))
# theta_complete = grad_desc_vec(cost_vec, dcost_dtheta_vec, x_train_w_bias, y_train, theta0, 0.0000001,200000)
# Iter 52000
# cost(x) = 148.69

theta0 = np.random.normal(0,0.01, (x_train.shape[0]+1, y_train.shape[0]))
theta_complete = grad_desc_vec(cost_vec, dcost_dtheta_vec, x_train_w_bias, y_train, theta0, 0.000001,200000)



Iter 2000
cost(x) = 201.62
Iter 4000
cost(x) = 164.26
Iter 6000
cost(x) = 142.46
Iter 8000
cost(x) = 127.40
Iter 10000
cost(x) = 116.11
Iter 12000
cost(x) = 107.18
Iter 14000
cost(x) = 99.86
Iter 16000
cost(x) = 93.68
Iter 18000
cost(x) = 88.37
Iter 20000
cost(x) = 83.73
Iter 22000
cost(x) = 79.62
Iter 24000
cost(x) = 75.94
Iter 26000
cost(x) = 72.62
Iter 28000
cost(x) = 69.60
Iter 30000
cost(x) = 66.84
Iter 32000
cost(x) = 64.29
Iter 34000
cost(x) = 61.94
Iter 36000
cost(x) = 59.76
Iter 38000
cost(x) = 57.73
Iter 40000
cost(x) = 55.83
Iter 42000
cost(x) = 54.05
Iter 44000
cost(x) = 52.38
Iter 46000
cost(x) = 50.81
Iter 48000
cost(x) = 49.32
Iter 50000
cost(x) = 47.92
Iter 52000
cost(x) = 46.59
Iter 54000
cost(x) = 45.32
Iter 56000
cost(x) = 44.12
Iter 58000
cost(x) = 42.98
Iter 60000
cost(x) = 41.89
Iter 62000
cost(x) = 40.85
Iter 64000
cost(x) = 39.85
Iter 66000
cost(x) = 38.90
Iter 68000
cost(x) = 37.99
Iter 70000
cost(x) = 37.11
Iter 72000
cost(x) = 36.28
Iter 74000
cost(x) = 35.47

In [200]:
#theta_complete = grad_desc_vec(cost_vec, dcost_dtheta_vec, x_train_w_bias, y_train, theta0, 0.000001,200000)

In [None]:
y_pred_train = pred_onehot(x=x_train_w_bias, theta=theta_complete)

y_pred_val = pred_onehot(x=x_val_w_bias, theta=theta_complete)

performance_train = performance(y_pred_train,y_train)
performance_val = performance(y_pred_val,y_val)

print(performance_train,performance_val, performance_val_others)

In [96]:
#testing whether my pred funciton works

pred_test = pred_onehot(x_train_w_bias,theta_complete)

print(pred_test[:,250])
#so seems like prediction output is correct

out = np.dot(theta_complete.T, x_train_w_bias)

#I guess we need to do gradient descent
out_example = np.dot(theta_complete.T, x_train_w_bias)[:,250] 


print(out_example)

out_example[np.where(out_example != max(out_example))] =0
out_example[np.where(out_example == max(out_example))] = 1

print(out_example)

[0 0 0 1 0 0]
[-0.096004   -0.03176723  0.05710696  1.01810195 -0.00665044  0.05564966]
[ 0.  0.  0.  1.  0.  0.]


In [63]:
 np.save("thetas_part7.npy",theta_complete)

In [72]:
#testing .astype
y = np.random.random((2,3))
y.view()

array([[ 0.56501352,  0.53894336,  0.42900461],
       [ 0.37450614,  0.48520647,  0.68545245]])

In [76]:
y.astype(int)

array([[0, 0, 0],
       [0, 0, 0]])