In [187]:
import json
import numpy as np


###### Q1.1 ######
def objective_function(X, y, w, lamb):
    """
    Inputs:
    - Xtrain: A 2 dimensional numpy array of data (number of samples x number of features)
    - ytrain: A 1 dimensional numpy array of labels (length = number of samples )
    - w: a numpy array of D elements as a D-dimension weight vector
    - lamb: lambda used in pegasos algorithm

    Return:
    - obj_value: the value of objective function in SVM primal formulation
    """
    # you need to fill in your solution here
    
    # 0.5 * lamb * ||w||^2 + 1/N sum(max(0,1-ywx))  
    X = np.array(X)
    y = np.array(y)

    N = X.shape[0]
    z = 1- np.multiply(y, np.transpose(np.dot(X,w)))
    z = z[z>0]
    obj_value = 0.5 * lamb * (np.linalg.norm(w) **2) + sum(z) / N
    #print(obj_value)

    return obj_value


###### Q1.2 ######
def pegasos_train(Xtrain, ytrain, w, lamb, k, max_iterations):
    """
    Inputs:
    - Xtrain: A list of num_train elements, where each element is a list of D-dimensional features.
    - ytrain: A list of num_train labels
    - w: a numpy array of D elements as a D-dimension vector, which is the weight vector and initialized to be all 0s
    - lamb: lambda used in pegasos algorithm
    - k: mini-batch size
    - max_iterations: the total number of iterations to update parameters

    Returns:
    - learnt w
    - train_obj: a list of the objective function value at each iteration during the training process, length of 500.
    """
    np.random.seed(0)
    Xtrain = np.array(Xtrain)
    ytrain = np.array(ytrain)
    N = Xtrain.shape[0]
    D = Xtrain.shape[1]

    train_obj = []

    for iter in range(1, max_iterations + 1):
        A_t = np.floor(np.random.rand(k) * N).astype(int)  # index of the current mini-batch

        # you need to fill in your solution here
        X_t = Xtrain[A_t]
        y_t = ytrain[A_t]
        
        # 4
        #A_tpls = A_t[np.multiply(y_t, np.dot(X_t,w)) < 1]
        A_tpls = A_t[(np.multiply(y_t, np.transpose(np.dot(X_t,w)) )<1).ravel()]
        X_tpls = Xtrain[A_tpls]
        y_tpls = ytrain[A_tpls]
        
        # 5
        ita_t = 1/(lamb * iter)
        
        # 6
        w_thalf = (1-ita_t * lamb) * w + ita_t/k * np.sum(
            np.multiply(y_tpls.reshape(y_tpls.shape[0],1),X_tpls),axis=0).reshape(D,1)
        
        # 7
        w = w_thalf * min(1,1/(np.sqrt(lamb) * np.linalg.norm(np.array(w_thalf))))
        
        train_obj.append(objective_function(X_t, y_t, w, lamb))
        # print(train_obj[iter-1])
        # print(w[0])
    

    return w, train_obj


###### Q1.3 ######
def pegasos_test(Xtest, ytest, w_l):
    """
    Inputs:
    - Xtest: A list of num_test elements, where each element is a list of D-dimensional features.
    - ytest: A list of num_test labels
    - w_l: a numpy array of D elements as a D-dimension vector, which is the weight vector of SVM classifier and learned by pegasos_train()
 
    Returns:
    - test_acc: testing accuracy.
    """
    # you need to fill in your solution here
    Xtest = np.array(Xtest)
    ytest = np.array(ytest)
    N = Xtest.shape[0]
    ywx = (np.multiply(ytest, np.transpose(np.dot(Xtest,w_l))) > 0).ravel()
    ytru = ytest > 0
    test_acc = sum(ytru == ywx)/N


    return test_acc


"""
NO MODIFICATIONS below this line.
You should only write your code in the above functions.
"""

def data_loader_mnist(dataset):

    with open(dataset, 'r') as f:
            data_set = json.load(f)
    train_set, valid_set, test_set = data_set['train'], data_set['valid'], data_set['test']

    Xtrain = train_set[0]
    ytrain = train_set[1]
    Xvalid = valid_set[0]
    yvalid = valid_set[1]
    Xtest = test_set[0]
    ytest = test_set[1]

    ## below we add 'one' to the feature of each sample, such that we include the bias term into parameter w
    Xtrain = np.hstack((np.ones((len(Xtrain), 1)), np.array(Xtrain))).tolist()
    Xvalid = np.hstack((np.ones((len(Xvalid), 1)), np.array(Xvalid))).tolist()
    Xtest = np.hstack((np.ones((len(Xtest), 1)), np.array(Xtest))).tolist()

    for i, v in enumerate(ytrain):
        if v < 5:
            ytrain[i] = -1.
        else:
            ytrain[i] = 1.
    for i, v in enumerate(ytest):
        if v < 5:
            ytest[i] = -1.
        else:
            ytest[i] = 1.

    return Xtrain, ytrain, Xvalid, yvalid, Xtest, ytest


def pegasos_mnist():

    test_acc = {}
    train_obj = {}

    Xtrain, ytrain, Xvalid, yvalid, Xtest, ytest = data_loader_mnist(dataset = 'mnist_subset.json')

    max_iterations = 500
    k = 100
    for lamb in (0.01, 0.1, 1):
        w = np.zeros((len(Xtrain[0]), 1))
        w_l, train_obj['k=' + str(k) + '_lambda=' + str(lamb)] = pegasos_train(Xtrain, ytrain, w, lamb, k, max_iterations)
        test_acc['k=' + str(k) + '_lambda=' + str(lamb)] = pegasos_test(Xtest, ytest, w_l)

    lamb = 0.1
    for k in (1, 10, 1000):
        w = np.zeros((len(Xtrain[0]), 1))
        w_l, train_obj['k=' + str(k) + '_lambda=' + str(lamb)] = pegasos_train(Xtrain, ytrain, w, lamb, k, max_iterations)
        test_acc['k=' + str(k) + '_lambda=' + str(lamb)] = pegasos_test(Xtest, ytest, w_l)

    return test_acc, train_obj


def main():
    test_acc, train_obj = pegasos_mnist() # results on mnist
    print('mnist test acc \n')
    for key, value in test_acc.items():
        print('%s: test acc = %.4f \n' % (key, value))

    with open('pegasos.json', 'w') as f_json:
        json.dump([test_acc, train_obj], f_json)


#if __name__ == "__main__":
#    main()


## 1.1

In [102]:
Xtrain, ytrain, Xvalid, yvalid, Xtest, ytest = data_loader_mnist(dataset = 'mnist_subset.json')

In [103]:
type(Xtrain)

list

In [128]:
X = np.array(Xtrain)
y = np.array(ytrain)
X.shape[1]
D = X.shape[1]
w = np.ones((D,1))
lamb = 0.5

In [138]:
    # 0.5 * lamb * ||w||^2 + 1/N sum(max(0,1-ywx)) 
    
    X = np.array(X)
    y = np.array(y)

    D = X.shape[1]
    z = 1- np.multiply(y, np.transpose(np.dot(X,w)))
    z = z[z>0]
    obj_value = 0.5 * lamb * (np.linalg.norm(w) **2 ) + sum(z) / N
    print(obj_value)

248.8182765625


In [106]:
np.linalg.norm(np.array([3,4]))**2

25.0

In [130]:
np.dot(X,w)

array([[ 41.63671875],
       [101.91015625],
       [130.578125  ],
       ...,
       [190.23828125],
       [ 97.51953125],
       [132.16796875]])

In [131]:
(np.dot(X,w)).shape

(5000, 1)

In [108]:
np.multiply(y, np.dot(X,w))

array([  41.63671875,  101.91015625, -130.578125  , ..., -190.23828125,
         97.51953125,  132.16796875])

In [109]:
(np.multiply(y, np.dot(X,w))).shape

(5000,)

In [110]:
w.shape

(785,)

In [32]:
1- np.multiply(y, np.dot(X,w))

array([ -40.63671875, -100.91015625,  131.578125  , ...,  191.23828125,
        -96.51953125, -131.16796875])

In [137]:
(1- np.multiply(y, np.transpose(np.dot(X,w)))).shape

(1, 5000)

In [35]:
z = 1 - np.multiply(y, np.dot(X,w))
z = z[z>0]

In [40]:
A = np.array([[1,2],[0,1]])
b = np.array([0,1])
np.dot(A,b)

array([2, 1])

## 1.2

In [193]:
k = 1000
max_iterations = 500
w = np.zeros((D,1))
lamb = 0.5

In [194]:
    """
    Inputs:
    - Xtrain: A list of num_train elements, where each element is a list of D-dimensional features.
    - ytrain: A list of num_train labels
    - w: a numpy array of D elements as a D-dimension vector, which is the weight vector and initialized to be all 0s
    - lamb: lambda used in pegasos algorithm
    - k: mini-batch size
    - max_iterations: the total number of iterations to update parameters

    Returns:
    - learnt w
    - train_obj: a list of the objective function value at each iteration during the training process, length of 500.
    """
    np.random.seed(0)
    Xtrain = np.array(Xtrain)
    ytrain = np.array(ytrain)
    N = Xtrain.shape[0]
    D = Xtrain.shape[1]

    train_obj = []

    for iter in range(1, max_iterations + 1):
#    for iter in range(1,2):
        A_t = np.floor(np.random.rand(k) * N).astype(int)  # index of the current mini-batch

        # you need to fill in your solution here
        X_t = Xtrain[A_t]
        y_t = ytrain[A_t]
        
        # 4
        #A_tpls = A_t[np.multiply(y_t, np.dot(X_t,w)) < 1]
        A_tpls = A_t[(np.multiply(y_t, np.transpose(np.dot(X_t,w)) )<1).ravel()]
        X_tpls = Xtrain[A_tpls]
        y_tpls = ytrain[A_tpls]
        
        # 5
        ita_t = 1/(lamb * iter)
        
        # 6
        w_thalf = (1-ita_t * lamb) * w + ita_t/k * np.sum(
            np.multiply(y_tpls.reshape(y_tpls.shape[0],1),X_tpls),axis=0).reshape(D,1)
        
        # 7
        w = w_thalf * min(1,1/(np.sqrt(lamb) * np.linalg.norm(np.array(w_thalf))))
        
        train_obj.append(objective_function(X_t, y_t, w, lamb))
        #print(train_obj[iter-1])
        #print(w[0])
    #print(train_obj)


    #return w, train_obj

In [51]:
A_t

array([4054, 1740, 1057,  296, 4380, 4592,  600, 1672,  876,  579, 4499,
        284, 4902,  482, 4317, 2832, 1839, 1711, 3786, 1572, 3286, 2586,
       2424, 4505, 2773, 4134, 3627,  192, 3865, 1084, 4515,  214, 1665,
        498, 2377, 4100, 1490,  754, 1651, 4069,  701, 1136,  344, 3528,
       1976, 1554, 3593, 1679, 3638, 4075, 1088, 4869,  811, 1454,  898,
       1727, 2400, 2610, 4268, 4447, 1100, 3114,  557, 2294, 1611, 1582,
       2412, 3649,  345, 4395, 3674,  882, 4695, 2531, 4999,  986, 2674,
       1451, 1520, 2955, 4608, 4026, 3619, 2795, 4611, 2461, 4369, 4169,
       1069, 3856,   60, 1614, 1147, 2534, 3684,  488, 2574, 4692, 1143,
       3385])

In [197]:
print(type(train_obj))
print(len(train_obj))

<class 'list'>
500


In [175]:
y_tpls.shape

(1000,)

In [198]:
w.shape

(785, 1)

In [183]:
w1 = np.sum(np.multiply(y_tpls.reshape(y_tpls.shape[0],1),X_tpls),axis=0).reshape(D,1)
w1.shape

(785, 1)

In [170]:
print(A_t.shape)
print(A_tpls.shape)
print((np.multiply(y_t, np.dot(X_t,w)) < 1).shape)
print((np.multiply(y_t, np.transpose(np.dot(X_t,w)) )<1).shape)
(np.multiply(y_t, np.transpose(np.dot(X_t,w)) )<1).ravel()

(1000,)
(1000,)
(1000, 1000)
(1, 1000)


array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [112]:
(np.multiply(y_t, np.dot(X_t,w))).shape

(1000,)

In [160]:
((np.multiply(y_t, np.dot(X_t,w)))<1).shape

(1000, 1000)

In [101]:
np.multiply(y_t, np.dot(X_t,w))

array([[ 0.,  0.,  0., ...,  0., -0., -0.],
       [ 0.,  0.,  0., ...,  0., -0., -0.],
       [ 0.,  0.,  0., ...,  0., -0., -0.],
       ...,
       [ 0.,  0.,  0., ...,  0., -0., -0.],
       [ 0.,  0.,  0., ...,  0., -0., -0.],
       [ 0.,  0.,  0., ...,  0., -0., -0.]])

In [99]:
print(A_t)
print(A_tpls)

[ 628 3819 4309 2998 1208 4396 1839 4390 3595 1413  874 2902 1686 3614
 2484 2570 4346 4159  309 3401  805 2301 2893 1844 1795 2240   74 4091
  566 1503 2202 4904  418 3682 3906  184 3969 1407  182 1263 4495  832
 1014 2979 1471 4556 1983 1422  986   28 1552 1767 3743 3366 2284 4079
 2027 2753 4921 1194  573  825 2931 2416   97 2975  727 4012 1248  364
 3643 2508 2233 1670  921 3738 2488  201 2850 2757 1033 4348 4179 1672
 3636 3298 2073 1867  866  686 1416 2698 2021 4434 3069 2322 2150  547
  687 2053 4231 4919 3705  857  132 2556  631 4576 4747 1571 1567 2385
 1241  586 2185  726 4522 2427 1330 2642 4604 3390  313 4742 4584 2860
 3863  889 3970 2391  805 1475  806 2512  663 4742 2422 4875 2198 2890
 1436 1187 1846  640 2070 1461 4801 4582 2767 4290  566 3310 4047 1015
 1053 1781 1787 4071 4064 3252 1590 2555 3818   36 4126 4103 4239 1496
 1682  973  366  918 2858  567 1392 4593 2235  878 4065  139 3834 4244
 2142 2213 3007 4135 1294 2589 4446 4888   58 4104 2419 2946  202 3668
 4054 

In [53]:
Xtrain[A_t].shape[0]

100

In [57]:
A_tpls

array([4054, 1740,  296, 4592, 1672, 4499,  284,  482, 1839, 3786, 1572,
       2424,  192, 3865, 4515, 1665, 2377, 1490, 4069, 1136,  344, 1976,
       1554, 3593, 3638, 1454, 4447, 1100, 1611, 2412, 3649, 3674, 4695,
       2531,  986, 2674, 1451, 4026, 3619, 4169, 1069, 3856,  488, 2574,
       4692])

In [59]:
ita_t

0.2

In [65]:
A = np.array([[1,2],[1,1],[0,1]])
b = np.array([1,-1,1])
np.multiply(b.reshape(b.shape[0],1),A)

array([[ 1,  2],
       [-1, -1],
       [ 0,  1]])

In [61]:
min(1,2)

1

In [68]:
w_thalf.shape

(785,)

In [69]:
c=[]

In [70]:
c.append(1)

In [71]:
c

[1]

In [73]:
iter

1

In [90]:
objective_function(X_t, y_t, w, lamb)

0.6404338979628443

In [91]:
objective_function(X_t, y_t, np.zeros(X_t.shape[1]), lamb)

1.0

In [88]:
np.dot(w,w.reshape(w.shape[0],1))

array([0.43231824])

In [87]:
np.linalg.norm(w) **2

0.4323182393174562

## 1.3

In [212]:
    max_iterations = 500
    k = 100
    lamb = 0.1
    w = np.zeros((len(Xtrain[0]), 1))
    w_l, train_obj= pegasos_train(Xtrain, ytrain, w, lamb, k, max_iterations)

In [240]:
    """
    Inputs:
    - Xtest: A list of num_test elements, where each element is a list of D-dimensional features.
    - ytest: A list of num_test labels
    - w_l: a numpy array of D elements as a D-dimension vector, which is the weight vector of SVM classifier and learned by pegasos_train()
 
    Returns:
    - test_acc: testing accuracy.
    """
    # you need to fill in your solution here
    Xtest = np.array(Xtest)
    ytest = np.array(ytest)
    N = Xtest.shape[0]
    ywx = (np.multiply(ytest, np.transpose(np.dot(Xtest,w_l))) > 0).ravel()
    ytru = ytest > 0
    test_acc = sum(ytru == ywx)/N


    #return test_acc

In [223]:
w.shape

(785, 1)

In [241]:
test_acc

0.568

In [224]:
len(train_obj)

500

In [229]:
np.multiply(ytest, np.transpose(np.dot(Xtest,w_l))).shape

(1, 1000)

In [236]:
ytest.shape

(1000,)

In [239]:
ywx = (np.multiply(ytest, np.transpose(np.dot(Xtest,w_l))) > 0).ravel()
ytru = ytest > 0
sum(ytru == ywx)

568

In [202]:
type([1,2,3])

list

In [235]:
a= np.array([1,2]) > 1
b = np.array([[-1],[2]])>0
a==b

array([[ True, False],
       [False,  True]])

In [234]:
a==np.transpose(b)

array([[ True,  True]])

In [203]:
li = [1,2,3]

In [204]:
train_obj['k=' + str(k) + '_lambda=' + str(lamb)]  = li

TypeError: list indices must be integers or slices, not str