In [1]:
import json
import numpy as np


###### Q1.1 ######
def objective_function(X, y, w, lamb):
    """
    Inputs:
    - Xtrain: A 2 dimensional numpy array of data (number of samples x number of features)
    - ytrain: A 1 dimensional numpy array of labels (length = number of samples )
    - w: a numpy array of D elements as a D-dimension weight vector
    - lamb: lambda used in pegasos algorithm

    Return:
    - obj_value: the value of objective function in SVM primal formulation
    """
    # you need to fill in your solution here
    
    # 0.5 * lamb * ||w||^2 + 1/N sum(max(0,1-ywx))  
    X = np.array(X)
    y = np.array(y)

    N = X.shape[0]
    z = 1- np.multiply(y, np.transpose(np.dot(X,w)))
    zmax = z[z>0]
    obj_value = 0.5 * lamb * (np.linalg.norm(w) **2) + np.sum(zmax) / N
    #print(obj_value)

    return obj_value


###### Q1.2 ######
def pegasos_train(Xtrain, ytrain, w, lamb, k, max_iterations):
    """
    Inputs:
    - Xtrain: A list of num_train elements, where each element is a list of D-dimensional features.
    - ytrain: A list of num_train labels
    - w: a numpy array of D elements as a D-dimension vector, which is the weight vector and initialized to be all 0s
    - lamb: lambda used in pegasos algorithm
    - k: mini-batch size
    - max_iterations: the total number of iterations to update parameters

    Returns:
    - learnt w
    - train_obj: a list of the objective function value at each iteration during the training process, length of 500.
    """
    np.random.seed(0)
    Xtrain = np.array(Xtrain)
    ytrain = np.array(ytrain)
    N = Xtrain.shape[0]
    D = Xtrain.shape[1]

    train_obj = []

    for iter in range(1, max_iterations + 1):
        A_t = np.floor(np.random.rand(k) * N).astype(int)  # index of the current mini-batch

        # you need to fill in your solution here
        X_t = Xtrain[A_t, :]
        y_t = ytrain[A_t]
        
        # 4
        #A_tpls = A_t[np.multiply(y_t, np.dot(X_t,w)) < 1]
        A_tpls = A_t[(np.multiply(y_t, np.transpose(np.dot(X_t,w)) )<1).ravel()]
        X_tpls = Xtrain[A_tpls, :]
        y_tpls = ytrain[A_tpls]
        
        # 5
        ita_t = 1/(lamb * iter)
        
        
        # 6
        w_thalf = (1 - (ita_t * lamb)) * w + ita_t * np.sum(
            np.multiply(y_tpls.reshape(y_tpls.shape[0],1),X_tpls),
            axis=0).reshape(D,1)/ k
        
        # 7
        #w = w_thalf * min(1,1/(np.sqrt(lamb) * np.linalg.norm(np.array(w_thalf))))
        w = w_thalf * min(1, 1 / np.sqrt(lamb) / np.linalg.norm(w_thalf))
        
        train_obj.append(objective_function(Xtrain, ytrain, w, lamb))
        print(train_obj[iter-1])
        # print(w[0])
    

    return w, train_obj


###### Q1.3 ######
def pegasos_test(Xtest, ytest, w_l):
    """
    Inputs:
    - Xtest: A list of num_test elements, where each element is a list of D-dimensional features.
    - ytest: A list of num_test labels
    - w_l: a numpy array of D elements as a D-dimension vector, which is the weight vector of SVM classifier and learned by pegasos_train()
 
    Returns:
    - test_acc: testing accuracy.
    """
    # you need to fill in your solution here
    Xtest = np.array(Xtest)
    ytest = np.array(ytest)
    N = Xtest.shape[0]
    ywx = (np.multiply(ytest, np.transpose(np.dot(Xtest, w_l))) > 0).ravel()
    ytru = ytest > 0
    test_acc = sum(ytru == ywx)/N


    return test_acc


"""
NO MODIFICATIONS below this line.
You should only write your code in the above functions.
"""

def data_loader_mnist(dataset):

    with open(dataset, 'r') as f:
            data_set = json.load(f)
    train_set, valid_set, test_set = data_set['train'], data_set['valid'], data_set['test']

    Xtrain = train_set[0]
    ytrain = train_set[1]
    Xvalid = valid_set[0]
    yvalid = valid_set[1]
    Xtest = test_set[0]
    ytest = test_set[1]

    ## below we add 'one' to the feature of each sample, such that we include the bias term into parameter w
    Xtrain = np.hstack((np.ones((len(Xtrain), 1)), np.array(Xtrain))).tolist()
    Xvalid = np.hstack((np.ones((len(Xvalid), 1)), np.array(Xvalid))).tolist()
    Xtest = np.hstack((np.ones((len(Xtest), 1)), np.array(Xtest))).tolist()

    for i, v in enumerate(ytrain):
        if v < 5:
            ytrain[i] = -1.
        else:
            ytrain[i] = 1.
    for i, v in enumerate(ytest):
        if v < 5:
            ytest[i] = -1.
        else:
            ytest[i] = 1.

    return Xtrain, ytrain, Xvalid, yvalid, Xtest, ytest


def pegasos_mnist():

    test_acc = {}
    train_obj = {}

    Xtrain, ytrain, Xvalid, yvalid, Xtest, ytest = data_loader_mnist(dataset = 'mnist_subset.json')

    max_iterations = 500
    k = 100
    for lamb in (0.01, 0.1, 1):
        w = np.zeros((len(Xtrain[0]), 1))
        w_l, train_obj['k=' + str(k) + '_lambda=' + str(lamb)] = pegasos_train(Xtrain, ytrain, w, lamb, k, max_iterations)
        test_acc['k=' + str(k) + '_lambda=' + str(lamb)] = pegasos_test(Xtest, ytest, w_l)

    lamb = 0.1
    for k in (1, 10, 1000):
        w = np.zeros((len(Xtrain[0]), 1))
        w_l, train_obj['k=' + str(k) + '_lambda=' + str(lamb)] = pegasos_train(Xtrain, ytrain, w, lamb, k, max_iterations)
        test_acc['k=' + str(k) + '_lambda=' + str(lamb)] = pegasos_test(Xtest, ytest, w_l)

    return test_acc, train_obj


def main():
    test_acc, train_obj = pegasos_mnist() # results on mnist
    print('mnist test acc \n')
    for key, value in test_acc.items():
        print('%s: test acc = %.4f \n' % (key, value))

    #with open('pegasos.json', 'w') as f_json:
    #    json.dump([test_acc, train_obj], f_json)


#if __name__ == "__main__":
#    main()


## 1.1

In [2]:
Xtrain, ytrain, Xvalid, yvalid, Xtest, ytest = data_loader_mnist(dataset = 'mnist_subset.json')
X = np.array(Xtrain)
y = np.array(ytrain)
N = X.shape[0]
D = X.shape[1]
w = np.ones((D,1))
lamb = 0.5

In [4]:
    # 0.5 * lamb * ||w||^2 + 1/N sum(max(0,1-ywx)) 
    
    X = np.array(X)
    y = np.array(y)

    D = X.shape[1]
    z = 1- np.multiply(y, np.transpose(np.dot(X,w)))
    zmax = z[z>0]
    obj_value = 0.5 * lamb * (np.linalg.norm(w) **2 ) + np.sum(zmax) / N

## 1.2

In [5]:
k = 1000
max_iterations = 500
w = np.zeros((D,1))
lamb = 0.5

In [14]:
    """
    Inputs:
    - Xtrain: A list of num_train elements, where each element is a list of D-dimensional features.
    - ytrain: A list of num_train labels
    - w: a numpy array of D elements as a D-dimension vector, which is the weight vector and initialized to be all 0s
    - lamb: lambda used in pegasos algorithm
    - k: mini-batch size
    - max_iterations: the total number of iterations to update parameters

    Returns:
    - learnt w
    - train_obj: a list of the objective function value at each iteration during the training process, length of 500.
    """
    np.random.seed(0)
    Xtrain = np.array(Xtrain)
    ytrain = np.array(ytrain)
    N = Xtrain.shape[0]
    D = Xtrain.shape[1]

    train_obj = []

#    for iter in range(1, max_iterations + 1):
    for iter in range(1,5):
        A_t = np.floor(np.random.rand(k) * N).astype(int)  # index of the current mini-batch

        # you need to fill in your solution here
        X_t = Xtrain[A_t, :]
        y_t = ytrain[A_t].astype(int)
        
        # 4
        A_tpls = A_t[(np.multiply(y_t, np.transpose(np.dot(X_t, w)))<1).ravel()]
        X_tpls = Xtrain[A_tpls, :]
        y_tpls = ytrain[A_tpls].astype(int)
        print('min', min(y_tpls)
        
        # 5
        ita_t = 1/(lamb * iter)
        
        # 6
        w_thalf = (1-ita_t * lamb) * w + (ita_t / k) * np.sum(
            np.multiply(y_tpls.reshape(y_tpls.shape[0], 1) ,X_tpls),
            axis=0).reshape(D,1)
        
        # 7
        #w = w_thalf * min(1,1/(np.sqrt(lamb) * np.linalg.norm(np.array(w_thalf))))
        w_ = w
        w = w_thalf * min(1, 1/(np.sqrt(lamb) * np.linalg.norm(w_thalf)))

        train_obj.append(objective_function(X_t, y_t, w, lamb))
        #print(train_obj[iter-1])
        
    #print(train_obj)


    #return w, train_obj

In [20]:
A_t[0:10]

array([2069, 3148, 3892, 4257, 4082,  830, 4141,  293, 1000, 3114])

In [21]:
A_tpls[0:10]

array([2069, 3892, 4082, 3114,  573, 1539, 1575, 2503, 1500, 4780])

In [17]:
print(Xtrain[2069,:])
print(ytrain[2069])

[1.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         

In [18]:
np.multiply(ytrain[2069], np.transpose(np.dot(Xtrain[2069,:], w_)))

array([-4.67209914])

In [19]:
np.multiply(ytrain[3148], np.transpose(np.dot(Xtrain[3148,:], w_)))

array([7.49211635])

In [28]:
np.linalg.norm(w_)

1.254998471914033

In [30]:
np.dot(X_tpls, w).shape

(497, 1)

In [31]:
y_tpls.reshape(y_tpls.shape[0], 1).shape

(497, 1)

In [36]:
a = np.array(range(497*2)).reshape((497,2))
print(np.multiply(y_tpls.reshape(y_tpls.shape[0], 1) ,a))

[[  0   1]
 [  2   3]
 [  4   5]
 [  6   7]
 [  8   9]
 [ 10  11]
 [ 12  13]
 [ 14  15]
 [ 16  17]
 [ 18  19]
 [ 20  21]
 [ 22  23]
 [ 24  25]
 [ 26  27]
 [ 28  29]
 [ 30  31]
 [ 32  33]
 [ 34  35]
 [ 36  37]
 [ 38  39]
 [ 40  41]
 [ 42  43]
 [ 44  45]
 [ 46  47]
 [ 48  49]
 [ 50  51]
 [ 52  53]
 [ 54  55]
 [ 56  57]
 [ 58  59]
 [ 60  61]
 [ 62  63]
 [ 64  65]
 [ 66  67]
 [ 68  69]
 [ 70  71]
 [ 72  73]
 [ 74  75]
 [ 76  77]
 [ 78  79]
 [ 80  81]
 [ 82  83]
 [ 84  85]
 [ 86  87]
 [ 88  89]
 [ 90  91]
 [ 92  93]
 [ 94  95]
 [ 96  97]
 [ 98  99]
 [100 101]
 [102 103]
 [104 105]
 [106 107]
 [108 109]
 [110 111]
 [112 113]
 [114 115]
 [116 117]
 [118 119]
 [120 121]
 [122 123]
 [124 125]
 [126 127]
 [128 129]
 [130 131]
 [132 133]
 [134 135]
 [136 137]
 [138 139]
 [140 141]
 [142 143]
 [144 145]
 [146 147]
 [148 149]
 [150 151]
 [152 153]
 [154 155]
 [156 157]
 [158 159]
 [160 161]
 [162 163]
 [164 165]
 [166 167]
 [168 169]
 [170 171]
 [172 173]
 [174 175]
 [176 177]
 [178 179]
 [180 181]

In [37]:
y_tpls

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [24]:
print(A_t.shape)
print(A_tpls.shape)
print((np.multiply(y_t, np.dot(X_t,w)) < 1).shape)
print((np.multiply(y_t, np.transpose(np.dot(X_t,w)) )<1).shape)
print((np.multiply(y_t, np.transpose(np.dot(X_t,w)) )<1).ravel())

(1000,)
(735,)
(1000, 1000)
(1, 1000)
[ True  True  True False  True  True  True  True  True  True  True  True
 False  True  True  True  True  True  True  True  True False  True  True
  True  True False  True  True False  True  True  True  True  True  True
 False False  True False  True False False False  True  True  True  True
  True  True  True  True  True  True  True False False  True False  True
  True False  True False  True  True  True  True  True False False False
  True False  True  True  True  True  True False  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True False  True  True  True  True  True  True  True  True  True  True
  True False False  True  True  True  True  True False  True  True  True
  True  True  True  True  True  True  True  True False  True  True  True
  True  True  True  True  True  True  True False  True  True  True  True
  True  True  True False  True  True  True  True False  True  True  True
  True False 

In [25]:
(np.multiply(y_t, np.dot(X_t,w))).shape

(1000, 1000)

In [26]:
((np.multiply(y_t, np.dot(X_t,w)))<1).shape

(1000, 1000)

In [27]:
np.multiply(y_t, np.dot(X_t,w))

array([[-0.73040117, -0.73040117,  0.73040117, ..., -0.73040117,
        -0.73040117,  0.73040117],
       [-0.67572895, -0.67572895,  0.67572895, ..., -0.67572895,
        -0.67572895,  0.67572895],
       [-0.02825347, -0.02825347,  0.02825347, ..., -0.02825347,
        -0.02825347,  0.02825347],
       ...,
       [ 0.7524917 ,  0.7524917 , -0.7524917 , ...,  0.7524917 ,
         0.7524917 , -0.7524917 ],
       [ 0.3820317 ,  0.3820317 , -0.3820317 , ...,  0.3820317 ,
         0.3820317 , -0.3820317 ],
       [-0.65464567, -0.65464567,  0.65464567, ..., -0.65464567,
        -0.65464567,  0.65464567]])

In [28]:
np.multiply(y_tpls.reshape(y_tpls.shape[0],1), X_tpls)

array([[-1., -0., -0., ..., -0., -0., -0.],
       [-1., -0., -0., ..., -0., -0., -0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       ...,
       [-1., -0., -0., ..., -0., -0., -0.],
       [-1., -0., -0., ..., -0., -0., -0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.]])

In [29]:
X_t

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [30]:
print(A_t)
print(A_tpls)

[2716 1980  929 3041  557 4739 2351 4039   99 2166 4460 1262 1741 4575
 3074  517 1185 1260 1724  128 2738  989 1059 2283 3626 1569 4644  996
 4424 2149 2707  464  521 4475 2221  555 4033 1752 3710 4295  813 3265
 3948 3239 2290 2161 2890 2070 4099 2188 2098 3445 3040 4042 3542 3950
 4498 1302 3308 4533 4827 1716 1306 2787 1532 2811 3928 3494  828 4260
  625 3560 1750 3570 1266 4139 1085 3161 2202 4076 3314 1527 4193 3263
 3721 3742 2841 4593 4445 4079 2240 3533 1600 1105 3225 1596 3497 2837
  395 1936  924  487 1735 4037   51 1965  866 3855 3172  842 4350 4637
 2286 2810 3982 2627 2822 2783  953 3194 3014 2182 3328 1410 3251 3433
 3867  270 2313 4228 4822  513 4000 1940 1812 3813 4007 3133 3237 4812
 1608 3328 4614 1542  894  945 3658 2718 3057 4633 2246 1123 2204 3691
 3456 4976  199 4362 2347 2400 1349 4180 4025 1798  923 3461 3845   12
 2553 4392  969 2796 2138 4681  278 4098 3810 3190  421  813 2784 1982
 1363 1287  349 3710 3310 2595 3855 2869 2480 1689 1982 2981 2542 1710
  919 

In [31]:
Xtrain[A_t].shape[0]

1000

In [32]:
A_tpls

array([2716, 1980,  929,  557, 4739, 2351, 4039,   99, 2166, 4460, 1262,
       4575, 3074,  517, 1185, 1260, 1724,  128, 2738, 1059, 2283, 3626,
       1569,  996, 4424, 2707,  464,  521, 4475, 2221,  555, 3710,  813,
       2290, 2161, 2890, 2070, 4099, 2188, 2098, 3445, 3040, 4042, 3542,
       1302, 4533, 4827, 1306, 1532, 2811, 3928, 3494,  828, 1750, 1266,
       4139, 1085, 3161, 2202, 3314, 1527, 4193, 3263, 3721, 3742, 2841,
       4593, 4445, 4079, 2240, 3533, 1600, 1105, 3225, 1596, 3497,  395,
       1936,  924,  487, 1735, 4037,   51, 1965,  866, 3855, 3172, 4637,
       2286, 2810, 3982, 2627, 2783,  953, 3194, 3014, 2182, 1410, 3251,
       3433, 3867,  270, 4228, 4822,  513, 4000, 1940, 1812, 3813, 4007,
       3133, 3237, 1608, 4614, 1542,  894,  945, 3658, 3057, 4633, 2246,
       1123, 3691, 3456, 4976,  199, 2347, 1349, 4180, 4025, 1798,  923,
       3461, 3845,   12, 4392,  969, 2796, 2138,  278, 4098, 3810,  421,
        813, 2784, 1982, 1363, 1287, 3710, 3310, 25

In [33]:
ita_t

0.004

In [34]:
A = np.array([[1,2],[1,1],[0,1]])
b = np.array([1,-1,1])
np.multiply(b.reshape(b.shape[0],1),A)

array([[ 1,  2],
       [-1, -1],
       [ 0,  1]])

In [35]:
min(1,2)

1

In [36]:
w_thalf.shape

(785, 1)

In [37]:
c=[]

In [38]:
c.append(1)

In [39]:
c

[1]

In [40]:
iter

500

In [41]:
objective_function(X_t, y_t, w, lamb)

0.6257762095295042

In [42]:
objective_function(X_t, y_t, np.zeros(X_t.shape[1]), lamb)

1.0

In [43]:
np.dot(w,w.reshape(w.shape[0],1))

ValueError: shapes (785,1) and (785,1) not aligned: 1 (dim 1) != 785 (dim 0)

In [None]:
np.linalg.norm(w) **2

In [None]:
y_tpls[0:4]

In [None]:
X_tpls[0:4, :]

In [None]:
np.multiply(y_tpls.reshape(y_tpls.shape[0],1),X_tpls)[0:4, :]

In [None]:
 np.sum(np.multiply(y_tpls.reshape(y_tpls.shape[0],1),X_tpls),axis=0).shape

In [None]:
type(np.sum(np.multiply(y_tpls.reshape(y_tpls.shape[0],1),X_tpls),axis=0))

## 1.3

In [None]:
    max_iterations = 500
    k = 100
    lamb = 0.1
    w = np.zeros((len(Xtrain[0]), 1))
    w_l, train_obj= pegasos_train(Xtrain, ytrain, w, lamb, k, max_iterations)

In [None]:
    """
    Inputs:
    - Xtest: A list of num_test elements, where each element is a list of D-dimensional features.
    - ytest: A list of num_test labels
    - w_l: a numpy array of D elements as a D-dimension vector, which is the weight vector of SVM classifier and learned by pegasos_train()
 
    Returns:
    - test_acc: testing accuracy.
    """
    # you need to fill in your solution here
    Xtest = np.array(Xtest)
    ytest = np.array(ytest)
    N = Xtest.shape[0]
    #ywx = (np.multiply(ytest, np.transpose(np.dot(Xtest,w_l))) > 0).ravel()
    ywx = np.sign((np.multiply(ytest, np.transpose(np.dot(Xtest,w_l)))).ravel())
    #ytru = ytest > 0
    #test_acc = sum(ytru == ywx)/N
    test_acc = sum(ytest == ywx)/N


    #return test_acc

In [None]:
w.shape

In [None]:
test_acc

In [None]:
len(train_obj)

In [None]:
np.multiply(ytest, np.transpose(np.dot(Xtest,w_l))).shape

In [None]:
ytest.shape

In [None]:
ywx = (np.multiply(ytest, np.transpose(np.dot(Xtest,w_l))) > 0).ravel()
ytru = ytest > 0
sum(ytru * 1 == ywx * 1)

In [None]:
ytru*1

In [None]:
yytr=[True, True, False, False]
yyte=[True, False, False, False]
type(yyte)
#sum(yytr == yyte)

In [None]:
a= np.array([1,2]) > 1
b = np.array([[-1],[2]])>0
a==b

In [None]:
a==np.transpose(b)

In [None]:
li = [1,2,3]

In [None]:
ywx

In [None]:
ytru

In [None]:
w_l.shape

In [None]:
Xtrain.shape

In [None]:
g = np.array([[1,1],[1,0],[0,1],[-1,-1]])
wg = np.array([[2],[4]])
np.dot(g,wg)