In [47]:
import pandas as pd
import numpy as np
from numpy.linalg import norm

def read_pandas(fname):
    df = pd.read_csv(fname)
    df = df.set_index("id")
    return df

In [48]:
def train_model(train_tot, alpha, lambd, epsilon, alpha_decay):
    w = np.zeros(np.shape(train_tot)[1] - 1)
    p = 0.0
    LCL = 1.0
    for epoch in range(1000):
        print(epoch)
        oldval = LCL - lambd * norm(w, 2)
        LCL = 0.0
        np.random.shuffle(train_tot)
        for example in train_tot:
            x = example[:-1]
            y = example[-1]
            p = 1.0 / (1.0 + np.exp(-np.dot(w, x)))
            w = w + alpha * ((y-p) * x - 2 * lambd * w)
            LCL += y * np.log(p) + (1-y) * np.log(1-p)
        alpha *= alpha_decay
        newval = LCL - lambd * norm(w, 2)
        if (abs(newval - oldval) < epsilon): break
        print(w, newval)
        print()
    
    return w

In [49]:
def test_model(w, test_features, test_labels = [None]):
    test_id_len = np.shape(test_features)[0]
    predicted_test_labels = np.zeros(test_id_len, dtype='int')

    for i in range(test_id_len):
        x = test_features[i]
        p1 = 1.0 / (1.0 + np.exp(-np.dot(w, x)))
        p0 = 1.0 - p1
        if p0 > p1: predicted_test_labels[i] = 0
        else: predicted_test_labels[i] = 1
            
            
    acc = 0.0
    if test_labels[0] != None:
        diff = predicted_test_labels - test_labels
        acc = 1.0 - np.dot(diff, diff) / test_id_len
        
        print("predicted labels:", predicted_test_labels)
        print("true labels:", test_labels)
        print("accuracy:", acc)
        
    return predicted_test_labels, acc

In [54]:
def fold10cv(train_tot, alpha, lambd, epsilon, alpha_decay):
    train_id_len = np.shape(train_tot)[0]
    accuracies = list()
    for i in range(10):
        train_segment = np.concatenate([np.arange(0, train_id_len*i/10.0, dtype = "int"), np.arange(train_id_len*(i+1)/10.0, train_id_len, dtype = "int")])
        test_segment = np.arange(train_id_len*i/10.0, train_id_len*(i+1)/10.0, dtype = "int")
        
        train_total = np.copy(train_tot[train_segment])
        w = train_model(train_total, alpha, lambd, epsilon, alpha_decay)
        test_total = np.copy(train_tot[test_segment])
        predicted_test_labels, a = test_model(w, test_features = test_total[:, :-1], test_labels = test_total[:, -1])
        print(a)
        accuracies.append(a)
        
    return accuracies

In [51]:
test_noans = read_pandas("test_noans.csv")
train = read_pandas("train.csv")

In [52]:
train_tot = train[train.keys()].to_numpy()
test_features = test_noans.to_numpy()

In [83]:
w = train_model(train_tot, alpha = 0.03, lambd = 1e-6, epsilon = 1e-5, alpha_decay = 0.975)
predicted_test_labels, acc = test_model(w, test_features = train_tot[:, :-1], test_labels = train_tot[:, -1])

0
[-0.13363646  0.0953695   0.01102279 ...  0.00454963  0.02343816
 -0.02915436] -2001.2542702770897

1
[-0.07242396  0.15936231 -0.25245084 ...  0.00394577  0.03765153
 -0.05554641] -1703.722482429416

2
[-0.06413957 -0.01155799  0.17002292 ...  0.00189102  0.05307475
 -0.08171967] -1564.788173265755

3
[ 0.04316673  0.16984764 -0.1958805  ... -0.00783382  0.0655708
 -0.10136181] -1459.2889665919147

4
[ 0.00980225 -0.19307335  0.01186665 ... -0.00566723  0.07323451
 -0.12362635] -1363.750669449013

5
[-0.00404992  0.06620078 -0.02514075 ... -0.00854427  0.08282546
 -0.14331156] -1302.244009375479

6
[ 0.03388343  0.10913992  0.00851009 ... -0.01691995  0.09176805
 -0.16620361] -1238.8131430665912

7
[-0.06846275  0.04582363 -0.01502636 ... -0.0208513   0.09899697
 -0.18605493] -1193.1593388081048

8
[-0.06156087  0.00342537 -0.24866034 ... -0.02135565  0.10616907
 -0.20298713] -1147.7736616656875

9
[-0.17658117  0.03617527 -0.04075744 ... -0.02755181  0.11071645
 -0.22042645] -1105.

[-0.09941563  0.03112766 -0.08733104 ... -0.16070462  0.21479021
 -0.66517736] -602.7800674299019

82
[-0.10610062  0.01761196 -0.09065288 ... -0.16141948  0.21512434
 -0.66683572] -602.1945237297012

83
[-0.12848331  0.02346922 -0.11684412 ... -0.16200056  0.21543006
 -0.66846505] -600.5316706441155

84
[-0.13469926  0.02793522 -0.11486376 ... -0.16264447  0.21573477
 -0.67002307] -599.5774521476453

85
[-0.12530609  0.01945745 -0.11281976 ... -0.16310406  0.21603339
 -0.67153176] -598.8922427941886

86
[-0.11451073  0.02522048 -0.09038367 ... -0.16360462  0.21632977
 -0.67294675] -597.9459128631323

87
[-0.11453648  0.03890893 -0.10457578 ... -0.16413936  0.21661831
 -0.6743829 ] -596.7153937769265

88
[-0.11684677  0.02721916 -0.09222134 ... -0.16463435  0.21689094
 -0.67577601] -595.6980751666397

89
[-0.13603583  0.00151477 -0.11666348 ... -0.16509905  0.21715838
 -0.67714683] -594.3320410506855

90
[-0.12424486  0.02484954 -0.10817878 ... -0.1656602   0.21743626
 -0.67843292] -59

[-0.1230464   0.02567474 -0.10348983 ... -0.18120825  0.22582348
 -0.71913752] -565.405884289883

162
[-0.12157323  0.02645764 -0.10210969 ... -0.18128185  0.22586408
 -0.71933304] -565.2492672962062

163
[-0.12356733  0.02511264 -0.10343328 ... -0.18135851  0.22590329
 -0.71952436] -565.1256180730539

164
[-0.12459355  0.02490473 -0.10396152 ... -0.18143128  0.22594162
 -0.71971282] -565.0071838808337

165
[-0.12540867  0.02426588 -0.10486601 ... -0.18150451  0.22597917
 -0.71989547] -564.8560319805082

166
[-0.12438386  0.02472385 -0.10391711 ... -0.18157247  0.22601578
 -0.72007117] -564.7715352397187

167
[-0.12401444  0.02466513 -0.10342439 ... -0.18163963  0.22605131
 -0.72024329] -564.6504947848676

168
[-0.12440308  0.02473463 -0.10373233 ... -0.18170504  0.22608597
 -0.72041209] -564.5348393122267

169
[-0.12253036  0.02637845 -0.10223578 ... -0.18176905  0.22612001
 -0.72057558] -564.4038658186606

170
[-0.12207311  0.0267951  -0.10175663 ... -0.18183211  0.22615319
 -0.72073

[-0.12332381  0.02569755 -0.10383516 ... -0.18386352  0.22722199
 -0.72592876] -560.7716690039889

242
[-0.12335356  0.0256767  -0.10385622 ... -0.18387356  0.22722727
 -0.72595445] -560.7556363872401

243
[-0.12337909  0.02565443 -0.10388017 ... -0.18388339  0.22723241
 -0.7259795 ] -560.7385428016178

244
[-0.12340506  0.02563361 -0.10390012 ... -0.18389298  0.22723743
 -0.72600392] -560.7220363740333

245
[-0.12341739  0.02561897 -0.10391775 ... -0.18390232  0.22724233
 -0.72602775] -560.7058700032658

246
[-0.12343138  0.02560259 -0.1039286  ... -0.18391139  0.2272471
 -0.72605096] -560.6900756827201

247
[-0.1234196   0.02560816 -0.10392108 ... -0.18392023  0.22725176
 -0.72607358] -560.6746791035748

248
[-0.12343646  0.02558882 -0.10393594 ... -0.18392886  0.22725629
 -0.72609563] -560.6595569252579

249
[-0.12342978  0.02558943 -0.10393098 ... -0.18393727  0.22726072
 -0.72611714] -560.6451022862984

250
[-0.12339893  0.02561558 -0.10390578 ... -0.18394542  0.22726503
 -0.72613

[-0.12349145  0.02554012 -0.10403067 ... -0.18421228  0.22740532
 -0.72682018] -560.1675172411541

322
[-0.12349059  0.02554079 -0.10403036 ... -0.1842136   0.22740602
 -0.72682357] -560.1652121538046

323
[-0.12349025  0.02554113 -0.10403035 ... -0.18421489  0.2274067
 -0.72682687] -560.1629731520212

324
[-0.12349021  0.02554122 -0.10403043 ... -0.18421615  0.22740736
 -0.72683009] -560.1607903040093

325
[-0.12349079  0.02554078 -0.10403117 ... -0.18421738  0.227408
 -0.72683322] -560.158651775088

326
[-0.12348977  0.02554157 -0.10403062 ... -0.18421858  0.22740863
 -0.72683628] -560.1565681810548

327
[-0.12348962  0.02554169 -0.10403081 ... -0.18421974  0.22740925
 -0.72683926] -560.1545585078347

328
[-0.12348916  0.02554212 -0.10403062 ... -0.18422088  0.22740984
 -0.72684217] -560.1525796000117

329
[-0.1234893   0.02554206 -0.10403097 ... -0.18422199  0.22741043
 -0.72684501] -560.1506576308293

330
[-0.12348986  0.02554168 -0.10403159 ... -0.18422307  0.227411
 -0.72684777] 

[-0.12348854  0.02554341 -0.10403776 ... -0.18425827  0.2274295
 -0.7269377 ] -560.0877054923868

402
[-0.12348854  0.02554341 -0.10403779 ... -0.18425844  0.22742959
 -0.72693815] -560.0874023682946

403
[-0.12348856  0.0255434  -0.10403784 ... -0.18425861  0.22742968
 -0.72693859] -560.0871066188415

404
[-0.12348857  0.02554339 -0.10403789 ... -0.18425878  0.22742977
 -0.72693901] -560.0868182874234

405
[-0.12348858  0.02554339 -0.10403792 ... -0.18425894  0.22742985
 -0.72693942] -560.08653744612

406
[-0.12348858  0.0255434  -0.10403796 ... -0.1842591   0.22742993
 -0.72693983] -560.0862634509166

407
[-0.12348859  0.0255434  -0.104038   ... -0.18425925  0.22743002
 -0.72694022] -560.0859962139689

408
[-0.12348859  0.0255434  -0.10403803 ... -0.1842594   0.22743009
 -0.7269406 ] -560.0857357686996

409
[-0.12348859  0.02554341 -0.10403805 ... -0.18425955  0.22743017
 -0.72694098] -560.0854816862163

410
[-0.1234886   0.0255434  -0.10403809 ... -0.18425969  0.22743025
 -0.7269413

[-0.12348866  0.02554348 -0.10403909 ... -0.18426434  0.22743269
 -0.72695321] -560.0771771935799

482
[-0.12348866  0.02554348 -0.10403909 ... -0.18426436  0.2274327
 -0.72695327] -560.0771371898219

483
[-0.12348866  0.02554348 -0.1040391  ... -0.18426438  0.22743271
 -0.72695332] -560.0770981880745

484
[-0.12348866  0.02554348 -0.1040391  ... -0.1842644   0.22743272
 -0.72695338] -560.0770601609031

485
[-0.12348866  0.02554348 -0.10403911 ... -0.18426443  0.22743273
 -0.72695344] -560.0770230790363

486
[-0.12348866  0.02554348 -0.10403911 ... -0.18426445  0.22743274
 -0.72695349] -560.076986926163

487
[-0.12348866  0.02554348 -0.10403912 ... -0.18426447  0.22743276
 -0.72695354] -560.0769516761695

488
[-0.12348867  0.02554348 -0.10403912 ... -0.18426449  0.22743277
 -0.72695359] -560.0769173075593

489
[-0.12348867  0.02554348 -0.10403912 ... -0.18426451  0.22743278
 -0.72695364] -560.076883802341

490
[-0.12348867  0.02554348 -0.10403913 ... -0.18426453  0.22743279
 -0.7269536

In [84]:
predicted_test_labels, acc = test_model(w, test_features = test_features)

In [87]:
test_noans["label"] = predicted_test_labels
test_ans = test_noans["label"]
test_ans.to_csv("test_ans.csv")