In [1]:
import pandas as pd
import numpy as np
import os, sys, json, pickle, time
from collections import Counter

In [3]:
with open('../data/512_train.pickle','rb') as f:
  train_data = pickle.load(f)
  
with open('../data/512_test.pickle','rb') as f:
  test_data = pickle.load(f)
  
with open('../data/512_validation.pickle','rb') as f:
  validation_data = pickle.load(f)

In [21]:
Xtrain, ytrain = train_data[:,1:-1], train_data[:,-1]
Xvalidation, yvalidation = validation_data[:,1:-1], validation_data[:,-1]
Xtest = test_data[:,1:]
Xtrn = np.r_[Xtrain,Xvalidation]
ytrn = np.r_[ytrain,yvalidation]

In [22]:
ytrn.shape, Xtrn.shape

((198480,), (198480, 512))

In [40]:
test_id = [int(i) for i in test_data[:,0]]

# Forward feature selection
---

Let first try what they did in the exercises. So forward feature selection:
Lets (as in the exercise) try to find 5 features that are "good" using a random sample of 10000.

---

In [None]:
#numpy.random.seed(1)
rnd_list = np.random.randint(0, train_data.shape[0],10000)
rnd_trn, rnd_val = rnd_list[:8000], rnd_list[8000:]

Xtrn, ytrn = Xtrain[rnd_trn], ytrain[rnd_trn]
Xval, yval = Xtrain[rnd_val], ytrain[rnd_val]

In [None]:
start = time.time()

good_features = []
val_scores = []

feature_list = list(range(Xtrain.shape[1]))

for j in range(1,6):
    scores = []
    for i in feature_list:
        feature_slice = good_features+[i]
        X = Xtrn[:,feature_slice]
        model = KNeighborsClassifier(n_neighbors=10)
        model.fit(X, ytrn)
        scores.append(1- model.score(Xval[:,feature_slice],yval))
    
    best_ind = np.argmin(scores)
    best_feature = feature_list[best_ind]
    
    val_scores.append(scores[best_ind])
    good_features.append(best_feature)
    del feature_list[best_ind]
    print(good_features)
    
print('Seconds to run: {}'.format(time.time() - start))

In [None]:
val_scores

### Conclusion

It seems that 5 features is not enough to get a proper score. maybe 10000 is also not enough to do this. 
Also the exercise data had 54 features, we have 256, so maybe crank up the number of features desired. 

---
# PCA

Also to furter reduce the number of features, lets look at PCA

----

In [None]:
from sklearn.decomposition import PCA

pca= PCA()
pca.fit(Xtrain)

In [None]:
print(pca.explained_variance_ratio_.cumsum()[65])
np.where(pca.explained_variance_ratio_.cumsum()> 0.90)

About 85% of the variance in data can be expressed using only 44 principle components. This seems like a nice number.

So lets transform the data and run a couple classifiers.

In [None]:
TXtrain = pca.transform(Xtrain)[:,:65]
TXvalidation = pca.transform(Xvalidation)[:,:65]

In [12]:
# K nearest neighbor classifier
# Score is the mean accuracy

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 15)
knn.fit(Xtrain,ytrain)
knn.score(Xvalidation, yvalidation)

0.329370740212395

In [13]:
# Random Forest Classifier
# Score is the mean accuracy

from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators = 10)
rfc.fit(Xtrain, ytrain)
rfc.score(Xvalidation,yvalidation)

0.20399429386590584

# Multilayer Perceptron Classifier



In [26]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier((256,128), verbose =True, max_iter = 200, warm_start = True, batch_size = 500, early_stopping = True)

In [30]:
# A multilayer Preceptron
# Score is the mean accuracy again

mlp.fit(Xtrn, ytrn)


Iteration 1, loss = 3.34106188
Validation score: 0.307638
Iteration 2, loss = 2.68667574
Validation score: 0.349456
Iteration 3, loss = 2.52023290
Validation score: 0.375957
Iteration 4, loss = 2.41738103
Validation score: 0.383615
Iteration 5, loss = 2.34665949
Validation score: 0.393138
Iteration 6, loss = 2.29064159
Validation score: 0.406137
Iteration 7, loss = 2.24013124
Validation score: 0.406540
Iteration 8, loss = 2.19983405
Validation score: 0.406086
Iteration 9, loss = 2.16392519
Validation score: 0.410067
Iteration 10, loss = 2.12900823
Validation score: 0.422914
Iteration 11, loss = 2.09965546
Validation score: 0.422511
Iteration 12, loss = 2.07184965
Validation score: 0.425635
Iteration 13, loss = 2.04859522
Validation score: 0.427247
Iteration 14, loss = 2.01954122
Validation score: 0.429061
Iteration 15, loss = 1.99990557
Validation score: 0.429162
Iteration 16, loss = 1.97679849
Validation score: 0.428960
Iteration 17, loss = 1.95694834
Validation score: 0.430723
Iterat

MLPClassifier(activation='relu', alpha=0.0001, batch_size=500, beta_1=0.9,
       beta_2=0.999, early_stopping=True, epsilon=1e-08,
       hidden_layer_sizes=(256, 128), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=True, warm_start=True)

In [38]:
with open('mlp_b500_l256-128_early_stopping.pickle','wb') as f:
  pickle.dump(mlp,f)
  
prediction = mlp.predict(Xtest)

In [41]:
all = set(range(1,12801))
missing = list(all - set(test_id))
rand_label = np.random.randint(1,125, len(missing))

pred = [int(i) for i in prediction]
pred.extend(rand_label)
test_id.extend(missing)

In [42]:
answers = pd.DataFrame(columns = ['id','predicted'])
answers['predicted'] = pred
print(len(pred), len(test_id))
answers['id'] = test_id

12800 12800


In [43]:
answers.to_csv('512_prediction_b500_l256-128_early.csv', index = False)

In [8]:
from sklearn.linear_model import Perceptron

In [9]:
per = Perceptron(verbose = 1)
per.fit(Xtrain,ytrain)



-- Epoch 1
Norm: 310.47, NNZs: 512, Bias: -11.000000, T: 192171, Avg. loss: 2.176636
Total training time: 0.14 seconds.
-- Epoch 2
Norm: 389.07, NNZs: 512, Bias: -14.000000, T: 384342, Avg. loss: 2.122328
Total training time: 0.28 seconds.
-- Epoch 3
Norm: 445.01, NNZs: 512, Bias: -17.000000, T: 576513, Avg. loss: 2.123216
Total training time: 0.43 seconds.
-- Epoch 4
Norm: 494.23, NNZs: 512, Bias: -19.000000, T: 768684, Avg. loss: 2.099829
Total training time: 0.57 seconds.
-- Epoch 5
Norm: 520.62, NNZs: 512, Bias: -20.000000, T: 960855, Avg. loss: 2.075820
Total training time: 0.72 seconds.
-- Epoch 1
Norm: 333.47, NNZs: 512, Bias: -1.000000, T: 192171, Avg. loss: 2.695283
Total training time: 0.15 seconds.
-- Epoch 2
Norm: 419.35, NNZs: 512, Bias: -7.000000, T: 384342, Avg. loss: 2.629256
Total training time: 0.28 seconds.
-- Epoch 3
Norm: 487.99, NNZs: 512, Bias: -9.000000, T: 576513, Avg. loss: 2.607630
Total training time: 0.43 seconds.
-- Epoch 4
Norm: 528.48, NNZs: 512, Bias: -

Norm: 225.27, NNZs: 512, Bias: 0.000000, T: 192171, Avg. loss: 2.316107
Total training time: 0.15 seconds.
-- Epoch 2
Norm: 287.04, NNZs: 512, Bias: 1.000000, T: 384342, Avg. loss: 2.294028
Total training time: 0.29 seconds.
-- Epoch 3
Norm: 341.57, NNZs: 512, Bias: 1.000000, T: 576513, Avg. loss: 2.251553
Total training time: 0.44 seconds.
-- Epoch 4
Norm: 382.28, NNZs: 512, Bias: 4.000000, T: 768684, Avg. loss: 2.271977
Total training time: 0.58 seconds.
-- Epoch 5
Norm: 423.37, NNZs: 512, Bias: 5.000000, T: 960855, Avg. loss: 2.299969
Total training time: 0.73 seconds.
-- Epoch 1
Norm: 415.99, NNZs: 512, Bias: -11.000000, T: 192171, Avg. loss: 2.064588
Total training time: 0.14 seconds.
-- Epoch 2
Norm: 516.99, NNZs: 512, Bias: -12.000000, T: 384342, Avg. loss: 1.879764
Total training time: 0.29 seconds.
-- Epoch 3
Norm: 589.46, NNZs: 512, Bias: -15.000000, T: 576513, Avg. loss: 1.830665
Total training time: 0.43 seconds.
-- Epoch 4
Norm: 639.39, NNZs: 512, Bias: -17.000000, T: 7686

Norm: 649.94, NNZs: 512, Bias: -70.000000, T: 960855, Avg. loss: 2.783213
Total training time: 0.73 seconds.
-- Epoch 1
Norm: 311.84, NNZs: 512, Bias: -5.000000, T: 192171, Avg. loss: 2.907117
Total training time: 0.15 seconds.
-- Epoch 2
Norm: 400.91, NNZs: 512, Bias: -4.000000, T: 384342, Avg. loss: 2.850815
Total training time: 0.29 seconds.
-- Epoch 3
Norm: 452.48, NNZs: 512, Bias: -5.000000, T: 576513, Avg. loss: 2.861677
Total training time: 0.44 seconds.
-- Epoch 4
Norm: 490.74, NNZs: 512, Bias: -10.000000, T: 768684, Avg. loss: 2.866746
Total training time: 0.59 seconds.
-- Epoch 5
Norm: 528.13, NNZs: 512, Bias: -11.000000, T: 960855, Avg. loss: 2.851942
Total training time: 0.74 seconds.
-- Epoch 1
Norm: 248.79, NNZs: 512, Bias: -9.000000, T: 192171, Avg. loss: 1.928931
Total training time: 0.15 seconds.
-- Epoch 2
Norm: 321.79, NNZs: 512, Bias: -13.000000, T: 384342, Avg. loss: 1.880927
Total training time: 0.28 seconds.
-- Epoch 3
Norm: 365.63, NNZs: 512, Bias: -20.000000, T

Norm: 687.14, NNZs: 512, Bias: -8.000000, T: 960855, Avg. loss: 6.834641
Total training time: 1.33 seconds.
-- Epoch 1
Norm: 405.19, NNZs: 512, Bias: 2.000000, T: 192171, Avg. loss: 2.619213
Total training time: 0.14 seconds.
-- Epoch 2
Norm: 510.95, NNZs: 512, Bias: 5.000000, T: 384342, Avg. loss: 2.510574
Total training time: 0.37 seconds.
-- Epoch 3
Norm: 585.71, NNZs: 512, Bias: 12.000000, T: 576513, Avg. loss: 2.460363
Total training time: 0.60 seconds.
-- Epoch 4
Norm: 645.57, NNZs: 512, Bias: 15.000000, T: 768684, Avg. loss: 2.462721
Total training time: 0.95 seconds.
-- Epoch 5
Norm: 688.87, NNZs: 512, Bias: 20.000000, T: 960855, Avg. loss: 2.431155
Total training time: 1.29 seconds.
-- Epoch 1
Norm: 380.86, NNZs: 512, Bias: -11.000000, T: 192171, Avg. loss: 2.100349
Total training time: 0.15 seconds.
-- Epoch 2
Norm: 470.62, NNZs: 512, Bias: -19.000000, T: 384342, Avg. loss: 2.009792
Total training time: 0.31 seconds.
-- Epoch 3
Norm: 540.19, NNZs: 512, Bias: -21.000000, T: 57

Norm: 672.56, NNZs: 512, Bias: -38.000000, T: 960855, Avg. loss: 2.664341
Total training time: 0.74 seconds.
-- Epoch 1
Norm: 243.26, NNZs: 512, Bias: -4.000000, T: 192171, Avg. loss: 1.435465
Total training time: 0.14 seconds.
-- Epoch 2
Norm: 313.81, NNZs: 512, Bias: -5.000000, T: 384342, Avg. loss: 1.434406
Total training time: 0.29 seconds.
-- Epoch 3
Norm: 369.67, NNZs: 512, Bias: -7.000000, T: 576513, Avg. loss: 1.415682
Total training time: 0.44 seconds.
-- Epoch 4
Norm: 408.33, NNZs: 512, Bias: -8.000000, T: 768684, Avg. loss: 1.409633
Total training time: 0.58 seconds.
-- Epoch 5
Norm: 432.63, NNZs: 512, Bias: -10.000000, T: 960855, Avg. loss: 1.367378
Total training time: 0.72 seconds.
-- Epoch 1
Norm: 311.79, NNZs: 512, Bias: -10.000000, T: 192171, Avg. loss: 1.323877
Total training time: 0.15 seconds.
-- Epoch 2
Norm: 412.41, NNZs: 512, Bias: -14.000000, T: 384342, Avg. loss: 1.203282
Total training time: 0.30 seconds.
-- Epoch 3
Norm: 481.43, NNZs: 512, Bias: -16.000000, T

Norm: 480.92, NNZs: 512, Bias: -37.000000, T: 960855, Avg. loss: 2.773127
Total training time: 0.72 seconds.
-- Epoch 1
Norm: 338.65, NNZs: 512, Bias: -18.000000, T: 192171, Avg. loss: 1.961192
Total training time: 0.14 seconds.
-- Epoch 2
Norm: 427.25, NNZs: 512, Bias: -28.000000, T: 384342, Avg. loss: 1.869682
Total training time: 0.28 seconds.
-- Epoch 3
Norm: 496.30, NNZs: 512, Bias: -38.000000, T: 576513, Avg. loss: 1.824703
Total training time: 0.42 seconds.
-- Epoch 4
Norm: 552.80, NNZs: 512, Bias: -47.000000, T: 768684, Avg. loss: 1.806617
Total training time: 0.56 seconds.
-- Epoch 5
Norm: 599.72, NNZs: 512, Bias: -57.000000, T: 960855, Avg. loss: 1.808473
Total training time: 0.71 seconds.
-- Epoch 1
Norm: 312.14, NNZs: 512, Bias: -12.000000, T: 192171, Avg. loss: 2.680724
Total training time: 0.14 seconds.
-- Epoch 2
Norm: 387.02, NNZs: 512, Bias: -13.000000, T: 384342, Avg. loss: 2.720350
Total training time: 0.29 seconds.
-- Epoch 3
Norm: 445.06, NNZs: 512, Bias: -19.00000

Norm: 546.63, NNZs: 512, Bias: -27.000000, T: 768684, Avg. loss: 1.760170
Total training time: 0.59 seconds.
-- Epoch 5
Norm: 586.21, NNZs: 512, Bias: -31.000000, T: 960855, Avg. loss: 1.785232
Total training time: 0.75 seconds.
-- Epoch 1
Norm: 260.92, NNZs: 512, Bias: -13.000000, T: 192171, Avg. loss: 1.029784
Total training time: 0.16 seconds.
-- Epoch 2
Norm: 344.33, NNZs: 512, Bias: -20.000000, T: 384342, Avg. loss: 0.957709
Total training time: 0.32 seconds.
-- Epoch 3
Norm: 404.04, NNZs: 512, Bias: -24.000000, T: 576513, Avg. loss: 0.949872
Total training time: 0.47 seconds.
-- Epoch 4
Norm: 452.95, NNZs: 512, Bias: -28.000000, T: 768684, Avg. loss: 0.964820
Total training time: 0.62 seconds.
-- Epoch 5
Norm: 494.35, NNZs: 512, Bias: -36.000000, T: 960855, Avg. loss: 0.940182
Total training time: 0.78 seconds.
-- Epoch 1
Norm: 205.17, NNZs: 512, Bias: -9.000000, T: 192171, Avg. loss: 1.372007
Total training time: 0.14 seconds.
-- Epoch 2
Norm: 270.88, NNZs: 512, Bias: -16.000000

Norm: 532.80, NNZs: 512, Bias: -20.000000, T: 576513, Avg. loss: 3.695277
Total training time: 0.96 seconds.
-- Epoch 4
Norm: 570.13, NNZs: 512, Bias: -28.000000, T: 768684, Avg. loss: 3.644456
Total training time: 1.30 seconds.
-- Epoch 5
Norm: 600.27, NNZs: 512, Bias: -32.000000, T: 960855, Avg. loss: 3.580984
Total training time: 1.64 seconds.
-- Epoch 1
Norm: 346.10, NNZs: 512, Bias: 2.000000, T: 192171, Avg. loss: 3.015559
Total training time: 0.32 seconds.
-- Epoch 2
Norm: 437.73, NNZs: 512, Bias: 0.000000, T: 384342, Avg. loss: 2.907699
Total training time: 0.58 seconds.
-- Epoch 3
Norm: 490.54, NNZs: 512, Bias: 4.000000, T: 576513, Avg. loss: 2.950493
Total training time: 0.99 seconds.
-- Epoch 4
Norm: 532.93, NNZs: 512, Bias: 3.000000, T: 768684, Avg. loss: 2.924854
Total training time: 1.25 seconds.
-- Epoch 5
Norm: 558.70, NNZs: 512, Bias: 5.000000, T: 960855, Avg. loss: 2.956744
Total training time: 1.51 seconds.
-- Epoch 1
Norm: 277.54, NNZs: 512, Bias: -1.000000, T: 19217

Norm: 416.67, NNZs: 512, Bias: -12.000000, T: 576513, Avg. loss: 3.592956
Total training time: 0.68 seconds.
-- Epoch 4
Norm: 453.99, NNZs: 512, Bias: -17.000000, T: 768684, Avg. loss: 3.530186
Total training time: 0.82 seconds.
-- Epoch 5
Norm: 478.94, NNZs: 512, Bias: -17.000000, T: 960855, Avg. loss: 3.515979
Total training time: 0.99 seconds.
-- Epoch 1
Norm: 329.03, NNZs: 512, Bias: -13.000000, T: 192171, Avg. loss: 2.615748
Total training time: 0.19 seconds.
-- Epoch 2
Norm: 407.01, NNZs: 512, Bias: -20.000000, T: 384342, Avg. loss: 2.628801
Total training time: 0.34 seconds.
-- Epoch 3
Norm: 456.54, NNZs: 512, Bias: -28.000000, T: 576513, Avg. loss: 2.619332
Total training time: 0.69 seconds.
-- Epoch 4
Norm: 495.98, NNZs: 512, Bias: -35.000000, T: 768684, Avg. loss: 2.587360
Total training time: 1.01 seconds.
-- Epoch 5
Norm: 532.24, NNZs: 512, Bias: -40.000000, T: 960855, Avg. loss: 2.581514
Total training time: 1.29 seconds.
-- Epoch 1
Norm: 249.84, NNZs: 512, Bias: -8.000000

Norm: 397.61, NNZs: 512, Bias: -8.000000, T: 384342, Avg. loss: 3.699386
Total training time: 0.28 seconds.
-- Epoch 3
Norm: 446.76, NNZs: 512, Bias: -14.000000, T: 576513, Avg. loss: 3.665938
Total training time: 0.54 seconds.
-- Epoch 4
Norm: 484.79, NNZs: 512, Bias: -18.000000, T: 768684, Avg. loss: 3.719846
Total training time: 0.78 seconds.
-- Epoch 5
Norm: 526.91, NNZs: 512, Bias: -21.000000, T: 960855, Avg. loss: 3.699825
Total training time: 1.03 seconds.
-- Epoch 1
Norm: 244.21, NNZs: 512, Bias: -7.000000, T: 192171, Avg. loss: 1.602001
Total training time: 0.25 seconds.
-- Epoch 2
Norm: 316.50, NNZs: 512, Bias: -11.000000, T: 384342, Avg. loss: 1.589291
Total training time: 0.47 seconds.
-- Epoch 3
Norm: 358.57, NNZs: 512, Bias: -15.000000, T: 576513, Avg. loss: 1.577950
Total training time: 0.61 seconds.
-- Epoch 4
Norm: 416.78, NNZs: 512, Bias: -19.000000, T: 768684, Avg. loss: 1.539737
Total training time: 0.75 seconds.
-- Epoch 5
Norm: 448.16, NNZs: 512, Bias: -24.000000,

[Parallel(n_jobs=1)]: Done 128 out of 128 | elapsed:  1.9min finished


Perceptron(alpha=0.0001, class_weight=None, eta0=1.0, fit_intercept=True,
      max_iter=5, n_iter=None, n_jobs=1, penalty=None, random_state=0,
      shuffle=True, tol=None, verbose=1, warm_start=False)

In [10]:
per.score(Xvalidation,yvalidation)

0.2414011729275638