## 1. Import All Packages

In [1]:
%%time
import numpy as np
import math
import pandas as pd
import pickle, os, math
from keras.models import Sequential
from keras.callbacks import Callback
from keras.layers import Dense
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score

Using TensorFlow backend.


Wall time: 5.68 s


## 2. Read data

In [47]:
%%time
data_dir = os.path.join(os.curdir, 'Data', 'Score', 'data_pics.pkl')
with open(data_dir, 'rb') as in_file:
    ot = pickle.load(in_file)
data_pics = ot['data']
target_pics = ot['target']
print(data_pics.shape)

(40001, 162)
Wall time: 78 ms


As in the case of survival training, score training also has unbalanced class, the ratio of class 0 to 1 is almost 14:1.

In [49]:
X_train, X_test, y_train, y_test = train_test_split(
    data_pics, target_pics, test_size=0.1, random_state=152)
print(X_train.shape)
X_train_train, X_vali, y_train_train, y_vali = train_test_split(
    X_train, y_train, test_size=0.3, random_state=15545)
print('class 0 has ' + str(len(y_train.index[y_train[0] == 0].tolist())) + ' points')
print('class 1 has ' + str(len(y_train.index[y_train[0] == 1].tolist())) + ' points')

(36000, 162)
class 0 has 33580 points
class 1 has 2420 points


To create a balanced dataset, use all class 1 and randomly select the same of number class 0 data.

In [100]:
index_0 = y_train.index[y_train[0] == 0].tolist()
index_1 = y_train.index[y_train[0] != 0].tolist()
index_0_comparable_to_1 = np.random.choice(index_0, math.floor(len(index_1) * 1))
samples = np.concatenate([index_1, index_0_comparable_to_1])
print(str(len(index_1)) + ' + ' + str(len(index_0_comparable_to_1)) + ' = ' + str(len(samples)))

2420 + 2420 = 4840


In [110]:
small_data = data_pics.iloc[samples, :]
small_target = target_pics.iloc[samples, :]

X_train_small, X_test_small, y_train_small, y_test_small = train_test_split(
    small_data, small_target, test_size=0.3, random_state=1152)
X_train_train_small, X_vali_small, y_train_train_small, y_vali_small = train_test_split(
    X_train_small, y_train_small, test_size=0.3, random_state=8155)

Ravelling the target data.

In [102]:
y_train_small_m = np.ravel(y_train_small)
y_test_small_m = np.ravel(y_test_small)
y_train_train_small_m = np.ravel(y_train_train_small)
y_vali_small_m = np.ravel(y_vali_small)
y_train_m = np.ravel(y_train)
y_test_m = np.ravel(y_test)
y_train_train_m = np.ravel(y_train_train)
y_vali_m = np.ravel(y_vali)

## 3. Training

### Support Vector Classification

In [108]:
clf_sc = SVC(C=10.0, gamma='auto', kernel='rbf', verbose=True)
clf_sc.fit(X_train_small, y_train_small_m)
clf_sc.score(X_test_small, y_test_small_m)

[LibSVM]

0.5117079889807162

In [109]:
ypred = clf_sc.predict(X_test_small)
print(sum(ypred) / len(ypred))
test = [idd for idd, value in enumerate(ypred) if value == 1 ]

0.04132231404958678


### Multi Layer Perceptor Classifier

MLPC apparently failed at generalizing the inner trend, all the predictions turne out to be all 0 or all 1.

In [62]:
mlpc_sc = MLPClassifier(hidden_layer_sizes=(50, 20, 20),
                        alpha=0.15, max_iter=1000, batch_size=50,
                        verbose=False, learning_rate_init=0.01, tol=1e-5,
                        learning_rate='adaptive')

mlpc_sc.fit(X_train_small, y_train_small_m)
mlpc_sc.score(X_test_small, y_test_small_m)

0.4951790633608815

In [63]:
ypred = mlpc_sc.predict(X_test_small)
sum(ypred) / len(ypred)

0.0

## 4. Training Results