In [1]:
import numpy as np

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.optimizers import SGD
from keras.utils import to_categorical
from collections import Counter

from sklearn.metrics import r2_score, balanced_accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


Using TensorFlow backend.


In [2]:
"""
1. import data, normalise
2. train different models
    - 3 different NN structures, 5 reps each
    - ?? GradBoostClf ?? on ROS data??
=> ensemble 
?? 3. semi-supervised learning
"""


######################################################################################################################
### IMPORT DATA
X_in = np.genfromtxt ('X_train.csv', delimiter=",")[1:,1:]
y_in = np.genfromtxt ('y_train.csv', delimiter=",")[1:,1:]
X_out = np.genfromtxt ('X_test.csv', delimiter=",")[1:,1:]

y_in_hot = to_categorical(y_in)
y_out = np.genfromtxt ('sample.csv', delimiter=",")

### NORMALISE
sts = StandardScaler()
X_in = sts.fit_transform(X_in)
X_out = sts.fit_transform(X_out)

In [3]:
X_full = X_in
y_full = y_in_hot

In [4]:
X_in, X_test, y_in_hot, y_test = train_test_split(X_full, y_full)

In [5]:
print(X_in.shape)
print(X_test.shape)
print(y_in_hot.shape)
print(y_test.shape)
print(X_out.shape)
print(y_out.shape)

(3600, 1000)
(1200, 1000)
(3600, 3)
(1200, 3)
(4100, 1000)
(4101, 2)


In [9]:
# callback
cb = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.001, patience=5, 
                                   verbose=1, mode='auto', restore_best_weights=True)

### MODELS
model_probs = np.zeros((len(X_out)   ,3), dtype=float)
test_probs  = np.zeros((len(y_test)  ,3), dtype=float)
train_probs = np.zeros((len(y_in_hot),3), dtype=float)

In [11]:
######################################################################################################################
### FFNNs

# NN FLAT
for rep in range(5):
    model0 = Sequential()
    model0.add(Dense(100, activation='relu', input_dim=1000))
    model0.add(Dropout(0.5))
    model0.add(Dense(3, activation='softmax'))

    model0.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])

    model0.fit(X_in, y_in_hot, epochs=100, batch_size=128, callbacks=[cb], validation_split=0.3, 
               verbose=0, class_weight={0:1/.125, 1:1/.75, 2:1/.125}) # CAN DIRECTLY INCLUDE CLASS_WEIGHT

    model_probs = model_probs+model0.predict_proba(X_out)
    test_probs  = test_probs +model0.predict_proba(X_test)
    train_probs = train_probs+model0.predict_proba(X_in)

Restoring model weights from the end of the best epoch
Epoch 00007: early stopping
Restoring model weights from the end of the best epoch
Epoch 00008: early stopping
Restoring model weights from the end of the best epoch
Epoch 00008: early stopping
Restoring model weights from the end of the best epoch
Epoch 00009: early stopping
Restoring model weights from the end of the best epoch
Epoch 00006: early stopping


In [12]:
# NN MEDIUM
for rep in range(5):
    model1 = Sequential()
    model1.add(Dense(40, activation='relu', input_dim=1000))
    model1.add(Dropout(0.5))
    model1.add(BatchNormalization())
    model1.add(Dense(40, activation='relu'))
    model1.add(Dropout(0.5))
    model1.add(Dense(3, activation='softmax'))

    model1.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])

    model1.fit(X_in, y_in_hot, epochs=100, batch_size=128, callbacks=[cb], validation_split=0.3, verbose=0,
              class_weight={0:1/.125, 1:1/.75, 2:1/.125}) # CAN DIRECTLY INCLUDE CLASS_WEIGHT

    model_probs = model_probs+model1.predict_proba(X_out)
    test_probs  = test_probs +model1.predict_proba(X_test)
    train_probs = train_probs+model1.predict_proba(X_in)

Restoring model weights from the end of the best epoch
Epoch 00018: early stopping
Restoring model weights from the end of the best epoch
Epoch 00020: early stopping
Restoring model weights from the end of the best epoch
Epoch 00018: early stopping
Restoring model weights from the end of the best epoch
Epoch 00025: early stopping
Restoring model weights from the end of the best epoch
Epoch 00019: early stopping


In [13]:
# NN LARGE
for rep in range(5):
    model2 = Sequential()
    model2.add(Dense(200, activation='relu', input_dim=1000))
    model2.add(Dropout(0.5))
    model2.add(BatchNormalization())
    model2.add(Dense(200, activation='relu'))
    model2.add(Dropout(0.5))
    model2.add(Dense(200, activation='relu'))
    model2.add(Dropout(0.5))
    model2.add(Dense(3, activation='softmax'))

    model2.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])

    model2.fit(X_in, y_in_hot, epochs=100, batch_size=128, callbacks=[cb], validation_split=0.3, verbose=0,
              class_weight={0:1/.125, 1:1/.75, 2:1/.125}) # CAN DIRECTLY INCLUDE CLASS_WEIGHT

    model_probs = model_probs+model2.predict_proba(X_out)
    test_probs  = test_probs +model2.predict_proba(X_test)
    train_probs = train_probs+model2.predict_proba(X_in)

Restoring model weights from the end of the best epoch
Epoch 00015: early stopping
Restoring model weights from the end of the best epoch
Epoch 00016: early stopping
Restoring model weights from the end of the best epoch
Epoch 00013: early stopping
Restoring model weights from the end of the best epoch
Epoch 00015: early stopping
Restoring model weights from the end of the best epoch
Epoch 00013: early stopping


In [15]:
y_out[1:,1] = np.argmax(model_probs, axis=1)
pred_test   = np.argmax(test_probs,  axis=1)
pred_train  = np.argmax(train_probs, axis=1)

testBMAC  = balanced_accuracy_score(np.argmax(y_test,   axis=1), pred_test)
trainBMAC = balanced_accuracy_score(np.argmax(y_in_hot, axis=1), pred_train)
print('test BMAC  =', testBMAC)
print('train BMAC =', trainBMAC)

test BMAC  = 0.6783780468394379
train BMAC = 0.7653879014468457


In [None]:
from datetime import datetime
filename = datetime.now().strftime("%m.%d.%H.%M")+"_lucien.csv"
print(filename)
np.savetxt(filename, y_out, delimiter=",",header="id,y",  comments='')