# SETUP 

### To run on GPU set "CUDA_VISIBLE_DEVICES" to the GPU number [0,1,2,etc] you wish to use.  To run on CPU, leave blank as ""

In [3]:
from numpy.random import seed
from pandas import read_csv, DataFrame
from sklearn.preprocessing import minmax_scale
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.utils import to_categorical
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
import os
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>")) #this line expands notebook display horizontally
os.environ["CUDA_VISIBLE_DEVICES"]="" #Comment this line out if you want all GPUS (2 hehe)

# Funcs to create model and train model

In [95]:
def create_model():
    model = Sequential()
    model.add(Conv1D(32, 8, input_shape = (3664, 1), activation = 'relu'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(64, 8,  activation = 'relu'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(128, 8, activation = 'relu'))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(256, 8,  activation = 'relu'))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(256, activation = 'relu'))
    model.add(Dense(128, activation = 'relu'))
    model.add(Dense(3, activation = 'softmax'))
    return model

def train_model(model, X_train, Y_train, X_val, Y_val):
    model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr = 1e-3), metrics = ['accuracy'])
    model.fit(X_train, Y_train, batch_size = 8, validation_data = (X_val, Y_val), epochs = 4)
    model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr = 1e-4), metrics = ['accuracy'])
    model.fit(X_train, Y_train, batch_size = 8, validation_data = (X_val, Y_val), epochs = 2)
    pred = model.predict(X_val)
    print(roc_auc_score(Y_val[:,0], pred[:,0]),roc_auc_score(Y_val[:,1], pred[:,1]), roc_auc_score(Y_val[:,2], pred[:,2]))
    return roc_auc_score(Y_val[:,0], pred[:,0]),roc_auc_score(Y_val[:,1], pred[:,1]), roc_auc_score(Y_val[:,2], pred[:,2])

# Prep our training data

In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv('../data/simulations.csv')
prec = []
eci = []
spin = []

for column in df:
    if 'prec' in column:
        prec.append(df[column])
    elif 'spin' in column:
        spin.append(df[column])
    elif 'eci' in column:
        eci.append(df[column])
    else:
        print("Unknown")
print(len(prec), len(spin), len(eci))

X_train = []
Y_train = []
X_val = []
Y_val = []

for j in prec:
    Y_train.append(0)
X_train += prec

for j in eci:
    Y_train.append(1)
X_train += eci

for j in spin:
    Y_train.append(2)
X_train += spin

X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_train = np.expand_dims(X_train, axis=2)

# Stratified 10-fold (results are mean AUROC and std AUROC for each class)

In [None]:
P = []
E = []
S = []
skf = StratifiedKFold(n_splits=10,  shuffle=True)
for index, (train_indices, val_indices) in enumerate(skf.split(X_train, Y_train)):
    print("Training on fold " + str(index+1) + "/10...")
    # Generate batches from indices
    xtrain, xval = X_train[train_indices], X_train[val_indices]
    ytrain, yval = to_categorical(Y_train[train_indices], 3), to_categorical(Y_train[val_indices], 3)
    # Clear model, and create it
    model = None
    model = create_model()
    
    p, s, e = train_model(model, xtrain, ytrain, xval, yval)
    P.append(p)
    E.append(e)
    S.append(s)

print("Prec = ", np.mean(P), " +/-", np.std(P))
print("ECI = ", np.mean(E), " +/-", np.std(E))
print("Spin = ", np.mean(S), " +/-", np.std(S))

Unknown
3404 3423 3167
Training on fold 1/10...
Train on 8993 samples, validate on 1001 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Train on 8993 samples, validate on 1001 samples
Epoch 1/2
Epoch 2/2
1520/8993 [====>.........................] - ETA: 2:31 - loss: 0.0606 - acc: 0.9855