In [1]:
import numpy as np
import pandas as pd
from sklearn.externals import joblib
import gc

In [68]:
with np.load('data/interim/kmers/kmer_matrix.npz') as data:
    kmers = data['kmers']
    kmer_order = data['kmer_order']
    genome_order = data['genome_order']

In [69]:
micsdf = joblib.load('data/interim/mic_class_dataframe2.pkl')
class_orders = joblib.load('data/interim/mic_class_order_dict2.pkl')

In [70]:
# TIO
tio_labels = class_orders['TIO']
tio_label_index = { k: v for v, k in enumerate(tio_labels) }
y_tio = np.array([ tio_label_index[m] if not pd.isna(m) else m for m in micsdf.loc[genome_order, 'TIO'] ])
labels, counts = np.unique(y_tio, return_counts=True)
ok = labels[counts >= 5]

mask = np.in1d(y_tio, ok) # Since Nan is not a label, this also filters invalid MICs
y_tio = y_tio[mask]
X_tio = kmers[mask,:]
tio_samples = genome_order[mask]
del kmers
del genome_order
del micsdf
del class_orders
gc.collect()
# %xdel kmers
# %xdel genome_order
# %xdel micsdf
# %xdel class_orders

432

In [71]:
# Test/train split
from sklearn.model_selection import train_test_split
indices = np.arange(len(tio_samples))
X_train, X_test, y_train, y_test, idx1, idx2 = train_test_split(X_tio, y_tio, indices, test_size=0.2, random_state=36, stratify=y_tio)
# del X_tio
# del y_tio
gc.collect()

0

In [6]:
print(np.unique(y_train,return_counts=True))
print(np.unique(y_test,return_counts=True))

(array([1, 2, 3, 4, 5, 6]), array([ 404, 1047,   66,    6,   78,  205]))
(array([1, 2, 3, 4, 5, 6]), array([101, 262,  16,   1,  20,  52]))


In [7]:
# Feature selection
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif, mutual_info_classif

fsel = SelectKBest(f_classif, k=10000)
X_train_fs = fsel.fit_transform(X_train, y_train)
X_test_fs = fsel.transform(X_test)



  f = msb / msw
  f = msb / msw


In [10]:
import tensorflow
from tensorflow import set_random_seed
set_random_seed(36)

from keras.layers.core import Dense, Dropout, Activation

from keras.models import Sequential#, load_model
from keras.utils import np_utils, to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau


In [11]:
patience = 16
early_stop = EarlyStopping(monitor='loss', patience=patience, verbose=0, min_delta=0.005, mode='auto')
model_save = ModelCheckpoint("best_model.hdf5",monitor='loss', verbose = 0, save_best_only =True, save_weights_only = False, mode ='auto', period =1)
reduce_LR = ReduceLROnPlateau(monitor='loss', factor= 0.1, patience=(patience/2), verbose = 0, min_delta=0.005,mode = 'auto', cooldown=0, min_lr=0)

num_classes=6
n1 = X_test_fs.shape[1]
n2 = int((n1+num_classes)/2)
model = Sequential()
model.add(Dense(n1,activation='relu',input_dim=(n1)))
model.add(Dropout(0.5))
model.add(Dense(n2, activation='relu', kernel_initializer='uniform'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='relu', kernel_initializer='uniform', ))
model.add(Dense(1, kernel_initializer='normal'))

model.compile(loss='mae', metrics=['accuracy'], optimizer='adam')

In [12]:
history = model.fit(X_train_fs, y_train, epochs=100, batch_size=200, verbose=1, shuffle=True, validation_split=0.33, callbacks=[early_stop, reduce_LR])

Train on 1210 samples, validate on 596 samples
Epoch 1/100


 200/1210 [===>..........................] - ETA: 32s - loss: 2.5660 - acc: 0.0000e+00













Epoch 2/100


 200/1210 [===>..........................] - ETA: 11s - loss: 1.1664 - acc: 0.2050













Epoch 3/100


 200/1210 [===>..........................] - ETA: 11s - loss: 1.6031 - acc: 0.1950













Epoch 4/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.9419 - acc: 0.4850













Epoch 5/100


 200/1210 [===>..........................] - ETA: 11s - loss: 1.1540 - acc: 0.2550













Epoch 6/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.9635 - acc: 0.4950













Epoch 7/100


 200/1210 [===>..........................] - ETA: 12s - loss: 0.8986 - acc: 0.4350













Epoch 8/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.5105 - acc: 0.6900













Epoch 9/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.6501 - acc: 0.6050













Epoch 10/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.6908 - acc: 0.5550













Epoch 11/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.5960 - acc: 0.5750













Epoch 12/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.4713 - acc: 0.6800













Epoch 13/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.4676 - acc: 0.6550













Epoch 14/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.4709 - acc: 0.6700













Epoch 15/100


 200/1210 [===>..........................] - ETA: 12s - loss: 0.4000 - acc: 0.7150













Epoch 16/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.4252 - acc: 0.6700













Epoch 17/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3952 - acc: 0.7050













Epoch 18/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.5819 - acc: 0.6050













Epoch 19/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.5313 - acc: 0.6150













Epoch 20/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.5270 - acc: 0.6150













Epoch 21/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.5126 - acc: 0.6650













Epoch 22/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3803 - acc: 0.7250













Epoch 23/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3638 - acc: 0.7200













Epoch 24/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.4231 - acc: 0.6750













Epoch 25/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.4298 - acc: 0.6900













Epoch 26/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.4706 - acc: 0.6400













Epoch 27/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.3221 - acc: 0.7600













Epoch 28/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.3844 - acc: 0.7500













Epoch 29/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.4187 - acc: 0.7400













Epoch 30/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.5238 - acc: 0.6550













Epoch 31/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3435 - acc: 0.7350













Epoch 32/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.4151 - acc: 0.7150













Epoch 33/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.3708 - acc: 0.7150













Epoch 34/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.5367 - acc: 0.5950













Epoch 35/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.3701 - acc: 0.7250













Epoch 36/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.4054 - acc: 0.7050













Epoch 37/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.4166 - acc: 0.7350













Epoch 38/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.5842 - acc: 0.5700













Epoch 39/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.5411 - acc: 0.6300













Epoch 40/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.4811 - acc: 0.7050













Epoch 41/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.3670 - acc: 0.7150













Epoch 42/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3232 - acc: 0.7600













Epoch 43/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3763 - acc: 0.7250













Epoch 44/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3262 - acc: 0.7500













Epoch 45/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3127 - acc: 0.7700













Epoch 46/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.2872 - acc: 0.8250













Epoch 47/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2992 - acc: 0.8000













Epoch 48/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2941 - acc: 0.8000













Epoch 49/100


 200/1210 [===>..........................] - ETA: 12s - loss: 0.2699 - acc: 0.8200













Epoch 50/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.2991 - acc: 0.7950













Epoch 51/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3015 - acc: 0.7900













Epoch 52/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.2893 - acc: 0.7750













Epoch 53/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2769 - acc: 0.7900













Epoch 54/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3026 - acc: 0.7650













Epoch 55/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3230 - acc: 0.7750













Epoch 56/100


 200/1210 [===>..........................] - ETA: 12s - loss: 0.3060 - acc: 0.7950













Epoch 57/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2739 - acc: 0.8300













Epoch 58/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2516 - acc: 0.8150













Epoch 59/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2823 - acc: 0.7900













Epoch 60/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2870 - acc: 0.7850













Epoch 61/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.2771 - acc: 0.8000













Epoch 62/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2697 - acc: 0.8050













Epoch 63/100


 200/1210 [===>..........................] - ETA: 12s - loss: 0.2600 - acc: 0.8250













Epoch 64/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.3249 - acc: 0.7600













Epoch 65/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2795 - acc: 0.8100













Epoch 66/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3086 - acc: 0.7750













Epoch 67/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2684 - acc: 0.8250













Epoch 68/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3019 - acc: 0.8000













Epoch 69/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2727 - acc: 0.8100













Epoch 70/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2631 - acc: 0.8200













Epoch 71/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3030 - acc: 0.7550













Epoch 72/100


 200/1210 [===>..........................] - ETA: 12s - loss: 0.2499 - acc: 0.8450













Epoch 73/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3038 - acc: 0.7900













Epoch 74/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2934 - acc: 0.7700













Epoch 75/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2657 - acc: 0.8050













Epoch 76/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.3002 - acc: 0.8150













Epoch 77/100


 200/1210 [===>..........................] - ETA: 11s - loss: 0.2501 - acc: 0.8300













Epoch 78/100


 200/1210 [===>..........................] - ETA: 10s - loss: 0.2610 - acc: 0.8100













In [13]:
score = model.evaluate(X_test_fs, y_test, verbose=0)
print(score)
y_pred = model.predict(X_test_fs)

[0.3442011955565056, 0.7345132743362832]


In [15]:
clonaldf = pd.read_csv('data/interim/mash_population_groups.csv', index_col=0)
test_samples = tio_samples[idx2]
test_cgs = clonaldf.loc[test_samples]

resdf = pd.DataFrame(data=np.vstack((y_test, y_pred[:,0], test_cgs.values[:,0])).T,
             index=test_samples,
             columns=['true','pred','group'])
resdf['err'] = np.abs(resdf['true'] - resdf['pred'])

In [22]:
errres = resdf.groupby('group').agg(['min','max','mean','median','count'])['err']

In [24]:
errres.to_csv('tmp')

In [74]:
# Run just the worst offender

grp1_samples = clonaldf.index[clonaldf['cluster'] == 1].values
grp1_idx = np.argwhere(np.in1d(tio_samples, grp1_samples))
grp1_idx1 = idx1[np.in1d(idx1, grp1_idx)]
grp1_idx2 = idx2[np.in1d(idx2, grp1_idx)]

X_train_g1 = X_tio[grp1_idx1,:]
X_test_g1 = X_tio[grp1_idx2,:]
y_train_g1 = y_tio[grp1_idx1]
y_test_g1 = y_tio[grp1_idx2]

In [75]:
fsel = SelectKBest(f_classif, k=10000)
X_train_g1_fs = fsel.fit_transform(X_train_g1, y_train_g1)
X_test_g1_fs = fsel.transform(X_test_g1)

  f = msb / msw
  f = msb / msw


In [128]:
patience = 16
early_stop = EarlyStopping(monitor='loss', patience=patience, verbose=0, min_delta=0.005, mode='auto')
model_save = ModelCheckpoint("best_model.hdf5",monitor='loss', verbose = 0, save_best_only =True, save_weights_only = False, mode ='auto', period =1)
reduce_LR = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=(patience/2), verbose = 0, min_delta=0.005,mode = 'auto', cooldown=0, min_lr=0)

num_classes=6
n1 = X_test_g1_fs.shape[1]
n2 = int((n1+num_classes-10)/2)
model = Sequential()
model.add(Dense(n1,activation='relu',input_dim=(n1)))
model.add(Dropout(0.5))
model.add(Dense(n2, activation='relu', kernel_initializer='uniform'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='relu', kernel_initializer='uniform', ))
model.add(Dense(1, kernel_initializer='normal'))

model.compile(loss='mae', metrics=['accuracy'], optimizer='adam')

In [130]:
#history = model.fit(X_train_g1_fs, y_train_g1, epochs=100, batch_size=75, verbose=1, shuffle=True, validation_split=0.33, callbacks=[early_stop, reduce_LR])
history = model.fit(X_train_g1_fs, y_train_g1, epochs=100, batch_size=50, verbose=1, shuffle=True, validation_split=0.33)

Train on 269 samples, validate on 133 samples
Epoch 1/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4802 - acc: 0.6200











Epoch 2/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4272 - acc: 0.6400











Epoch 3/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4340 - acc: 0.7000











Epoch 4/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.5332 - acc: 0.5200











Epoch 5/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4485 - acc: 0.6200











Epoch 6/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3868 - acc: 0.7400











Epoch 7/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4094 - acc: 0.6400











Epoch 8/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3393 - acc: 0.8200











Epoch 9/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4647 - acc: 0.6200











Epoch 10/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4569 - acc: 0.6400











Epoch 11/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4672 - acc: 0.6400











Epoch 12/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.5010 - acc: 0.5400











Epoch 13/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.5446 - acc: 0.4800











Epoch 14/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.5067 - acc: 0.5200











Epoch 15/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.5373 - acc: 0.5800











Epoch 16/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4901 - acc: 0.6000











Epoch 17/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.5497 - acc: 0.5800











Epoch 18/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.6749 - acc: 0.4200











Epoch 19/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.7438 - acc: 0.3400











Epoch 20/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.6393 - acc: 0.5200











Epoch 21/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4270 - acc: 0.6800











Epoch 22/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4020 - acc: 0.6600











Epoch 23/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.5661 - acc: 0.4800











Epoch 24/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3094 - acc: 0.8400











Epoch 25/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3824 - acc: 0.6600











Epoch 26/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.5329 - acc: 0.6000











Epoch 27/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.6308 - acc: 0.5000











Epoch 28/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3943 - acc: 0.7400











Epoch 29/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4182 - acc: 0.7400











Epoch 30/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4135 - acc: 0.6400











Epoch 31/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4062 - acc: 0.8200











Epoch 32/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3950 - acc: 0.6800











Epoch 33/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4250 - acc: 0.6600











Epoch 34/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4882 - acc: 0.5800











Epoch 35/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.5435 - acc: 0.5800











Epoch 36/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.6276 - acc: 0.4800











Epoch 37/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.5305 - acc: 0.5600











Epoch 38/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.5409 - acc: 0.5200











Epoch 39/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.7653 - acc: 0.4200











Epoch 40/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4263 - acc: 0.6600











Epoch 41/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3587 - acc: 0.7400











Epoch 42/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4066 - acc: 0.6200











Epoch 43/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4377 - acc: 0.6200











Epoch 44/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4263 - acc: 0.6000











Epoch 45/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4479 - acc: 0.6600











Epoch 46/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4223 - acc: 0.6800











Epoch 47/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3542 - acc: 0.7600











Epoch 48/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.6968 - acc: 0.4000











Epoch 49/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4776 - acc: 0.5400











Epoch 50/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4035 - acc: 0.6800











Epoch 51/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.5623 - acc: 0.5000











Epoch 52/100


 50/269 [====>.........................] - ETA: 8s - loss: 0.4878 - acc: 0.6000











Epoch 53/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4212 - acc: 0.7000











Epoch 54/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.6472 - acc: 0.4800











Epoch 55/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.5659 - acc: 0.5400











Epoch 56/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4290 - acc: 0.7000











Epoch 57/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.9107 - acc: 0.4000











Epoch 58/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3623 - acc: 0.7200











Epoch 59/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4189 - acc: 0.6400











Epoch 60/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4737 - acc: 0.5600











Epoch 61/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3016 - acc: 0.7400











Epoch 62/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3420 - acc: 0.7200











Epoch 63/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3709 - acc: 0.7400











Epoch 64/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3431 - acc: 0.7400











Epoch 65/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3877 - acc: 0.7000











Epoch 66/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3254 - acc: 0.7600











Epoch 67/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3182 - acc: 0.7800











Epoch 68/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3598 - acc: 0.7400











Epoch 69/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3473 - acc: 0.7200











Epoch 70/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4557 - acc: 0.6000











Epoch 71/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3671 - acc: 0.6200











Epoch 72/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4284 - acc: 0.6200











Epoch 73/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3649 - acc: 0.7000











Epoch 74/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3670 - acc: 0.7600











Epoch 75/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4636 - acc: 0.6400











Epoch 76/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3444 - acc: 0.8000











Epoch 77/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3273 - acc: 0.8000











Epoch 78/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4502 - acc: 0.6400











Epoch 79/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3487 - acc: 0.7800











Epoch 80/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3672 - acc: 0.7600











Epoch 81/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3938 - acc: 0.7400











Epoch 82/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3647 - acc: 0.7000











Epoch 83/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4057 - acc: 0.7200











Epoch 84/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3272 - acc: 0.7600











Epoch 85/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.5967 - acc: 0.5000











Epoch 86/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.5772 - acc: 0.5000











Epoch 87/100


 50/269 [====>.........................] - ETA: 8s - loss: 0.5196 - acc: 0.5800











Epoch 88/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3492 - acc: 0.7200











Epoch 89/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.5331 - acc: 0.5600











Epoch 90/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.6328 - acc: 0.5600











Epoch 91/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3758 - acc: 0.7200











Epoch 92/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.2974 - acc: 0.8200











Epoch 93/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3457 - acc: 0.7600











Epoch 94/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.4521 - acc: 0.7000











Epoch 95/100


 50/269 [====>.........................] - ETA: 7s - loss: 0.3752 - acc: 0.7200











Epoch 96/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3831 - acc: 0.7200











Epoch 97/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.4943 - acc: 0.5600











Epoch 98/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.5565 - acc: 0.5000











Epoch 99/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3444 - acc: 0.8200











Epoch 100/100


 50/269 [====>.........................] - ETA: 6s - loss: 0.3088 - acc: 0.8400











In [132]:
y_pred_g1 = model.predict(X_test_g1_fs)
g1resdf = pd.DataFrame(data=np.vstack((y_test_g1, y_pred_g1[:,0])).T,
             columns=['true','pred'])
g1resdf['err'] = np.abs(g1resdf['true'] - g1resdf['pred'])

In [133]:
g1resdf.describe()

Unnamed: 0,true,pred,err
count,111.0,111.0,111.0
mean,2.918919,2.891734,0.46876
std,1.873912,1.68455,0.317698
min,1.0,1.432609,0.000237
25%,2.0,1.667603,0.234079
50%,2.0,1.825915,0.39589
75%,5.0,5.011823,0.698466
max,6.0,5.934915,1.35295


In [135]:
grp6_samples = clonaldf.index[clonaldf['cluster'] == 6].values
grp6_idx = np.argwhere(np.in1d(tio_samples, grp6_samples))
grp6_idx1 = idx1[np.in1d(idx1, grp6_idx)]
grp6_idx2 = idx2[np.in1d(idx2, grp6_idx)]

X_train_g6 = X_tio[grp6_idx1,:]
X_test_g6 = X_tio[grp6_idx2,:]
y_train_g6 = y_tio[grp6_idx1]
y_test_g6 = y_tio[grp6_idx2]

In [136]:
fsel = SelectKBest(f_classif, k=10000)
X_train_g6_fs = fsel.fit_transform(X_train_g6, y_train_g6)
X_test_g6_fs = fsel.transform(X_test_g6)

  f = msb / msw
  f = msb / msw


In [139]:
patience = 16
early_stop = EarlyStopping(monitor='loss', patience=patience, verbose=0, min_delta=0.005, mode='auto')
model_save = ModelCheckpoint("best_model.hdf5",monitor='loss', verbose = 0, save_best_only =True, save_weights_only = False, mode ='auto', period =1)
reduce_LR = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=(patience/2), verbose = 0, min_delta=0.005,mode = 'auto', cooldown=0, min_lr=0)

num_classes=6
n1 = X_test_g6_fs.shape[1]
n2 = int((n1+num_classes-10)/2)
model = Sequential()
model.add(Dense(n1,activation='relu',input_dim=(n1)))
model.add(Dropout(0.5))
model.add(Dense(n2, activation='relu', kernel_initializer='uniform'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='relu', kernel_initializer='uniform', ))
model.add(Dense(1, kernel_initializer='normal'))

model.compile(loss='mae', metrics=['accuracy'], optimizer='adam')

In [140]:
history = model.fit(X_train_g6_fs, y_train_g6, epochs=100, batch_size=50, verbose=1, shuffle=True)

Epoch 1/100


 50/227 [=====>........................] - ETA: 1:43 - loss: 1.6907 - acc: 0.0600









Epoch 2/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.8978 - acc: 0.2800









Epoch 3/100


 50/227 [=====>........................] - ETA: 5s - loss: 1.8839 - acc: 0.1000









Epoch 4/100


 50/227 [=====>........................] - ETA: 5s - loss: 1.4249 - acc: 0.3400









Epoch 5/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.8877 - acc: 0.4200









Epoch 6/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.7374 - acc: 0.5000









Epoch 7/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.8308 - acc: 0.5000









Epoch 8/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.5877 - acc: 0.6800









Epoch 9/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.7730 - acc: 0.6200









Epoch 10/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.6813 - acc: 0.3600









Epoch 11/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.6785 - acc: 0.5600









Epoch 12/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.4309 - acc: 0.6400









Epoch 13/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.5359 - acc: 0.7000









Epoch 14/100


 50/227 [=====>........................] - ETA: 6s - loss: 0.5144 - acc: 0.5600









Epoch 15/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.5535 - acc: 0.5800









Epoch 16/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.5831 - acc: 0.5400









Epoch 17/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.4244 - acc: 0.7000









Epoch 18/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3768 - acc: 0.7400









Epoch 19/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3516 - acc: 0.7400









Epoch 20/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3899 - acc: 0.6800









Epoch 21/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3745 - acc: 0.7000









Epoch 22/100


 50/227 [=====>........................] - ETA: 6s - loss: 0.4411 - acc: 0.6200









Epoch 23/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.4348 - acc: 0.7000









Epoch 24/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.4369 - acc: 0.6400









Epoch 25/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3787 - acc: 0.6800









Epoch 26/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3497 - acc: 0.7400









Epoch 27/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.4841 - acc: 0.6000









Epoch 28/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.5061 - acc: 0.5800









Epoch 29/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3174 - acc: 0.7600









Epoch 30/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3130 - acc: 0.7800









Epoch 31/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.4660 - acc: 0.6400









Epoch 32/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.5344 - acc: 0.6400









Epoch 33/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.5829 - acc: 0.5000









Epoch 34/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3187 - acc: 0.7600









Epoch 35/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3987 - acc: 0.6200









Epoch 36/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3145 - acc: 0.8000









Epoch 37/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3412 - acc: 0.7200









Epoch 38/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3382 - acc: 0.7200









Epoch 39/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3540 - acc: 0.7000









Epoch 40/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3041 - acc: 0.7600









Epoch 41/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3491 - acc: 0.7000









Epoch 42/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3982 - acc: 0.6400









Epoch 43/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2609 - acc: 0.7800









Epoch 44/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3946 - acc: 0.6600









Epoch 45/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.4321 - acc: 0.6400









Epoch 46/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2905 - acc: 0.7400









Epoch 47/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2741 - acc: 0.8200









Epoch 48/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3545 - acc: 0.8200









Epoch 49/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.4201 - acc: 0.6800









Epoch 50/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.6089 - acc: 0.5200









Epoch 51/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3297 - acc: 0.7000









Epoch 52/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.4225 - acc: 0.6800









Epoch 53/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.5614 - acc: 0.6800









Epoch 54/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.5188 - acc: 0.5000









Epoch 55/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2970 - acc: 0.7200









Epoch 56/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3553 - acc: 0.7000









Epoch 57/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3902 - acc: 0.6600









Epoch 58/100


 50/227 [=====>........................] - ETA: 6s - loss: 0.3036 - acc: 0.7600









Epoch 59/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.4304 - acc: 0.6600









Epoch 60/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3875 - acc: 0.7000









Epoch 61/100


 50/227 [=====>........................] - ETA: 6s - loss: 0.3831 - acc: 0.6800









Epoch 62/100


 50/227 [=====>........................] - ETA: 6s - loss: 0.3123 - acc: 0.7400









Epoch 63/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3879 - acc: 0.7000









Epoch 64/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3133 - acc: 0.7800









Epoch 65/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3442 - acc: 0.7000









Epoch 66/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.4146 - acc: 0.7000









Epoch 67/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3750 - acc: 0.7200









Epoch 68/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.4013 - acc: 0.6200









Epoch 69/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2965 - acc: 0.7600









Epoch 70/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3179 - acc: 0.7400









Epoch 71/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3526 - acc: 0.6400









Epoch 72/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3508 - acc: 0.6800









Epoch 73/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3189 - acc: 0.7200









Epoch 74/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2851 - acc: 0.7400









Epoch 75/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2293 - acc: 0.8000









Epoch 76/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2865 - acc: 0.8200









Epoch 77/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3561 - acc: 0.7000









Epoch 78/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2692 - acc: 0.7400









Epoch 79/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2464 - acc: 0.8400









Epoch 80/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2404 - acc: 0.8800









Epoch 81/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3559 - acc: 0.7000









Epoch 82/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2580 - acc: 0.8600









Epoch 83/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3233 - acc: 0.7000









Epoch 84/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2821 - acc: 0.7200









Epoch 85/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2800 - acc: 0.8200









Epoch 86/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2845 - acc: 0.7600









Epoch 87/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3168 - acc: 0.7400









Epoch 88/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2532 - acc: 0.8000









Epoch 89/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3961 - acc: 0.7000









Epoch 90/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2977 - acc: 0.8000









Epoch 91/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2782 - acc: 0.8000









Epoch 92/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3759 - acc: 0.7600









Epoch 93/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3665 - acc: 0.6600









Epoch 94/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2948 - acc: 0.8000









Epoch 95/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2581 - acc: 0.9400









Epoch 96/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2608 - acc: 0.7600









Epoch 97/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2485 - acc: 0.7800









Epoch 98/100


 50/227 [=====>........................] - ETA: 6s - loss: 0.2795 - acc: 0.7800









Epoch 99/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.3012 - acc: 0.7600









Epoch 100/100


 50/227 [=====>........................] - ETA: 5s - loss: 0.2805 - acc: 0.8000









In [141]:
y_pred_g6 = model.predict(X_test_g6_fs)
g6resdf = pd.DataFrame(data=np.vstack((y_test_g6, y_pred_g6[:,0])).T,
             columns=['true','pred'])
g6resdf['err'] = np.abs(g6resdf['true'] - g6resdf['pred'])

In [142]:
g6resdf.describe()

Unnamed: 0,true,pred,err
count,63.0,63.0,63.0
mean,2.253968,1.787069,0.540105
std,1.822514,1.602802,0.519916
min,1.0,0.834897,0.002675
25%,1.0,0.976401,0.06894
50%,2.0,1.040271,0.362689
75%,2.0,1.297256,0.984276
max,6.0,6.010312,1.872184
