In [72]:
from procan_connectome.model_training.loocv_wrapper import LOOCV_Wrapper
# from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
# from sklearn.preprocessing import StandardScaler, PowerTransformer
# from procan_connectome.data_processing.linear_svc_importance_filter import LinearSVCImportanceFilter
# from procan_connectome.data_processing.correlation_filter import CorrelationFilter
# from procan_connectome.data_processing.powertransformer_wrapper import PowerTransformerWrapper
# from procan_connectome.data_processing.select_k_best_filter import SelectKBestFilter
# from sklearn.svm import SVC, LinearSVC
# from sklearn.decomposition import FastICA
# from sklearn.preprocessing import MinMaxScaler
# from sklearn.feature_selection import SelectKBest, SelectFromModel
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PowerTransformer, OneHotEncoder
from sklearn.model_selection import train_test_split
import numpy as np 
import os
import pandas as pd
import logging
import datetime
from procan_connectome.config import DATA_PATH, RANDOM_STATE, LOGGER_LEVEL
import tensorflow as tf 

In [73]:
df = pd.read_csv(os.path.join(DATA_PATH, 'combined_datasets.csv'))
df = df.set_index('ID')
X, y = df.drop(columns=['label']), df['label']
log_dir = os.path.join(DATA_PATH, 'logs')
pipeline =  Pipeline([
    ('ss', StandardScaler()),
])

In [93]:
ohe = OneHotEncoder(sparse=False)
X_dev, X_test, y_dev, y_test = train_test_split(X,y, test_size=0.2)
y_dev = ohe.fit_transform(np.array(y_dev).reshape(-1, 1))
y_test = ohe.transform(np.array(y_test).reshape(-1, 1))
X_train, X_val, y_train, y_val = train_test_split(X_dev, y_dev, test_size=0.5)
ss = StandardScaler() 
X_train = ss.fit_transform(X_train)
X_val = ss.transform(X_val)
X_test = ss.transform(X_test)

In [94]:
def get_fcnn_model(input_shape=(1359, ), k=5, lr=1e-5): 
    model_input = tf.keras.layers.Input(shape=input_shape)
    x = tf.keras.layers.Dense(1359, activation='relu')(model_input)
    x = tf.keras.layers.Dense(680, activation='relu')(x)
    x = tf.keras.layers.Dense(340, activation='relu')(x)
    x = tf.keras.layers.Dense(170, activation='relu')(x)
    x = tf.keras.layers.Dense(85, activation='relu')(x)
    x = tf.keras.layers.Dense(42, activation='relu')(x)
    x = tf.keras.layers.Dense(21, activation='relu')(x)
    x = tf.keras.layers.Dense(10, activation='relu')(x)
    model_output = tf.keras.layers.Dense(5, activation='softmax')(x)
    model = tf.keras.models.Model(inputs= model_input, outputs=model_output)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    return model 
    
model = get_fcnn_model()

In [76]:
model.summary()

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 1359)]            0         
_________________________________________________________________
dense_45 (Dense)             (None, 1359)              1848240   
_________________________________________________________________
dense_46 (Dense)             (None, 680)               924800    
_________________________________________________________________
dense_47 (Dense)             (None, 340)               231540    
_________________________________________________________________
dense_48 (Dense)             (None, 170)               57970     
_________________________________________________________________
dense_49 (Dense)             (None, 85)                14535     
_________________________________________________________________
dense_50 (Dense)             (None, 42)                3612

In [95]:
model_name_fcn =  os.path.join(DATA_PATH, 'tf', datetime.datetime.now().strftime('%Y_%m_%d_%H_%M') + "_tf.h5")
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50)
monitor = tf.keras.callbacks.ModelCheckpoint(
    model_name_fcn,
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    mode='min')

# Learning rate schedule
def scheduler(epoch, lr):
    if epoch%10 == 0 and epoch != 0 :
        lr = lr/2
    return lr

lr_schedule = tf.keras.callbacks.LearningRateScheduler(scheduler,verbose = 1)

In [96]:
model.fit(X_train,
          y_train,
          steps_per_epoch=len(X_train) / 32,
          epochs=100,
          verbose=3,
          callbacks = [early_stop, monitor, lr_schedule],
          validation_data = (X_val, y_val))

Epoch 1/100

Epoch 00001: LearningRateScheduler reducing learning rate to 9.999999747378752e-06.

Epoch 00001: val_loss improved from inf to 1.63692, saving model to /home/mklasby/Sandbox/Research/procan_connectome/procan_connectome/data/tf/2021_07_20_15_06_tf.h5
Epoch 2/100

Epoch 00002: LearningRateScheduler reducing learning rate to 9.999999747378752e-06.

Epoch 00002: val_loss improved from 1.63692 to 1.63456, saving model to /home/mklasby/Sandbox/Research/procan_connectome/procan_connectome/data/tf/2021_07_20_15_06_tf.h5
Epoch 3/100

Epoch 00003: LearningRateScheduler reducing learning rate to 9.999999747378752e-06.

Epoch 00003: val_loss improved from 1.63456 to 1.63268, saving model to /home/mklasby/Sandbox/Research/procan_connectome/procan_connectome/data/tf/2021_07_20_15_06_tf.h5
Epoch 4/100

Epoch 00004: LearningRateScheduler reducing learning rate to 9.999999747378752e-06.

Epoch 00004: val_loss improved from 1.63268 to 1.63069, saving model to /home/mklasby/Sandbox/Research

<tensorflow.python.keras.callbacks.History at 0x7fd7c80b9910>

In [97]:
metrics = model.evaluate(X_test,y_test)



In [78]:


log_file_name = f"{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M')}" + "_" + 'TF'

logging.basicConfig(
    filename=os.path.join(DATA_PATH, 'logs', log_file_name + "_LOGS"),
    filemode='a',
    format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
    datefmt='%H:%M:%S',
    level=LOGGER_LEVEL
)

loocv = LOOCV_Wrapper(
            X, 
            y, 
            estimator, 
            pipeline=pipeline, 
            param_grid=grid,
            perform_grid_search=False,
            label_col='label',
            log_file_name = log_file_name,
            log_dir=log_dir,
            balance_classes=True, 
            scoring='f1_weighted',
            verbose=2,
            n_samples=None,
            single_label_upsample=None,
            cv=None,
            select_features=False,
            feature_threshold=0.001,
            grid_search_feature_selection=False
        )
        loocv.fit(X,y) 

IndentationError: unexpected indent (<ipython-input-78-416f82b5dd6c>, line 31)