# Read The Data

In [1]:
from supporting_files.dfs2 import DeepFeatureSelectionNew
from sklearn.cross_validation import train_test_split
from sklearn import datasets
from scipy import io as sio
from tensorflow.python.framework import ops
import numpy as np
from sklearn.datasets import make_classification
from sklearn.preprocessing import normalize

ourdata = sio.loadmat("./data/B_AsthmaCOPD_mean_scaled_7159.mat")

inputX = ourdata['X']
inputY = ourdata['Y'][0,:]

columnNames = ourdata['columnNames']

# Run the Deep Feature Selection
## Changing lambda1 slightly

In [None]:
# Reset the graph
ops.reset_default_graph()

weights_tuning_lamda1 = []
for lambda1 in xrange(0, 10, 1):
    # Should be modified for different datasets, similar things should be done for alpha1
    lambda1 /= 10000.
    dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[50], learning_rate=0.01, \
                                     lambda1=0.0001, lambda2=1, alpha1=0.00001, alpha2=0, activation='tanh', \
                                     weight_init='uniform',epochs=20, optimizer='Adam', print_step=1)
    dfsMLP.train(batch_size=2000)
    print("Train finised for lambda1:" + str(lambda1))
    weights_tuning_lamda1.append(dfsMLP.selected_ws[0])

# Run different random states in order to select features given selected set of parameters chosen above 

In [None]:
weights_randomstates = []

for random_state in xrange(20):
    # Resplit the data
    X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=random_state)
    
    # Change number of epochs to control the training time
    dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[30], learning_rate=0.01, \
                                         lambda1=0.0001, lambda2=1, alpha1=0.0001, alpha2=0, activation='tanh', \
                                         weight_init='uniform',epochs=50, optimizer='Adam', print_step=10)
    dfsMLP.train(batch_size=2000)
    print("Train finised for random state:" + str(random_state))
    weights_randomstates.append(dfsMLP.selected_ws[0])

# The generated weights will be in the weights folder
np.save("./weights/weights_randomstates", weights_randomstates)

# The below code is for single model testing / parameter discovering

In [None]:
X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=2)

dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[5], learning_rate=0.012, \
                                     lambda1=0.002, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \
                                     weight_init='uniform',epochs=200, optimizer='Adam', print_step=1)
dfsMLP.train(batch_size=2000)

# More layers might cause overfitting problems, but certainly change the alpha1 and lambda1 accordingly would 
# set the problem

In [None]:
ourdata = sio.loadmat("./data/B_COPDAcos_mean_scaled_7169.mat")
inputX = ourdata['X']
inputY = ourdata['Y'][0,:]
columnNames = ourdata['columnNames']

index_Acos = np.where(inputY==0)[0]
index_COPD = np.where(inputY==1)[0]

In [None]:
ourdata = sio.loadmat("./data/B_COPDAcos_mean_scaled_7169.mat")
inputX = ourdata['X']
inputY = ourdata['Y'][0,:]
columnNames = ourdata['columnNames']

index_Acos = np.where(inputY==0)[0]
index_COPD = np.where(inputY==1)[0]

In [None]:
weights = []
for i in xrange(1):
    # made random choice of asthma patients
    choice = np.random.choice(a=len(index_COPD), size=len(index_Acos))
    index_COPD_chosen = index_Asthma[choice]

    # Concatenate the indexes for Asthma and Acos patients
    indexes = np.array(index_Acos.tolist()+index_COPD_chosen.tolist())
    # Shuffle the indexes
    np.random.shuffle(indexes)
    indexes = indexes.tolist()

    # inputX and inputY for this round
    inputX_ = inputX[indexes,:]
    inputY_ = inputY[indexes]
    
    X_train, X_test, y_train, y_test = train_test_split(inputX_, inputY_, test_size=0.2)
    
    # Change number of epochs to control the training time
    dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[10], learning_rate=0.01, \
                                         lambda1=0.01, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \
                                         weight_init='uniform',epochs=30, optimizer='Adam', print_step=1)
    dfsMLP.train(batch_size=500)
    print("Train finised for random state:" + str(random_state))
    weights.append(dfsMLP.selected_ws[0])

# Run XGBoost Model

In [None]:
import scipy.io as sio
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import xgboost as xgb
import numpy as np

# COPD Acos
ourdata = sio.loadmat("./data/B_COPDAcos_mean_scaled_7159.mat")
inputX = ourdata['X']
inputY = ourdata['Y'][0,:]

gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(inputX, inputY)
indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]

np.save("./weights/indexes_xgboost_rerun_All_CAc",indexes_xgboost)

In [None]:
ourdata = sio.loadmat("./data/B_AsthmaCOPD_mean_scaled_7159.mat")
inputX = ourdata['X']
inputY = ourdata['Y'][0,:]

gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(inputX, inputY)
# y_pred = gbm.predict(X_test)

# featurescores = gbm.feature_importances_

# print(accuracy_score(y_test, y_pred))

indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]
np.save("./weights/indexes_xgboost_rerun_All_AsC",indexes_xgboost)

In [None]:
# Asthma Acos
ourdata = sio.loadmat("./data/B_AsthmaAcos_mean_scaled_7159.mat")
inputX = ourdata['X']
inputY = ourdata['Y'][0,:]

gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(inputX, inputY)
indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]

np.save("./weights/indexes_xgboost_rerun_All_AsAc",indexes_xgboost)