# Code for importing iPython notebooks

In [None]:
%reset -f
import io, os, sys, types
from IPython import get_ipython
from nbformat import current
from IPython.core.interactiveshell import InteractiveShell

def find_notebook(fullname, path=None):
    """find a notebook, given its fully qualified name and an optional path

    This turns "foo.bar" into "foo/bar.ipynb"
    and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar
    does not exist.
    """
    name = fullname.rsplit('.', 1)[-1]
    if not path:
        path = ['']
    for d in path:
        nb_path = os.path.join(d, name + ".ipynb")
        if os.path.isfile(nb_path):
            return nb_path
        # let import Notebook_Name find "Notebook Name.ipynb"
        nb_path = nb_path.replace("_", " ")
        if os.path.isfile(nb_path):
            return nb_path


class NotebookLoader(object):
    """Module Loader for Jupyter Notebooks"""
    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path

    def load_module(self, fullname):
        """import a notebook as a module"""
        path = find_notebook(fullname, self.path)

        print ("importing Jupyter notebook from %s" % path)

        # load the notebook object
        with io.open(path, 'r', encoding='utf-8') as f:
            nb = current.read(f, 'json')


        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod

        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__

        try:
            for cell in nb.worksheets[0].cells:
                if cell.cell_type == 'code' and cell.language == 'python':
                    # transform the input to executable Python
                    code = self.shell.input_transformer_manager.transform_cell(cell.input)
                    # run the code in themodule
                    exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod


class NotebookFinder(object):
    """Module finder that locates Jupyter Notebooks"""
    def __init__(self):
        self.loaders = {}

    def find_module(self, fullname, path=None):
        nb_path = find_notebook(fullname, path)
        if not nb_path:
            return

        key = path
        if path:
            # lists aren't hashable
            key = os.path.sep.join(path)

        if key not in self.loaders:
            self.loaders[key] = NotebookLoader(path)
        return self.loaders[key]

sys.meta_path.append(NotebookFinder())


# Import and Run Pipeline

In [None]:
import scipy
import numpy
import pandas
import csv
import MergeDiagnosis_AdjustedLabels
import Categorical_Updated
import LongitudinalDataAnalysis
import Imputation
import FeatureReduction
import SupervisedLearning
import ModelPerformance
import TrainTestSplit

In [None]:
# -------------------------------
# Merge Data
# -------------------------------
merged_data = MergeDiagnosis_AdjustedLabels.data_preprocess(study = "all",imaging_to_drop = 'all', reversions = 'label0')

In [None]:
# -------------------------------
# Categorical to Numerical
# -------------------------------
date_cols = ['update_stamp','EXAMDATE','EXAMDATE_bl']
cols_to_ignore = ['PTID']

Categorical_Updated.categorical_conversion(date_cols,cols_to_ignore)


In [None]:
# -------------------------------
# Longitudinal Data Analysis
# -------------------------------

# Input file name for Longitudinal Data Analysis
InputToLongitudinal='CategoricalToNumerical.csv'

with open(InputToLongitudinal) as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    labels = next(reader)

# Output file name from this script
OutputFromLongitudinal='LongitudinalDataAnalysis.csv'

# Patient RID Features
Patient_FEATURES=['RID'];

# Demographic Features
Demo_FEATURES_type=['AGE','PTEDUCAT','PTGENDER','PTETHCAT','PTRACCAT','PTMARRY']
Demo_FEATURES = []
for i in labels:
    if i in Demo_FEATURES_type:
        Demo_FEATURES.append(i)
    elif i.find("_") != -1 and i[:i.find("_")] in Demo_FEATURES_type:
        Demo_FEATURES.append(i)

# Baseline OneTime Features
BaselineOneTime_FEATURES_type = ['APOE4','Years_bl','ORIGPROT']
BaselineOneTime_FEATURES = []
for i in labels:
    if i in BaselineOneTime_FEATURES_type:
        BaselineOneTime_FEATURES.append(i)
    elif i.rfind("_") != -1 and i[:i.rfind("_")] in BaselineOneTime_FEATURES_type:
        BaselineOneTime_FEATURES.append(i)

# Time Headers
Time_FEATURES_type=['SITE','Month','update_stamp_minus_EXAMDATE_bl','update_stamp_minus_EXAMDATE','EXAMDATE_minus_EXAMDATE_bl',
               'COLPROT','M','Month_bl']
Time_FEATURES = []
for i in labels:
    if i in Time_FEATURES_type:
        Time_FEATURES.append(i)
    elif i.find("_") != -1 and i[:i.find("_")] in Time_FEATURES_type:
        Time_FEATURES.append(i)
Time_FEATURES.insert(len(Time_FEATURES), Time_FEATURES.pop(Time_FEATURES.index('Month_bl'))) # Month_bl must be the last time feature

# Baseline Evaluation Features
BaselineEvaluation_FEATURES=['CDRSB_bl','ADAS11_bl','ADAS13_bl','MMSE_bl','RAVLT_learning_bl','RAVLT_forgetting_bl',
                             'RAVLT_perc_forgetting_bl','RAVLT_immediate_bl','FAQ_bl','MOCA_bl','EcogPtLang_bl','EcogPtVisspat_bl',
                             'EcogPtPlan_bl','EcogPtOrgan_bl','EcogPtDivatt_bl','EcogPtMem_bl','EcogPtTotal_bl','EcogSPLang_bl',
                             'EcogSPVisspat_bl','EcogSPPlan_bl','EcogSPOrgan_bl','EcogSPDivatt_bl','EcogSPMem_bl','EcogSPTotal_bl'];



# Current Medical Evaluation
CurrentEvaluation_FEATURES=['CDRSB','ADAS11','ADAS13','MMSE','RAVLT_learning','RAVLT_forgetting','RAVLT_perc_forgetting','RAVLT_immediate',
                            'FAQ','MOCA','EcogPtLang','EcogPtVisspat','EcogPtPlan','EcogPtOrgan','EcogPtDivatt','EcogPtMem','EcogPtTotal',
                            'EcogSPLang','EcogSPVisspat','EcogSPPlan','EcogSPOrgan','EcogSPDivatt','EcogSPMem','EcogSPTotal'];


# Current Diagnosis
CurrentDiagnosis_FEATURES= ['AD'];



In [None]:
res = []

for i in range(100):
    
    print "###########################################"; 
    print "###########################################";
    print i;
    print "###########################################";
    print "###########################################";
    
    
    # -------------------------------
    # Longitudinal Data Analysis
    # -------------------------------

    # Longitudinal Method
    LongitudinalMethod=2;
    MetricList=['MaxTime','Delta','Mean','Std'];

    # Run Longitudinal Data Anaysis
    LongitudinalDataAnalysis.runLongitudinal(InputToLongitudinal,OutputFromLongitudinal,Patient_FEATURES,Demo_FEATURES,\
                                             BaselineOneTime_FEATURES,Time_FEATURES,BaselineEvaluation_FEATURES,\
                                             CurrentEvaluation_FEATURES,CurrentDiagnosis_FEATURES,LongitudinalMethod,MetricList)

    # # !!!!!!!!!!!!!!!!!!!!!!--------------------------------------------------------------
    # Keep only a few columns 
    # --------------------------------------------------------------
    import pandas as pd
    f=pd.read_csv("LongitudinalDataAnalysis.csv")
    keep_col = ['CDRSB_MaxTime','CDRSB_Delta','CDRSB_Mean','CDRSB_Std',\
                'ADAS13_MaxTime','ADAS13_Delta','ADAS13_Mean','ADAS13_Std',\
                'MOCA_MaxTime','MOCA_Delta','MOCA_Mean','MOCA_Std',\
                'Diagnostics']
    new_f = f[keep_col]
    new_f.to_csv("LongitudinalDataAnalysis.csv", index=False)
    
    
    # ------------------
    # Train Test Split
    # ------------------
    TrainTestSplit.traintest_split(0.33)


    
    # ----------------------------------------------------
    # Limit number of months available for test patients
    # ----------------------------------------------------
    # Longitudinal Method
    LongitudinalMethod=3;

    # Run Longitudinal Data Anaysis
    LongitudinalDataAnalysis.runLongitudinal(InputToLongitudinal,OutputFromLongitudinal,Patient_FEATURES,Demo_FEATURES,\
                                             BaselineOneTime_FEATURES,Time_FEATURES,BaselineEvaluation_FEATURES,\
                                             CurrentEvaluation_FEATURES,CurrentDiagnosis_FEATURES,LongitudinalMethod,MetricList)

         
    # !!!!!!!!!!!!!!!!!!!!!! --------------------------------------------------------------
    # Keep only a few columns 
    # --------------------------------------------------------------
    f=pd.read_csv("LongitudinalDataAnalysis.csv")
    new_f = f[keep_col]
    new_f.to_csv("LongitudinalDataAnalysis.csv", index=False)
    
    f2=pd.read_csv("LongitudinalDataAnalysis_test.csv")
    new_f2 = f2[keep_col]
    new_f2.to_csv("LongitudinalDataAnalysis_test.csv", index=False)
    
    
    # ------------------
    # Imputation
    # ------------------
    #Imputation.imputation('knn')
    Imputation.imputation('meanmode')
    #Imputation.imputation('nuclearnorm')
    #Imputation.imputation('softimpute')

    # ----------------------------------
    # Feature Reduction 
    # ----------------------------------

    # Input and Output files from Feature Reduction for train set
    InputToFeatureReduction_train     ='ImputedMatrix_train.csv'
    OutputFromFeatureReduction_train  ='Features_train.csv'
    InputToFeatureReduction_test      ='ImputedMatrix_test.csv'
    OutputFromFeatureReduction_test   ='Features_test.csv'


    # Normalization method
    NormalizationMethod='MinMax'
    #NormalizationMethod='MeanStd'

    # Feature Reduction Method and Settings
    #FeatureReductionMethod='SVD'; 
    ExplainedVariance=0.99; # For method 'SVD'

    FeatureReductionMethod='none' # 'AffinityPropagation', 'SVD' or 'none' 
    APpreference=-50; # Hyperparameter for method 'AffinityPropagation'


    # Run Feature Reduction
    FeatureReduction.RunFeatureReduction(InputToFeatureReduction_train,OutputFromFeatureReduction_train,\
                                         InputToFeatureReduction_test,OutputFromFeatureReduction_test,\
                                         NormalizationMethod,FeatureReductionMethod,ExplainedVariance,APpreference)


    # ------------------
    # Supervised Learning- Model 3
    # ------------------
    from sklearn.neural_network import MLPClassifier
    from sklearn.ensemble import RandomForestClassifier
    import numpy as np
    data_train = np.loadtxt('Features_train.csv', delimiter=",", skiprows = 1)
    data_test = np.loadtxt('Features_test.csv', delimiter=",", skiprows = 1)
    training_set_X, test_set_X, training_set_Y, test_set_Y = data_train[:,:-1], data_test[:,:-1], data_train[:,-1], data_test[:,-1]
    best_params = {'n_estimators': 1000}
    model = RandomForestClassifier()
    SupervisedLearning.test_model(training_set_X, test_set_X, training_set_Y, test_set_Y, model, best_params)
    res.append(ModelPerformance.model_performance('all'))

    #------------------
    # Model Evaluation
    # ------------------
    #print ModelPerformance.model_performance('confusion_matrix')
    print ModelPerformance.model_performance('all')

In [None]:
#print res
res2=np.asarray(res)
print res2

In [None]:
np.mean(res2, axis=0)

In [None]:

# # ------------------
# # Supervised Learning- options currently are LogisticRegression, RandomForest, MLP, knn
# # ------------------
# from sklearn.neural_network import MLPClassifier
# from sklearn.ensemble import RandomForestClassifier
# import numpy as np
# data_train = np.loadtxt('Features_train.csv', delimiter=",", skiprows = 1)
# data_test = np.loadtxt('Features_test.csv', delimiter=",", skiprows = 1)

# training_set_X, test_set_X, training_set_Y, test_set_Y = data_train[:,:-1], data_test[:,:-1], data_train[:,-1], data_test[:,-1]

# best_params = {'alpha': 0.05, 'hidden_layer_sizes': (100,100,100)}
# model = MLPClassifier()
# SupervisedLearning.test_model(training_set_X, test_set_X, training_set_Y, test_set_Y, model, best_params)

# #------------------
# # Model Evaluation
# # ------------------
# #print ModelPerformance.model_performance('confusion_matrix')
# print ModelPerformance.model_performance('all')