In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<h1 style="background-color:#DC143C; font-family:'Brush Script MT',cursive;color:white;font-size:200%; text-align:center;border-radius: 50% 20% / 10% 40%">Defining Survival as an Outcome Measure in Amyotrophic Lateral Sclerosis</h1>

Citation: Gordon PH, Corcia P, Lacomblez L, et al. Defining Survival as an Outcome Measure in Amyotrophic Lateral Sclerosis. Arch Neurol. 2009;66(6):758–761. doi:10.1001/archneurol.2009.1

"Tracheostomy and permanent assisted ventilation are not equivalent to death in amyotrophic lateral sclerosis. The use of respiratory interventions differs between centers, leading to variability in combined outcome assessments. The time to the end point can differ significantly depending on its definition, and combining outcomes does not reduce the estimated sample size of a trial. The death rate alone is the least variable and most easily identifiable measure of survival rate in amyotrophic lateral sclerosis."

" Many patients who undergo PAV (permanent assisted ventilation) die when they decide to stop the ventilation, but patients with ALS also die of cardiac infarction, pulmonary embolism, or other events. Overall, approximately 18% of the participants died of causes other than a clear respiratory insufficiency, usually termed “sudden death.” These data indicate that death is not necessarily related to the function of the respiratory muscles and that the time to administration of respiratory life support cannot be assumed to be equivalent to the time to death."

https://jamanetwork.com/journals/jamaneurology/fullarticle/797270

In [None]:
df = pd.read_csv('../input/end-als/end-als/clinical-data/filtered-metadata/metadata/clinical/Permanent_Assisted_Ventilation.csv', encoding='ISO-8859-2')
pd.set_option('display.max_columns', None)
df.head().style.set_properties(**{'background-color':'bisque',
                                     'color': 'purple'})

In [None]:
df.isnull().sum()

In [None]:
# Lets first handle numerical features with nan value
numerical_nan = [feature for feature in df.columns if df[feature].isna().sum()>1 and df[feature].dtypes!='O']
numerical_nan

In [None]:
df[numerical_nan].isna().sum()

In [None]:
## Replacing the numerical Missing Values

for feature in numerical_nan:
    ## We will replace by using median since there are outliers
    median_value=df[feature].median()
    
    df[feature].fillna(median_value,inplace=True)
    
df[numerical_nan].isnull().sum()

In [None]:
df = pd.get_dummies(df)

EvalML is an AutoML library that builds, optimizes, and evaluates machine learning pipelines using domain-specific objective functions.

Combined with Featuretools and Compose, EvalML can be used to create end-to-end machine learning solutions for classification and regression problems.

https://evalml.featurelabs.com/en/v0.9.0/

In [None]:
!pip install evalml

In [None]:
from evalml.automl import AutoMLSearch
from sklearn.model_selection import train_test_split

In [None]:
X = df.drop(columns=['pavyn'])
y = df['pavyn']

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [None]:
automl = AutoMLSearch(X_train=X_train, y_train=y_train, problem_type='binary')

In [None]:
automl.search()

#The Gray marker indicates the score: Zero! At least the programm didn't return any error. 

In [None]:
automl.rankings

In [None]:
pipeline = automl.best_pipeline
pipeline.fit(X_train, y_train)
pred = pipeline.predict(X_test)

In [None]:
#https://evalml.featurelabs.com/en/v0.9.0/
automl.describe_pipeline(3)

In [None]:
#https://evalml.featurelabs.com/en/v0.9.0/
pipeline = automl.best_pipeline
pipeline.score(X_test, y_test, ["f1"])

In [None]:
pipeline.graph()

#Though my scores are super low, thanks to FeatureLabs for EvalML library. 