In [None]:
from project import Project
import tables
from get_dataframe import get_dataframe

In [None]:
import pandas
import sklearn
import shap
import numpy
import imblearn
import scipy
import matplotlib
import optuna

print(f'Pandas version: {pandas.__version__}')
print(f'scikit-learn version: {sklearn.__version__}')
print(f'SHAP version: {shap.__version__}')
print(f'NumPy version: {numpy.__version__}')
print(f'Matplotlib version: {matplotlib.__version__}')
print(f'Imbalanced-learn version: {imblearn.__version__}')
print(f'Scipy version: {scipy.__version__}')
print(f'Optuna version: {optuna.__version__}')


In [None]:
# get python version
import sys
print(f'Python version: {sys.version}')

In [None]:
candidate_features = [
            "Age",
            "Sex",
            "Disease duration",
            "Impulse control disorder",
            "Days between screening and follow up",
            "Total Levodopa Equivalent Dose",
            "Total UPDRS-I score",
            "Total UPDRS-II ON score",
            "Total UPDRS-III OFF score",
            "Total bradykinesia + rigidity OFF score (UPDRS-III subscore)",
            "Total tremor OFF score (UPDRS-III subscore)",
            "Axial OFF score (UPDRS-III subscore)",
            "Total bradykinesia + rigidity ON score (UPDRS-III subscore)",
            "Total UPDRS-III ON score",
            "Total tremor ON score (UPDRS-III subscore)",
            "Axial ON score (UPDRS-III subscore)",
            "% UPDRS-III improvement after dopamine",
            "% Total bradykinesia + rigidity improvement after dopamine (UPDRS-III subscore)",
            "% Total tremor improvement after dopamine (UPDRS-III subscore)",
            "% Axial improvement after dopamine (UPDRS-III subscore)",
            "Total preoperative UPDRS-IV score",
            "% of waking day dyskinesias present",
            "% of waking day OFF",
            "% of OFF time with dystonia",
            "AS score",
            "PDQ-39 score",
            #"Most invalidating symptom",
            #"Hoehn and Yahr OFF", # Too few records
            #"Hoehn and Yahr ON", # Too few records
            #"BDI score", # Too few records
        ]

In [None]:
results = []
all_included_indices = []
# results is a list of tuples (outcome_abbreviation, performance, lower_bound, upper_bound)
def run_project(project_outcome):
    project = Project(candidate_features, project_outcome)
    project.train_and_tune_model(n_trials=100)
    project.evaluate()
    project.calculate_confidence_interval(n_bootstraps=1000)
    project.perform_shap_analysis()
    list_included_features = project.list_included_features
    best_model_type = project.best_model_type
    num_training_records = project.num_training_records
    num_testing_records = project.num_testing_records
    included_indices = project.included_indices
    all_included_indices.append(included_indices)
    model_name = project.model_name
    lower_bound = project.lower_bound
    upper_bound = project.upper_bound
    performance = project.performance
    rsquaredperformance = project.rsquaredperformance
    rsquaredlower_bound = project.rsquaredlower_bound
    rsquaredupper_bound = project.rsquaredupper_bound
    results.append((list_included_features, num_training_records, num_testing_records, best_model_type, model_name, performance, lower_bound, upper_bound, rsquaredperformance, rsquaredlower_bound, rsquaredupper_bound))


In [None]:
run_project('Total UPDRS-III')

In [None]:
run_project('Tremor')

In [None]:
run_project('Axial')

In [None]:
run_project('Bradykinesia + rigidity')

In [None]:
latex_table = tables.generate_feature_table(
    [
        {
            'name': model_name, 
            'features': list_included_features
        } for list_included_features, _, _, _, model_name, _, _, _, _, _, _ in results], candidate_features)
print(latex_table)


In [None]:
# reload tables
from importlib import reload
reload(tables)

In [None]:
feature_dataframe = tables.generate_feature_dataframe(
    [
        {
            'name': model_name, 
            'features': list_included_features
        } for list_included_features, _, _, _, model_name, _, _, _, _, _, _ in results], candidate_features)

tables.export_dataframe_to_word(feature_dataframe.reset_index().rename(columns={"index": "Feature"}), "feature_selection")
feature_dataframe

In [None]:
# NOTE: Does not show R2 values
latex_table = tables.generate_results_table(
    [
        {
            'name': model_name, 
            'n_records_training_data': num_training_records,
            'n_records_testing_data': num_testing_records,
            'model_type': model_type,
            'rmse': performance, 
            'lower bound': lower_bound, 
            'upper bound': upper_bound
        } for _, num_training_records, num_testing_records, model_type, model_name, performance, lower_bound, upper_bound, _, _, _ in results])

print(latex_table)


In [None]:
results_dataframe = tables.generate_results_dataframe(
    [
        {
            'name': model_name, 
            'n_records_training_data': num_training_records,
            'n_records_testing_data': num_testing_records,
            'model_type': model_type,
            'rmse': performance, 
            'lower bound': lower_bound, 
            'upper bound': upper_bound,
            'r squared': rsquaredperformance,
            'r squared lower bound': rsquaredlower_bound,
            'r squared upper bound': rsquaredupper_bound,
        } for _, num_training_records, num_testing_records, model_type, model_name, performance, lower_bound, upper_bound, rsquaredperformance, rsquaredlower_bound, rsquaredupper_bound in results
    ]
)

tables.export_dataframe_to_word(results_dataframe, "results")
results_dataframe

In [None]:
all_included_indices 
# get unique indices
unique_indices = set()
for included_indices in all_included_indices:
    for index in included_indices:
        unique_indices.add(index)

data = get_dataframe()
data = data[data["NO Permission data use for research"] != 1.0]
data = data.loc[list(unique_indices)]



In [None]:
category_table_latex, category_table_pandas = tables.categorical_statistics_table(data)
print(category_table_latex)


In [None]:
tables.export_dataframe_to_word(category_table_pandas, "categorical_statistics")
category_table_pandas


In [None]:
numerical_table_latex, numerical_table_pandas = tables.numerical_statistics_table(data, candidate_features)
print(numerical_table_latex)


In [None]:
tables.export_dataframe_to_word(numerical_table_pandas, "numerical_statistics")
numerical_table_pandas