# SAMueL-2 full production analysis

## Import packages

In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from utils.data_process import DataProcess
from utils.descriptive_stats import DescriptiveStatistics
from utils.pathway import Pathway
from utils.reporting import GlobalReport
from utils.thrombolysis_choice_model import ThrombolysisChoiceModel
from utils.thrombolysis_outcome_model import OutcomeModel

## Create data for models

See 'data' folder for processing of raw data.

In [2]:
data_processor = DataProcess(
    year_min=2016,
    year_max=2021,
    limit_to_ambo=False,
)

data_processor.run()

All rows: 302719, ML rows:110981, Fraction: 0.37


## Model Run

In [3]:
run_model = True

if run_model:
    # Descriptive statistics
    ds = DescriptiveStatistics()
    #ds.run()
    
    # Thrombolysis decision model
    thrombolysis_choice_model = ThrombolysisChoiceModel()
    thrombolysis_choice_model.run()

    # Outcome model
    outcome_model = OutcomeModel()
    outcome_model.run()

    # Pathway simulation
    
    pathway = Pathway(
        data_processor.pathway_simulation_parameters,
        thrombolysis_choice_model.benchmark_thrombolysis,
        trials=100)
    pathway.run()

Accuracy: 0.849
Balanced accuracy: 0.820
ROC AUC: 0.916
Actual thrombolysis: 0.300
Predicted thrombolysis: 0.298
Outcome multiclass ROC AUC 0.811


ValueError: p < 0, p > 1 or p is NaN

## Reporting

In [None]:
report = GlobalReport()
report.create_report()

## To Do

* Restrict stroke units to minimum admissions and thrombolysis use (can currently crash with restricted data years)