# Thrombolysis decision model with LightGBM

In [1]:
import warnings
warnings.filterwarnings("ignore")


import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import shap

from dataclasses import dataclass
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_auc_score

In [2]:
@dataclass(frozen=True)
class Paths:
    '''Singleton object for storing paths to data.'''

    output_save_path: str = './output/'
    data_read_path: str = '../data/kfold_5fold'
    notebook: str = 'expt_'
    model_text: str = 'lgbm'

paths = Paths()

In [4]:
selected_features = [
    'stroke_team_id',
    'age',
    'infarction',
    'onset_to_arrival_time',
    'precise_onset_known',
    'onset_during_sleep',
    'arrival_to_scan_time',
    'afib_anticoagulant',
    'prior_disability',
    'stroke_severity',
    'thrombolysis'
]

train_data, test_data = [], []

def show_thrombolysis_use(row):
    return 0 if row['scan_to_thrombolysis_time'] == -100 else 1

for i in range(5):
    # Load traing data
    loaded_data = pd.read_csv(paths.data_read_path + f'/03_train_{i}.csv')
    # Convert time to thrombolysis to use of thrombolysis
    loaded_data['thrombolysis'] = loaded_data.apply(show_thrombolysis_use, axis=1)
    # Restrict data columns
    loaded_data = loaded_data[selected_features]
    # Convert stroke_team_id to category
    loaded_data['stroke_team_id'] = loaded_data['stroke_team_id'].astype('category')
    train_data.append(loaded_data)

    # Load traing data
    loaded_data = pd.read_csv(paths.data_read_path + f'/03_test_{i}.csv')
    # Convert time to thrombolysis to use of thrombolysis
    loaded_data['thrombolysis'] = loaded_data.apply(show_thrombolysis_use, axis=1)
    # Restrict data columns
    loaded_data = loaded_data[selected_features]
    # Convert stroke_team_id to category
    loaded_data['stroke_team_id'] = loaded_data['stroke_team_id'].astype('category')
    test_data.append(loaded_data)