In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import math
df = pd.read_csv('SecondProvider1patient.csv')
df = df.iloc[:, 2:]

# functions
def change_downgrade(als):
    if als == 1:
        return 0
    else:
        return 1
def create_coef_df(vars, lr):
    coef = lr.coef_.tolist()[0]
    df = pd.DataFrame({'variable': vars, 'coefficient': coef})
    df['odd'] = df['coefficient'].apply(math.exp)
    df = df.reindex(df.coefficient.abs().sort_values(ascending = False).index)
    df = df.reset_index(drop=True)
    return (df)

def print_performance(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    print(f'accuracy: {round(acc, 2)}')
    print(f'f1 score: {round(f1, 2)}')

    

In [38]:
# initial second provider
# set feature
ini_vars = list(set(['impression_Injury - Lung Hemothorax - Traumatic (S27.1)','impression_Respiratory - Arrest/Apnea (R06.81)',
                     'impression_CV - Cardiac Arrest (I46.9)','impression_CV - Cardiac Arrest (I46.9)','unit_type_Medic',
                     'unit_type_EMS Supervisor','FinalIncidentType_CPRF','ArrivedIncidentType_CPRF','impression_CV - Cardiac Arrest (I46.9)',
                     'InitialIncidentType_BLS','InitialIncidentType_ALS','InitialIncidentType_ODF','impression_Respiratory - Arrest/Apnea (R06.81)',
                     'impression_Intracranial - Stroke (CVA) Hemorrhagic (I62.9)','patient_age_10 - 19','unit_station_40','unit_station_32',
                     'unit_station_37','CallDTMonth','unit_station_40']))
# split
x = df[ini_vars]
y = df['is_SecondProvider2']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 112)
# filt
lr = LogisticRegression(max_iter=1000)
lr.fit(x_train, y_train)
# show coef
ini_sec_df = create_coef_df(ini_vars, lr)
print(ini_sec_df)
# performance
print_performance(y_test, lr.predict(x_test))

                                             variable  coefficient        odd
0              impression_CV - Cardiac Arrest (I46.9)     2.769685  15.953611
1   impression_Injury - Lung Hemothorax - Traumati...     2.042742   7.711728
2   impression_Intracranial - Stroke (CVA) Hemorrh...     1.852841   6.377911
3      impression_Respiratory - Arrest/Apnea (R06.81)     1.201582   3.325375
4                            unit_type_EMS Supervisor     0.795536   2.215628
5                              FinalIncidentType_CPRF     0.746030   2.108612
6                             InitialIncidentType_BLS    -0.733665   0.480146
7                                     unit_station_40     0.138903   1.149012
8                             InitialIncidentType_ALS     0.107763   1.113784
9                                     unit_type_Medic     0.107400   1.113380
10                           ArrivedIncidentType_CPRF    -0.086928   0.916743
11                            InitialIncidentType_ODF     0.0649

In [12]:
# final state second provider
final_vars = list(set(['procedure_IO Start - Intraosseous Access','procedure_IO Start - Intraosseous Access','medication_Epinephrine 0.1 MG/ML (1:10,000)',
                       'procedure_Resp - Assist Ventilation - BVM Via Mask','FinalIncidentType_CPRF','procedure_CPR - Manual','ArrivedIncidentType_CPRF',
                       'reason_Specialty Center (Trauma STEMI Stroke)','procedure_IV Start - Extremity Vein (arm or leg)','isALS_LOC',
                       'unit_action_Transport person','medication_Midazolam (Versed)','procedure_IV Start - Extremity Vein (arm or leg)',
                       'reason_Specialty Center (Trauma STEMI Stroke)','procedure_Resp - Assist Ventilation - BVM Via Mask',
                       'reason_Specialty Center (Trauma STEMI Stroke)','impression_CV - Cardiac Arrest (I46.9)','medication_Albuterol (Ventolin) ',
                       'unit_station_29','IncidentFirstDue_423.0','medication_Epinephrine 0.1 MG/ML (1:10,000)','IncidentFirstDue_423.0']))
x = df[final_vars]
y = df['is_SecondProvider2']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 112)
lr = LogisticRegression()
lr.fit(x_train, y_train)
# show coef
final_sec_df = create_coef_df(final_vars, lr)
print(final_sec_df)
# performance
print_performance(y_test, lr.predict(x_test))

                                             variable  coefficient       odd
0            procedure_IO Start - Intraosseous Access     1.583887  4.873865
1   procedure_Resp - Assist Ventilation - BVM Via ...     1.567226  4.793333
2       reason_Specialty Center (Trauma STEMI Stroke)     1.360511  3.898184
3              impression_CV - Cardiac Arrest (I46.9)     1.139063  3.123839
4                       medication_Midazolam (Versed)     1.096177  2.992703
5                              FinalIncidentType_CPRF     0.554516  1.741099
6                    medication_Albuterol (Ventolin)      0.532711  1.703544
7                                           isALS_LOC     0.367668  1.444362
8                            ArrivedIncidentType_CPRF     0.364617  1.439962
9    procedure_IV Start - Extremity Vein (arm or leg)     0.356038  1.427662
10                       unit_action_Transport person     0.334607  1.397391
11        medication_Epinephrine 0.1 MG/ML (1:10,000)     0.110616  1.116966

In [9]:
# downgrading

dg_vars = list(set(['InitialIncidentType_CPRF', 'InitialIncidentType_ALS', 'patient_age_70 - 79', 'impression_Neuro - Paraplegia (G82.20)',
                       'impression_OB - Childbirth Uncomplicated (O80)','impression_Neuro - TIA (transient ischemic attack) (G45.9)',
                       'impression_Environment - Poisonous Snake Bite (T63.0)', 'impression_EENT - Epistaxis (Non-traumatic) (R04.0)',
                       'impression_Injury - Nose (S09.92)']))
df = df[df['transloc_Level 4: 1 Provider (BLS Care)'] != 1]
x = df[dg_vars]
y = df['isALS_LOC'].apply(change_downgrade)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 112)
lr = LogisticRegression()
lr.fit(x_train, y_train)
# show coef
dg_df = create_coef_df(dg_vars, lr)
print(dg_df)
# performance
print_performance(y_test, lr.predict(x_test))


                                            variable  coefficient       odd
0  impression_Neuro - TIA (transient ischemic att...     1.762108  5.824705
1  impression_EENT - Epistaxis (Non-traumatic) (R...     1.483756  4.409479
2                  impression_Injury - Nose (S09.92)     1.476562  4.377868
3     impression_OB - Childbirth Uncomplicated (O80)     1.341012  3.822912
4  impression_Environment - Poisonous Snake Bite ...     1.182050  3.261052
5                           InitialIncidentType_CPRF    -1.036373  0.354739
6             impression_Neuro - Paraplegia (G82.20)     0.898638  2.456256
7                            InitialIncidentType_ALS    -0.284431  0.752443
8                                patient_age_70 - 79    -0.043062  0.957852
accuracy: 0.71
f1 score: 0.83


In [18]:
# threshold
ini_vars = list(set(['impression_Injury - Lung Hemothorax - Traumatic (S27.1)','impression_Respiratory - Arrest/Apnea (R06.81)',
                     'impression_CV - Cardiac Arrest (I46.9)','impression_CV - Cardiac Arrest (I46.9)','unit_type_Medic',
                     'unit_type_EMS Supervisor','FinalIncidentType_CPRF','ArrivedIncidentType_CPRF','impression_CV - Cardiac Arrest (I46.9)',
                     'InitialIncidentType_BLS','InitialIncidentType_ALS','InitialIncidentType_ODF','impression_Respiratory - Arrest/Apnea (R06.81)',
                     'impression_Intracranial - Stroke (CVA) Hemorrhagic (I62.9)','patient_age_10 - 19','unit_station_40','unit_station_32',
                     'unit_station_37','CallDTMonth','unit_station_40']))
x = df[ini_vars]
y = df['is_SecondProvider2']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 112)
lr = LogisticRegression(max_iter=1000)
lr.fit(x_train, y_train)
ini_sec_df = create_coef_df(ini_vars, lr)
print('Threshold')
y_pred = (lr.predict_proba(x_test)[:,1]>=0.4).astype(int)
print_performance(y_test, y_pred)

# regularization
ini_vars = list(set(['impression_Injury - Lung Hemothorax - Traumatic (S27.1)','impression_Respiratory - Arrest/Apnea (R06.81)',
                     'impression_CV - Cardiac Arrest (I46.9)','impression_CV - Cardiac Arrest (I46.9)','unit_type_Medic',
                     'unit_type_EMS Supervisor','FinalIncidentType_CPRF','ArrivedIncidentType_CPRF','impression_CV - Cardiac Arrest (I46.9)',
                     'InitialIncidentType_BLS','InitialIncidentType_ALS','InitialIncidentType_ODF','impression_Respiratory - Arrest/Apnea (R06.81)',
                     'impression_Intracranial - Stroke (CVA) Hemorrhagic (I62.9)','patient_age_10 - 19','unit_station_40','unit_station_32',
                     'unit_station_37','CallDTMonth','unit_station_40']))
x = df[ini_vars]
y = df['is_SecondProvider2']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 112)
lr = LogisticRegression(max_iter=1000, C = 0.01)
lr.fit(x_train, y_train)
ini_sec_df = create_coef_df(ini_vars, lr)
print('Regularization')
print_performance(y_test, lr.predict(x_test))

# oversampling
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
sampler = RandomOverSampler(random_state=112)
ini_vars = list(set(['impression_Injury - Lung Hemothorax - Traumatic (S27.1)','impression_Respiratory - Arrest/Apnea (R06.81)',
                     'impression_CV - Cardiac Arrest (I46.9)','impression_CV - Cardiac Arrest (I46.9)','unit_type_Medic',
                     'unit_type_EMS Supervisor','FinalIncidentType_CPRF','ArrivedIncidentType_CPRF','impression_CV - Cardiac Arrest (I46.9)',
                     'InitialIncidentType_BLS','InitialIncidentType_ALS','InitialIncidentType_ODF','impression_Respiratory - Arrest/Apnea (R06.81)',
                     'impression_Intracranial - Stroke (CVA) Hemorrhagic (I62.9)','patient_age_10 - 19','unit_station_40','unit_station_32',
                     'unit_station_37','CallDTMonth','unit_station_40']))
x = df[ini_vars]
y = df['is_SecondProvider2']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 112)
x_train, y_train = sampler.fit_resample(x_train, y_train)
lr = LogisticRegression(max_iter=1000)
lr.fit(x_train, y_train)
ini_sec_df = create_coef_df(ini_vars, lr)
print('Oversampling')
print_performance(y_test, lr.predict(x_test))

# undersampling
sampler = RandomUnderSampler(random_state=112)
ini_vars = list(set(['impression_Injury - Lung Hemothorax - Traumatic (S27.1)','impression_Respiratory - Arrest/Apnea (R06.81)',
                     'impression_CV - Cardiac Arrest (I46.9)','impression_CV - Cardiac Arrest (I46.9)','unit_type_Medic',
                     'unit_type_EMS Supervisor','FinalIncidentType_CPRF','ArrivedIncidentType_CPRF','impression_CV - Cardiac Arrest (I46.9)',
                     'InitialIncidentType_BLS','InitialIncidentType_ALS','InitialIncidentType_ODF','impression_Respiratory - Arrest/Apnea (R06.81)',
                     'impression_Intracranial - Stroke (CVA) Hemorrhagic (I62.9)','patient_age_10 - 19','unit_station_40','unit_station_32',
                     'unit_station_37','CallDTMonth','unit_station_40']))
x = df[ini_vars]
y = df['is_SecondProvider2']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 112)
x_train, y_train = sampler.fit_resample(x_train, y_train)
lr = LogisticRegression(max_iter=1000)
lr.fit(x_train, y_train)
ini_sec_df = create_coef_df(ini_vars, lr)
print('Oversampling')
print_performance(y_test, lr.predict(x_test))

# interaction
ini_vars = list(set(['impression_Injury - Lung Hemothorax - Traumatic (S27.1)','impression_Respiratory - Arrest/Apnea (R06.81)',
                     'impression_CV - Cardiac Arrest (I46.9)','impression_CV - Cardiac Arrest (I46.9)','unit_type_Medic',
                     'unit_type_EMS Supervisor','FinalIncidentType_CPRF','ArrivedIncidentType_CPRF','impression_CV - Cardiac Arrest (I46.9)',
                     'InitialIncidentType_BLS','InitialIncidentType_ALS','InitialIncidentType_ODF','impression_Respiratory - Arrest/Apnea (R06.81)',
                     'impression_Intracranial - Stroke (CVA) Hemorrhagic (I62.9)','patient_age_10 - 19','unit_station_40','unit_station_32',
                     'unit_station_37','CallDTMonth','unit_station_40']))
x = df[ini_vars]
ini_vars.append('station_40 * Cardiac Arrest')
ini_vars.append('station_32 * Cardiac Arrest')
ini_vars.append('station_37 * Cardiac Arrest')
x['station_40 * Cardiac Arrest'] = x['impression_CV - Cardiac Arrest (I46.9)'] * x['unit_station_40']
x['station_32 * Cardiac Arrest'] = x['impression_CV - Cardiac Arrest (I46.9)'] * x['unit_station_32']
x['station_37 * Cardiac Arrest'] = x['impression_CV - Cardiac Arrest (I46.9)'] * x['unit_station_37']
y = df['is_SecondProvider2']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 112)
lr = LogisticRegression(max_iter=1000)
lr.fit(x_train, y_train)
ini_sec_df = create_coef_df(ini_vars, lr)
print('Adding interaction terms')
print_performance(y_test, lr.predict(x_test))


Threshold
accuracy: 0.96
f1 score: 0.26
Regularization
accuracy: 0.96
f1 score: 0.18
Oversampling
accuracy: 0.88
f1 score: 0.23
Oversampling
accuracy: 0.87
f1 score: 0.21


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Adding interaction terms
accuracy: 0.96
f1 score: 0.26
