# Models: GOC Documentation, Adjusted

In [1]:
import numpy as np
import pandas as pd
import patsy
import plotly.express as px
import statsmodels.api as sm

from cleaning.caregivers.models import load_data

df = load_data()

In [2]:
def logistic_fit(formula):
    y, X = patsy.dmatrices(formula, df, return_type="dataframe")
    model = sm.Logit(y, X).fit()

    return model

## Has GOC Documentation ~ **Has Child Documentation (True/False)** + Demographic + Elixhauser + SOFA

In [3]:
formula = "IDENTIFIED_CONV_GOC ~ SEX + MARITAL_STATUS + ETHNICITY + ADMISSION_AGE + ELIX_SCORE + SOFA + ANNOTATION_CHILD"

model_fit = logistic_fit(formula)

print(model_fit.summary())

Optimization terminated successfully.
         Current function value: 0.602427
         Iterations 5
                            Logit Regression Results                           
Dep. Variable:     IDENTIFIED_CONV_GOC   No. Observations:                 1389
Model:                           Logit   Df Residuals:                     1379
Method:                            MLE   Df Model:                            9
Date:                 Wed, 20 Jan 2021   Pseudo R-squ.:                 0.03775
Time:                         16:38:03   Log-Likelihood:                -836.77
converged:                        True   LL-Null:                       -869.60
Covariance Type:             nonrobust   LLR p-value:                 1.072e-10
                                              coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------------
Intercept                                 

In [4]:
np.exp(model_fit.params)

Intercept                                  0.850033
MARITAL_STATUS[T.NOT MARRIED]              0.959706
MARITAL_STATUS[T.UNKNOWN/NOT SPECIFIED]    0.960837
ETHNICITY[T.OTHER]                         0.720439
ETHNICITY[T.UNKNOWN/NOT SPECIFIED]         0.948469
SEX                                        1.038838
ADMISSION_AGE                              1.005996
ELIX_SCORE                                 0.996098
SOFA                                       1.069689
ANNOTATION_CHILD                           1.879298
dtype: float64

## Has GOC Documentation ~ **Has Spouse Documentation (True/False)** + Demographic + Elixhauser + SOFA

In [12]:
formula = "IDENTIFIED_CONV_GOC ~ SEX + MARITAL_STATUS + ETHNICITY + ADMISSION_AGE + ELIX_SCORE + SOFA + ANNOTATION_SPOUSE"

model_fit = logistic_fit(formula)

print(model_fit.summary())

Optimization terminated successfully.
         Current function value: 0.597438
         Iterations 5
                            Logit Regression Results                           
Dep. Variable:     IDENTIFIED_CONV_GOC   No. Observations:                 1389
Model:                           Logit   Df Residuals:                     1379
Method:                            MLE   Df Model:                            9
Date:                 Wed, 20 Jan 2021   Pseudo R-squ.:                 0.04572
Time:                         16:40:37   Log-Likelihood:                -829.84
converged:                        True   LL-Null:                       -869.60
Covariance Type:             nonrobust   LLR p-value:                 2.010e-13
                                              coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------------
Intercept                                 

In [6]:
np.exp(model_fit.params)

Intercept                                  0.265588
MARITAL_STATUS[T.NOT MARRIED]              1.592868
MARITAL_STATUS[T.UNKNOWN/NOT SPECIFIED]    1.567619
ETHNICITY[T.OTHER]                         0.827621
ETHNICITY[T.UNKNOWN/NOT SPECIFIED]         0.890094
SEX                                        1.185786
ADMISSION_AGE                              1.017792
ELIX_SCORE                                 0.974659
SOFA                                       1.079761
ANNOTATION_SPOUSE                          2.508623
dtype: float64

## Has GOC Documentation ~ **Has Child Documentation (True/False)** + **Has Spouse Documentation (True/False)** + Demographic + Elixhauser + SOFA

In [7]:
formula = "IDENTIFIED_CONV_GOC ~ SEX + MARITAL_STATUS + ETHNICITY + ADMISSION_AGE + ELIX_SCORE + SOFA + ANNOTATION_CHILD + ANNOTATION_SPOUSE"

model_fit = logistic_fit(formula)

print(model_fit.summary())

Optimization terminated successfully.
         Current function value: 0.591196
         Iterations 5
                            Logit Regression Results                           
Dep. Variable:     IDENTIFIED_CONV_GOC   No. Observations:                 1389
Model:                           Logit   Df Residuals:                     1378
Method:                            MLE   Df Model:                           10
Date:                 Wed, 20 Jan 2021   Pseudo R-squ.:                 0.05569
Time:                         16:38:03   Log-Likelihood:                -821.17
converged:                        True   LL-Null:                       -869.60
Covariance Type:             nonrobust   LLR p-value:                 2.310e-16
                                              coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------------
Intercept                                 

In [8]:
np.exp(model_fit.params)

Intercept                                  0.372001
MARITAL_STATUS[T.NOT MARRIED]              1.547570
MARITAL_STATUS[T.UNKNOWN/NOT SPECIFIED]    1.481872
ETHNICITY[T.OTHER]                         0.776635
ETHNICITY[T.UNKNOWN/NOT SPECIFIED]         0.909809
SEX                                        1.104176
ADMISSION_AGE                              1.010721
ELIX_SCORE                                 0.979664
SOFA                                       1.071063
ANNOTATION_CHILD                           1.788235
ANNOTATION_SPOUSE                          2.400825
dtype: float64

## Has GOC Documentation ~ **Type of Caregiver Documentation (Child/Spouse/Both/Neither)** + Demographic + Elixhauser + SOFA

In [9]:
formula = "IDENTIFIED_CONV_GOC ~ SEX + MARITAL_STATUS + ETHNICITY + ADMISSION_AGE + ELIX_SCORE + SOFA + ANNOTATION"

model_fit = logistic_fit(formula)

print(model_fit.summary())

Optimization terminated successfully.
         Current function value: 0.590429
         Iterations 5
                            Logit Regression Results                           
Dep. Variable:     IDENTIFIED_CONV_GOC   No. Observations:                 1389
Model:                           Logit   Df Residuals:                     1377
Method:                            MLE   Df Model:                           11
Date:                 Wed, 20 Jan 2021   Pseudo R-squ.:                 0.05692
Time:                         16:38:03   Log-Likelihood:                -820.11
converged:                        True   LL-Null:                       -869.60
Covariance Type:             nonrobust   LLR p-value:                 2.824e-16
                                              coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------------
Intercept                                 

In [10]:
np.exp(model_fit.params)

Intercept                                  1.366477
MARITAL_STATUS[T.NOT MARRIED]              1.533875
MARITAL_STATUS[T.UNKNOWN/NOT SPECIFIED]    1.480457
ETHNICITY[T.OTHER]                         0.770339
ETHNICITY[T.UNKNOWN/NOT SPECIFIED]         0.903039
ANNOTATION[T.CHILD]                        0.550555
ANNOTATION[T.NEITHER]                      0.269751
ANNOTATION[T.SPOUSE]                       0.731371
SEX                                        1.090845
ADMISSION_AGE                              1.010302
ELIX_SCORE                                 0.980076
SOFA                                       1.071912
dtype: float64