# Models: GOC Documentation, Unadjusted

In [1]:
import numpy as np
import pandas as pd
import patsy
import plotly.express as px
import statsmodels.api as sm

from cleaning.caregivers.models import load_data

df = load_data()

In [2]:
def logistic_fit(formula):
    y, X = patsy.dmatrices(formula, df, return_type="dataframe")
    model = sm.Logit(y, X).fit()

    return model

## Has GOC Documentation ~ **Has Child Documentation (True/False)**

In [3]:
formula = "IDENTIFIED_CONV_GOC ~ ANNOTATION_CHILD"

model_fit = logistic_fit(formula)

print(model_fit.summary())

Optimization terminated successfully.
         Current function value: 0.611868
         Iterations 5
                            Logit Regression Results                           
Dep. Variable:     IDENTIFIED_CONV_GOC   No. Observations:                 1389
Model:                           Logit   Df Residuals:                     1387
Method:                            MLE   Df Model:                            1
Date:                 Wed, 20 Jan 2021   Pseudo R-squ.:                 0.02267
Time:                         16:38:32   Log-Likelihood:                -849.89
converged:                        True   LL-Null:                       -869.60
Covariance Type:             nonrobust   LLR p-value:                 3.392e-10
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept            0.4809      0.071      6.752      0.000       0.341       0.621
ANN

In [4]:
np.exp(model_fit.params)

Intercept           1.617555
ANNOTATION_CHILD    2.143817
dtype: float64

## Has GOC Documentation ~ **Has Spouse Documentation (True/False)**

In [5]:
formula = "IDENTIFIED_CONV_GOC ~ ANNOTATION_SPOUSE"

model_fit = logistic_fit(formula)

print(model_fit.summary())

Optimization terminated successfully.
         Current function value: 0.617448
         Iterations 5
                            Logit Regression Results                           
Dep. Variable:     IDENTIFIED_CONV_GOC   No. Observations:                 1389
Model:                           Logit   Df Residuals:                     1387
Method:                            MLE   Df Model:                            1
Date:                 Wed, 20 Jan 2021   Pseudo R-squ.:                 0.01376
Time:                         16:38:32   Log-Likelihood:                -857.64
converged:                        True   LL-Null:                       -869.60
Covariance Type:             nonrobust   LLR p-value:                 9.968e-07
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept             0.5552      0.070      7.941      0.000       0.418       0.692


In [6]:
np.exp(model_fit.params)

Intercept            1.742236
ANNOTATION_SPOUSE    1.826284
dtype: float64

## Has GOC Documentation ~ **Has Child Documentation (True/False)** + **Has Spouse Documentation (True/False)**

In [7]:
formula = "IDENTIFIED_CONV_GOC ~ ANNOTATION_CHILD + ANNOTATION_SPOUSE"

model_fit = logistic_fit(formula)

print(model_fit.summary())

Optimization terminated successfully.
         Current function value: 0.603024
         Iterations 5
                            Logit Regression Results                           
Dep. Variable:     IDENTIFIED_CONV_GOC   No. Observations:                 1389
Model:                           Logit   Df Residuals:                     1386
Method:                            MLE   Df Model:                            2
Date:                 Wed, 20 Jan 2021   Pseudo R-squ.:                 0.03680
Time:                         16:38:32   Log-Likelihood:                -837.60
converged:                        True   LL-Null:                       -869.60
Covariance Type:             nonrobust   LLR p-value:                 1.264e-14
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept             0.2672      0.083      3.220      0.001       0.105       0.430


In [8]:
np.exp(model_fit.params)

Intercept            1.306273
ANNOTATION_CHILD     2.171187
ANNOTATION_SPOUSE    1.856449
dtype: float64

## Has GOC Documentation ~ **Type of Caregiver Documentation (Child/Spouse/Both/Neither)**

In [9]:
formula = "IDENTIFIED_CONV_GOC ~ ANNOTATION"

model_fit = logistic_fit(formula)

print(model_fit.summary())

Optimization terminated successfully.
         Current function value: 0.602141
         Iterations 5
                            Logit Regression Results                           
Dep. Variable:     IDENTIFIED_CONV_GOC   No. Observations:                 1389
Model:                           Logit   Df Residuals:                     1385
Method:                            MLE   Df Model:                            3
Date:                 Wed, 20 Jan 2021   Pseudo R-squ.:                 0.03821
Time:                         16:38:32   Log-Likelihood:                -836.37
converged:                        True   LL-Null:                       -869.60
Covariance Type:             nonrobust   LLR p-value:                 2.445e-14
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
Intercept                 1.4623      0.180      8.122      0.000       1.109 

In [10]:
np.exp(model_fit.params)

Intercept                4.315789
ANNOTATION[T.CHILD]      0.716676
ANNOTATION[T.NEITHER]    0.289634
ANNOTATION[T.SPOUSE]     0.616956
dtype: float64