In [1]:
import statsmodels.formula.api as smf
import pandas as pd

# Data Preparation

## Import Data

In [2]:
data_folder = '../data/'
fn = 'assessments.csv'
df = pd.read_csv(f"{data_folder}{fn}")

individuals = df[df['AssessmentName'].str.contains('Individuals')].copy()
individuals.dropna(axis=1, how='all', inplace=True)

for col in [c for c in individuals.columns if c.startswith('CALC_')]:
    individuals[col] = individuals[col].apply(int)

youth = df[df['AssessmentName'].str.contains('Youth')].copy()
youth.dropna(axis=1, how='all', inplace=True)

for col in [c for c in youth.columns if c.startswith('CALC_')]:
    youth[col] = youth[col].apply(int)

## Feature Engineering

### High Acuity

In [3]:
individuals['is_high_acuity'] = individuals['Acuity'].str.contains('High').apply(int)
youth['is_high_acuity'] = youth['Acuity'].str.contains('High').apply(int)

### PSH Assessment Recommendation

In [4]:
individuals['is_psh_recommendation'] = individuals['TOTAL_SCORE'].ge(8).apply(int)

### Veteran Status

In [5]:
individuals['is_veteran'] = individuals['Veteran status'].eq('Yes').apply(int)
youth['is_veteran'] = youth['Veteran status'].eq('Yes').apply(int)

### Age Flags

In [6]:
individuals['is_senior'] = individuals['Age_group_at_assessment'].apply(lambda s: '60+' in str(s)).apply(int)
youth['is_minor'] = youth['Age_group_at_assessment'].apply(lambda s: 'Under 18' in str(s)).apply(int)

### Gender Flags

In [7]:
individuals['is_male'] = individuals['Gender'].eq('Male').apply(int)
youth['is_male'] = youth['Gender'].eq('Male').apply(int)

### Race 

In [8]:
individuals['re'] = individuals['Race/Ethnicity']
individuals['is_white'] = individuals['Race/Ethnicity'].eq('White').apply(int)
individuals['is_black'] = individuals['Race/Ethnicity'].eq('Black').apply(int)
individuals['is_latino'] = individuals['Race/Ethnicity'].eq('Latino').apply(int)

youth['re'] = youth['Race/Ethnicity']

# Regressions

## Adult Linear Regression

In [9]:
al_adjustments = pd.DataFrame()

In [10]:
model = smf.ols("""
    TOTAL_SCORE ~ C(re, Treatment('White'))
""", data=individuals)

results = model.fit()
al_adjustments['no_adjustments'] = results.params
results.summary()

0,1,2,3
Dep. Variable:,TOTAL_SCORE,R-squared:,0.008
Model:,OLS,Adj. R-squared:,0.008
Method:,Least Squares,F-statistic:,194.2
Date:,"Thu, 09 Feb 2023",Prob (F-statistic):,7.08e-207
Time:,15:44:01,Log-Likelihood:,-351160.0
No. Observations:,127442,AIC:,702300.0
Df Residuals:,127436,BIC:,702400.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,9.4341,0.022,428.471,0.000,9.391,9.477
"C(re, Treatment('White'))[T.Asian]",-0.8711,0.090,-9.683,0.000,-1.047,-0.695
"C(re, Treatment('White'))[T.Black]",-0.8299,0.028,-29.893,0.000,-0.884,-0.775
"C(re, Treatment('White'))[T.Latino]",-0.4467,0.030,-15.131,0.000,-0.505,-0.389
"C(re, Treatment('White'))[T.Other]",-0.5076,0.067,-7.541,0.000,-0.639,-0.376
"C(re, Treatment('White'))[T.Unknown]",-0.9349,0.064,-14.561,0.000,-1.061,-0.809

0,1,2,3
Omnibus:,12744.657,Durbin-Watson:,1.948
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3694.847
Skew:,-0.007,Prob(JB):,0.0
Kurtosis:,2.166,Cond. No.,9.69


**NOTE:** For the below regression (and subsequent regressions that use demographic/situational variables in addition to race,) we used [Service Planning Area (SPA) 4](http://publichealth.lacounty.gov/chs/SPA4/), which represents Metro LA, as a treatment category for the `SPA` variable.

In [11]:
model = smf.ols("""
    TOTAL_SCORE ~ is_veteran 
                + is_senior 
                + is_male 
                + C(SPA, Treatment(4)) 
                + AssessmentYear 
                + C(re, Treatment('White'))
""", data=individuals)

results = model.fit()
al_adjustments['with_adjustments'] = results.params
results.summary()

0,1,2,3
Dep. Variable:,TOTAL_SCORE,R-squared:,0.081
Model:,OLS,Adj. R-squared:,0.08
Method:,Least Squares,F-statistic:,687.7
Date:,"Thu, 09 Feb 2023",Prob (F-statistic):,0.0
Time:,15:44:02,Log-Likelihood:,-341200.0
No. Observations:,125689,AIC:,682400.0
Df Residuals:,125672,BIC:,682600.0
Df Model:,16,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-820.2054,11.447,-71.650,0.000,-842.642,-797.769
"C(SPA, Treatment(4))[T.1.0]",-0.4446,0.047,-9.435,0.000,-0.537,-0.352
"C(SPA, Treatment(4))[T.2.0]",-0.6075,0.035,-17.354,0.000,-0.676,-0.539
"C(SPA, Treatment(4))[T.3.0]",-0.3222,0.040,-8.094,0.000,-0.400,-0.244
"C(SPA, Treatment(4))[T.5.0]",0.1776,0.042,4.278,0.000,0.096,0.259
"C(SPA, Treatment(4))[T.6.0]",-1.6374,0.031,-52.705,0.000,-1.698,-1.577
"C(SPA, Treatment(4))[T.7.0]",-1.1507,0.046,-24.906,0.000,-1.241,-1.060
"C(SPA, Treatment(4))[T.8.0]",-1.2035,0.038,-31.839,0.000,-1.278,-1.129
"C(re, Treatment('White'))[T.Asian]",-0.9395,0.087,-10.773,0.000,-1.110,-0.769

0,1,2,3
Omnibus:,6855.134,Durbin-Watson:,1.959
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2615.606
Skew:,-0.009,Prob(JB):,0.0
Kurtosis:,2.294,Cond. No.,2240000.0


In [12]:
al_adjustments

Unnamed: 0,no_adjustments,with_adjustments
Intercept,9.434113,-820.205413
"C(re, Treatment('White'))[T.Asian]",-0.871088,-0.939512
"C(re, Treatment('White'))[T.Black]",-0.829908,-0.523342
"C(re, Treatment('White'))[T.Latino]",-0.446717,-0.393314
"C(re, Treatment('White'))[T.Other]",-0.507556,-0.368288
"C(re, Treatment('White'))[T.Unknown]",-0.934867,-0.746816


## Adult Logistic Regression (High Acuity)

In [13]:
alog_adjustments = pd.DataFrame()

In [14]:
model = smf.logit("""
    is_high_acuity ~ C(re, Treatment('White'))
""", data=individuals)

results = model.fit()
alog_adjustments['no_adjustments'] = results.params
results.summary()

Optimization terminated successfully.
         Current function value: 0.589566
         Iterations 5


0,1,2,3
Dep. Variable:,is_high_acuity,No. Observations:,127442.0
Model:,Logit,Df Residuals:,127436.0
Method:,MLE,Df Model:,5.0
Date:,"Thu, 09 Feb 2023",Pseudo R-squ.:,0.002823
Time:,15:44:03,Log-Likelihood:,-75135.0
converged:,True,LL-Null:,-75348.0
Covariance Type:,nonrobust,LLR p-value:,9.493999999999999e-90

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.7528,0.012,-60.709,0.000,-0.777,-0.728
"C(re, Treatment('White'))[T.Asian]",-0.3740,0.055,-6.834,0.000,-0.481,-0.267
"C(re, Treatment('White'))[T.Black]",-0.3109,0.016,-19.384,0.000,-0.342,-0.279
"C(re, Treatment('White'))[T.Latino]",-0.1893,0.017,-11.194,0.000,-0.222,-0.156
"C(re, Treatment('White'))[T.Other]",-0.2163,0.039,-5.486,0.000,-0.294,-0.139
"C(re, Treatment('White'))[T.Unknown]",-0.4286,0.039,-10.881,0.000,-0.506,-0.351


In [None]:
model = smf.logit("""
    is_high_acuity ~ is_veteran 
                + is_senior 
                + is_male 
                + C(SPA, Treatment(4)) 
                + AssessmentYear 
                + C(re, Treatment('White'))
""", data=individuals)

results = model.fit()
alog_adjustments['with_adjustments'] = results.params
results.summary()

In [None]:
alog_adjustments

## Adult Logistic Regression (PSH Assessment Recommendation)

**NOTE:** While LAHSA policies use a score of 12 as a cutoff, the [VI-SPDAT survey](https://www.lahsa.org/documents?id=1306-form-1306-ces-survey-for-individuals-survey-packet.pdf) itself recommends an asssessment for Permanent Supportive Housing/Housing First for scores of 8 or higher. We found that running a logisitic regression with the 8 or more cutoff produced similar results compared to the 12 or more cutoff. You can read more about how we chose to categorize scores in the [“Acuity Group” Categorization](TK) section of our methodology.

In [None]:
apshlog_adjustments = pd.DataFrame()

In [None]:
model = smf.logit("""
    is_psh_recommendation ~ C(re, Treatment('White'))
""", data=individuals)

results = model.fit()
apshlog_adjustments['no_adjustments'] = results.params
results.summary()

In [None]:
model = smf.logit("""
    is_psh_recommendation ~ is_veteran 
                + is_senior 
                + is_male 
                + C(SPA, Treatment(4)) 
                + AssessmentYear 
                + C(re, Treatment('White'))
""", data=individuals)

results = model.fit()
apshlog_adjustments['with_adjustments'] = results.params
results.summary()

In [None]:
apshlog_adjustments

## Youth Linear Regression

In [None]:
yl_adjustments = pd.DataFrame()

In [None]:
model = smf.ols("""
    TOTAL_SCORE ~ C(re, Treatment('White'))
""", data=youth)

results = model.fit()
yl_adjustments['no_adjustments'] = results.params
results.summary()

In [None]:
model = smf.ols("""
    TOTAL_SCORE ~ is_veteran 
                + is_minor 
                + is_male 
                + C(SPA, Treatment(4)) 
                + AssessmentYear 
                + C(re, Treatment('White'))
""", data=youth)

results = model.fit()
yl_adjustments['with_adjustments'] = results.params
results.summary()

In [None]:
yl_adjustments

## Youth Logistic Regression (High Acuity)

In [None]:
ylog_adjustments = pd.DataFrame()

In [None]:
model = smf.logit("""
    is_high_acuity ~ C(re, Treatment('White'))
""", data=youth)

results = model.fit()
ylog_adjustments['no_adjustments'] = results.params
results.summary()

In [None]:
model = smf.logit("""
    is_high_acuity ~ is_veteran 
                + is_minor 
                + is_male 
                + C(SPA, Treatment(4)) 
                + AssessmentYear 
                + C(re, Treatment('White'))
""", data=youth)

results = model.fit()
ylog_adjustments['with_adjustments'] = results.params
results.summary()

In [None]:
ylog_adjustments