In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import openpyxl
from scipy.stats import ttest_ind

In [6]:
CHR = pd.read_excel("CountyHealthNew.xlsx")

Subset to counties with RUCC codes 4-9

In [8]:
CHRRur = CHR[CHR["RUCC"].isin([4, 5, 6, 7, 8, 9])].copy()

Median value of child poverty rate across 45 WI counties

In [9]:
poverty_col = "% Children in Poverty"
mhprov_col = "# Mental Health Providers"

median_poverty = CHRRur[poverty_col].median()
print("Median % Children in Poverty:", median_poverty)

Median % Children in Poverty: 15.8


Codes child poverty rate into 2 groups using the median cutoff (15.8)

In [10]:
cutoff = 15.8
CHRRur["Poverty"] = np.where(CHRRur[poverty_col] < cutoff, "Below", "Above")

T-test

In [None]:
t_stat, p_value = ttest_ind(
    CHRRur.loc[CHRRur["Poverty"] == "Below", mhprov_col],
    CHRRur.loc[CHRRur["Poverty"] == "Above", mhprov_col],
    equal_var=False,          # Welch (matches R default)
    alternative="less"        # H1: mean(Below) < mean(Above)
)

print("t =", t_stat)
print("one-sided p =", p_value)


t = 4.128526738483224
one-sided p = 0.9998731530544198


In [None]:


# ----------------------------
# T-test:  (# Mental Health Providers) ~ Poverty group, alternative = "less"
# In R: t.test(y ~ group, alternative="less") tests mean(Below) < mean(Above)
# ----------------------------
below = CHRRur.loc[CHRRur["Poverty"] == "Below", mhprov_col].dropna()
above = CHRRur.loc[CHRRur["Poverty"] == "Above", mhprov_col].dropna()

# Welch's t-test (matches R default: var.equal = FALSE)
t_stat, p_two_sided = stats.ttest_ind(below, above, equal_var=False)

# Convert to one-sided p-value for alternative="less" (Below < Above)
p_one_sided = p_two_sided / 2 if t_stat < 0 else 1 - (p_two_sided / 2)

print("\nWelch two-sample t-test (Below vs Above)")
print("t =", t_stat)
print("one-sided p (Below < Above) =", p_one_sided)

# ----------------------------
# Correlation test: cor.test(mh_providers, child_poverty, alternative="less")
# R's cor.test defaults to Pearson unless method specified
# alternative="less" tests correlation < 0
# ----------------------------
x = CHRRur[mhprov_col]
y = CHRRur[poverty_col]
df_corr = CHRRur[[mhprov_col, poverty_col]].dropna()
r, p_two_sided_corr = stats.pearsonr(df_corr[mhprov_col], df_corr[poverty_col])

# One-sided p-value for H1: r < 0
p_one_sided_corr = p_two_sided_corr / 2 if r < 0 else 1 - (p_two_sided_corr / 2)

print("\nPearson correlation test")
print("r =", r)
print("one-sided p (r < 0) =", p_one_sided_corr)

# ----------------------------
# Log-transform child poverty rate for viz
# (Note: log requires positive values; if any zeros exist, handle separately)
# ----------------------------
CHRRur["PovertyLog"] = np.log(CHRRur[poverty_col])

# ----------------------------
# Viz (scatter)
# ----------------------------
plt.figure()
plt.scatter(CHRRur["PovertyLog"], CHRRur[mhprov_col], s=60)  # size ~ 2.5 in ggplot-ish
plt.title("Relationship between % Children in Poverty and # of Mental Health Providers", fontweight="bold")
plt.xlabel("% Children in Poverty (log scale)")
plt.ylabel("# Mental\nHealth\nProviders")
plt.tight_layout()
plt.show()
