## Ingest

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn.objects as so

url = "https://data.cdc.gov/api/views/8pt5-q6wp/rows.csv?accessType=DOWNLOAD"

df = pd.read_csv(url)

df_subset = df.loc[(df["Group"] == "By State") \
& (df["Time Period Start Date"] == "05/07/2020"), \
["Indicator", "Group", "State", "Time Period Start Date", \
"Time Period End Date", "Value", "High CI"]]

regions = pd.read_csv("https://github.com/cphalpert/census-regions/raw/refs/heads/master/us%20census%20bureau%20regions%20and%20divisions.csv")


## EDA

View a sample of the dataset

In [None]:
df_subset.head()

Pivot the dataset so that each state is an individual observation

In [None]:
pivoted = df_subset.pivot(index="State", columns="Indicator", values="Value")
pivoted.head()

Validate that the pivot worked properly

In [None]:
assert (df_subset.loc[(df_subset["State"] == "California") & (df_subset["Indicator"] == "Symptoms of Anxiety Disorder"), "Value"] == 31.4).all()
assert (df_subset.loc[(df_subset["State"] == "California") & (df_subset["Indicator"] == "Symptoms of Depressive Disorder"), "Value"] == 22.8).all()
assert (df_subset.loc[(df_subset["State"] == "California") & (df_subset["Indicator"] == "Symptoms of Anxiety Disorder or Depressive Disorder"), "Value"] == 34.4).all()


Get summary statistics for indicator values

In [None]:
pivoted.describe()

Plot relationship between anxiety and depressive symptoms by state using pivoted data

In [None]:
f = plt.figure(figsize=(12,8))
so.Plot(pivoted, x="Symptoms of Anxiety Disorder", y="Symptoms of Depressive Disorder").add(so.Dots()).add(so.Text(fontsize=8), text="State") \
    .label(
        x="% of Sample Reporting Anxiety Symptoms",
        y="% of Sample Reporting Depressive Symptoms",
        title="Anxiety & Depression Symptoms By State During Early COVID",
    ).on(f).show()



Merge dataset with dataset of US states' region labels

In [None]:
regions.head()

In [None]:
merged = pd.merge(pivoted, regions, on="State", how='outer', indicator=True)
merged._merge.value_counts()

In [None]:
merged.head()

Generate boxplots of anxiety symptoms by US regional division

In [None]:
merged.boxplot(column="Symptoms of Anxiety Disorder", by="Division", grid=False, figsize = (12,8))
plt.xticks(rotation=45)
plt.xlabel("Regional Division")
plt.ylabel("Anxiety Symptoms % Distribution")
plt.title("Anxiety Symptoms By Region During Early COVID")

Generate boxplots of depressive symptoms by US regional division

In [None]:
merged.boxplot(column="Symptoms of Depressive Disorder", by="Division", grid=False, figsize = (12,8))
plt.xticks(rotation=45)
plt.xlabel("Regional Division")
plt.ylabel("Depressive Symptoms % Distribution")
plt.title("Depressive Symptoms By Region During Early COVID")