# Are you Better Off? 
> This notebook downloads, processes and charts data for a project about whether Americans are better off than they were four years ago. 

---

#### Import Python tools and Jupyter config

In [1]:
import os
import us
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
from io import StringIO
import geopandas as gpd
import altair_cnn as altcnn
from IPython.display import Image
from datawrapper import Datawrapper
import statsmodels.api as sm
import numpy as np

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.themes.register("cnn", altcnn.theme)
alt.themes.enable("cnn")

ThemeRegistry.enable('cnn')

In [3]:
dw_token = os.environ.get("dw_api")
dw = Datawrapper(access_token=dw_token)
today = pd.Timestamp("today").strftime("%Y-%m-%d")

In [4]:
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
}

---

## Charts

#### 1. Home values [FRED](https://fred.stlouisfed.org/series/USAUCSFRCONDOSMSAMID)
> Zillow, Zillow Home Value Index (ZHVI) for All Homes Including Single-Family Residences, Condos, and CO-OPs in the United States of America [USAUCSFRCONDOSMSAMID], retrieved from FRED, Federal Reserve Bank of St. Louis; https://fred.stlouisfed.org/series/USAUCSFRCONDOSMSAMID, April 24, 2024.

In [5]:
hv_df = (
    pd.read_csv(
        "https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1318&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=USAUCSFRCONDOSMSAMID&scale=left&cosd=2000-01-01&coed=2024-03-01&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Monthly&fam=avg&fgst=lin&fgsnd=2020-03-01&line_index=1&transformation=lin&vintage_date=2024-04-25&revision_date=2024-04-25&nd=2000-01-01"
    )
    .round()
    .rename(columns={"DATE": "date", "USAUCSFRCONDOSMSAMID": "dollar_value"})
)

In [6]:
dates = ["2020-03-01", "2024-03-01"]
then_now = hv_df.query(f"date.isin({dates})").reset_index(drop=True)

In [7]:
hv_df.tail()

Unnamed: 0,date,dollar_value
286,2023-11-01,350990.0
287,2023-12-01,351423.0
288,2024-01-01,351854.0
289,2024-02-01,352643.0
290,2024-03-01,354179.0


In [8]:
percentage_change = (
    (
        (then_now.loc[1, "dollar_value"] - then_now.loc[0, "dollar_value"])
        / then_now.loc[0, "dollar_value"]
    )
    * 100
).round(0)

In [9]:
percentage_change

np.float64(42.0)

In [10]:
hv_df.to_csv("data/processed/home_value_index.csv", index=False)

In [11]:
hv_df_id = "BsW4U"
dw.add_data(chart_id=f"{hv_df_id}", data=hv_df)

True

#### 1a. Rentals [observed index from Zillow](https://www.zillow.com/research/data/)

In [None]:
rent_url = "https://files.zillowstatic.com/research/public_csvs/zori/Metro_zori_uc_sfrcondomfr_sm_sa_month.csv?t=1715190478"

In [143]:
rent_df = (
    pd.read_csv(rent_url)
    .query('RegionName == "United States"')
    .melt(var_name="date", value_name="value")
    .drop([0, 1, 2, 3, 4])
)

In [None]:
rent_df["date"] = pd.to_datetime(rent_df["date"])
rent_df = rent_df.query('date > "2016-12-31"')

In [152]:
rent_df.head(3)

Unnamed: 0,date,value
29,2017-01-31,1329.878581
30,2017-02-28,1335.849601
31,2017-03-31,1343.514733


In [145]:
rent_df.tail(3)

Unnamed: 0,date,value
113,2024-01-31,1966.384191
114,2024-02-29,1974.650704
115,2024-03-31,1982.677025


In [146]:
rent_then = round(rent_df.query("date == date.min()")["value"].iloc[0], 2)
rent_then

1329.88

In [147]:
rent_now = round(rent_df.query("date == date.max()")["value"].iloc[0], 2)
rent_now

1982.68

In [148]:
rent_pct_change = int(round((((rent_now - rent_then) / rent_now) * 100), 0))

In [149]:
rent_pct_change

33

In [150]:
rent_df.to_csv("data/processed/zillow_rent_index_2017_2024.csv", index=False)

In [151]:
rent_id = "OOYLk"
dw.add_data(
    chart_id=f"{rent_id}",
    data=rent_df,
)

True

#### 2. Impact of inflation [Gallup poll](https://datawrapper.dwcdn.net/FSi7K/5/)

In [23]:
url = "https://datawrapper.dwcdn.net/FSi7K/5/dataset.csv"
response = requests.get(url)

In [24]:
gallup_inflation = pd.read_csv(
    StringIO(response.text),
    sep="\t",
    names=["date", "value"],
    header=None,
    skiprows=1,
)

In [25]:
gallup_inflation.loc[len(gallup_inflation.index)] = ["4/1/2024", 41]

In [26]:
gallup_inflation["date"] = pd.to_datetime(gallup_inflation["date"], format="%m/%d/%Y")

In [27]:
gallup_inflation_id = "hF7HI"
dw.add_data(
    chart_id=f"{gallup_inflation_id}",
    data=gallup_inflation.query('date > "2012-04-01"').sort_values("date"),
)

True

#### 3. Stock market

In [153]:
sp_df = pd.read_json("https://ix.cnn.io/data/stock-market/snp_stock_market_index.json")

In [155]:
sp_df.tail()

Unnamed: 0,date,close
2972,2024-05-01,5018.39
2973,2024-05-02,5064.2
2974,2024-05-03,5127.79
2975,2024-05-06,5180.74
2976,2024-05-07,5187.7


In [156]:
sp_id = "QSGJa"
dw.add_data(
    chart_id=f"{sp_id}",
    data=sp_df[["date", "close"]],
)

True

#### 4. Groceries

In [31]:
grocery_url = "https://datasembly.com/wp-json/gpi/v3/chart?industry=36&start_date=2019-10-06&end_date=2024-04-14&location=all&eggs=true"

In [32]:
response = requests.get(grocery_url, headers=headers)

In [33]:
params = {
    "industry": "36",
    "start_date": "2019-10-06",
    "end_date": "2024-04-14",
    "location": "all",
    "eggs": "true",
}

response = requests.get(
    "https://datasembly.com/wp-json/gpi/v3/chart", params=params, headers=headers
)

json_data = response.json()

In [34]:
dates = response.json()["data"]["labels"]

all_data = pd.DataFrame()

for dataset in json_data["data"]["datasets"]:
    category = dataset["label"]
    values = dataset["data"]

    df = pd.DataFrame(data={"Date": dates, category: values})

    if all_data.empty:
        all_data = df
    else:
        all_data = pd.merge(all_data, df, on="Date", how="outer")
        all_data = all_data.rename(columns={"Meat | Alternative Meats": "Meats"})

In [35]:
all_data = all_data.drop(["Alternative Dairy"], axis=1)
# all_data.columns = all_data.columns.str.lower()

In [36]:
all_data["Date"] = pd.to_datetime(all_data["Date"], format="%b %d, %y")

In [37]:
grocery_id = "nCDAR"
dw.add_data(
    chart_id=f"{grocery_id}",
    data=all_data.query('Date > "2019-12-31"')[
        ["Date", "Frozen", "Produce", "Meats", "Dairy"]
    ],
)

True

#### 5. Mortgage rates

In [38]:
mort_df = pd.read_json(
    "https://ix.cnn.io/data/mortgage-rates/fred-30-year-fixed-mortgage-rates.json"
)

In [39]:
mort_df.head()

Unnamed: 0,date,value,value15yr,indicator,source,fetched,change
0,2000-01-07,8.15,7.73,US fixed rates mortgage average,https://www.freddiemac.com/pmms,2024-05-02T16:11:40+00:00,
1,2000-01-14,8.18,7.78,US fixed rates mortgage average,https://www.freddiemac.com/pmms,2024-05-02T16:11:40+00:00,0.37
2,2000-01-21,8.26,7.86,US fixed rates mortgage average,https://www.freddiemac.com/pmms,2024-05-02T16:11:40+00:00,0.98
3,2000-01-28,8.25,7.84,US fixed rates mortgage average,https://www.freddiemac.com/pmms,2024-05-02T16:11:40+00:00,-0.12
4,2000-02-04,8.25,7.85,US fixed rates mortgage average,https://www.freddiemac.com/pmms,2024-05-02T16:11:40+00:00,0.0


In [40]:
mort_id = "KYFbn"
dw.add_data(
    chart_id=f"{mort_id}",
    data=mort_df.query('date > "2015-12-31"')[["date", "value"]],
)

True

#### 6. Unemployment rates

In [41]:
unempl_states = pd.read_html(
    "https://www.bls.gov/web/laus/laumstrk.htm",
    storage_options=headers,
)[0].drop([51, 52])

In [42]:
unempl_states["period"] = unempl_states.columns[1].replace("(p) rate", "")

In [43]:
unempl_states.columns = ["state", "rate", "rank", "period"]

In [44]:
name_to_postal = us.states.mapping("name", "abbr")

In [45]:
unempl_states["state_abbr"] = unempl_states["state"].map(name_to_postal)
unempl_states.loc[unempl_states.state == "District of Columbia", "state_abbr"] = "DC"

In [46]:
unempl_states_id = "Y0osD"
dw.add_data(
    chart_id=f"{unempl_states_id}",
    data=unempl_states[["state", "rate", "period", "state_abbr"]],
)

True

#### 7. Job openings

In [47]:
jobs_url = f"https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1318&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=JTSJOL&scale=left&cosd=2015-12-31&coed={today}&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Monthly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date={today}&revision_date={today}&nd=2015-12-31"

In [48]:
jobs_df = pd.read_csv(jobs_url, names=["date", "value"], header=0)

In [49]:
jobs_df["value_label"] = jobs_df["value"] * 1000

In [50]:
jobs_id = "4jPkY"
dw.add_data(
    chart_id=f"{jobs_id}",
    data=jobs_df,
)

True

#### 8. Job openings per person

In [51]:
positions_rate = pd.read_html(
    "https://www.bls.gov/charts/job-openings-and-labor-turnover/unemp-per-job-opening.htm",
    storage_options=headers,
)[0].rename(
    columns={
        "Month": "month",
        "Number of unemployed persons per job opening": "people_per_opening",
    }
)

In [52]:
positions_rate["month_clean"] = pd.to_datetime(positions_rate["month"], format="mixed")

In [53]:
jobs_rate_id = "E6vj3"
dw.add_data(
    chart_id=f"{jobs_rate_id}",
    data=positions_rate.query('month_clean > "2014-12-31"'),
)

True

#### 9. Poverty

In [54]:
sup_pov_meas = pd.read_excel(
    "https://www2.census.gov/programs-surveys/demo/tables/p60/280/tableB-2.xlsx",
    skipfooter=219,
    skiprows=6,
)[
    [
        "ALL RACES",
        "Unnamed: 4",
        "Unnamed: 9",
        "Unnamed: 14",
        "Unnamed: 19",
    ]
].rename(
    columns={
        "ALL RACES": "year",
        "Unnamed: 4": "all_rate",
        "Unnamed: 9": "rate_under_18",
        "Unnamed: 14": "rate_18_64",
        "Unnamed: 19": "rate_65_older",
    }
)

In [55]:
sup_pov_meas

Unnamed: 0,year,all_rate,rate_under_18,rate_18_64,rate_65_older
0,2022,12.4,12.4,11.9,14.1
1,2021,7.8,5.2,7.9,10.7
2,20202,9.2,9.7,8.9,9.5
3,20193,11.8,12.6,11.2,12.8
4,2019,11.7,12.5,11.2,12.8
5,2018,12.8,13.7,12.2,13.6
6,20174,13.0,14.2,12.4,13.6
7,2017,13.9,15.6,13.2,14.1
8,2016,14.0,15.2,13.3,14.5
9,2015,14.5,16.2,14.1,13.7


In [56]:
sup_pov_meas["year"] = sup_pov_meas["year"].astype(str).str[0:4]

In [57]:
sup_pov_meas = sup_pov_meas.drop_duplicates(subset="year")
sup_pov_meas

Unnamed: 0,year,all_rate,rate_under_18,rate_18_64,rate_65_older
0,2022,12.4,12.4,11.9,14.1
1,2021,7.8,5.2,7.9,10.7
2,2020,9.2,9.7,8.9,9.5
3,2019,11.8,12.6,11.2,12.8
5,2018,12.8,13.7,12.2,13.6
6,2017,13.0,14.2,12.4,13.6
8,2016,14.0,15.2,13.3,14.5
9,2015,14.5,16.2,14.1,13.7


In [58]:
pov_rate_id = "lyFsj"
dw.add_data(
    chart_id=f"{pov_rate_id}",
    data=sup_pov_meas.query('year > "2014"'),
)

True

In [59]:
sup_pov_meas

Unnamed: 0,year,all_rate,rate_under_18,rate_18_64,rate_65_older
0,2022,12.4,12.4,11.9,14.1
1,2021,7.8,5.2,7.9,10.7
2,2020,9.2,9.7,8.9,9.5
3,2019,11.8,12.6,11.2,12.8
5,2018,12.8,13.7,12.2,13.6
6,2017,13.0,14.2,12.4,13.6
8,2016,14.0,15.2,13.3,14.5
9,2015,14.5,16.2,14.1,13.7


#### 10. Freedom

In [60]:
def fetch_scores_for_years(start_year, end_year):
    base_url = "https://freedomhouse.org/api/map/fiw/{}/json"
    data_list = []

    for year in range(start_year, end_year + 1):
        url = base_url.format(year)
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            # Extract data for each country
            for country_code, details in data["countries"].items():
                if "current" in details:
                    # Prepare the data entry for each country
                    entry = {
                        "Country_Code": country_code,
                        "Country_Name": details["name"],
                        "Year": year,
                        **details[
                            "current"
                        ],  # Unpack the 'current' dictionary directly
                    }
                    data_list.append(entry)
                else:
                    print(f"No 'current' data available for {country_code} in {year}")
        else:
            print(
                f"Failed to fetch data for {year} with status code: {response.status_code}"
            )

    return pd.DataFrame(data_list).rename(
        columns={"pr": "political_rights", "cl": "civil_liberties"}
    )

In [61]:
# Fetch scores from 2017 to 2024
scores_df = fetch_scores_for_years(2017, 2024)

No 'current' data available for ATA in 2017
No 'current' data available for FLK in 2017
No 'current' data available for PRI in 2017
No 'current' data available for PSX in 2017
No 'current' data available for EDO in 2017
No 'current' data available for  in 2017
No 'current' data available for ATA in 2018
No 'current' data available for FLK in 2018
No 'current' data available for PRI in 2018
No 'current' data available for PSX in 2018
No 'current' data available for EDO in 2018
No 'current' data available for  in 2018
No 'current' data available for ATA in 2019
No 'current' data available for FLK in 2019
No 'current' data available for PRI in 2019
No 'current' data available for PSX in 2019
No 'current' data available for EDO in 2019
No 'current' data available for  in 2019
No 'current' data available for ATA in 2020
No 'current' data available for FLK in 2020
No 'current' data available for PRI in 2020
No 'current' data available for PSX in 2020
No 'current' data available for  in 2020


In [62]:
scores_df.columns = scores_df.columns.str.lower()

In [63]:
scores_df.query("year == 2024").to_csv(
    "data/processed/freedom_house_countries_2024.csv", index=False
)

In [64]:
pop_countries = [
    "India",
    "China",
    "United States",
    "Indonesia",
    "Pakistan",
    "Nigeria",
    "Brazil",
    "Mexico",
    "Russia",
    "Bangladesh",
]

In [65]:
scores_pivot = (
    scores_df.query(f"country_name.isin({pop_countries})")
    .pivot(
        index=["year"],
        columns=["country_name"],
        values="total",
    )
    .reset_index()
)

In [66]:
scores_pivot

country_name,year,Bangladesh,Brazil,China,India,Indonesia,Mexico,Nigeria,Pakistan,Russia,United States
0,2017,47,79,15,77,65,65,50,43,20,89
1,2018,45,78,14,77,64,62,50,43,20,86
2,2019,41,75,11,75,62,63,50,39,20,86
3,2020,39,75,10,71,61,62,47,38,20,86
4,2021,39,74,9,67,59,61,45,37,20,83
5,2022,39,73,9,66,59,60,43,37,19,83
6,2023,40,72,9,66,58,60,43,37,16,83
7,2024,40,72,9,66,57,60,44,35,13,83


In [67]:
alt.Chart(scores_df.query(f"country_name.isin({pop_countries})")).mark_line().encode(
    x="year:T",
    y="total:Q",
    color="country_name:N",
)

In [68]:
scores_pivot.to_csv("data/processed/freedom_house_countries_wide.csv", index=False)

#### 11. CPI - Food at home

In [69]:
food_cpi_url = "https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1318&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=CUSR0000SAF11&scale=left&cosd=2016-01-01&coed=2024-03-01&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Monthly&fam=avg&fgst=nbd&fgsnd=2016-01-01&line_index=1&transformation=lin&vintage_date=2024-04-30&revision_date=2024-04-30&nd=1952-01-01"

In [70]:
food_df = (
    pd.read_csv(food_cpi_url)
    .round(2)
    .rename(columns={"DATE": "date", "CUSR0000SAF11": "value"})
)

In [71]:
then = int(food_df.query("date == '2020-03-01'")["value"].iloc[0])

In [72]:
now = int(food_df.query("date == date.max()")["value"].iloc[0])

In [73]:
((now - then) / then) * 100

24.752475247524753

In [74]:
food_df_id = "fYpnn"
dw.add_data(
    chart_id=f"{food_df_id}",
    data=food_df,
)

True

#### 12. Covid-19 hospitalizations

In [75]:
url = "https://ix.cnn.io/data/cdc-covid-hospitalizations/processed.json"

In [76]:
covid_df = (
    pd.json_normalize(pd.read_json(url)["data"])
    .rename(columns={"weekEndDate": "date", "newHospitalizations": "value"})
    .dropna()
)

In [77]:
covid_df_id = "X6Lp8"
dw.add_data(
    chart_id=f"{covid_df_id}",
    data=covid_df,
)

True

#### 13. Gun violence

In [78]:
url = "https://ix.cnn.io/data/gun-violence-archive/gva-incidents.json"

In [79]:
response = requests.get(url)
data = response.json()

#### Into a dataframe

In [80]:
gun_df = pd.DataFrame(data["data"])

In [81]:
gun_df["date"] = pd.to_datetime(gun_df["timestampDate"]).dt.date
gun_df["month"] = pd.to_datetime(gun_df["timestampDate"]).dt.month
gun_df["year"] = pd.to_datetime(gun_df["timestampDate"]).dt.year

In [82]:
gun_recent = gun_df.query("year>2013")

In [83]:
grouped_df = (
    gun_recent.query("year>2013")
    .groupby(["year"])
    .agg(
        {
            "gvaIncidentId": "count",
            "victimsInjured": "sum",
            "victimsKilled": "sum",
        }
    )
    .reset_index()
).rename(
    columns={
        "gvaIncidentId": "incidents",
        "victimsInjured": "injured",
        "victimsKilled": "killed",
    }
)

In [84]:
grouped_df

Unnamed: 0,year,incidents,injured,killed
0,2014,272,1075,257
1,2015,332,1319,336
2,2016,383,1532,429
3,2017,347,1790,415
4,2018,335,1311,345
5,2019,414,1690,428
6,2020,610,2523,494
7,2021,689,2785,668
8,2022,644,2651,641
9,2023,655,2684,715


In [85]:
grouped_df.to_csv(
    "data/processed/gun_violence_incidents_aggregates_year.csv", index=False
)

In [86]:
grouped_df_id = "gbr4B"
dw.add_data(
    chart_id=f"{grouped_df_id}",
    data=grouped_df.query("year!=2024"),
)

True

#### 14. Happiness - Gallup poll

In [87]:
happy_url = "https://datawrapper.dwcdn.net/8Z3B3/35/dataset.csv"
response = requests.get(happy_url)

In [88]:
happy_df = pd.read_csv(
    StringIO(response.text),
    sep="\t",
    names=["date", "value"],
    header=None,
    skiprows=1,
)

In [89]:
happy_df["date"] = pd.to_datetime(happy_df["date"].str.strip(), format="%b %d %Y")

In [90]:
happy_df.set_index("date", inplace=True)

In [91]:
happy_df["rolling_mean"] = happy_df["value"].rolling(window=12).mean()

In [92]:
happy_df = happy_df.reset_index()

In [93]:
happy_df.to_csv("data/processed/gallup_satisfaction_1979-present.csv")

In [94]:
happy_dw = happy_df.query('date>"2015-12-31"')

In [95]:
happy_df_id = "IwQvj"
dw.add_data(
    chart_id=f"{happy_df_id}",
    data=happy_dw,
)

True