### setup

In [1]:
%load_ext lab_black

In [2]:
import requests
import urllib.request
import numpy as np
import datetime
import random
import json

In [3]:
import pandas as pd
import gspread
from oauth2client.service_account import ServiceAccountCredentials

In [4]:
import altair as alt
import altair_stiles as altstiles

alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('grid')

In [5]:
from datawrapper import Datawrapper

dw = Datawrapper(
    access_token="FtIwtvFtoGLaRT9a3gjX69PLu4wSuRyKddoOz6SOPw3k9wWyNICMHTkcPhOGCR5Z"
)

In [6]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [7]:
scope = ["https://spreadsheets.google.com/feeds"]
# credentials = ServiceAccountCredentials.from_json_keyfile_name(
#    "jupyter-integration-credentials-redistricting.json", scope
# )
# gc = gspread.authorize(credentials)

source: https://twitter.com/ddiamond/status/1555969337068081154<br>
data series:<br>
'CES6562110001' (physicians offices)<br>
'CES6562140001' (outpatient care centers)<br>
'CES6562200001' (hospitals)<br>
'CES6562300001' (nursing homes)<br>

### read in bls data from FRED

Physicians offices

In [8]:
with urllib.request.urlopen(
    "https://api.stlouisfed.org/fred/series/observations?series_id=CES6562110001&api_key=b98b54e0ff7221aca6e1b161f5b12435&file_type=json"
) as url:
    data = json.loads(url.read().decode())

In [9]:
dataframes = []
for d in data["observations"]:
    dataframes.append(pd.DataFrame(d, index=[0]))

In [10]:
physicians_office = pd.concat(dataframes).reset_index(drop=True)

Outpatient care centers

In [11]:
with urllib.request.urlopen(
    "https://api.stlouisfed.org/fred/series/observations?series_id=CES6562140001&api_key=b98b54e0ff7221aca6e1b161f5b12435&file_type=json"
) as url:
    data = json.loads(url.read().decode())

In [12]:
dataframes = []
for d in data["observations"]:
    dataframes.append(pd.DataFrame(d, index=[0]))

In [13]:
outpatient = pd.concat(dataframes).reset_index(drop=True)

Hospitals

In [14]:
with urllib.request.urlopen(
    "https://api.stlouisfed.org/fred/series/observations?series_id=CES6562200001&api_key=b98b54e0ff7221aca6e1b161f5b12435&file_type=json"
) as url:
    data = json.loads(url.read().decode())

In [15]:
dataframes = []
for d in data["observations"]:
    dataframes.append(pd.DataFrame(d, index=[0]))

In [16]:
hospitals = pd.concat(dataframes).reset_index(drop=True)

Nursing homes

In [17]:
with urllib.request.urlopen(
    "https://api.stlouisfed.org/fred/series/observations?series_id=CES6562300001&api_key=b98b54e0ff7221aca6e1b161f5b12435&file_type=json"
) as url:
    data = json.loads(url.read().decode())

In [18]:
dataframes = []
for d in data["observations"]:
    dataframes.append(pd.DataFrame(d, index=[0]))

In [19]:
nursing_homes = pd.concat(dataframes).reset_index(drop=True)

### clean dfs

In [20]:
physicians_office = physicians_office.drop(columns=["realtime_start", "realtime_end"])
nursing_homes = nursing_homes.drop(columns=["realtime_start", "realtime_end"])
hospitals = hospitals.drop(columns=["realtime_start", "realtime_end"])
outpatient = outpatient.drop(columns=["realtime_start", "realtime_end"])

In [21]:
physicians_office["type"] = "Physicians' offices"
nursing_homes["type"] = "Nursing homes"
hospitals["type"] = "Hospitals"
outpatient["type"] = "Outpatient care"

#### combining each venue into single df and changing the types of variables

In [22]:
df_list = [physicians_office, nursing_homes, hospitals, outpatient]

In [23]:
combined = pd.concat(df_list)

In [24]:
combined["date"] = pd.to_datetime(combined["date"])
combined["value"] = combined["value"].astype(float)
combined["type"] = combined["type"].astype(str)

#### if you want to treat each venue separately

In [25]:
for df in df_list:
    df["date"] = pd.to_datetime(df["date"])
    df["value"] = df["value"].astype(float)
    df["type"] = df["type"].astype(str)

### charting

In [26]:
alt.Chart(combined[combined["date"] >= "2020-01-01"]).mark_line().encode(
    x=alt.X("date"), y=alt.Y("value"), color=alt.Color("type")
)

In [27]:
alt.Chart(combined[combined["date"] >= "2020-01-01"]).mark_line().encode(
    x=alt.X("date"), y=alt.Y("value")
).properties(width=400, height=200).facet(facet=alt.Facet("type:O"), columns=2)

### mess around with percent changes

In [28]:
for df in df_list:
    df.sort_values("date", ascending=True)
    df["pct_change"] = (df["value"] - df["value"].shift(periods=1)) / df["value"].shift(
        periods=1
    )

In [29]:
combined_pct_chg = pd.concat(df_list)

In [30]:
alt.Chart(
    combined_pct_chg[combined_pct_chg["date"] >= "2020-01-01"]
).mark_line().encode(x=alt.X("date"), y=alt.Y("pct_change"), color=alt.Color("type"))

In [31]:
alt.Chart(
    combined_pct_chg[combined_pct_chg["date"] >= "2020-01-01"]
).mark_line().encode(x=alt.X("date"), y=alt.Y("pct_change")).properties(
    width=400, height=200
).facet(
    facet=alt.Facet("type:O"), columns=2
)

In [32]:
alt.Chart(combined_pct_chg[combined_pct_chg["date"] >= "2020-01-01"]).mark_bar().encode(
    x=alt.X("date"),
    y=alt.Y("pct_change"),
    color=alt.condition(alt.datum.value > 0, alt.value("#00eeef"), alt.value("orange")),
).properties(width=300, height=200).facet(facet=alt.Facet("type:O"), columns=2)

#### is it worth comparing to the percent change from jan. 2020 for all?

In [33]:
# for df in df_list:
#     df.sort_values("date", ascending=True)
#     df["jan2020"] = (
#         df["value"] - df.loc[df["date"] == "2020-01-01", "value"]
#     ) / df.loc[df["date"] == "2020-01-01", "value"]

In [34]:
# hospitals.loc[hospitals["date"] == "2020-01-01", "value"]

In [35]:
benchmark = hospitals[hospitals["date"] == "2020-01-01"]["value"].iloc[0]
benchmark

5229.1

In [36]:
for df in df_list:
    df.sort_values("date", ascending=True)
    df["jan2020_change"] = (df["value"] - benchmark) / benchmark

In [37]:
combined_jan_2020 = pd.concat(df_list)

In [38]:
alt.Chart(
    combined_jan_2020[combined_jan_2020["date"] >= "2020-01-01"]
).mark_bar().encode(
    x=alt.X("date"),
    y=alt.Y("jan2020_change"),
    color=alt.condition(alt.datum.value > 0, alt.value("#00eeef"), alt.value("orange")),
).properties(
    width=300, height=200
).facet(
    facet=alt.Facet("type:O"), columns=2
)

### try slopes for jan 2020, may 2020, july 2022

I gave up, something to pair on maybe<br>
I was able to make the basic faceted chart with just these three datapoints as bars but it was not helpful <br>

In [39]:
# dates that I want
dates = ["2020-01-01", "2020-05-01", "2022-06-01"]

In [40]:
# failed attempt at layering the charts
# if this worked it would still be wrong bc the percent change in this df is from the previous month in the unfiltered df, not between the three points anyways
bars=alt.Chart(combined_pct_chg[combined_pct_chg["date"].isin(dates)]).mark_bar().encode(x=alt.X("date:O"), y=alt.Y("value"))

lines = alt.Chart(combined_pct_chg[combined_pct_chg["date"].isin(dates)]).mark_line(color="#005f66", size=2).encode(x=alt.X("date:O", title=""),y=alt.Y("value",title=""))

text=bars.mark_text(align='left', baseline='middle', dx=-11, dy=11).encode(text=alt.Text('pct_change', format='.0%'))

alt.layer((bars+lines+text),data=combined_pct_chg[combined_pct_chg["date"].isin(dates)], facet(facet=alt.Facet('type:N', columns=2))

SyntaxError: positional argument follows keyword argument (983299382.py, line 9)

ERROR:root:Cannot parse: 10:0: EOF in multi-line statement
Traceback (most recent call last):
  File "/Users/alexleedsmatthews/.local/share/virtualenvs/notebooks-HSWNtBAS/lib/python3.9/site-packages/lab_black.py", line 218, in format_cell
    formatted_code = _format_code(cell)
  File "/Users/alexleedsmatthews/.local/share/virtualenvs/notebooks-HSWNtBAS/lib/python3.9/site-packages/lab_black.py", line 29, in _format_code
    return format_str(src_contents=code, mode=FileMode())
  File "src/black/__init__.py", line 1163, in format_str
  File "src/black/__init__.py", line 1173, in _format_str_once
  File "src/black/parsing.py", line 128, in lib2to3_parse
black.parsing.InvalidInput: Cannot parse: 10:0: EOF in multi-line statement


In [None]:
short = combined[combined["date"].isin(dates)].copy()

In [None]:
#this is my attempt to do the grouped percent change (so I don't have to loop thru each individual df and concat them)
short["pct_change"] = (
    short.groupby(["date", "type", "value"]).pct_change(1) * 100
).round(2)

In [None]:
# this doesn't work for a few reasons
alt.Chart(short).mark_bar().encode(
    x=alt.X("date:N"),
    y=alt.Y("value"),
    color=alt.condition(alt.datum.value > 0, alt.value("#00eeef"), alt.value("orange")),
).properties(width=200, height=200).facet(facet=alt.Facet("type:O"), columns=2)