# LA County coronavirus vaccine administration by city/community

### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_latimes as lat
import glob
import path
import os

In [3]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Create a timeseries from coronavirus-tracker data

In [67]:
path = "/Users/mhustiles/data/github/coronavirus-tracker/_notebooks/data/raw/vaccines/los-angeles/"
all_files = glob.glob(os.path.join(path, "*.csv"))

In [68]:
df_from_each_file = (
    pd.read_csv(f, encoding="ISO-8859-1", low_memory=False) for f in all_files
)
concatenated_df = pd.concat(df_from_each_file, ignore_index=True)

In [69]:
df = concatenated_df.copy()

In [70]:
population = df[df["update_date"] == df["update_date"].max()][["area", "pop_2019"]]

In [71]:
population.head()

Unnamed: 0,area,pop_2019
696,City of Agoura Hills,18019
697,City of Alhambra,72940
698,City of Arcadia,51103
699,City of Artesia,14266
700,City of Avalon,3077


### Strip out the strings placed in columns by the county

In [72]:
df = df[
    (df.vaccinations.str.strip() != "Unreliable Data")
    & (df.pop_2019.str.strip() != "No Pop Data")
    & (df.vaccinations_pct.str.strip() != "Unreliable Data")
    & (df.vaccinations_pct.str.strip() != "No Pop Data")
    & (df.vaccinations.str.strip() != "<5")
]

In [73]:
df.head()

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date
0,City of Agoura Hills,5008,17447,28.7,2021-03-25
1,City of Alhambra,18947,71313,26.6,2021-03-25
2,City of Arcadia,13777,49666,27.7,2021-03-25
3,City of Artesia,3571,13877,25.7,2021-03-25
4,City of Avalon,317,3000,10.6,2021-03-25


In [74]:
df["week"] = pd.to_datetime(df["update_date"]).dt.week

In [75]:
df.week.value_counts()

13    2352
12    2352
11    2352
10    2342
9     2338
14    2008
8     1670
Name: week, dtype: int64

In [76]:
df.sort_values("update_date", ascending=False).head(10)

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date,week
862,Los Angeles - Pacific Palisades,9872,17576,56.2,2021-04-10,14
798,Los Angeles - Carthay,6268,12443,50.4,2021-04-10,14
800,Los Angeles - Century City,6319,10589,59.7,2021-04-10,14
801,Los Angeles - Century Palms/Cove,5408,24721,21.9,2021-04-10,14
802,Los Angeles - Chatsworth,14390,31934,45.1,2021-04-10,14
803,Los Angeles - Cheviot Hills,4708,7395,63.7,2021-04-10,14
804,Los Angeles - Chinatown,2707,7441,36.4,2021-04-10,14
805,Los Angeles - Cloverdale/Cochran,3649,11658,31.3,2021-04-10,14
806,Los Angeles - Country Club Park,5057,12607,40.1,2021-04-10,14
807,Los Angeles - Crenshaw District,4048,11229,36.0,2021-04-10,14


### Clean up data types

In [77]:
df["vaccinations"] = df["vaccinations"].astype(float)

In [78]:
df["pop_2019"] = df["pop_2019"].astype(float)

In [79]:
df["vaccinations_pct"] = ((df["vaccinations"] / df["pop_2019"]) * 100).round(2)

### Which week of the year? 

In [80]:
df["week"] = pd.to_datetime(df["update_date"]).dt.week

In [81]:
df.sort_values("week", ascending=False).head()

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date,week
3991,Los Angeles - North Hills,12542.0,46307.0,27.08,2021-04-06,14
3922,Los Angeles - Bel Air,3251.0,6968.0,46.66,2021-04-06,14
3938,Los Angeles - Country Club Park,4125.0,12311.0,33.51,2021-04-06,14
3937,Los Angeles - Cloverdale/Cochran,2769.0,11269.0,24.57,2021-04-06,14
3936,Los Angeles - Chinatown,2138.0,7292.0,29.32,2021-04-06,14


### Widen the data for weekly average columns

In [83]:
df_pivot = (
    pd.pivot_table(
        df,
        values="vaccinations_pct",
        index="area",
        columns="week",
        aggfunc="mean",
    )
    .round(2)
    .reset_index()
)

In [84]:
df_pivot.head()

week,area,8,9,10,11,12,13,14
0,City of Agoura Hills,21.82,21.82,23.79,28.7,33.08,38.91,41.67
1,City of Alhambra,18.68,18.68,20.93,26.57,31.19,37.35,39.93
2,City of Arcadia,20.9,20.9,22.85,27.74,31.99,37.66,40.47
3,City of Artesia,20.83,20.83,22.23,25.73,29.82,35.27,37.6
4,City of Avalon,10.0,10.0,10.16,10.57,10.44,10.27,10.32


In [85]:
df_pivot.columns = ["week_" + str(col) for col in df_pivot.columns]

In [86]:
df_pivot.rename(
    columns={"week_area": "area", "week_pop_2019": "population"}, inplace=True
)

In [87]:
df_pivot.head()

Unnamed: 0,area,week_8,week_9,week_10,week_11,week_12,week_13,week_14
0,City of Agoura Hills,21.82,21.82,23.79,28.7,33.08,38.91,41.67
1,City of Alhambra,18.68,18.68,20.93,26.57,31.19,37.35,39.93
2,City of Arcadia,20.9,20.9,22.85,27.74,31.99,37.66,40.47
3,City of Artesia,20.83,20.83,22.23,25.73,29.82,35.27,37.6
4,City of Avalon,10.0,10.0,10.16,10.57,10.44,10.27,10.32


### Merge to add population to each area

In [88]:
df_merge = pd.merge(df_pivot, population, on="area")

In [89]:
df_merge.head()

Unnamed: 0,area,week_8,week_9,week_10,week_11,week_12,week_13,week_14,pop_2019
0,City of Agoura Hills,21.82,21.82,23.79,28.7,33.08,38.91,41.67,18019
1,City of Alhambra,18.68,18.68,20.93,26.57,31.19,37.35,39.93,72940
2,City of Arcadia,20.9,20.9,22.85,27.74,31.99,37.66,40.47,51103
3,City of Artesia,20.83,20.83,22.23,25.73,29.82,35.27,37.6,14266
4,City of Avalon,10.0,10.0,10.16,10.57,10.44,10.27,10.32,3077


In [55]:
df_pivot["change_week8_to_14"] = (
    ((df_pivot["week_14"] - df_pivot["week_8"]) / df_pivot["week_8"]) * 100
).round()

In [56]:
df_pivot.sort_values("change_week8_to_14", ascending=False).head(10)

Unnamed: 0,area,week_8,week_9,week_10,week_11,week_12,week_13,week_14,change_week8_to_14
289,Unincorporated - Palmdale,2.21,2.21,2.69,3.9,6.52,10.02,10.41,371.0
229,Unincorporated - Angeles National Forest,6.05,6.05,6.63,8.09,13.75,21.3,23.2,283.0
200,Los Angeles - University Park,6.46,6.46,7.65,10.62,14.66,20.04,21.74,237.0
81,City of Vernon,25.23,25.23,28.83,37.84,57.91,84.68,84.68,236.0
279,Unincorporated - Littlerock/Pearblossom,5.59,5.59,6.51,8.81,12.0,16.25,18.08,223.0
260,Unincorporated - Florence-Firestone,6.76,6.76,7.93,10.87,14.79,20.01,21.79,222.0
264,Unincorporated - Hi Vista,5.28,5.28,5.78,7.04,10.66,15.49,17.0,222.0
194,Los Angeles - Thai Town,7.21,7.21,8.52,11.81,15.58,20.6,23.06,220.0
275,Unincorporated - Lennox,7.29,7.29,8.65,12.06,15.94,21.11,23.13,217.0
316,Unincorporated - Sun Village,6.23,6.23,7.24,9.77,13.17,17.7,19.44,212.0


In [57]:
df_pivot.sort_values("change_week8_to_14", ascending=False).tail(20)

Unnamed: 0,area,week_8,week_9,week_10,week_11,week_12,week_13,week_14,change_week8_to_14
106,Los Angeles - Cheviot Hills,37.08,37.08,39.52,45.61,50.14,56.17,58.67,58.0
154,Los Angeles - Mandeville Canyon,35.04,35.04,37.12,42.33,46.47,51.98,54.84,57.0
103,Los Angeles - Century City,34.26,34.26,36.1,40.71,44.81,50.27,53.41,56.0
33,City of Hidden Hills,25.38,25.38,26.66,29.86,32.75,36.6,39.51,56.0
274,Unincorporated - Lakewood,18.27,18.27,19.37,22.12,24.59,27.88,28.07,54.0
94,Los Angeles - Beverly Crest,32.56,32.56,34.37,38.9,42.68,47.71,49.82,53.0
302,Unincorporated - San Francisquito Canyon/Bouqu...,37.23,37.23,39.11,43.8,47.86,53.28,56.27,51.0
10,City of Beverly Hills,32.02,32.02,33.79,38.21,41.32,45.46,47.54,48.0
64,City of Rolling Hills,29.97,29.97,31.21,34.32,36.93,40.42,42.54,42.0
93,Los Angeles - Bel Air,34.3,34.3,35.83,39.67,42.67,46.66,48.67,42.0


In [58]:
larger_places = df_pivot[df_pivot["population"] > 5000]

KeyError: 'population'

### Which places with populations of more than 5,000 saw the largest pct increase from week 8 to week 14?

In [None]:
larger_places.sort_values("change_week8_to_14", ascending=False).head(10)

### Which places with populations of more than 5,000 saw the smallest pct increase from week 8 to week 14?

In [None]:
larger_places.sort_values("change_week8_to_14", ascending=False).tail(10)

---

### Export to csv

In [None]:
import datetime as dt

today = dt.datetime.today().strftime("%m-%d-%Y")

In [None]:
# df.to_csv(
#     "output/lacounty_vaccinations_by_city_community_" + today + ".csv", index=False
# )

In [None]:
concatenated_df.to_csv(
    "output/lacounty_vaccinations_by_city_community_timeseries_" + today + ".csv",
    index=False,
)

In [None]:
df_pivot.to_csv(
    "output/lacounty_vaccinations_by_city_community_weeks_" + today + ".csv",
    index=False,
)