# LA County coronavirus vaccine administration by city/community

A data analysis for a story in the L.A. Times, by Matt Stiles & Luke Money.

Questions? matt.stiles@latimes.com

### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import glob
import path
import os

In [3]:
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000

### Create a timeseries from the daily scrapes for the coronavirus tracker data

In [4]:
path = "/Users/mhustiles/data/github/coronavirus-tracker/_notebooks/data/raw/vaccines/los-angeles/"
all_files = glob.glob(os.path.join(path, "*.csv"))

In [5]:
df_from_each_file = (
    pd.read_csv(f, encoding="ISO-8859-1", low_memory=False) for f in all_files
)
concatenated_df = pd.concat(df_from_each_file, ignore_index=True)

In [6]:
df = concatenated_df.copy()

### The county's population numbers changed earlier. That's select the most-recent count

In [7]:
population = df[df["update_date"] == df["update_date"].max()][["area", "pop_2019"]]

### Strip out the strings placed in columns by the county health officials for small places

In [8]:
df = df[
    (df.vaccinations.str.strip() != "Unreliable Data")
    & (df.pop_2019.str.strip() != "No Pop Data")
    & (df.vaccinations_pct.str.strip() != "Unreliable Data")
    & (df.vaccinations_pct.str.strip() != "No Pop Data")
    & (df.vaccinations.str.strip() != "<5")
]

In [9]:
df["area"] = df["area"].str.strip(" ").str.replace("City of ", "", regex=False)

### Merge with population and demographic figures/rates from vaccine disparity project

In [10]:
disparity = pd.read_csv("../../vaccine-disparity/_workspace/neighborhoods_grouped.csv")

In [11]:
disparity["name"] = disparity["name"].str.strip(" ")

In [12]:
disparity_clean = disparity[
    [
        "name",
        "total_pop",
        "totalpop_17andunder",
        "totalpop_18to64",
        "totalpop_65andup",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
        "totalpop_65andup_percent",
        "majority_white",
        "majority_hispanic",
        "median_householdincome",
    ]
]

In [13]:
df_pop_merge = pd.merge(
    disparity_clean, df, left_on="name", right_on="area", indicator=True
)

### Add the day of the week from the update date column

In [14]:
df["week"] = pd.to_datetime(df["update_date"]).dt.week

In [15]:
df.sort_values("update_date", ascending=False).head(10)

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date,week
4259,West Covina,38583,88873,43.4,2021-04-12,15
4406,Unincorporated - Angeles National Forest,296,1096,27.0,2021-04-12,15
4404,Unincorporated - Altadena,17205,36812,46.7,2021-04-12,15
4403,Unincorporated - Agua Dulce,958,3736,25.6,2021-04-12,15
4402,Unincorporated - Acton,1652,6961,23.7,2021-04-12,15
4401,Los Angeles - Woodland Hills,27387,57284,47.8,2021-04-12,15
4400,Los Angeles - Winnetka,15821,41471,38.1,2021-04-12,15
4399,Los Angeles - Wilshire Center,13370,40599,32.9,2021-04-12,15
4398,Los Angeles - Wilmington,11236,42449,26.5,2021-04-12,15
4397,Los Angeles - Wholesale District,12332,32528,37.9,2021-04-12,15


In [16]:
df["area"] = df["area"].str.replace("City of", "", regex=False)

### Clean up data types

In [17]:
df["vaccinations"] = df["vaccinations"].astype(float)

In [18]:
df["pop_2019"] = df["pop_2019"].astype(float)

In [19]:
df["vaccinations_pct"] = ((df["vaccinations"] / df["pop_2019"]) * 100).round(2)

### Which week of the year? 

In [20]:
df["week"] = pd.to_datetime(df["update_date"]).dt.week

In [21]:
df["weekday"] = pd.to_datetime(df["update_date"]).dt.day_name()

### Just get the updates each Monday

In [22]:
df = df[df["weekday"] == "Monday"]

In [23]:
df[df["area"].str.contains("Culver City")].sort_values(
    "update_date", ascending=True
).head(100)

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date,week,weekday
14637,Culver City,7445.0,32840.0,22.67,2021-03-01,9,Monday
6981,Culver City,7445.0,32840.0,22.67,2021-03-08,10,Monday
15333,Culver City,9969.0,32840.0,30.36,2021-03-15,11,Monday
5937,Culver City,9969.0,32840.0,30.36,2021-03-22,12,Monday
14289,Culver City,13819.0,32840.0,42.08,2021-03-29,13,Monday
1761,Culver City,13819.0,32840.0,42.08,2021-04-05,14,Monday
4197,Culver City,17718.0,33638.0,52.67,2021-04-12,15,Monday


### Widen the data for weekly average columns

In [24]:
df_pivot = (
    pd.pivot_table(
        df,
        values="vaccinations_pct",
        index="area",
        columns="week",
        aggfunc="mean",
    )
    .round(2)
    .reset_index()
)

In [25]:
df_pivot.head()

week,area,9,10,11,12,13,14,15
0,Agoura Hills,21.82,21.82,28.7,28.7,38.91,38.91,47.18
1,Alhambra,18.68,18.68,26.57,26.57,37.35,37.35,45.1
2,Arcadia,20.9,20.9,27.74,27.74,37.66,37.66,46.08
3,Artesia,20.83,20.83,25.73,25.73,35.27,35.27,42.27
4,Avalon,10.0,10.0,10.57,10.57,10.27,10.27,10.43


In [26]:
df_pivot.columns = ["pct_week_" + str(col) for col in df_pivot.columns]

In [27]:
df_pivot.rename(
    columns={"pct_week_area": "name", "week_pop_2019": "population"}, inplace=True
)

In [28]:
df_pivot.head()

Unnamed: 0,name,pct_week_9,pct_week_10,pct_week_11,pct_week_12,pct_week_13,pct_week_14,pct_week_15
0,Agoura Hills,21.82,21.82,28.7,28.7,38.91,38.91,47.18
1,Alhambra,18.68,18.68,26.57,26.57,37.35,37.35,45.1
2,Arcadia,20.9,20.9,27.74,27.74,37.66,37.66,46.08
3,Artesia,20.83,20.83,25.73,25.73,35.27,35.27,42.27
4,Avalon,10.0,10.0,10.57,10.57,10.27,10.27,10.43


### Merge with population figures/rates from vaccine disparity project

In [29]:
disparity = pd.read_csv("../../vaccine-disparity/_workspace/neighborhoods_grouped.csv")

In [30]:
df_pivot["name"] = df_pivot["name"].str.strip(" ")

In [31]:
disparity_clean = disparity[
    [
        "name",
        "total_pop",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
        "nonwhite_percentage",
        "majority_white",
        "majority_hispanic",
        "median_householdincome",
    ]
]

In [32]:
df_merge = pd.merge(df_pivot, disparity_clean, on="name", indicator=True, how="left")

In [33]:
df_merge["change_week9_to_15"] = (
    ((df_merge["pct_week_15"] - df_merge["pct_week_9"]) / df_merge["pct_week_9"]) * 100
).round()

In [34]:
df_merge_slim = df_merge[
    [
        "name",
        "pct_week_9",
        "pct_week_15",
        "total_pop",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
        "nonwhite_percentage",
        "majority_white",
        "majority_hispanic",
        "median_householdincome",
        "change_week9_to_15",
    ]
].copy()

### Filter the dataframe to larger places

In [35]:
df_merge_slim["total_pop"] = df_merge_slim["total_pop"].fillna("").astype(int)

In [36]:
larger_places = df_merge_slim[df_merge_slim["total_pop"] > 5000].copy()

### Which places with populations of more than 5,000 saw the largest pct increase from week 8 to week 15?

In [37]:
largest = (
    larger_places[
        (larger_places["pct_week_9"].notnull())
        & (larger_places["pct_week_15"].notnull())
    ]
    .sort_values("change_week9_to_15", ascending=False)
    .head(10)
)

In [38]:
largest[
    [
        "name",
        "pct_week_15",
        "majority_hispanic",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
    ]
]

Unnamed: 0,name,pct_week_15,majority_hispanic,totalpop_17andunder_percent,totalpop_65andup_percent
161,Los Angeles - University Park,25.15,False,0.1197,0.061099
273,Unincorporated - Littlerock/Pearblossom,21.74,True,0.284108,0.139551
155,Los Angeles - Thai Town,27.97,False,0.107368,0.130282
254,Unincorporated - Florence-Firestone,25.36,True,0.317636,0.068662
269,Unincorporated - Lennox,27.16,True,0.285876,0.064121
20,Cudahy,25.56,True,0.305616,0.069818
49,Los Angeles - Alsace,29.92,True,0.209612,0.089146
90,Los Angeles - Green Meadows,23.61,True,0.308907,0.074795
86,Los Angeles - Florence-Firestone,22.81,True,0.319442,0.055256
169,Los Angeles - Vernon Central,22.51,True,0.323692,0.062574


### Mean income and older population share in places with largest pct increases

In [39]:
round(
    larger_places[
        (larger_places["pct_week_9"].notnull())
        & (larger_places["pct_week_15"].notnull())
    ]
    .sort_values("change_week9_to_15", ascending=False)
    .head(10)["median_householdincome"]
    .mean()
)

42400

In [40]:
round(
    larger_places[
        (larger_places["pct_week_9"].notnull())
        & (larger_places["pct_week_15"].notnull())
    ]
    .sort_values("change_week9_to_15", ascending=False)
    .head(10)["totalpop_65andup_percent"]
    .mean()
    * 100,
    2,
)

8.15

### Mean income and older population share in places with smalles pct increases

In [41]:
round(
    larger_places[
        (larger_places["pct_week_9"].notnull())
        & (larger_places["pct_week_15"].notnull())
    ]
    .sort_values("change_week9_to_15", ascending=False)
    .tail(10)["median_householdincome"]
    .mean()
)

141297

In [42]:
round(
    larger_places[
        (larger_places["pct_week_9"].notnull())
        & (larger_places["pct_week_15"].notnull())
    ]
    .sort_values("change_week9_to_15", ascending=False)
    .tail(10)["totalpop_65andup_percent"]
    .mean()
    * 100,
    2,
)

22.77

### Which places with populations of more than 5,000 saw the smallest pct increase from week 8 to week 14?

In [43]:
smallest = (
    larger_places[
        (larger_places["pct_week_9"].notnull())
        & (larger_places["pct_week_15"].notnull())
    ]
    .sort_values("change_week9_to_15", ascending=False)
    .tail(10)
)

In [44]:
smallest[
    [
        "name",
        "pct_week_15",
        "majority_hispanic",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
    ]
]

Unnamed: 0,name,pct_week_15,majority_hispanic,totalpop_17andunder_percent,totalpop_65andup_percent
200,Rancho Palos Verdes,50.23,False,0.213775,0.24859
180,Los Angeles - Westwood,35.76,False,0.088921,0.119817
203,Rolling Hills Estates,55.39,False,0.222468,0.247619
195,Palos Verdes Estates,51.26,False,0.213455,0.270924
81,Los Angeles - Encino,54.11,False,0.207176,0.199472
64,Los Angeles - Century City,59.68,False,0.177379,0.234012
67,Los Angeles - Cheviot Hills,63.66,False,0.211283,0.202283
55,Los Angeles - Beverly Crest,54.03,False,0.196094,0.237505
10,Beverly Hills,51.69,False,0.20252,0.213604
54,Los Angeles - Bel Air,52.68,False,0.209939,0.303493


### Which places have the highest vaccination coverage right now?

In [45]:
larger_places[
    (larger_places["pct_week_9"].notnull()) & (larger_places["pct_week_15"].notnull())
][
    [
        "name",
        "pct_week_15",
        "majority_hispanic",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
    ]
].sort_values(
    "pct_week_15", ascending=False
).head(
    10
)

Unnamed: 0,name,pct_week_15,majority_hispanic,totalpop_17andunder_percent,totalpop_65andup_percent
67,Los Angeles - Cheviot Hills,63.66,False,0.211283,0.202283
136,Los Angeles - Rancho Park,62.71,False,0.207814,0.163806
64,Los Angeles - Century City,59.68,False,0.177379,0.234012
126,Los Angeles - Pacific Palisades,56.17,False,0.248622,0.236204
134,Los Angeles - Playa Vista,55.74,False,0.186668,0.104989
203,Rolling Hills Estates,55.39,False,0.222468,0.247619
81,Los Angeles - Encino,54.11,False,0.207176,0.199472
55,Los Angeles - Beverly Crest,54.03,False,0.196094,0.237505
212,Sierra Madre,53.69,False,0.182263,0.228925
208,San Marino,53.29,False,0.227649,0.20661


### Which places have the lowest vaccination coverage right now? 

In [46]:
larger_places[
    (larger_places["pct_week_9"].notnull()) & (larger_places["pct_week_15"].notnull())
][
    [
        "name",
        "pct_week_15",
        "majority_hispanic",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
    ]
].sort_values(
    "pct_week_15", ascending=False
).tail(
    10
)

Unnamed: 0,name,pct_week_15,majority_hispanic,totalpop_17andunder_percent,totalpop_65andup_percent
179,Los Angeles - Westlake,22.88,True,0.23015,0.093303
86,Los Angeles - Florence-Firestone,22.81,True,0.319442,0.055256
169,Los Angeles - Vernon Central,22.51,True,0.323692,0.062574
146,Los Angeles - South Park,22.28,True,0.322496,0.056965
63,Los Angeles - Central,22.14,True,0.306173,0.065168
65,Los Angeles - Century Palms/Cove,21.88,True,0.311102,0.073659
273,Unincorporated - Littlerock/Pearblossom,21.74,True,0.284108,0.139551
172,Los Angeles - Watts,21.42,True,0.358358,0.05553
168,Los Angeles - Vermont Vista,20.7,True,0.319345,0.067
266,Unincorporated - Lake Los Angeles,19.11,True,0.302845,0.114554


### Merge the places with the largest and smallest increases in vax coverage for graphics

In [47]:
largest["category"] = "Largest increase"
smallest["category"] = "Smallest increase"

In [48]:
large_small = pd.concat([largest, smallest]).reset_index()

In [49]:
large_small["name_clean"] = (
    large_small["name"]
    .str.replace("City of", "", regex=False)
    .str.replace("Los Angeles - ", "", regex=False)
    .str.replace("Unincorporated - ", "", regex=False)
)

In [50]:
large_small_slim = large_small[
    [
        "category",
        "name_clean",
        "pct_week_9",
        "pct_week_15",
        "change_week9_to_15",
    ]
].copy()

In [51]:
large_small_slim.rename(
    columns={
        "change_week9_to_15": "% change",
        "pct_week_15": "Week 15",
        "pct_week_9": "Week 9",
        "name_clean": "Place",
        "category": "Category",
    },
    inplace=True,
)

---

### Merge with geography

In [52]:
places_geo = gpd.read_file(
    "input/cities-neighborhoods-unincorporated-la-county-no-islands.geojson"
)

In [53]:
places_geo = places_geo.to_crs("EPSG:4326")

### Clean up places names

In [54]:
larger_places["name_clean"] = (
    larger_places["name"]
    .str.strip(" ")
    .str.replace("City of", "", regex=False)
    .str.replace("Los Angeles - ", "", regex=False)
    .str.replace("Unincorporated - ", "", regex=False)
)

In [55]:
df_geo = places_geo.merge(larger_places, left_on="NAME", right_on="name_clean")

### Slim down the geo file and simplify its features for graphics

In [56]:
df_geo_slim = df_geo[
    ["name_clean", "pct_week_9", "pct_week_15", "change_week9_to_15", "geometry"]
]

In [57]:
df_geo_slim[
    ["name_clean", "pct_week_9", "pct_week_15", "change_week9_to_15", "geometry"]
].to_file("output/vaccine-community/df_geo.geojson", driver="GeoJSON")

In [58]:
df_geo_slim[["name_clean", "change_week9_to_15"]].to_csv(
    "output/vaccine-community/df_geo_metadata_datawrapper.csv", index=False
)

In [59]:
!mapshaper -i output/vaccine-community/df_geo.geojson -simplify percentage=.3 no-repair  -o output/vaccine-community/df_geo_simple.geojson \

[o] Wrote output/vaccine-community/df_geo_simple.geojson


---

## Exports

In [60]:
import datetime as dt

today = dt.datetime.today().strftime("%m-%d-%Y")

In [61]:
concatenated_df.to_csv(
    "output/vaccine-community/lac_vax_by_place_raw_table_" + today + ".csv",
    index=False,
)

In [62]:
df_pivot.to_csv(
    "output/vaccine-community/lac_vax_by_place_weeks_" + today + ".csv",
    index=False,
)

In [63]:
larger_places[
    (larger_places["pct_week_9"].notnull()) & (larger_places["pct_week_15"].notnull())
].sort_values("change_week9_to_15", ascending=False).head(10).to_csv(
    "output/vaccine-community/largest_vax_coverage_change_" + today + ".csv",
    index=False,
)

In [64]:
larger_places[
    (larger_places["pct_week_9"].notnull()) & (larger_places["pct_week_15"].notnull())
].sort_values("change_week9_to_15", ascending=False).tail(10).to_csv(
    "output/vaccine-community/smallest_vax_coverage_change_" + today + ".csv",
    index=False,
)

In [65]:
larger_places[
    (larger_places["pct_week_9"].notnull()) & (larger_places["pct_week_15"].notnull())
][["name", "pct_week_15", "total_pop"]].sort_values(
    "pct_week_15", ascending=False
).tail(
    10
).to_csv(
    "output/vaccine-community/lowest_coverage_" + today + ".csv",
    index=False,
)

In [66]:
larger_places[
    (larger_places["pct_week_9"].notnull()) & (larger_places["pct_week_15"].notnull())
][["name", "pct_week_15", "total_pop"]].sort_values(
    "pct_week_15", ascending=False
).head(
    10
).to_csv(
    "output/vaccine-community/highest_coverage_" + today + ".csv",
    index=False,
)

In [67]:
large_small_slim.to_csv("output/vaccine-community/large_small_slim.csv", index=False)