# LA County coronavirus vaccine administration by city/community

### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_latimes as lat
import glob
import path
import os

In [3]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Create a timeseries from coronavirus-tracker data

In [4]:
path = "/Users/mhustiles/data/github/coronavirus-tracker/_notebooks/data/raw/vaccines/los-angeles/"
all_files = glob.glob(os.path.join(path, "*.csv"))

In [5]:
df_from_each_file = (
    pd.read_csv(f, encoding="ISO-8859-1", low_memory=False) for f in all_files
)
concatenated_df = pd.concat(df_from_each_file, ignore_index=True)

In [6]:
df = concatenated_df.copy()

In [7]:
population = df[df["update_date"] == df["update_date"].max()][["area", "pop_2019"]]

In [8]:
population.head()

Unnamed: 0,area,pop_2019
4176,City of Agoura Hills,18019
4177,City of Alhambra,72940
4178,City of Arcadia,51103
4179,City of Artesia,14266
4180,City of Avalon,3077


### Strip out the strings placed in columns by the county

In [9]:
df = df[
    (df.vaccinations.str.strip() != "Unreliable Data")
    & (df.pop_2019.str.strip() != "No Pop Data")
    & (df.vaccinations_pct.str.strip() != "Unreliable Data")
    & (df.vaccinations_pct.str.strip() != "No Pop Data")
    & (df.vaccinations.str.strip() != "<5")
]

### Clean up places names

In [10]:
df.head()

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date
0,City of Agoura Hills,5008,17447,28.7,2021-03-25
1,City of Alhambra,18947,71313,26.6,2021-03-25
2,City of Arcadia,13777,49666,27.7,2021-03-25
3,City of Artesia,3571,13877,25.7,2021-03-25
4,City of Avalon,317,3000,10.6,2021-03-25


In [11]:
df.area = df.area.str.replace("City of", "", regex=False)

In [12]:
df.head()

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date
0,Agoura Hills,5008,17447,28.7,2021-03-25
1,Alhambra,18947,71313,26.6,2021-03-25
2,Arcadia,13777,49666,27.7,2021-03-25
3,Artesia,3571,13877,25.7,2021-03-25
4,Avalon,317,3000,10.6,2021-03-25


### Merge with population figures/rates from vaccine disparity project

In [13]:
disparity = pd.read_csv("../../vaccine-disparity/_workspace/neighborhoods_grouped.csv")

In [14]:
disparity_clean = disparity[
    [
        "name",
        "total_pop",
        "totalpop_17andunder",
        "totalpop_18to64",
        "totalpop_65andup",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
        "totalpop_65andup_percent",
        "majority_white",
        "majority_hispanic",
        "median_householdincome",
    ]
]

In [15]:
df_pop_merge = pd.merge(
    disparity_clean, df, left_on="name", right_on="area", indicator=True
)

In [16]:
df_pop_merge._merge.value_counts()

both          11924
right_only        0
left_only         0
Name: _merge, dtype: int64

In [17]:
df["week"] = pd.to_datetime(df["update_date"]).dt.week

In [18]:
df.week.value_counts()

13    2352
12    2352
11    2352
10    2342
14    2340
9     2338
8     1670
15     332
Name: week, dtype: int64

In [19]:
df.sort_values("update_date", ascending=False).head(10)

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date,week
4259,West Covina,38583,88873,43.4,2021-04-12,15
4406,Unincorporated - Angeles National Forest,296,1096,27.0,2021-04-12,15
4404,Unincorporated - Altadena,17205,36812,46.7,2021-04-12,15
4403,Unincorporated - Agua Dulce,958,3736,25.6,2021-04-12,15
4402,Unincorporated - Acton,1652,6961,23.7,2021-04-12,15
4401,Los Angeles - Woodland Hills,27387,57284,47.8,2021-04-12,15
4400,Los Angeles - Winnetka,15821,41471,38.1,2021-04-12,15
4399,Los Angeles - Wilshire Center,13370,40599,32.9,2021-04-12,15
4398,Los Angeles - Wilmington,11236,42449,26.5,2021-04-12,15
4397,Los Angeles - Wholesale District,12332,32528,37.9,2021-04-12,15


### Clean up data types

In [20]:
df["vaccinations"] = df["vaccinations"].astype(float)

In [21]:
df["pop_2019"] = df["pop_2019"].astype(float)

In [22]:
df["vaccinations_pct"] = ((df["vaccinations"] / df["pop_2019"]) * 100).round(2)

### Which week of the year? 

In [23]:
df["week"] = pd.to_datetime(df["update_date"]).dt.week

In [24]:
df.sort_values("week", ascending=False).head()

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date,week
4336,Los Angeles - Miracle Mile,6522.0,15523.0,42.02,2021-04-12,15
4283,Los Angeles - Cheviot Hills,4708.0,7395.0,63.66,2021-04-12,15
4285,Los Angeles - Cloverdale/Cochran,3649.0,11658.0,31.3,2021-04-12,15
4286,Los Angeles - Country Club Park,5057.0,12607.0,40.11,2021-04-12,15
4287,Los Angeles - Crenshaw District,4048.0,11229.0,36.05,2021-04-12,15


### Widen the data for weekly average columns

In [25]:
df_pivot = (
    pd.pivot_table(
        df,
        values="vaccinations_pct",
        index="area",
        columns="week",
        aggfunc="mean",
    )
    .round(2)
    .reset_index()
)

In [26]:
df_pivot.head()

week,area,8,9,10,11,12,13,14,15
0,Agoura Hills,21.82,21.82,23.79,28.7,33.08,38.91,42.45,47.18
1,Alhambra,18.68,18.68,20.93,26.57,31.19,37.35,40.67,45.1
2,Arcadia,20.9,20.9,22.85,27.74,31.99,37.66,41.27,46.08
3,Artesia,20.83,20.83,22.23,25.73,29.82,35.27,38.27,42.27
4,Avalon,10.0,10.0,10.16,10.57,10.44,10.27,10.34,10.43


In [27]:
df_pivot.columns = ["pct_week_" + str(col) for col in df_pivot.columns]

In [28]:
df_pivot.rename(
    columns={"pct_week_area": "name", "week_pop_2019": "population"}, inplace=True
)

In [29]:
df_pivot.head()

Unnamed: 0,name,pct_week_8,pct_week_9,pct_week_10,pct_week_11,pct_week_12,pct_week_13,pct_week_14,pct_week_15
0,Agoura Hills,21.82,21.82,23.79,28.7,33.08,38.91,42.45,47.18
1,Alhambra,18.68,18.68,20.93,26.57,31.19,37.35,40.67,45.1
2,Arcadia,20.9,20.9,22.85,27.74,31.99,37.66,41.27,46.08
3,Artesia,20.83,20.83,22.23,25.73,29.82,35.27,38.27,42.27
4,Avalon,10.0,10.0,10.16,10.57,10.44,10.27,10.34,10.43


### Merge with population figures/rates from vaccine disparity project

In [30]:
disparity = pd.read_csv("../../vaccine-disparity/_workspace/neighborhoods_grouped.csv")

In [31]:
disparity.head()

Unnamed: 0.1,Unnamed: 0,id,name,tract,tract_percent,Unnamed: 0_x,total_white,white_17andunder,white_18to64,white_65andup,total_hispanic,hispanic_17andunder,hispanic_18to64,hispanic_65andup,total_black,black_17andunder,black_18to64,black_65andup,total_native_american,native_american_17andunder,native_american_18to64,native_american_65andup,total_other,other_17andunder,other_18to64,...,total_asian_percent,asian_17andunder_percent,asian_18to64_percent,asian_65andup_percent,total_pacificislander_percent,pacificislander_17andunder_percent,pacificislander_18to64_percent,pacificislander_65andup_percent,nonwhite_percentage,totalpop_17andunder_percent,totalpop_18to64_percent,totalpop_65andup_percent,healthcare_workers_percent,majority_nonwhite,majority_black,majority_asian,majority_white,majority_american_indian_and_alaska_native,majority_hispanic,majority_18to64,majority_65andup,Unnamed: 0_y,neighborhood,median_householdincome,majorityhigherthanmedianincomeLA
0,0,1,Agoura Hills,56022.97,3.015807,15128,14294.386712,2565.000554,9240.747705,2488.638453,2060.384429,637.837458,1224.50195,2.1e-05,475.768824,167.211468,235.017585,73.539771,29.80088,16.466653,13.334227,0.0,1360.190418,599.562387,692.68505,...,0.0841,0.076485,0.085944,0.084423,0.0,0.0,0.0,0.0,0.256824,0.215044,0.630619,0.159371,0.058344,True,False,False,True,False,False,True,False,0.0,Agoura Hills,121292.2827,True
1,1,2,Alhambra,161868.09,20.001823,47335,7500.64187,595.828265,4755.725927,2149.087677,30438.112581,7119.58294,19780.332201,1.5e-05,1892.355281,466.04967,1181.319618,244.985992,400.171514,26.149556,248.238252,125.783706,19460.555741,5174.416696,12865.429555,...,0.511584,0.393858,0.517814,0.598009,0.002586,0.005788,0.001722,0.002694,0.911412,0.169381,0.651545,0.179751,0.098254,False,False,True,False,False,False,True,False,1.0,Alhambra,61897.39551,False
2,2,3,Arcadia,126258.98,12.228262,37169,13381.530815,2115.148625,6685.762725,4580.619465,8147.091902,2441.806335,4856.81538,6e-06,990.881657,334.528273,525.148452,131.204932,70.730779,0.017364,24.079301,46.634115,5085.192956,1626.462948,3048.032363,...,0.60552,0.596204,0.642072,0.495743,0.001375,0.000157,0.001546,0.002277,0.783141,0.222551,0.592985,0.185774,0.103864,False,False,True,False,False,False,True,False,2.0,Arcadia,92543.61666,True
3,3,4,Artesia,55463.91,3.000606,18036,2543.198368,249.844904,1424.499924,868.85354,6684.617774,1596.709299,4444.111268,9.7e-05,852.042661,301.822462,497.79144,52.42876,59.913886,0.003356,0.049661,59.860869,4909.63036,1361.455844,3131.351644,...,0.367121,0.321175,0.387681,0.341434,0.002986,2.4e-05,0.001575,0.01278,0.848194,0.201502,0.644574,0.153806,0.181542,False,False,False,False,False,False,True,False,3.0,Artesia,67486.31903,False
4,4,5,Avalon,11981.0,0.830664,3881,1298.646327,95.240319,830.798525,372.607483,1704.840285,714.150321,926.143353,0.000333,3.354092,0.0,3.354092,0.0,0.0,0.0,0.0,0.0,583.530943,294.597351,263.280451,...,0.000558,0.0,2.7e-05,0.003757,0.000324,0.0,0.000558,0.0,0.574229,0.265365,0.580198,0.14441,0.067722,False,False,False,False,False,True,True,False,4.0,Avalon,73854.46872,True


In [32]:
disparity_clean = disparity[
    [
        "name",
        "total_pop",
        #         "totalpop_17andunder_percent",
        #         "totalpop_18to64",
        #         "totalpop_65andup",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
        "nonwhite_percentage",
        "majority_white",
        "majority_hispanic",
        "median_householdincome",
    ]
]

In [33]:
df_merge = pd.merge(df_pivot, disparity_clean, on="name")

In [34]:
df_merge["change_week8_to_15"] = (
    ((df_merge["pct_week_15"] - df_merge["pct_week_8"]) / df_merge["pct_week_8"]) * 100
).round()

In [35]:
df_merge_slim = df_merge[
    [
        "name",
        "pct_week_8",
        "pct_week_15",
        "total_pop",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
        "nonwhite_percentage",
        "majority_white",
        "majority_hispanic",
        "median_householdincome",
        "change_week8_to_15",
    ]
]

In [36]:
df_merge_slim.sort_values("change_week8_to_15", ascending=False).tail(10)

Unnamed: 0,name,pct_week_8,pct_week_15,total_pop,totalpop_17andunder_percent,totalpop_65andup_percent,nonwhite_percentage,majority_white,majority_hispanic,median_householdincome,change_week8_to_15
6,Los Angeles - Bel Air,34.3,52.68,7089.875517,0.209939,0.303493,0.231194,True,False,200555.1398,54.0
219,Unincorporated - Santa Catalina Island,8.91,12.85,75.17025,0.08567,0.146566,0.328672,True,False,44555.3499,44.0
222,Unincorporated - Saugus/Canyon Country,39.16,50.6,387.871012,0.068194,0.201725,0.394058,True,False,86114.50759,29.0
201,Unincorporated - Padua Hills,32.03,38.12,23.307818,0.229093,0.231904,0.510892,False,False,200691.5032,19.0
150,Unincorporated - Bradbury,91.79,,17.528365,0.150437,0.338797,0.488834,True,False,,
154,Unincorporated - Charter Oak,,36.84,31.849237,0.206382,0.17859,0.797564,False,False,99545.65026,
165,Unincorporated - East Lancaster,,22.34,41.729479,0.187149,0.236239,0.527762,False,False,49757.10699,
221,Unincorporated - Saugus,34.59,,2935.136173,0.228273,0.140286,0.43701,True,False,109405.8172,
232,Unincorporated - Universal City,,,5.913468,0.133156,0.124041,0.265811,True,False,,
248,Unincorporated - Whittier Narrows,,,5493.652677,0.204398,0.163589,0.93544,False,True,65094.07573,


In [37]:
larger_places = df_merge_slim[df_merge_slim["total_pop"] > 5000]

### Which places with populations of more than 5,000 saw the largest pct increase from week 8 to week 15?

In [38]:
larger_places.sort_values("change_week8_to_15", ascending=False).head(10)

Unnamed: 0,name,pct_week_8,pct_week_15,total_pop,totalpop_17andunder_percent,totalpop_65andup_percent,nonwhite_percentage,majority_white,majority_hispanic,median_householdincome,change_week8_to_15
113,Los Angeles - University Park,6.46,25.15,25962.486834,0.1197,0.061099,0.785405,False,False,24656.25697,289.0
192,Unincorporated - Littlerock/Pearblossom,5.59,21.74,6786.857583,0.284108,0.139551,0.700575,False,True,48752.94395,289.0
107,Los Angeles - Thai Town,7.21,27.97,9314.899544,0.107368,0.130282,0.527663,False,False,42919.83666,288.0
173,Unincorporated - Florence-Firestone,6.76,25.36,63373.648603,0.317636,0.068662,0.995205,False,True,44489.45377,275.0
188,Unincorporated - Lennox,7.29,27.16,19025.322846,0.285876,0.064121,0.983711,False,True,51161.26784,273.0
1,Los Angeles - Alsace,8.49,29.92,10053.532747,0.209612,0.089146,0.939044,False,True,43093.10702,252.0
42,Los Angeles - Green Meadows,6.82,23.61,21718.398531,0.308907,0.074795,0.994338,False,True,41366.2288,246.0
38,Los Angeles - Florence-Firestone,6.59,22.81,48257.717134,0.319442,0.055256,0.994513,False,True,40483.50613,246.0
121,Los Angeles - Vernon Central,6.52,22.51,50398.002681,0.323692,0.062574,0.996621,False,True,40567.66783,245.0
29,Los Angeles - Echo Park,12.06,41.02,13606.416088,0.134642,0.106789,0.608251,False,False,80221.86169,240.0


### Mean income and older population share in places with largest pct increases

In [39]:
round(
    larger_places.sort_values("change_week8_to_15", ascending=False)
    .head(10)["median_householdincome"]
    .mean()
)

45771

In [40]:
round(
    larger_places.sort_values("change_week8_to_15", ascending=False)
    .head(10)["totalpop_65andup_percent"]
    .mean()
    * 100,
    2,
)

8.52

### Which places with populations of more than 5,000 saw the smallest pct increase from week 8 to week 14?

In [41]:
larger_places.sort_values("change_week8_to_15", ascending=False).tail(10)

Unnamed: 0,name,pct_week_8,pct_week_15,total_pop,totalpop_17andunder_percent,totalpop_65andup_percent,nonwhite_percentage,majority_white,majority_hispanic,median_householdincome,change_week8_to_15
78,Los Angeles - Pacific Palisades,30.4,56.17,18049.93585,0.248622,0.236204,0.151819,True,False,187493.7967,85.0
183,Unincorporated - Ladera Heights,25.83,47.48,12139.984947,0.162826,0.215393,0.787777,False,False,102493.2975,84.0
105,Los Angeles - Tarzana,26.13,47.64,29276.422436,0.199575,0.189168,0.333741,True,False,80716.71485,82.0
132,Los Angeles - Westwood,19.74,35.76,55923.818159,0.088921,0.119817,0.4889,True,False,73222.64454,81.0
33,Los Angeles - Encino,30.77,54.11,48064.57624,0.207176,0.199472,0.242364,True,False,96824.75157,76.0
16,Los Angeles - Century City,34.26,59.68,12434.936791,0.177379,0.234012,0.316788,True,False,119661.6992,74.0
19,Los Angeles - Cheviot Hills,37.08,63.66,8640.938035,0.211283,0.202283,0.26995,True,False,139097.5358,72.0
7,Los Angeles - Beverly Crest,32.56,54.03,10869.083843,0.196094,0.237505,0.134145,True,False,181638.5865,66.0
6,Los Angeles - Bel Air,34.3,52.68,7089.875517,0.209939,0.303493,0.231194,True,False,200555.1398,54.0
248,Unincorporated - Whittier Narrows,,,5493.652677,0.204398,0.163589,0.93544,False,True,65094.07573,


### Mean income and older population share in places with smalles pct increases

In [42]:
round(
    larger_places.sort_values("change_week8_to_15", ascending=False)
    .tail(10)["median_householdincome"]
    .mean()
)

124680

In [43]:
round(
    larger_places.sort_values("change_week8_to_15", ascending=False)
    .tail(10)["totalpop_65andup_percent"]
    .mean()
    * 100,
    2,
)

21.01

### Which places have the highest vaccination coverage right now? 

In [44]:
larger_places[["name", "pct_week_15"]].sort_values("pct_week_15", ascending=False).head(
    10
)

Unnamed: 0,name,pct_week_15
19,Los Angeles - Cheviot Hills,63.66
88,Los Angeles - Rancho Park,62.71
16,Los Angeles - Century City,59.68
78,Los Angeles - Pacific Palisades,56.17
86,Los Angeles - Playa Vista,55.74
33,Los Angeles - Encino,54.11
7,Los Angeles - Beverly Crest,54.03
6,Los Angeles - Bel Air,52.68
10,Los Angeles - Brentwood,52.49
8,Los Angeles - Beverlywood,52.0


### What's the relationship between these variables and vaccination rate increases?

In [45]:
df_corr = larger_places[
    [
        "name",
        "pct_week_15",
        "change_week8_to_15",
        "nonwhite_percentage",
        "totalpop_65andup_percent",
        "median_householdincome",
    ]
]

In [46]:
df_corr.corr(method="pearson")

Unnamed: 0,pct_week_15,change_week8_to_15,nonwhite_percentage,totalpop_65andup_percent,median_householdincome
pct_week_15,1.0,-0.6609,-0.619982,0.681467,0.72237
change_week8_to_15,-0.6609,1.0,0.53564,-0.669029,-0.6676
nonwhite_percentage,-0.619982,0.53564,1.0,-0.453173,-0.742385
totalpop_65andup_percent,0.681467,-0.669029,-0.453173,1.0,0.609078
median_householdincome,0.72237,-0.6676,-0.742385,0.609078,1.0


---

### Merge with geography

In [47]:
places_geo = gpd.read_file("../../vaccine-disparity/assets/lac-areas.json")

In [52]:
larger_places.head()

Unnamed: 0,name,pct_week_8,pct_week_15,total_pop,totalpop_17andunder_percent,totalpop_65andup_percent,nonwhite_percentage,majority_white,majority_hispanic,median_householdincome,change_week8_to_15
0,Los Angeles - Adams-Normandie,9.62,30.13,8343.156246,0.232311,0.08511,0.952524,False,True,49595.00637,213.0
1,Los Angeles - Alsace,8.49,29.92,10053.532747,0.209612,0.089146,0.939044,False,True,43093.10702,252.0
3,Los Angeles - Arleta,14.75,35.92,34535.950264,0.218514,0.119109,0.924363,False,True,81174.77394,144.0
4,Los Angeles - Atwater Village,18.52,46.78,14994.342344,0.158207,0.153406,0.703651,False,False,76128.19959,153.0
5,Los Angeles - Baldwin Hills,12.38,31.22,27194.67882,0.171565,0.156819,0.932933,False,False,38941.91553,152.0


---

### Export to csv

In [49]:
import datetime as dt

today = dt.datetime.today().strftime("%m-%d-%Y")

In [54]:
concatenated_df.to_csv(
    "output/vaccine-community/lacounty_vaccinations_by_city_community_timeseries_"
    + today
    + ".csv",
    index=False,
)

In [53]:
df_pivot.to_csv(
    "output/vaccine-community/lacounty_vaccinations_by_city_community_weeks_"
    + today
    + ".csv",
    index=False,
)