In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# read in cases dataset for population information
new_cases = pd.read_csv("resources/covid19cases_test.csv")

In [3]:
# remove rows that are not counties or California
clean_cases = new_cases.loc[ (new_cases["area"] != "Unknown") & (new_cases["area"] != "Out of state")]

In [4]:
# create list of population values
populations = clean_cases["population"].unique().tolist()
print(populations)

[1685886.0, 1117.0, 38531.0, 217769.0, 44289.0, 40129160.0, 22593.0, 1160099.0, 27558.0, 193098.0, 1032227.0, 29348.0, 134098.0, 191649.0, 18453.0, 927251.0, 156444.0, 64871.0, 30065.0, 10257557.0, 160089.0, 260800.0, 17795.0, 88439.0, 287420.0, 9475.0, 13961.0, 448732.0, 139652.0, 98710.0, 3228519.0, 400434.0, 18997.0, 2468145.0, 1567975.0, 64022.0, 2217398.0, 3370418.0, 892280.0, 782545.0, 278862.0, 778001.0, 456373.0, 1967585.0, 273999.0, 177925.0, 3115.0, 43956.0, 444255.0, 496668.0, 562303.0, 105747.0, 65885.0, 13354.0, 484423.0, 52351.0, 852747.0, 223612.0, 79290.0]


In [5]:
# display the end of the DataFrame for most recent date
new_vacc = pd.read_csv("resources/covid19vaccinesbycounty.csv")
new_vacc.tail(5)

Unnamed: 0,county,administered_date,total_doses,cumulative_total_doses,pfizer_doses,cumulative_pfizer_doses,moderna_doses,cumulative_moderna_doses,jj_doses,cumulative_jj_doses,partially_vaccinated,total_partially_vaccinated,fully_vaccinated,cumulative_fully_vaccinated,at_least_one_dose,cumulative_at_least_one_dose,california_flag
5420,Amador,2021-03-15,179,14676,22,4311,146,10327,11,38,54,7017,125,3934,79,10951,California
5421,Amador,2021-03-16,453,15129,58,4369,391,10718,4,42,195,6964,255,4189,202,11153,California
5422,Amador,2021-03-17,171,15300,33,4402,135,10853,3,45,108,7014,63,4252,113,11266,California
5423,Amador,2021-03-18,132,15432,72,4474,57,10910,3,48,84,7058,47,4299,91,11357,California
5424,Amador,2021-03-19,136,15568,23,4497,106,11016,7,55,74,7080,61,4360,83,11440,California


In [6]:
# create variable for the last date
last_date = '2021-03-19'

In [7]:
# create DataFrames with only the most recent entries for 'cumulative_fully_vaccinated'
cali_df = new_vacc.loc[new_vacc.administered_date == f'{last_date}']
vacc_df = new_vacc.loc[(new_vacc.administered_date == f'{last_date}')&\
                      (new_vacc.county != 'Outside California') & (new_vacc.county != 'Unknown')]
cali_df = cali_df[['county', 'administered_date', 'cumulative_fully_vaccinated']]
vacc_df = vacc_df[['county', 'administered_date', 'cumulative_fully_vaccinated']]

In [8]:
# calculate totals for California, including out-of-state residents vaccinated in California
total_full = cali_df.cumulative_fully_vaccinated.sum()
# create dictionary to append to vaccine DataFrame
vacc_ca = {'county': 'California', 'administered_date': f'{last_date}',
           'cumulative_fully_vaccinated': total_full}

In [9]:
# append a row for California data
vacc_df = vacc_df.append(vacc_ca, ignore_index=True)

In [10]:
# sort DataFrame alphabetically by county
vacc_df = vacc_df.sort_values("county")
# reset index
vacc_df = vacc_df.reset_index(drop=True)
# add population data to DataFrame
vacc_df['population'] = populations
# convert population floats to integers
vacc_df['population'] = vacc_df['population'].astype(int)

In [11]:
# create columns for percent of population vaccinated
vacc_df['percent_full'] = round((vacc_df.cumulative_fully_vaccinated / vacc_df.population) * 100, 2)

In [12]:
# verification
vacc_df

Unnamed: 0,county,administered_date,cumulative_fully_vaccinated,population,percent_full
0,Alameda,2021-03-19,242233,1685886,14.37
1,Alpine,2021-03-19,270,1117,24.17
2,Amador,2021-03-19,4360,38531,11.32
3,Butte,2021-03-19,31846,217769,14.62
4,Calaveras,2021-03-19,6137,44289,13.86
5,California,2021-03-19,5009299,40129160,12.48
6,Colusa,2021-03-19,1963,22593,8.69
7,Contra Costa,2021-03-19,181120,1160099,15.61
8,Del Norte,2021-03-19,2153,27558,7.81
9,El Dorado,2021-03-19,24871,193098,12.88


In [13]:
import json

In [14]:
# use .to_dict and 'records' orient to make our desired list
# can be used as 'data.js', or .insert_many with PyMongo
list_d = vacc_df.to_dict(orient='records')

In [15]:
# write to a text file for our data.js later
with open("weekly_dict.txt", "w") as file:
    file.write(json.dumps(list_d))