In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# commentary for code on previous notebooks on weekly data
def weekly(data):
    clean_cases = data.loc[ (data["county"] != "Unassigned") & (data["county"] != "Out Of Country")]
    counties = clean_cases["county"].unique().tolist()
    
    dates = clean_cases["date"].unique().tolist()
    weeks = np.arange(0, len(dates), 7).tolist()
    
    column_names = ["County", "week", "week number", "total cases", "total deaths"]
    weekly_df = pd.DataFrame(columns = column_names)
    
    counter = 1
    for ref in weeks:
        for county in counties:
            try:
                df = clean_cases.loc[clean_cases["county"] == county]
                df = df.reset_index(drop=True)
                df = df.loc[(df.index >= ref) & (df.index <= (ref+6))]
                weekly_cases = df["newcountconfirmed"].sum()
                weekly_deaths = df["newcountdeaths"].sum()
                period = f"{dates[ref]} to {dates[ref+6]}"
                data = pd.DataFrame({"County": [county], "week": period,
                                     "week number": counter,  "total cases": weekly_cases, "total deaths": weekly_deaths})
                weekly_df = weekly_df.append(data)
                
            except:
                print("Not enough dates.")
        #print(f"Processing {dates[ref]} to {dates[ref+6]}, week {counter}")
        counter = counter + 1
    
    census = pd.read_csv("resources/CA_Counties_Census_Data.csv")
    census["County"]= census["County"].str.replace(" County", "")
    weekly_complete = pd.merge(weekly_df, census, on="County")
    
    weekly_complete = weekly_complete[["County", "week", "week number", "total cases", "total deaths", "Population"]]
    weekly_complete["average cases"] = weekly_complete["total cases"]/ 7
    weekly_complete["cases per cap"] = (weekly_complete["average cases"]/weekly_complete["Population"]) * 100000
    weekly_complete["average deaths"] = weekly_complete["total deaths"]/ 7
    weekly_complete["deaths per cap"] = (weekly_complete["average deaths"]/weekly_complete["Population"]) * 100000
    
    weekly_complete["Population"] = weekly_complete["Population"].map("{:,}".format)
    weekly_complete["average cases"] = weekly_complete["average cases"].astype(float).round(3)
    weekly_complete["cases per cap"] = weekly_complete["cases per cap"].astype(float).round(3)
    weekly_complete["average deaths"] = weekly_complete["average deaths"].astype(float).round(3)
    weekly_complete["deaths per cap"] = weekly_complete["deaths per cap"].astype(float).round(3)
    
    return weekly_complete

In [3]:
new_cases = pd.read_csv("resources/statewide_cases.csv")
covid_weekly = weekly(new_cases)

In [4]:
covid_weekly

Unnamed: 0,County,week,week number,total cases,total deaths,Population,average cases,cases per cap,average deaths,deaths per cap
0,Santa Clara,2020-03-18 to 2020-03-24,1,389,17,1922200,55.571,2.891,2.429,0.126
1,Santa Clara,2020-03-25 to 2020-03-31,2,388,13,1922200,55.429,2.884,1.857,0.097
2,Santa Clara,2020-04-01 to 2020-04-07,3,416,16,1922200,59.429,3.092,2.286,0.119
3,Santa Clara,2020-04-08 to 2020-04-14,4,368,20,1922200,52.571,2.735,2.857,0.149
4,Santa Clara,2020-04-15 to 2020-04-21,5,382,22,1922200,54.571,2.839,3.143,0.164
...,...,...,...,...,...,...,...,...,...,...
2373,Yolo,2020-11-25 to 2020-12-01,37,508,3,214977,72.571,33.758,0.429,0.199
2374,Yolo,2020-12-02 to 2020-12-08,38,632,7,214977,90.286,41.998,1.000,0.465
2375,Yolo,2020-12-09 to 2020-12-15,39,971,14,214977,138.714,64.525,2.000,0.930
2376,Yolo,2020-12-16 to 2020-12-22,40,857,10,214977,122.429,56.950,1.429,0.665


In [5]:
import json

In [6]:
# use .to_dict and 'records' orient to make our desired list
# can be used as 'data.js', or .insert_many with PyMongo
list_d = covid_weekly.to_dict(orient='records')

In [7]:
# write to a text file for our data.js later
with open("weekly_dict.txt", "w") as file:
    file.write(json.dumps(list_d))

In [8]:
# sample query
new_cases.loc[(new_cases["county"] == "Yuba") & (new_cases["date"] == "2020-04-23")]

Unnamed: 0,county,totalcountconfirmed,totalcountdeaths,newcountconfirmed,newcountdeaths,date
2618,Yuba,15.0,1.0,0,0,2020-04-23


In [None]:
# operations here are for charting purposes
# use a dictionary to create multiple empty lists to store cases per cap
obj = {}
counties = covid_weekly["County"].unique().tolist()
for county in counties:
    obj[county] = []

# loop through counties and append cases per cap
for county in counties:
    county_df = covid_weekly.loc[covid_weekly["County"] == county]
    obj[county].append(county_df["cases per cap"].tolist())
    
x_axis = covid_weekly["week number"].unique().tolist()

In [None]:
# for loop that will plot and save a chart for each county listed
for county in counties:
    y = obj[county][0]

    plt.plot(x_axis, y, label = f"{county}")

    plt.title("Seven-Day Average COVID-19 Cases", fontweight="bold")
    plt.xlabel("Weeks: March 18, 2020 - December 2, 2020")
    plt.ylabel("Daily Rate per 100,000")
    plt.xticks(x_axis[::5])

    plt.legend(loc="best")
    plt.tight_layout()
    plt.grid()
    plt.savefig(f"Output_data/county_maps/{county}_cases.jpg", transparent=True, dpi=300)
    plt.clf()