In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# commentary for code on previous notebooks on weekly data
def weekly(data):
    clean_cases = data.loc[ (data["county"] != "Unassigned") & (data["county"] != "Out Of Country")]
    counties = clean_cases["county"].unique().tolist()
    
    dates = clean_cases["date"].unique().tolist()
    weeks = np.arange(0, len(dates), 7).tolist()
    
    column_names = ["County", "week", "week number", "total cases", "total deaths"]
    weekly_df = pd.DataFrame(columns = column_names)
    
    counter = 1
    for ref in weeks:
        for county in counties:
            try:
                df = clean_cases.loc[clean_cases["county"] == county]
                df = df.reset_index(drop=True)
                df = df.loc[(df.index >= ref) & (df.index <= (ref+6))]
                weekly_cases = df["newcountconfirmed"].sum()
                weekly_deaths = df["newcountdeaths"].sum()
                period = f"{dates[ref]} to {dates[ref+6]}"
                data = pd.DataFrame({"County": [county], "week": period,
                                     "week number": counter,  "total cases": weekly_cases, "total deaths": weekly_deaths})
                weekly_df = weekly_df.append(data)
                
            except:
                print("Not enough dates.")
        #print(f"Processing {dates[ref]} to {dates[ref+6]}, week {counter}")
        counter = counter + 1
    
    census = pd.read_csv("resources/CA_Counties_Census_Data.csv")
    census["County"]= census["County"].str.replace(" County", "")
    weekly_complete = pd.merge(weekly_df, census, on="County")
    
    weekly_complete = weekly_complete[["County", "week", "week number", "total cases", "total deaths", "Population"]]
    weekly_complete["average cases"] = weekly_complete["total cases"]/ 7
    weekly_complete["cases per cap"] = (weekly_complete["average cases"]/weekly_complete["Population"]) * 100000
    weekly_complete["average deaths"] = weekly_complete["total deaths"]/ 7
    weekly_complete["deaths per cap"] = (weekly_complete["average deaths"]/weekly_complete["Population"]) * 100000
    
    return weekly_complete

In [3]:
new_cases = pd.read_csv("resources/statewide_cases.csv")
covid_weekly = weekly(new_cases)

Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough

In [4]:
covid_weekly

Unnamed: 0,County,week,week number,total cases,total deaths,Population,average cases,cases per cap,average deaths,deaths per cap
0,Santa Clara,2020-03-18 to 2020-03-24,1,389,17,1922200,55.5714,2.89103,2.42857,0.126343
1,Santa Clara,2020-03-25 to 2020-03-31,2,388,13,1922200,55.4286,2.8836,1.85714,0.0966155
2,Santa Clara,2020-04-01 to 2020-04-07,3,416,16,1922200,59.4286,3.0917,2.28571,0.118911
3,Santa Clara,2020-04-08 to 2020-04-14,4,368,20,1922200,52.5714,2.73496,2.85714,0.148639
4,Santa Clara,2020-04-15 to 2020-04-21,5,382,22,1922200,54.5714,2.83901,3.14286,0.163503
...,...,...,...,...,...,...,...,...,...,...
2083,Yolo,2020-10-21 to 2020-10-27,32,133,3,214977,19,8.83815,0.428571,0.199357
2084,Yolo,2020-10-28 to 2020-11-03,33,127,2,214977,18.1429,8.43944,0.285714,0.132905
2085,Yolo,2020-11-04 to 2020-11-10,34,230,3,214977,32.8571,15.284,0.428571,0.199357
2086,Yolo,2020-11-11 to 2020-11-17,35,314,6,214977,44.8571,20.866,0.857143,0.398714


In [5]:
import json

In [6]:
# use .to_dict and 'records' orient to make our desired list
# can be used as 'data.js', or .insert_many with PyMongo
list_d = covid_weekly.to_dict(orient='records')

In [7]:
# write to a text file for our data.js later
with open("weekly_dict.txt", "w") as file:
    file.write(json.dumps(list_d))

In [9]:
# sample query
new_cases.loc[(new_cases["county"] == "Yuba") & (new_cases["date"] == "2020-04-23")]

Unnamed: 0,county,totalcountconfirmed,totalcountdeaths,newcountconfirmed,newcountdeaths,date
2321,Yuba,15.0,1.0,0,0,2020-04-23


In [None]:
# operations here are for charting purposes
# use a dictionary to create multiple empty lists to store cases per cap
obj = {}
counties = covid_weekly["County"].unique().tolist()
for county in counties:
    obj[county] = []

# loop through counties and append cases per cap
for county in counties:
    county_df = covid_weekly.loc[covid_weekly["County"] == county]
    obj[county].append(county_df["cases per cap"].tolist())
    
x_axis = covid_weekly["week number"].unique().tolist()

In [None]:
# for loop that will plot and save a chart for each county listed
for county in counties:
    y = obj[county][0]

    plt.plot(x_axis, y, label = f"{county}")

    plt.title("Seven-Day Average COVID-19 Cases", fontweight="bold")
    plt.xlabel("Weeks: March 18, 2020 - November 17, 2020")
    plt.ylabel("Daily Rate per 100,000")
    plt.xticks(x_axis[::5])

    plt.legend(loc="best")
    plt.tight_layout()
    plt.grid()
    plt.savefig(f"Output_data/county_maps/{county}_cases.jpg", transparent=True, dpi=300)
    plt.clf()