In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# commentary for code on previous notebooks on weekly data
def weekly(data):
    clean_cases = data.loc[ (data["county"] != "Unassigned") & (data["county"] != "Out Of Country")]
    counties = clean_cases["county"].unique().tolist()
    
    dates = clean_cases["date"].unique().tolist()
    weeks = np.arange(0, len(dates), 7).tolist()
    
    column_names = ["County", "week", "week number", "total cases"]
    weekly_df = pd.DataFrame(columns = column_names)
    
    counter = 1
    for ref in weeks:
        for county in counties:
            try:
                df = clean_cases.loc[clean_cases["county"] == county]
                df = df.reset_index(drop=True)
                df = df.loc[(df.index >= ref) & (df.index <= (ref+6))]
                weekly_cases = df["newcountconfirmed"].sum()
                period = f"{dates[ref]} to {dates[ref+6]}"
                data = pd.DataFrame({"County": [county], "week": period, "week number": counter,  "total cases": weekly_cases})
                weekly_df = weekly_df.append(data)
                
            except:
                print("Not enough dates.")
        print(f"Processing {dates[ref]} to {dates[ref+6]}, week {counter}")
        counter = counter + 1
    
    census = pd.read_csv("resources/CA_Counties_Census_Data.csv")
    census["County"]= census["County"].str.replace(" County", "")
    weekly_complete = pd.merge(weekly_df, census, on="County")
    
    weekly_complete = weekly_complete[["County", "week", "week number", "total cases", "Population"]]
    weekly_complete["average cases"] = weekly_complete["total cases"]/ 7
    weekly_complete["cases per cap"] = (weekly_complete["average cases"]/weekly_complete["Population"]) * 100000
    
    return weekly_complete

In [3]:
new_cases = pd.read_csv("resources/new_cases.csv")
covid_weekly = weekly(new_cases)

Processing 2020-03-18T00:00:00 to 2020-03-24T00:00:00, week 1
Processing 2020-03-25T00:00:00 to 2020-03-31T00:00:00, week 2
Processing 2020-04-01T00:00:00 to 2020-04-07T00:00:00, week 3
Processing 2020-04-08T00:00:00 to 2020-04-14T00:00:00, week 4
Processing 2020-04-15T00:00:00 to 2020-04-21T00:00:00, week 5
Processing 2020-04-22T00:00:00 to 2020-04-28T00:00:00, week 6
Processing 2020-04-29T00:00:00 to 2020-05-05T00:00:00, week 7
Processing 2020-05-06T00:00:00 to 2020-05-12T00:00:00, week 8
Processing 2020-05-13T00:00:00 to 2020-05-19T00:00:00, week 9
Processing 2020-05-20T00:00:00 to 2020-05-26T00:00:00, week 10
Processing 2020-05-27T00:00:00 to 2020-06-02T00:00:00, week 11
Processing 2020-06-03T00:00:00 to 2020-06-09T00:00:00, week 12
Processing 2020-06-10T00:00:00 to 2020-06-16T00:00:00, week 13
Processing 2020-06-17T00:00:00 to 2020-06-23T00:00:00, week 14
Processing 2020-06-24T00:00:00 to 2020-06-30T00:00:00, week 15
Processing 2020-07-01T00:00:00 to 2020-07-07T00:00:00, week 16
P

In [4]:
covid_weekly

Unnamed: 0,County,week,week number,total cases,Population,average cases,cases per cap
0,Santa Clara,2020-03-18T00:00:00 to 2020-03-24T00:00:00,1,389,1922200,55.5714,2.89103
1,Santa Clara,2020-03-25T00:00:00 to 2020-03-31T00:00:00,2,388,1922200,55.4286,2.8836
2,Santa Clara,2020-04-01T00:00:00 to 2020-04-07T00:00:00,3,416,1922200,59.4286,3.0917
3,Santa Clara,2020-04-08T00:00:00 to 2020-04-14T00:00:00,4,368,1922200,52.5714,2.73496
4,Santa Clara,2020-04-15T00:00:00 to 2020-04-21T00:00:00,5,382,1922200,54.5714,2.83901
...,...,...,...,...,...,...,...
2025,Yolo,2020-10-14T00:00:00 to 2020-10-20T00:00:00,31,100,214977,14.2857,6.64523
2026,Yolo,2020-10-21T00:00:00 to 2020-10-27T00:00:00,32,133,214977,19,8.83815
2027,Yolo,2020-10-28T00:00:00 to 2020-11-03T00:00:00,33,127,214977,18.1429,8.43944
2028,Yolo,2020-11-04T00:00:00 to 2020-11-10T00:00:00,34,230,214977,32.8571,15.284


In [5]:
import json

In [7]:
# use .to_dict and 'records' orient to make our desired list
# can be used as 'data.js', or .insert_many with PyMongo
list_d = covid_weekly.to_dict(orient='records')

In [9]:
with open("weekly_dict.txt", "w") as file:
    file.write(json.dumps(list_d))

In [None]:
# operations here are for charting purposes
# use a dictionary to create multiple empty lists to store cases per cap
obj = {}
counties = covid_weekly["County"].unique().tolist()
for county in counties:
    obj[county] = []

# loop through counties and append cases per cap
for county in counties:
    county_df = covid_weekly.loc[covid_weekly["County"] == county]
    obj[county].append(county_df["cases per cap"].tolist())
    
x_axis = covid_weekly["week number"].unique().tolist()

In [None]:
# for loop that will plot and save a chart for each county listed
for county in counties:
    y = obj[county][0]

    plt.plot(x_axis, y, label = f"{county}")

    plt.title("Seven-Day Average COVID-19 Cases", fontweight="bold")
    plt.xlabel("Weeks: March 18, 2020 - November 17, 2020")
    plt.ylabel("Daily Rate per 100,000")
    plt.xticks(x_axis[::5])

    plt.legend(loc="best")
    plt.tight_layout()
    plt.grid()
    plt.savefig(f"Output_data/county_maps/{county}_cases.jpg", transparent=True, dpi=300)
    plt.clf()