In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### 4/8/2021: cumulative count columns have been temporarily removed, requiring adjustments

In [2]:
def weekly(new_cases):
    data = new_cases[["area", "cases", "deaths", "date", "population"]]
    data.columns = ["county", "newcountconfirmed", "newcountdeaths", "date", "population"]
    data = data.dropna()
    data = data.sort_values("date")
    clean_cases = data.loc[ (data["county"] != "Unknown") & (data["county"] != "Out of state")]
    counties = clean_cases["county"].unique().tolist()
    dates = clean_cases["date"].unique().tolist()
    weeks = np.arange(0, len(dates), 7).tolist()
    column_names = ["County", "week", "week number", "total cases", "total deaths", "Population"]
    weekly_df = pd.DataFrame(columns = column_names)
    counter = 1
    for ref in weeks:
        for county in counties:
            try:
                df = clean_cases.loc[clean_cases["county"] == county]
                df = df.reset_index(drop=True)
                df = df.loc[(df.index >= ref) & (df.index <= (ref+6))]
                weekly_cases = df["newcountconfirmed"].sum()
                weekly_deaths = df["newcountdeaths"].sum()
                period = f"{dates[ref]} to {dates[ref+6]}"
                pop = df["population"].unique()[0]
                data = pd.DataFrame({"County": [county], "week": period, "week number": counter,
                                     "total cases": weekly_cases, "total deaths": weekly_deaths, "Population": pop})
                weekly_df = weekly_df.append(data)
            except:
                print("Not enough dates.")
        counter += 1
    weekly_df["average cases"] = weekly_df["total cases"]/ 7
    weekly_df["cases per cap"] = (weekly_df["average cases"]/weekly_df["Population"]) * 100000
    weekly_df["average deaths"] = weekly_df["total deaths"]/ 7
    weekly_df["deaths per cap"] = (weekly_df["average deaths"]/weekly_df["Population"]) * 100000
    weekly_df["Population"] = weekly_df["Population"].map("{:,}".format)
    weekly_df["average cases"] = weekly_df["average cases"].astype(float).round(3)
    weekly_df["cases per cap"] = weekly_df["cases per cap"].astype(float).round(3)
    weekly_df["average deaths"] = weekly_df["average deaths"].astype(float).round(3)
    weekly_df["deaths per cap"] = weekly_df["deaths per cap"].astype(float).round(3)
    return weekly_df

In [3]:
new_cases = pd.read_csv("resources/covid19cases_test.csv")
covid_weekly = weekly(new_cases)

Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough dates.
Not enough

In [4]:
covid_weekly

Unnamed: 0,County,week,week number,total cases,total deaths,Population,average cases,cases per cap,average deaths,deaths per cap
0,Yuba,2020-03-01 to 2020-03-07,1,0.0,0.0,79290.0,0.000,0.000,0.000,0.000
0,Modoc,2020-03-01 to 2020-03-07,1,0.0,0.0,9475.0,0.000,0.000,0.000,0.000
0,Merced,2020-03-01 to 2020-03-07,1,1.0,0.0,287420.0,0.143,0.050,0.000,0.000
0,Mendocino,2020-03-01 to 2020-03-07,1,0.0,0.0,88439.0,0.000,0.000,0.000,0.000
0,Mariposa,2020-03-01 to 2020-03-07,1,0.0,0.0,17795.0,0.000,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...
0,San Francisco,2021-04-11 to 2021-04-17,59,180.0,0.0,892280.0,25.714,2.882,0.000,0.000
0,San Joaquin,2021-04-11 to 2021-04-17,59,434.0,0.0,782545.0,62.000,7.923,0.000,0.000
0,San Luis Obispo,2021-04-11 to 2021-04-17,59,130.0,0.0,278862.0,18.571,6.660,0.000,0.000
0,San Mateo,2021-04-11 to 2021-04-17,59,290.0,0.0,778001.0,41.429,5.325,0.000,0.000


In [5]:
import json

In [6]:
# use .to_dict and 'records' orient to make our desired list
# can be used as 'data.js', or .insert_many with PyMongo
list_d = covid_weekly.to_dict(orient='records')

In [7]:
# write to a text file for our data.js later
with open("weekly_dict.txt", "w") as file:
    file.write(json.dumps(list_d))

### Chart generator

In [None]:
# operations here are for charting purposes
# use a dictionary to create multiple empty lists to store cases per cap
obj = {}
counties = covid_weekly["County"].unique().tolist()
for county in counties:
    obj[county] = []

# loop through counties and append cases per cap
for county in counties:
    county_df = covid_weekly.loc[covid_weekly["County"] == county]
    obj[county].append(county_df["cases per cap"].tolist())
    
x_axis = covid_weekly["week number"].unique().tolist()

In [None]:
# for loop that will plot and save a chart for each county listed
for county in counties:
    y = obj[county][0]

    plt.plot(x_axis, y, label = f"{county}")

    plt.title("Seven-Day Average COVID-19 Cases", fontweight="bold")
    plt.xlabel("Weeks: January 1, 2020 - December 2, 2020")
    plt.ylabel("Daily Rate per 100,000")
    plt.xticks(x_axis[::5])

    plt.legend(loc="best")
    plt.tight_layout()
    plt.grid()
    plt.savefig(f"Output_data/county_maps/{county}_cases.jpg", transparent=True, dpi=300)
    plt.clf()