In [54]:
# Import all the packages

import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts
import numpy as np
import requests
import json

In [55]:
# Display the data frame

df_by_county = pd.read_csv("statewide_cases.csv")
df_by_county

Unnamed: 0,county,totalcountconfirmed,totalcountdeaths,newcountconfirmed,newcountdeaths,date
0,Santa Clara,151.0,6.0,151,6,2020-03-18
1,Santa Clara,183.0,8.0,32,2,2020-03-19
2,Santa Clara,246.0,8.0,63,0,2020-03-20
3,Santa Clara,269.0,10.0,23,2,2020-03-21
4,Santa Clara,284.0,13.0,15,3,2020-03-22
...,...,...,...,...,...,...
14400,Yolo,3522.0,63.0,30,0,2020-11-09
14401,Yolo,3573.0,64.0,51,1,2020-11-10
14402,Yolo,3606.0,64.0,33,0,2020-11-11
14403,Yolo,3646.0,64.0,40,0,2020-11-12


In [128]:
# Group by on the county name and use .max to show just the totals
# The "newcountconfirmed" column shows the highest single day cases that the county has experienced

df_county = df_by_county.groupby(["county"]).max()
df_county = df_county[["totalcountconfirmed","totalcountdeaths","newcountconfirmed"]]
df_county

Unnamed: 0_level_0,totalcountconfirmed,totalcountdeaths,newcountconfirmed
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alameda,25515.0,486.0,540
Alpine,12.0,0.0,3
Amador,397.0,16.0,29
Butte,3402.0,56.0,122
Calaveras,378.0,21.0,43
Colusa,598.0,6.0,36
Contra Costa,20799.0,253.0,558
Del Norte,208.0,1.0,21
El Dorado,1611.0,4.0,40
Fresno,33693.0,460.0,743


In [95]:
# Drop the rows that say 'Unassigned' and 'Out of country'

county_df = df_county.drop(["Out Of Country", "Unassigned"])
county_df.head()

Unnamed: 0_level_0,totalcountconfirmed,totalcountdeaths,newcountconfirmed
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alameda,25515.0,486.0,540
Alpine,12.0,0.0,3
Amador,397.0,16.0,29
Butte,3402.0,56.0,122
Calaveras,378.0,21.0,43


In [94]:
# Use the describe to show initial data of the data frame

county_df.describe()

Unnamed: 0,totalcountconfirmed,totalcountdeaths,newcountconfirmed
count,58.0,58.0,58.0
mean,17385.706897,314.155172,394.103448
std,45433.948786,982.527707,691.969391
min,11.0,0.0,3.0
25%,667.25,9.25,39.25
50%,3559.0,46.0,120.0
75%,15576.5,232.75,513.5
max,329364.0,7246.0,4493.0


In [126]:
# Create a data frame that shows the total number of cases for each county in the first seven days
# Again drop the rows that say unassigned and out of country

first_seven_df = df_by_county.loc[(df_by_county["date"] == "2020-03-18") | (df_by_county["date"] == "2020-03-19") | (df_by_county["date"] == "2020-03-20") | (df_by_county["date"] == "2020-03-21") | (df_by_county["date"] == "2020-03-22") | (df_by_county["date"] == "2020-03-23") | (df_by_county["date"] == "2020-03-24")]
first_seven_df = first_seven_df.groupby(["county"]).sum()
first_seven_df = first_seven_df[["newcountconfirmed"]]
first_seven_df = first_seven_df.rename(columns={"newcountconfirmed":"Total cases over first 7 days"})
first_seven = first_seven_df.drop(["Unassigned"])
first_seven.head()

Unnamed: 0_level_0,Total cases over first 7 days
county,Unnamed: 1_level_1
Alameda,96
Amador,1
Butte,3
Calaveras,2
Colusa,0


In [127]:
# Create a data frame that shows the total number of cases for each county in the last seven days
# Again drop the rows that say unassigned and out of country

last_seven_df = df_by_county.loc[(df_by_county["date"] == "2020-11-07") | (df_by_county["date"] == "2020-11-08") | (df_by_county["date"] == "2020-11-09") | (df_by_county["date"] == "2020-11-10") | (df_by_county["date"] == "2020-11-11") | (df_by_county["date"] == "2020-11-12") | (df_by_county["date"] == "2020-11-13")]
last_seven_df = last_seven_df.groupby(["county"]).sum()
last_seven_df = last_seven_df[["newcountconfirmed"]]
last_seven_df = last_seven_df.rename(columns={"newcountconfirmed":"Total cases over last 7 days"})
last_seven = last_seven_df.drop(["Out Of Country", "Unassigned"])
last_seven.head()

Unnamed: 0_level_0,Total cases over last 7 days
county,Unnamed: 1_level_1
Alameda,1126
Alpine,6
Amador,28
Butte,148
Calaveras,19


In [123]:
# Merge the two previous tables together on the index 'county' to show the differences in first 7 days and last 7 days

merge_seven_days = pd.merge(first_seven, last_seven, left_index=True, right_index=True)
merge_seven_days

Unnamed: 0_level_0,Total cases over first 7 days,Total cases over last 7 days
county,Unnamed: 1_level_1,Unnamed: 2_level_1
Alameda,96,1126
Amador,1,28
Butte,3,148
Calaveras,2,19
Colusa,0,18
Contra Costa,108,1055
Del Norte,0,14
El Dorado,3,136
Fresno,18,1415
Glenn,0,48


In [78]:
# Create a line plot to show the total confirmed cases

x_values = np.arange(len(county_df["totalcountconfirmed"]))
y_values = county_df["totalcountconfirmed"]
plt.plot(x_values, y_values)
plt.show()

In [73]:
# Create a line plot to show the total confirmed deaths

x_values = np.arange(len(county_df["totalcountdeaths"]))
y_values = county_df["totalcountdeaths"]
plt.plot(x_values, y_values)
plt.show()

In [74]:
# Create a line plot to show the new counts

x_values = np.arange(len(county_df["newcountconfirmed"]))
y_values = county_df["newcountconfirmed"]
plt.plot(x_values, y_values)
plt.show()

In [79]:
# Create a bar chart to show the total confirmed cases

x_axis = np.arange(len(county_df["totalcountconfirmed"]))
y_axis = county_df["totalcountconfirmed"]
plt.bar(x_axis, y_axis)
plt.show()

In [80]:
# Create a bar chart to show the total confirmed deaths

x_axis = np.arange(len(county_df["totalcountdeaths"]))
y_axis = county_df["totalcountdeaths"]
plt.bar(x_axis, y_axis)
plt.show()

In [81]:
# Create a bar chart to show the new confirmed cases

x_axis = np.arange(len(county_df["newcountconfirmed"]))
y_axis = county_df["newcountconfirmed"]
plt.bar(x_axis, y_axis)
plt.show()

In [90]:
# Create a pie chart to show the total confirmed cases

#labels = [county_df["county"]]
sizes = [county_df["totalcountconfirmed"]]
plt.pie(sizes, autopct="%1.1f%%", shadow=True, startangle=140)
plt.show()

In [93]:
# Create a pie chart to show the total confirmed deaths

#labels = [county_df["county"]]
sizes = [county_df["totalcountdeaths"]]
plt.pie(sizes, autopct="%1.1f%%", shadow=True, startangle=140)
plt.show()

In [92]:
# Create a pie chart to show the total confirmed cases

#labels = [county_df["county"]]
sizes = [county_df["newcountconfirmed"]]
plt.pie(sizes, autopct="%1.1f%%", shadow=True, startangle=140)
plt.show()

In [131]:
# Create a line plot to show the differences between the first seven days and the last seven days

x_axis = np.arange(
first_axis = [merge_seven_days["Total cases over first 7 days"]]
last_axis = [merge_seven_days["Total cases over last 7 days"]]
plt.plot(x_axis, first_axis, color="red")
plt.plot(x_axis, last_axis, color="blue")
plt.show()

ValueError: x and y must have same first dimension, but have shapes (56,) and (1, 56)