In [2]:
import pandas as pd
from datetime import datetime, timedelta
from os import path
import glob
import matplotlib.pyplot as plt
import numpy as np
import re

In [3]:
date = datetime.today() - timedelta(days = 1)
min_date = datetime(year = 2020, month = 1, day = 22)

data_dir = "data"

while date >= min_date:
    date_formatted = date.strftime("%m-%d-%Y")
    filepath = f"{data_dir}/{date_formatted}.csv"
    if not path.isfile(filepath):    
        url = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{date_formatted}.csv"
        dataframe = pd.read_csv(url)
        dataframe.to_csv(filepath, index = False)
        print(f"Saved {filepath}")
    else:
        print(f"Skipped {filepath}")
    date = date - timedelta(days = 1)

Saved data/06-14-2020.csv
Skipped data/06-13-2020.csv
Skipped data/06-12-2020.csv
Skipped data/06-11-2020.csv
Skipped data/06-10-2020.csv
Skipped data/06-09-2020.csv
Skipped data/06-08-2020.csv
Skipped data/06-07-2020.csv
Skipped data/06-06-2020.csv
Skipped data/06-05-2020.csv
Skipped data/06-04-2020.csv
Skipped data/06-03-2020.csv
Skipped data/06-02-2020.csv
Skipped data/06-01-2020.csv
Skipped data/05-31-2020.csv
Skipped data/05-30-2020.csv
Skipped data/05-29-2020.csv
Skipped data/05-28-2020.csv
Skipped data/05-27-2020.csv
Skipped data/05-26-2020.csv
Skipped data/05-25-2020.csv
Skipped data/05-24-2020.csv
Skipped data/05-23-2020.csv
Skipped data/05-22-2020.csv
Skipped data/05-21-2020.csv
Skipped data/05-20-2020.csv
Skipped data/05-19-2020.csv
Skipped data/05-18-2020.csv
Skipped data/05-17-2020.csv
Skipped data/05-16-2020.csv
Skipped data/05-15-2020.csv
Skipped data/05-14-2020.csv
Skipped data/05-13-2020.csv
Skipped data/05-12-2020.csv
Skipped data/05-11-2020.csv
Skipped data/05-10-202

In [None]:
data_files = glob.glob(f"{data_dir}/*.csv")

daily_dataframes = []
p = re.compile(r'(\d{2}-\d{2}-\d{2})')

for file in data_files:
    daily_dataframe = pd.read_csv(file)
    daily_dataframe = daily_dataframe.rename({"Last_Update": "Last Update", "Country_Region": "Country/Region"}, axis=1)
    daily_dataframe["Last Update"] = p.search(file).group()
    daily_dataframes.append(daily_dataframe)

combined_data = pd.concat(daily_dataframes, axis=0, ignore_index=True, sort=False)
combined_data = combined_data.sort_values(by="Last Update", ascending=False)
combined_data = combined_data[["Country/Region", "Last Update", "Deaths"]]
combined_data["Last Update"] = combined_data["Last Update"].apply(lambda x: datetime.strptime(x, "%m-%d-%y"))
combined_data["Country/Region"] = combined_data["Country/Region"].replace("United Kingdom", "UK")
combined_data["Country/Region"] = combined_data["Country/Region"].replace("Mainland China", "China")

In [None]:
countries_selected = ["UK", "Sweden", "France", "Italy", "China", "US"]
filtered_data = combined_data[combined_data["Country/Region"].isin(countries_selected)]
grouped_data = filtered_data.groupby(by=["Last Update", "Country/Region"]).sum()
print(grouped_data.tail(10))

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10,15))
clean_data = grouped_data.unstack()
# print(clean_data[["Confirmed", "Deaths"]])
clean_data.plot(kind="line", ax=ax[0], lw=2)
clean_data.plot(kind="line", ax=ax[1], lw=2)
ax[1].set_yscale("log")
plt.savefig("plots/line.png")
# plt.close(fig)