# Import

In [81]:
from flask import Flask, request, jsonify
import pandas as pd
from datetime import date, timedelta
import io
import requests

# Request data

In [83]:
error = {"status": False, "message": ""}

In [84]:
# Request .csv file from Github
def request_file(date):
    URL = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/"
    path = URL + date.strftime("%m-%d-%Y") + ".csv"
    global last_updated

    try:
        res = requests.get(path)
        print("Request status code:", res.status_code)
        last_updated = date
        error = {"status": False, "message": ""}
        return res, error
    except:
        error = {"status": True, "message": "Error retrieving file from GitHub"}
        print(error["message"])
        return False, error

In [85]:
res, error = request_file(date.today())

Request status code: 404


# Clean data

In [4]:
# If today's data is not available yet, request data from yesterday
if res.status_code == 404:
    res, error = request_file(date.today() - timedelta(days = 1))

# Create and clean df
global data
try:
    df = pd.read_csv(io.StringIO(res.content.decode('utf-8')))
    df.rename(columns={'Admin2': 'County', 'Province_State': 'State', 'Country_Region': 'Country',
                       'Last_Update': 'Update', 'Long_': 'Long', 'Combined_Key': 'Key'}, inplace=True)
    df.set_index('Key', inplace=True)
    df = df.drop(columns=["FIPS", "Lat", "Long"])
    for column in ["County", "State"]:
        df[column] = df[column].str.replace(' ','-')
        df[column] = df[column].str.lower()
    data = df[df["Country"] == "US"]
    data.to_csv('data.csv')
except:
    error = {"status": True, "message": "Error while handling df"}
    print(error["message"])

Request status code: 404
Request status code: 200


# Get data for all states

In [114]:
df = data[["State", "Confirmed", "Deaths", "Recovered"]]

In [115]:
df = df.groupby("State").sum()

In [116]:
df.head()

Unnamed: 0_level_0,Confirmed,Deaths,Recovered
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
alabama,6026,209,0
alaska,339,9,0
arizona,6286,273,0
arkansas,2911,48,0
california,42368,1689,0


In [117]:
# df = df.reset_index().melt(id_vars=["State"], var_name="Type", value_name="Value")

In [118]:
# df.sort_values(by = "State", inplace = True)

In [119]:
#  df = df.groupby(["State", "Type"]).sum()

In [120]:
df_dict = {}

In [131]:
for state in df.index:
    df_dict[state] = df.loc[state].to_dict()

In [136]:
df_dict["california"]

{'Confirmed': 42368, 'Deaths': 1689, 'Recovered': 0}

In [137]:
if len(df_dict) != 0:
    df_dict.update({"Date": last_updated})

# Get total

In [142]:
total = data[["Confirmed", "Deaths", "Recovered"]].sum()

In [143]:
total.to_dict()

{'Confirmed': 938154, 'Deaths': 53755, 'Recovered': 100372}

In [147]:
data.sort_values(by="Recovered", ascending=False)

Unnamed: 0_level_0,County,State,Country,Update,Confirmed,Deaths,Recovered,Active
Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"Recovered, US",,recovered,US,2020-04-26 02:30:51,0,0,100372,-100372
"Abbeville, South Carolina, US",abbeville,south-carolina,US,2020-04-26 02:30:51,24,0,0,24
"Orange, Texas, US",orange,texas,US,2020-04-26 02:30:51,64,0,0,64
"Onondaga, New York, US",onondaga,new-york,US,2020-04-24 00:00:00,664,25,0,639
"Onslow, North Carolina, US",onslow,north-carolina,US,2020-04-26 02:30:51,44,1,0,43
...,...,...,...,...,...,...,...,...
"Grant, Kentucky, US",grant,kentucky,US,2020-04-26 02:30:51,18,3,0,15
"Grant, Louisiana, US",grant,louisiana,US,2020-04-26 02:30:51,11,0,0,11
"Grant, New Mexico, US",grant,new-mexico,US,2020-04-26 02:30:51,14,0,0,14
"Grant, North Dakota, US",grant,north-dakota,US,2020-04-26 02:30:51,1,0,0,1
