# Covid-19 Daily Tracking


In [113]:
import sys
import requests
import json
from datetime import datetime 
import pandas as pd

COUNTRY_LOC = 'vietnam'
API_URL = 'https://api.covid19api.com/total/country/' + COUNTRY_LOC

start_date = '2021-02-01'
to_date = datetime.today()
url_param = {'from': start_date, 'to': str(to_date)[:10]}

api_request = requests.get(API_URL, params=url_param)

if not api_request.ok:
    print('Server Request Failed :', api_request.status_code, api_request.reason)
    print('Exiting ....')
    sys.exit(0)


df = pd.DataFrame(api_request.json())
df.set_index('Date', inplace=True)

In [114]:
df

Unnamed: 0_level_0,Country,CountryCode,Province,City,CityCode,Lat,Lon,Confirmed,Deaths,Recovered,Active
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-22T00:00:00Z,Viet Nam,,,,,0,0,0,0,0,0
2020-01-23T00:00:00Z,Viet Nam,,,,,0,0,2,0,0,2
2020-01-24T00:00:00Z,Viet Nam,,,,,0,0,2,0,0,2
2020-01-25T00:00:00Z,Viet Nam,,,,,0,0,2,0,0,2
2020-01-26T00:00:00Z,Viet Nam,,,,,0,0,2,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...
2020-07-30T00:00:00Z,Viet Nam,,,,,0,0,509,0,373,136
2020-07-31T00:00:00Z,Viet Nam,,,,,0,0,558,3,373,182
2020-08-01T00:00:00Z,Viet Nam,,,,,0,0,590,3,373,214
2020-08-02T00:00:00Z,Viet Nam,,,,,0,0,621,6,373,242


In [115]:
# remove unwanted Columns
df.drop(columns=['Country', 'CountryCode', 'Province', 'City', 'CityCode', 'Lat', "Lon"], inplace=True)

In [116]:
# rename columns date to strip of time info
for item in df.index:
    df.rename(index={item: str(item)[:10]}, inplace=True)
df

Unnamed: 0_level_0,Confirmed,Deaths,Recovered,Active
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-22,0,0,0,0
2020-01-23,2,0,0,2
2020-01-24,2,0,0,2
2020-01-25,2,0,0,2
2020-01-26,2,0,0,2
...,...,...,...,...
2020-07-30,509,0,373,136
2020-07-31,558,3,373,182
2020-08-01,590,3,373,214
2020-08-02,621,6,373,242


In [117]:
# Population by Country data pulled from UN [source: https://population.un.org/wpp/Download/Standard/Population/] edited to conform to country's name
pop_df = pd.read_csv('../datas/population.csv')

for index, item in enumerate(pop_df['Pop.(\'000)']):
    # strip empty spaces and convert to integer (in millions)
    pop_df.loc[index, 'Pop.(\'000)'] = int(item.replace(' ', ''))
    pop_df.loc[index, 'Pop.(\'000)'] = pop_df.loc[index, 'Pop.(\'000)'] * 1000 

    # rename Country's name to conform to covid's dataframe
    pop_df.loc[index, 'Country'] = pop_df.loc[index, 'Country'].lower().replace(' ', '-')

pop_df.set_index('Country', inplace=True)

In [118]:
pop_df.loc['hong-kong-sar-china']

Pop.('000)    7497000
Name: hong-kong-sar-china, dtype: object

## Data Crunching to form new dataset


In [119]:
daily_cases = []
daily_deaths = []
daily_recovered = []
mortality_rate = []
recovered_rate = []
active_rate = []
cases_per_million_pop = []

def append_rate_data(index):
    """ to append into list after calculation of
            - mortality rate
            - recovered rate
            - active rate
            - cases per 1mil population
    """
    mortality_rate.append(round((df.loc[df.index[index], 'Deaths'] / df.loc[df.index[index], 'Confirmed']) * 100, 2))
    recovered_rate.append(round((df.loc[df.index[index], 'Recovered'] / df.loc[df.index[index], 'Confirmed']) * 100, 2))
    active_rate.append(round((df.loc[df.index[index], 'Active'] / df.loc[df.index[index], 'Confirmed']) * 100, 2))
    cases_per_million_pop.append(round((df.loc[df.index[index], 'Confirmed'] / pop_df.loc[COUNTRY_LOC, 'Pop.(\'000)']) * 1000000, 0))

for index, item in enumerate(df.index):
    if item == df.index[0]:
        new_case, new_death, new_recovered = df.loc[df.index[0], ['Confirmed', 'Deaths', 'Recovered']] 
        daily_cases.append(new_case)
        daily_deaths.append(new_death)
        daily_recovered.append(new_recovered)
        append_rate_data(0)
    else:
        new_case, new_death, new_recovered = df.loc[df.index[index], ['Confirmed', 'Deaths', 'Recovered']] \
                                            - df.loc[df.index[index - 1], ['Confirmed', 'Deaths', 'Recovered']]
        daily_cases.append(new_case)
        daily_deaths.append(new_death)
        daily_recovered.append(new_recovered)
        append_rate_data(index)
    
    df.loc[item, ['Daily Cases', 'Daily Deaths', 'Daily Recovered', 'Mortality Rate', \
                  'Recovered Rate', 'Active Rate', 'Cases per 1mil pop']] \
                  = daily_cases[index], daily_deaths[index], daily_recovered[index], mortality_rate[index], \
                    recovered_rate[index], active_rate[index], cases_per_million_pop[index]

# release unwanted objects from memory
del daily_cases, daily_deaths, daily_recovered, mortality_rate, recovered_rate, active_rate, cases_per_million_pop


In [124]:
# replace NaN to zero
df.fillna(0.0, inplace=True)
df.tail(7)

Unnamed: 0_level_0,Confirmed,Deaths,Recovered,Active,Daily Cases,Daily Deaths,Daily Recovered,Mortality Rate,Recovered Rate,Active Rate,Cases per 1mil pop
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-07-28,446,0,369,77,15.0,0.0,4.0,0.0,82.74,17.26,5.0
2020-07-29,459,0,369,90,13.0,0.0,0.0,0.0,80.39,19.61,5.0
2020-07-30,509,0,373,136,50.0,0.0,4.0,0.0,73.28,26.72,5.0
2020-07-31,558,3,373,182,49.0,3.0,0.0,0.54,66.85,32.62,6.0
2020-08-01,590,3,373,214,32.0,0.0,0.0,0.51,63.22,36.27,6.0
2020-08-02,621,6,373,242,31.0,3.0,0.0,0.97,60.06,38.97,6.0
2020-08-03,652,7,374,271,31.0,1.0,1.0,1.07,57.36,41.56,7.0


In [121]:
# Max Daily Record
round(df[['Daily Cases', 'Daily Deaths', 'Daily Recovered']].max(), 2)

Daily Cases        50.0
Daily Deaths        3.0
Daily Recovered    30.0
dtype: float64

In [122]:
round(df[['Daily Cases', 'Daily Deaths', 'Daily Recovered']].mean(), 2)

Daily Cases        3.34
Daily Deaths       0.04
Daily Recovered    1.92
dtype: float64

In [123]:
last_entry_date = df.index[len(df.index) - 1]
df.loc[last_entry_date]

Confirmed             652.00
Deaths                  7.00
Recovered             374.00
Active                271.00
Daily Cases            31.00
Daily Deaths            1.00
Daily Recovered         1.00
Mortality Rate          1.07
Recovered Rate         57.36
Active Rate            41.56
Cases per 1mil pop      7.00
Name: 2020-08-03, dtype: float64