# Covid-19 Daily Tracking


In [23]:
import sys
import os.path
import requests
import json
from datetime import datetime 
import pandas as pd

COUNTRY_LOC = 'Malaysia'
API_URL = 'https://api.covid19api.com/total/country/' + COUNTRY_LOC
API_WORLD = 'https://api.covid19api.com/world/total'
COVID_LOCAL = '../data/covid_19_' + COUNTRY_LOC + '.json'

start_date = '2021-02-01'
today_date = datetime.today()
url_param = {'from': start_date, 'to': str(today_date)[:10]}

try:
    api_request = requests.get(API_URL, params=url_param, timeout=30)

    if os.path.isfile(COVID_LOCAL):
        print('Exist')
        t = os.path.getmtime(COVID_LOCAL)
        file_date = (str(datetime.fromtimestamp(t)))[:10]

        if file_date < str(today_date)[:10]:
            if api_request.ok:
                print('Outdated File. Updating new data from web.....')
                with open(COVID_LOCAL, "wb") as file:
                    file.write(api_request.content) 
                df = pd.read_json(COVID_LOCAL)
                df.set_index('Date', inplace=True)
            else:
                print('Error: ', api_request.status_code)
                print('Use existing file.....')
                df = pd.read_json(COVID_LOCAL)
                df.set_index('Date', inplace=True)
        else:
            print('Retrieving from current file....')
            df = pd.read_json(COVID_LOCAL)
            df.set_index('Date', inplace=True)
        
    else:
        if api_request.ok:
            print('Pulling data from web to new file....')
            with open(COVID_LOCAL, "wb") as file:
                file.write(api_request.content) 
            df = pd.read_json(COVID_LOCAL)
            df.set_index('Date', inplace=True)
        else:
            print('Error: ', api_request.status_code)
            print('No Data File exist....quiting')
            sys.exit(0)

except requests.exceptions.RequestException as e:
    print('Server Request Failed :', e)
    print('using existing file ....')
    df = pd.read_json(COVID_LOCAL)
    df.set_index('Date', inplace=True)




Exist
Retrieving from current file....


In [24]:
#r = requests.get('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')

cfm_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
death_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
rec_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

cfm_df.drop(columns=['Province/State', 'Lat', 'Long'], inplace=True)
cfm_df = cfm_df.groupby(['Country/Region']).sum().transpose()
death_df.drop(columns=['Province/State', 'Lat', 'Long'], inplace=True)
death_df = death_df.groupby(['Country/Region']).sum().transpose()
rec_df.drop(columns=['Province/State', 'Lat', 'Long'], inplace=True)
rec_df = rec_df.groupby(['Country/Region']).sum().transpose()

for item in cfm_df.index:
    df.loc[item, 'World Confirmed'] = cfm_df.loc[item].sum()
    df.loc[item, 'World Deaths'] = death_df.loc[item].sum()
    df.loc[item, 'World Recovered'] = rec_df.loc[item].sum()
    

In [25]:
df

Unnamed: 0_level_0,Country,CountryCode,Province,City,CityCode,Lat,Lon,Confirmed,Deaths,Recovered,Active,World Confirmed,World Deaths,World Recovered
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-01-22 00:00:00+00:00,Malaysia,,,,,0,0,0,0,0,0,555.0,17.0,28.0
2020-01-23 00:00:00+00:00,Malaysia,,,,,0,0,0,0,0,0,654.0,18.0,30.0
2020-01-24 00:00:00+00:00,Malaysia,,,,,0,0,0,0,0,0,941.0,26.0,36.0
2020-01-25 00:00:00+00:00,Malaysia,,,,,0,0,3,0,0,3,1434.0,42.0,39.0
2020-01-26 00:00:00+00:00,Malaysia,,,,,0,0,4,0,0,4,2118.0,56.0,52.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08-01 00:00:00+00:00,Malaysia,,,,,0,0,8985,125,8647,213,17850617.0,685113.0,10553585.0
2020-08-02 00:00:00+00:00,Malaysia,,,,,0,0,8999,125,8664,210,18079723.0,689417.0,10690555.0
2020-08-03 00:00:00+00:00,Malaysia,,,,,0,0,9001,125,8668,208,18282208.0,693754.0,10913000.0
2020-08-04 00:00:00+00:00,Malaysia,,,,,0,0,9002,125,8684,193,18540119.0,700708.0,11134735.0


In [26]:
# remove unwanted Columns
df.drop(columns=['Country', 'CountryCode', 'Province', 'City', 'CityCode', 'Lat', "Lon"], inplace=True)

In [27]:
# rename columns date to strip of time info
for item in df.index:
    df.rename(index={item: str(item)[:10]}, inplace=True)
df

Unnamed: 0_level_0,Confirmed,Deaths,Recovered,Active,World Confirmed,World Deaths,World Recovered
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-22,0,0,0,0,555.0,17.0,28.0
2020-01-23,0,0,0,0,654.0,18.0,30.0
2020-01-24,0,0,0,0,941.0,26.0,36.0
2020-01-25,3,0,0,3,1434.0,42.0,39.0
2020-01-26,4,0,0,4,2118.0,56.0,52.0
...,...,...,...,...,...,...,...
2020-08-01,8985,125,8647,213,17850617.0,685113.0,10553585.0
2020-08-02,8999,125,8664,210,18079723.0,689417.0,10690555.0
2020-08-03,9001,125,8668,208,18282208.0,693754.0,10913000.0
2020-08-04,9002,125,8684,193,18540119.0,700708.0,11134735.0


In [28]:
# Population by Country data pulled from UN [source: https://population.un.org/wpp/Download/Standard/Population/] edited to conform to country's name
pop_df = pd.read_csv('../data/population.csv')
"""
for index, item in enumerate(pop_df['Pop.(\'000)']):
    # strip empty spaces and convert to integer (in millions)
    pop_df.loc[index, 'Pop.(\'000)'] = int(item.replace(' ', ''))
    pop_df.loc[index, 'Pop.(\'000)'] = pop_df.loc[index, 'Pop.(\'000)'] * 1000 

    # rename Country's name to conform to covid's dataframe
    pop_df.loc[index, 'Country'] = pop_df.loc[index, 'Country'].lower().replace(' ', '-')
"""
pop_df['Country'] = pop_df['Country'].str.replace(' ', '-') #.str.lower()
pop_df['Pop.(\'000)'] = pop_df['Pop.(\'000)'].str.replace(' ', '').astype(int) \
 * 1000 
pop_df.set_index('Country', inplace=True)
pop_df

Unnamed: 0_level_0,Pop.('000)
Country,Unnamed: 1_level_1
Burundi,11891000
Comoros,870000
Djibouti,988000
Eritrea,3546000
Ethiopia,114964000
...,...
Bermuda,62000
Canada,37742000
Greenland,57000
Saint-Pierre-and-Miquelon,6000


## Data Crunching to form new dataset


In [29]:
daily_cases = []
daily_deaths = []
daily_recovered = []

""" OBSOLETE - REFACTOR

def append_rate_data(index):
  
    mortality_rate.append(round((df.loc[df.index[index], 'Deaths'] / df.loc[df.index[index], 'Confirmed']) * 100, 2))
    recovered_rate.append(round((df.loc[df.index[index], 'Recovered'] / df.loc[df.index[index], 'Confirmed']) * 100, 2))
    active_rate.append(round((df.loc[df.index[index], 'Active'] / df.loc[df.index[index], 'Confirmed']) * 100, 2))
    cases_per_million_pop.append(round((df.loc[df.index[index], 'Confirmed'] / pop_df.loc[COUNTRY_LOC, 'Pop.(\'000)']) * 1000000, 0))    
"""
      
for index, item in enumerate(df.index):
    if item == df.index[0]:
        new_case, new_death, new_recovered = df.loc[df.index[0], ['Confirmed', 'Deaths', 'Recovered']] 
        daily_cases.append(new_case)
        daily_deaths.append(new_death)
        daily_recovered.append(new_recovered)

    else:
        new_case, new_death, new_recovered = df.loc[df.index[index], ['Confirmed', 'Deaths', 'Recovered']] \
                                            - df.loc[df.index[index - 1], ['Confirmed', 'Deaths', 'Recovered']]
        daily_cases.append(new_case)
        daily_deaths.append(new_death)
        daily_recovered.append(new_recovered)

    df.loc[item, ['Daily Cases', 'Daily Deaths', 'Daily Recovered']] \
                  = daily_cases[index], daily_deaths[index], daily_recovered[index]

# release unwanted objects from memory
del daily_cases, daily_deaths, daily_recovered


df['Mortality Rates'] = round(df['Deaths'].divide(df ['Confirmed']).fillna(0.0), 4) * 100
df['Recovered Rates'] = round(df['Recovered'].divide(df ['Confirmed']).fillna(0.0), 4) * 100
df['Active Rates'] = round(df['Active'].divide(df ['Confirmed']).fillna(0.0), 4) * 100
df['Cases per 1mil pop'] = ((df['Confirmed'] / pop_df.loc[COUNTRY_LOC, 'Pop.(\'000)']) * 1000000).astype(int)
df[['Daily Cases', 'Daily Deaths', 'Daily Recovered']] = df[['Daily Cases', 'Daily Deaths', 'Daily Recovered']].astype(int)


df


Unnamed: 0_level_0,Confirmed,Deaths,Recovered,Active,World Confirmed,World Deaths,World Recovered,Daily Cases,Daily Deaths,Daily Recovered,Mortality Rates,Recovered Rates,Active Rates,Cases per 1mil pop
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-01-22,0,0,0,0,555.0,17.0,28.0,0,0,0,0.00,0.00,0.00,0
2020-01-23,0,0,0,0,654.0,18.0,30.0,0,0,0,0.00,0.00,0.00,0
2020-01-24,0,0,0,0,941.0,26.0,36.0,0,0,0,0.00,0.00,0.00,0
2020-01-25,3,0,0,3,1434.0,42.0,39.0,3,0,0,0.00,0.00,100.00,0
2020-01-26,4,0,0,4,2118.0,56.0,52.0,1,0,0,0.00,0.00,100.00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08-01,8985,125,8647,213,17850617.0,685113.0,10553585.0,9,0,3,1.39,96.24,2.37,277
2020-08-02,8999,125,8664,210,18079723.0,689417.0,10690555.0,14,0,17,1.39,96.28,2.33,278
2020-08-03,9001,125,8668,208,18282208.0,693754.0,10913000.0,2,0,4,1.39,96.30,2.31,278
2020-08-04,9002,125,8684,193,18540119.0,700708.0,11134735.0,1,0,16,1.39,96.47,2.14,278


In [30]:
# replace NaN to zero
#df.fillna(0.0, inplace=True)
df.tail(7)

Unnamed: 0_level_0,Confirmed,Deaths,Recovered,Active,World Confirmed,World Deaths,World Recovered,Daily Cases,Daily Deaths,Daily Recovered,Mortality Rates,Recovered Rates,Active Rates,Cases per 1mil pop
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-07-30,8964,124,8617,223,17309805.0,673244.0,10170650.0,8,0,5,1.38,96.13,2.49,276
2020-07-31,8976,125,8644,207,17599905.0,679556.0,10369140.0,12,1,27,1.39,96.3,2.31,277
2020-08-01,8985,125,8647,213,17850617.0,685113.0,10553585.0,9,0,3,1.39,96.24,2.37,277
2020-08-02,8999,125,8664,210,18079723.0,689417.0,10690555.0,14,0,17,1.39,96.28,2.33,278
2020-08-03,9001,125,8668,208,18282208.0,693754.0,10913000.0,2,0,4,1.39,96.3,2.31,278
2020-08-04,9002,125,8684,193,18540119.0,700708.0,11134735.0,1,0,16,1.39,96.47,2.14,278
2020-08-05,9023,125,8702,196,18810392.0,707666.0,11356275.0,21,0,18,1.39,96.44,2.17,278


In [31]:
# Max Daily Record
round(df[['Daily Cases', 'Daily Deaths', 'Daily Recovered']].max(), 2)

Daily Cases        277
Daily Deaths         8
Daily Recovered    333
dtype: int64

In [32]:
round(df[['Daily Cases', 'Daily Deaths', 'Daily Recovered']].mean(), 2)

Daily Cases        45.80
Daily Deaths        0.63
Daily Recovered    44.17
dtype: float64

In [33]:
last_entry_date = df.index[len(df.index) - 1]
df.loc[last_entry_date]

Confirmed                 9023.00
Deaths                     125.00
Recovered                 8702.00
Active                     196.00
World Confirmed       18810392.00
World Deaths            707666.00
World Recovered       11356275.00
Daily Cases                 21.00
Daily Deaths                 0.00
Daily Recovered             18.00
Mortality Rates              1.39
Recovered Rates             96.44
Active Rates                 2.17
Cases per 1mil pop         278.00
Name: 2020-08-05, dtype: float64