# This Notebook will provide an example use case for the UpdateRt module. Used to estimate Rt values based on exponential fitting

In [1]:
# dependencies
import RtUpdate

In [2]:
# Just a function for updating and getting the most recent data on coronavirus for South Africa
def get_SA_province(ToUpdateSAProvince):
    
    '''
    This function will get the SA provincial confirmed and cases as function of date from the Uni Pretoria repo
    
    It also has the total that should agree with the reported worldodometer confirmed cases
    
    Usage:
    
        get_SA_province('y') or get_SA_province('n')
    
    Input:
        Take a y or n - whether to update the dataset or not
    
    Output:
        Returns the SA province confirmed dataframe
    '''
    
    import pandas as pd
    import wget
    import ssl
    ssl._create_default_https_context = ssl._create_unverified_context

    if ToUpdateSAProvince.lower() =='y':
        
        print("updating data")
        ConfirmedURL = "https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_confirmed.csv"
        SAProvinConfirmed = pd.read_csv(wget.download(ConfirmedURL, out ='covid19za_provincial_cumulative_timeline_confirmed.csv'))
        DeathURL = "https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_deaths.csv"
        SAProvinDead = pd.read_csv(wget.download(DeathURL, out= 'covid19za_provincial_cumulative_timeline_deaths.csv'))
        RecoveredURL = "https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_provincial_cumulative_timeline_recoveries.csv"
        SAProvinrecover = pd.read_csv(wget.download(DeathURL, out ='covid19za_provincial_cumulative_timeline_recoveries.csv'))
        
    else:
        print("Reading local file... Please check data is uptodate")
        SAProvinConfirmed  = pd.read_csv("covid19za_provincial_cumulative_timeline_confirmed.csv")
        SAProvinDead = pd.read_csv("covid19za_provincial_cumulative_timeline_deaths.csv")
        SAProvinrecover = pd.read_csv("covid19za_provincial_cumulative_timeline_recoveries.csv")
        print(f"Note: File was last updated on {SAProvinConfirmed['date'].values[-1]}")
    #
    # Drop the unwanted column
    #
    if 'YYYYMMDD' in SAProvinConfirmed.columns:
        SAProvinConfirmed.drop(columns='YYYYMMDD', inplace = True)
        SAProvinDead.drop(columns='YYYYMMDD', inplace = True)
        SAProvinrecover.drop(columns='YYYYMMDD', inplace = True)
    #
    # Drop the source of information
    #
    if 'source' in SAProvinConfirmed.columns:
        SAProvinConfirmed.drop(columns='source', inplace = True)
        SAProvinDead.drop(columns='source', inplace = True)
        SAProvinrecover.drop(columns='source', inplace = True)
    #
    # Make the date the index
    #
    SAProvinConfirmed["date"] = pd.to_datetime(SAProvinConfirmed.date, format='%d-%m-%Y')
    SAProvinConfirmed.index = SAProvinConfirmed["date"]
    
    SAProvinDead["date"] = pd.to_datetime(SAProvinDead.date, format='%d-%m-%Y')
    SAProvinDead.index = SAProvinDead["date"]
    
    SAProvinrecover["date"] = pd.to_datetime(SAProvinrecover.date, format='%d-%m-%Y')
    SAProvinrecover.index = SAProvinrecover["date"]
    
    #
    # Deleting the date column now
    #
    SAProvinConfirmed.drop(columns='date', inplace = True)
    SAProvinDead.drop(columns='date', inplace = True)
    SAProvinrecover.drop(columns='date', inplace = True)
    #
    # Renaming the Total column to Confirmed
    #
    SAProvinConfirmed = SAProvinConfirmed.rename(columns={'total': 'ZAConfirmed'})
    SAProvinDead = SAProvinDead.rename(columns={'total': 'ZADead'})
    SAProvinrecover = SAProvinrecover.rename(columns={'total': 'ZARecovered'})
    #
    #
    #
    return(SAProvinConfirmed,SAProvinDead, SAProvinrecover)

In [3]:
# Defining DataFrames for confirmed cases deaths and recoveries in south Africa
ProvinConf, ProvinDead, ProvinRecover = get_SA_province('y') 

ProvinConf.dropna(inplace=True)
ProvinConf = ProvinConf.astype('int64')
ProvinConf.head()

updating data


Unnamed: 0_level_0,EC,FS,GP,KZN,LP,MP,NC,NW,WC,UNKNOWN,ZAConfirmed
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-03-05,0,0,0,1,0,0,0,0,0,0,1
2020-03-07,0,0,1,1,0,0,0,0,0,0,2
2020-03-08,0,0,1,2,0,0,0,0,0,0,3
2020-03-09,0,0,1,6,0,0,0,0,0,0,7
2020-03-11,0,0,5,7,0,0,0,0,1,0,13


In [4]:
# Computing Rt estimates for the columns(South Africa and its provinces) over the most recent 14 days
RtUpdate.Update_Rt(ProvinConf)

Unnamed: 0,Median,95%CI
EC,2.085278,"(1.7668733090350386, 2.444898074920848)"
FS,1.687656,"(1.510846266165105, 1.8902591104375372)"
GP,2.590693,"(2.1228186331664904, 3.1477572858135168)"
KZN,1.376419,"(1.2899930554215304, 1.4712723846613494)"
LP,1.825903,"(1.6050634944205773, 2.0645361216585285)"
MP,2.204746,"(1.855463130352806, 2.60647987454154)"
NC,2.033117,"(1.7578675899933351, 2.380757732969707)"
NW,2.955319,"(2.369755088017346, 3.734686399102935)"
WC,1.524354,"(1.4013195949902884, 1.6617108305374133)"
UNKNOWN,2.523237,"(2.083211338671766, 3.0164341562213917)"
