In [1]:
import pandas as pd
from os import listdir, getcwd
from datetime import datetime, timedelta
from pycovid import pycovidfunc as cv

In [2]:
def raw_data_formatter(file_list,file_dir):
    from os import path
    
    df = pd.DataFrame()
    for arquivo in file_list:
        file = path.join(file_dir, arquivo)
        date=datetime.strptime(arquivo.split(sep='.')[0],'%m-%d-%Y')
        df_arquivo = pd.read_csv(file)
        df_arquivo['Date'] = date
        df = pd.concat([df,df_arquivo])
        
    # Merging the data from columns with same content but different headers:
    Country = df.Country_Region
    Province = df.Province_State
    Last_Update = df.Last_Update
    Latitude = df.Lat
    Longitude = df.Long_

    df_aux = pd.DataFrame({'Country/Region': Country,'Province/State': Province,
                           'Last Update': Last_Update,'Latitude': Latitude,
                           'Longitude': Longitude})
    df = df.combine_first(df_aux)

    # Dropping columns that won't be used:
    df.drop(axis=1,labels=['Country_Region','Province_State','Last_Update',
                           'FIPS','Combined_Key','Long_','Lat','Admin2',
                           'Case-Fatality_Ratio','Incidence_Rate'],inplace=True)

    # Formatting datetime columns:
    df['Last Update'] = pd.to_datetime(df['Last Update'])

    # Replacing NaN values on numeric data with 0:
    new_values = {'Deaths': 0, 'Active': 0, 'Recovered': 0,
                  'Confirmed': 0,'Latitude': 0, 'Longitude': 0}
    df.fillna(value=new_values,inplace=True)

    # Replacing NaN values on non numeric data with '-':
    df.fillna(value='-',inplace=True)

    # Adding date columns:
    df['Year'] = pd.DatetimeIndex(df['Date']).year
    df['Month'] = pd.to_datetime(df['Date']).dt.strftime('%b')
    df['Week'] = pd.DatetimeIndex(df['Date']).week
    df['Day'] = pd.DatetimeIndex(df['Date']).day

    # Establishing number of active cases as the difference between
    # Confirmed cases and Death cases:
    df['Active'] = df['Confirmed'] - df['Deaths'] - df['Recovered']

    # Calculating Mortality rate as the ratio between Deaths and
    # Confirmed cases for each day:
    df['Mortality rate in %'] = (df['Deaths']/df['Confirmed']*100).fillna(value=0)
    
    return df

In [4]:
# Read the config file to check for data file information:

if 'config.csv' in listdir(getcwd()):
    config = pd.read_csv('config.csv',index_col='var').fillna('-')
else:
    raise FileNotFoundError('No configuration file "config.csv" found.')
    
last_update = pd.to_datetime(config.loc['raw_data'].last_update)
who_data_dir = config.loc['who_data_dir'].path
who_file_list = listdir(who_data_dir)

for file in who_file_list:
    if not file.endswith('.csv'):
        who_file_list.remove(file)

# Compare the latest WHO file to the raw data update information
# and calculates the number of files to update:

latest_who_file_date = pd.to_datetime(who_file_list[-1].split(sep='.')[0])
files_to_update = (latest_who_file_date - last_update).days

# Generating the list of new files to update the database
if files_to_update != 0:
    print('%d new files found. Adding the new information into the raw data file' 
          % (files_to_update))
    list_of_new_files = []
    for i in list(range(1,files_to_update + 1)):
        new_date = datetime.strftime((last_update
                                      + timedelta(days=i)).date(),
                                      format='%m-%d-%Y')
        list_of_new_files.append(new_date + '.csv')
    
    # Generating a dataframe with new information:
    df = cv.raw_data_formatter(list_of_new_files,who_data_dir)
    
    # Appending the new data to existing raw data file and updating
    # the raw data information in the config file:
    
    raw_data_path = config.loc['raw_data'].path
    config.loc['raw_data'].last_update = new_date
    
    df.to_csv(raw_data_path, mode='a', index=False, header=None)
    config.to_csv('config.csv', index=False)
else:
    print('0 new files found. No further action necessary')

0 new files found. No further action necessary
