# The University of Hong Kong
## DASC7600 Data Science Project 2024
## STD - Global - Case Counts

# Import Modules and Settings

In [1]:
import os
import sys

# Add project directory to system path
project_dir = os.path.dirname(os.getcwd())
sys.path.append(project_dir)

In [2]:
import datetime
import pandas as pd
import warnings

import covid_module

# Settings
warnings.filterwarnings('ignore')

# Load Data

In [3]:
# Read csv file
covid_global_case_count_std = pd.read_csv(project_dir + '/data/raw_data/global/WHO-COVID-19-global-data.csv')

# Modify Column Names

In [4]:
# Rename columns
col_name_map = {'Date_reported': 'report_date',
                'Country': 'cntry',
                'New_cases': 'case_count',
                'Cumulative_cases': 'cuml_case_count',
                'New_deaths': 'death_count',
                'Cumulative_deaths': 'cuml_death_count'}

covid_global_case_count_std.rename(columns=col_name_map, inplace=True)

# Update Date Format

In [5]:
# Update date format to YYYYMMDD for onset_date
covid_module.change_date_format(covid_global_case_count_std,
                                'report_date', '%Y-%m-%d',
                                'report_date', '%Y%m%d')

# Replace Values

In [6]:
# To fill NA with 0 for 'New_cases' and 'New_deaths'
covid_global_case_count_std['case_count'] = covid_global_case_count_std['case_count'].fillna(0)
covid_global_case_count_std['death_count'] = covid_global_case_count_std['death_count'].fillna(0)

# Modify Table Structure and Export to csv Files

In [7]:
for cnt_col, csv_name in [('case_count', 'covid_global_new_case_count_std'),
                          ('cuml_case_count', 'covid_global_cuml_case_count_std'),
                          ('death_count', 'covid_global_new_death_count_std'),
                          ('cuml_death_count', 'covid_global_cuml_death_count_std')]:
    # Use a pivot table structure to present the counts
    df_temp = pd.pivot_table(covid_global_case_count_std,
                             values=cnt_col,
                             index='report_date',
                             columns='cntry',
                             aggfunc="min")

    df_temp.reset_index(inplace=True)
    
    # Export to csv file
    df_temp.to_csv(project_dir + f'/data/std_data/global/{csv_name}.csv', index=False)