In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

pd.options.display.max_columns = None

### Data Import

In [2]:
# Dengue cases in 2023
dengue_2023 = pd.read_csv('../datasets/dengue_cases/2023 weekly-infectious-bulletin_cases.csv', header=1)
# Dengue cases in 2022
dengue_2022 = pd.read_csv('../datasets/dengue_cases/2022 weekly-infectious-bulletin_cases.csv', header=1)
# Dengue cases in 2021
dengue_2021 = pd.read_csv('../datasets/dengue_cases/2021 weekly-infectious-bulletin_cases.csv')
# Dengue cases in 2020
dengue_2020 = pd.read_csv('../datasets/dengue_cases/2020 weekly-infectious-bulletin_cases.csv', header=1)
# Dengue cases in 2019
dengue_2019 = pd.read_csv('../datasets/dengue_cases/2019 weekly-infectious-bulletin_cases.csv', header=1)
# Dengue cases in 2018
dengue_2018 = pd.read_csv('../datasets/dengue_cases/2018 weekly-infectious-bulletin_cases.csv', header=1)
# Dengue cases in 2017
dengue_2017 = pd.read_csv('../datasets/dengue_cases/2017 weekly-infectious-bulletin_cases.csv', header=1)
# Dengue cases in 2016
dengue_2016 = pd.read_csv('../datasets/dengue_cases/2016 weekly-infectious-bulletin_cases.csv', header=1)
# Dengue cases in 2015
dengue_2015 = pd.read_csv('../datasets/dengue_cases/2015 weekly-infectious-bulletin_cases.csv', header=1)
# Dengue cases in 2014
dengue_2014 = pd.read_csv('../datasets/dengue_cases/2014 weekly-infectious-bulletin_cases.csv', header=1)
# Dengue cases in 2013
dengue_2013 = pd.read_csv('../datasets/dengue_cases/2013 weekly-infectious-bulletin_cases.csv', header=1)
# Dengue cases in 2012
dengue_2012 = pd.read_csv('../datasets/dengue_cases/2012 weekly-infectious-bulletin_cases.csv', header=1)

### Functions for cleaning

In [3]:
# Function used to clean dengue data for infections before 2022
def clean_dengue_data(dengue):

    # Rename the columns we're using
    dengue.rename(columns={'Start' : 'date', 
                        'Dengue' : 'dengue_cases',
                        'Dengue Fever' : 'dengue_cases'}, 
                       inplace=True)
    
    # Drop the columns we're not using
    dengue = dengue[['date', 'dengue_cases']]
    
    # Convert the date to dateTime format
    dengue['date'] = pd.to_datetime(dengue['date'], dayfirst=True)
    
    return dengue

In [4]:
# Function used to clean dengue data for infections 2022 onwards as MoH changed the CSV format
def clean_dengue_data_2(dengue):

    # Rename the columns we're using
    dengue.rename(columns={'Unnamed: 1' : 'old_date', 
                        'Dengue' : 'dengue_cases',
                        'Dengue Fever' : 'dengue_cases'}, 
                       inplace=True)
    
    # Drop the columns we're not using
    dengue = dengue[['old_date', 'dengue_cases']]
    
    # Slice the weekly rangte from dd/mm/yyyy - dd/mm/yyyy format to just the 
    # first date of the range (dd/mm/yyyy)
    dengue['date'] = dengue['old_date'].str.split(pat='-')
    for week in range(len(dengue['date'])):
        dengue['date'][week] = dengue['date'][week][0]
    
    # Convert the date to dateTime format
    dengue['date'] = pd.to_datetime(dengue['date'], dayfirst=True)
    
    # Drop the old_date
    dengue = dengue[['date', 'dengue_cases']]
    
    # Drop the weeks that do not have data (April onwards)
    dengue.dropna(inplace=True)
    
    # Change dengue_cases to int format
    dengue['dengue_cases'] = dengue['dengue_cases'].astype(int)
    
    return dengue

### Clean Dengue Cases Data

In [9]:
# Clean the yearly dengue datasets
dengue_2023 = clean_dengue_data(dengue_2023)
dengue_2022 = clean_dengue_data_2(dengue_2022)
dengue_2021 = clean_dengue_data(dengue_2021)
dengue_2020 = clean_dengue_data(dengue_2020)
dengue_2019 = clean_dengue_data(dengue_2019)
dengue_2018 = clean_dengue_data(dengue_2018)
dengue_2017 = clean_dengue_data(dengue_2017)
dengue_2016 = clean_dengue_data(dengue_2016)
dengue_2015 = clean_dengue_data(dengue_2015)
dengue_2014 = clean_dengue_data(dengue_2014)
dengue_2013 = clean_dengue_data(dengue_2013)
dengue_2012 = clean_dengue_data(dengue_2012)


In [10]:
dengue_2023.head()

Unnamed: 0,date,dengue_cases
0,2023-01-01,279
1,2023-01-08,278
2,2023-01-15,273
3,2023-01-22,186
4,2023-01-29,223


### Concatenate the yearly data into a single dataframe

In [11]:
# Use concat to combine all the dengue dataframes together
dengue = pd.concat([dengue_2023, dengue_2022, dengue_2021, dengue_2020, 
                   dengue_2019, dengue_2018,dengue_2017,dengue_2016,dengue_2015,dengue_2014,dengue_2013,dengue_2012], ignore_index=True)

In [12]:
# Check that the concatination is successful
dengue.shape

(590, 2)

In [13]:
dengue.sort_values(by=['date'], ascending=True,inplace=True)

In [14]:
# Resetting the index
dengue.set_index('date', inplace=True)

### Export the dengue data

In [15]:
# Export the dengue data into a csv file
dengue.to_csv('../datasets/dengue_merged_2012_2023.csv', index=True)

In [16]:
dengue.head()

Unnamed: 0_level_0,dengue_cases
date,Unnamed: 1_level_1
2012-01-01,74.0
2012-01-08,64.0
2012-01-15,60.0
2012-01-22,50.0
2012-01-29,84.0
