In [17]:
# Import libraries
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)

In [18]:
# Read dengue csv file
dengue = pd.read_csv('../Data/dengue_cases_region_final.csv')
dengue.shape

(4996, 3)

In [19]:
# Display dengue dataframe
dengue.head()

Unnamed: 0,region,recent_cases,date
0,Admiralty,5,2013-11-24
1,Admiralty,6,2013-12-01
2,Admiralty,8,2013-12-15
3,Admiralty,5,2013-12-22
4,Admiralty,3,2013-12-29


In [20]:
# Convert date column to datetime format and set as index.
dengue.date = pd.to_datetime(dengue.date)
dengue.set_index('date', inplace=True)

In [21]:
# Check number of unique regions
regions = list(dengue.region.unique())
len(regions)

41

In [22]:
# Function to split the dengue dataframe based on region and sum up cases based on region
def split_df (dataframe, region):
    df = pd.DataFrame(dataframe[dataframe['region']==region]['recent_cases'])
    df = df.groupby('date').sum()
    df['region'] = region
    return df

In [23]:
# Perform the function and concatenate into a single dataframe
dengues = pd.DataFrame()
for r in regions:
    d = split_df(dengue, r)
    dengues = pd.concat([dengues, d], axis=0)

In [24]:
# Reset the index in order to merge with other dataframe easily
dengues=dengues.reset_index()

In [25]:
# Read both Google trends and Weather report
trends = pd.read_csv('../Data/google_trends.csv')
weather = pd.read_csv('../Data/weather.csv')

In [26]:
# Display Google trends data
trends.head()

Unnamed: 0.1,Unnamed: 0,date,aedes,dengue,fever,headache,nosebleed,vomit
0,0,2016-01-03,0,78,75,81,0,42
1,1,2016-01-10,6,74,77,76,0,40
2,2,2016-01-17,13,68,88,79,48,70
3,3,2016-01-24,41,94,91,68,30,58
4,4,2016-01-31,23,85,96,52,31,56


In [27]:
trends.drop(columns='Unnamed: 0', inplace=True)

In [28]:
# Display weather data
weather.head()

Unnamed: 0,date,daily_rainfall_total,highest_30_min_rainfall,highest_60_min_rainfall,highest_120_min_rainfall,mean_temperature,maximum_temperature,minimum_temperature,mean_wind_speed,max_wind_speed,year,region,daily_rainfall_total_14,highest_30_min_rainfall_14,highest_60_min_rainfall_14,highest_120_min_rainfall_14,mean_temperature_14,maximum_temperature_14,minimum_temperature_14,mean_wind_speed_14,max_wind_speed_14
0,2017-01-01,3.0,2.6,2.6,2.6,26.8,30.0,24.7,11.2,40.7,2017,Admiralty,,,,,,,,,
1,2017-01-02,47.2,32.6,42.2,45.2,26.1,30.5,24.1,6.1,32.0,2017,Admiralty,,,,,,,,,
2,2017-01-03,0.6,0.6,0.6,0.6,26.3,30.6,24.5,7.6,27.4,2017,Admiralty,,,,,,,,,
3,2017-01-04,2.6,2.4,2.4,2.4,26.6,30.2,23.5,9.0,33.5,2017,Admiralty,,,,,,,,,
4,2017-01-05,1.2,0.8,1.0,1.2,27.7,31.4,24.4,8.6,33.5,2017,Admiralty,,,,,,,,,


In [29]:
# Convert date columns into datetime format
trends['date'] = pd.to_datetime(trends['date'])
weather['date'] = pd.to_datetime(weather['date'])

In [30]:
# Merge all data into a single dataframe
new_df = pd.merge(weather, trends, how ='left',on=['date'])
new_df = pd.merge(new_df, dengues, how ='left', on=['date','region'])

In [31]:
# Display merged data
new_df.tail()

Unnamed: 0,date,daily_rainfall_total,highest_30_min_rainfall,highest_60_min_rainfall,highest_120_min_rainfall,mean_temperature,maximum_temperature,minimum_temperature,mean_wind_speed,max_wind_speed,year,region,daily_rainfall_total_14,highest_30_min_rainfall_14,highest_60_min_rainfall_14,highest_120_min_rainfall_14,mean_temperature_14,maximum_temperature_14,minimum_temperature_14,mean_wind_speed_14,max_wind_speed_14,aedes,dengue,fever,headache,nosebleed,vomit,recent_cases
130921,2021-04-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021,Yishun,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,
130922,2021-04-27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021,Yishun,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,
130923,2021-04-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021,Yishun,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,
130924,2021-04-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021,Yishun,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,
130925,2021-04-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021,Yishun,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,


In [32]:
# Export to a csv file for EDA and modelling.
new_df.to_csv('../Data/merged_data.csv')