In [140]:
# Import libraries
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)

In [141]:
# Read dengue csv file
dengue = pd.read_csv('../Data/dengue_cases_region_final.csv')
dengue.shape

(4996, 3)

In [142]:
# Display dengue dataframe
dengue.head()

Unnamed: 0,region,recent_cases,date
0,Admiralty,5,2013-11-24
1,Admiralty,6,2013-12-01
2,Admiralty,8,2013-12-15
3,Admiralty,5,2013-12-22
4,Admiralty,3,2013-12-29


In [143]:
# Convert date column to datetime format and set as index.
dengue.date = pd.to_datetime(dengue.date)
dengue.set_index('date', inplace=True)

In [144]:
# Check number of unique regions
regions = list(dengue.region.unique())
len(regions)

41

In [145]:
# Function to split the dengue dataframe based on region and sum up cases based on region
def split_df (dataframe, region):
    df = pd.DataFrame(dataframe[dataframe['region']==region]['recent_cases'])
    df = df.groupby('date').sum()
    df['region'] = region
    return df

In [146]:
# Perform the function and concatenate into a single dataframe
dengues = pd.DataFrame()
for r in regions:
    d = split_df(dengue, r)
    dengues = pd.concat([dengues, d], axis=0)

In [147]:
# Reset the index in order to merge with other dataframe easily
dengues=dengues.reset_index()

In [148]:
# Read both Google trends and Weather report
trends = pd.read_csv('../Data/google_trends.csv')
weather = pd.read_csv('../Data/weather.csv')

In [149]:
# Display Google trends data
trends.head()

Unnamed: 0.1,Unnamed: 0,date,aedes,dengue,fever,headache,nosebleed,vomit
0,0,2016-01-03,0,78,75,81,0,42
1,1,2016-01-10,6,74,77,76,0,40
2,2,2016-01-17,13,68,88,79,48,70
3,3,2016-01-24,41,94,91,68,30,58
4,4,2016-01-31,23,85,96,52,31,56


In [150]:
trends.drop(columns='Unnamed: 0', inplace=True)

In [151]:
# Display weather data
weather.head()

Unnamed: 0,date,daily_rainfall_total,highest_30_min_rainfall,highest_60_min_rainfall,highest_120_min_rainfall,mean_temperature,maximum_temperature,minimum_temperature,mean_wind_speed,max_wind_speed,year,region,daily_rainfall_total_14,highest_30_min_rainfall_14,highest_60_min_rainfall_14,highest_120_min_rainfall_14,mean_temperature_14,maximum_temperature_14,minimum_temperature_14,mean_wind_speed_14,max_wind_speed_14
0,2022-10-01,22.6,18.6,21.4,21.4,26.8,31.8,24.4,6.0,27.8,2022,Admiralty,,,,,,,,,
1,2022-10-02,16.4,13.2,14.2,16.0,27.1,32.6,23.0,5.8,24.4,2022,Admiralty,,,,,,,,,
2,2022-10-03,44.2,18.6,33.4,39.6,26.3,31.8,23.1,5.7,52.6,2022,Admiralty,,,,,,,,,
3,2022-10-04,1.4,1.0,1.2,1.2,26.2,31.2,24.5,6.3,36.3,2022,Admiralty,,,,,,,,,
4,2022-10-05,49.2,17.6,29.2,44.8,26.0,28.6,22.8,7.2,33.5,2022,Admiralty,,,,,,,,,


In [152]:
# Convert date columns into datetime format
trends['date'] = pd.to_datetime(trends['date'])
weather['date'] = pd.to_datetime(weather['date'])

In [153]:
# Merge all data into a single dataframe
new_df = pd.merge(weather, trends, how ='left',on=['date'])
new_df = pd.merge(new_df, dengues, how ='left', on=['date','region'])

In [154]:
# Display merged data
new_df.tail()

Unnamed: 0,date,daily_rainfall_total,highest_30_min_rainfall,highest_60_min_rainfall,highest_120_min_rainfall,mean_temperature,maximum_temperature,minimum_temperature,mean_wind_speed,max_wind_speed,year,region,daily_rainfall_total_14,highest_30_min_rainfall_14,highest_60_min_rainfall_14,highest_120_min_rainfall_14,mean_temperature_14,maximum_temperature_14,minimum_temperature_14,mean_wind_speed_14,max_wind_speed_14,aedes,dengue,fever,headache,nosebleed,vomit,recent_cases
130921,2016-01-27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2016,Yishun,7.028571,3.928571,6.185714,6.628571,0.0,0.0,0.0,0.0,4.060244e-15,,,,,,,
130922,2016-01-28,15.4,9.2,10.8,14.8,0.0,0.0,0.0,0.0,0.0,2016,Yishun,8.028571,4.5,6.871429,7.6,0.0,0.0,0.0,0.0,4.060244e-15,,,,,,,
130923,2016-01-29,1.4,1.0,1.0,1.4,0.0,0.0,0.0,0.0,0.0,2016,Yishun,5.514286,3.157143,4.457143,5.157143,0.0,0.0,0.0,0.0,4.060244e-15,,,,,,,
130924,2016-01-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2016,Yishun,5.5,3.142857,4.442857,5.142857,0.0,0.0,0.0,0.0,4.060244e-15,,,,,,,
130925,2016-01-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2016,Yishun,2.042857,1.357143,1.528571,1.871429,0.0,0.0,0.0,0.0,4.060244e-15,23.0,85.0,96.0,52.0,31.0,56.0,32.0


In [155]:
# Export to a csv file for EDA and modelling.
new_df.to_csv('../Data/merged_data.csv')