# CS3244 Machine Learning Team Project
## Prediction of the number of dengue cases in Singapore

First, we shall import necessary Python libraries.

In [77]:
# Import data processing tools
import pandas as pd
import numpy as np

import Levenshtein as lev

import csv, json, sys
from fuzzywuzzy import fuzz

import geopandas as gpd
from shapely.geometry import Point, Polygon

# Import visualization tools
import matplotlib.pyplot as plt

# Import utility tools
import requests
from math import *
from datetime import datetime
from datetime import timedelta

# Import ML tools
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

We shall read from various CSV files to extract dataframe for each feature in the following cells:
* Weekly **dengue** cases (*dengue_df*)
* Annual **population** (*population_df*)
* Monthly mean **sunshine** hours (*sunshine_df*)
* Monthly **surface air temperature** (*surface_air_temperature_df*)

In [None]:
# Read weekly dengue cases from csv files
raw_dengue_df = pd.read_csv(
    "Datasets/Dengue Cluster Data/weekly-number-of-dengue-and-dengue-haemorrhagic-fever-cases.csv")

# Clean data by filtering by type_dengue and remove the redundant column
dengue_df = raw_dengue_df[raw_dengue_df['type_dengue'].map(
    lambda x: str(x) == "Dengue")].drop('type_dengue', 1).reset_index()

# Rename eweek to week
dengue_df = dengue_df.rename(columns={"eweek": "week"}, errors="raise")
# Display first 5 rows of the dataframe
dengue_df.head()

In [None]:
# Read from CSV
raw_population_df = pd.read_csv(
    "Datasets/Population Data/singapore-residents-by-age-group-ethnic-group-and-sex-end-june-annual.csv")
# Only get data from 2014 to 2018
population_df = raw_population_df[raw_population_df['year'].map(
    lambda x: 2014 <= int(x) <= 2018)]
# Only get total residents for each year
population_df = population_df[population_df['level_1'].map(
    lambda x: str(x) == 'Total Residents')].drop('level_1', 1).drop('level_2', 1)
population_df = population_df.astype({'year' : 'int32', 'value': 'int32'})
population_df = population_df.groupby('year').sum()
population_df = population_df.rename(columns={"value": "population"}, errors="raise")
# Display dataframe
population_df.head()

In [None]:
# Read from CSV
raw_sunshine_df = pd.read_csv("Datasets/Sunshine Data/sunshine-duration-monthly-mean-daily-duration.csv")
# Split year_month into year and month column
raw_sunshine_df = raw_sunshine_df.rename(columns={"month": "year_month"}, errors="raise")
year_col = pd.Series([], dtype='int32')
month_col = pd.Series([], dtype='int32')
for i in range(len(raw_sunshine_df)):
    year_col[i] = int(raw_sunshine_df['year_month'][i].split('-')[0])
    month_col[i] = int(raw_sunshine_df['year_month'][i].split('-')[1])
raw_sunshine_df.insert(1, 'year', year_col)
raw_sunshine_df.insert(2, 'month', month_col)
sunshine_df = raw_sunshine_df.drop('year_month', 1)
# Display
sunshine_df.head()

In [None]:
# Read from CSV
raw_temperature_df = pd.read_csv(
    "Datasets/Surface Air Temperature Data/surface-air-temperature-monthly-mean-daily-maximum.csv")
# Split year_month into year and month column
raw_temperature_df = raw_temperature_df.rename(columns={"month": "year_month"}, errors="raise")
year_col = pd.Series([], dtype='int32')
month_col = pd.Series([], dtype='int32')
for i in range(len(raw_temperature_df)):
    year_col[i] = int(raw_temperature_df['year_month'][i].split('-')[0])
    month_col[i] = int(raw_temperature_df['year_month'][i].split('-')[1])
raw_temperature_df.insert(1, 'year', year_col)
raw_temperature_df.insert(2, 'month', month_col)
surface_air_temperature_df = raw_temperature_df.drop('year_month', 1)
# Display
surface_air_temperature_df.head()

Next, we shall merge the dataframes into one general dataframe **df** based on *year*, *month* and *week*.

In [None]:
# Merge dengue_df and population_df by year
df = dengue_df.merge(population_df, left_on='year', right_on='year').drop('index', 1)

# Create new column for cases_per_capita = number / population
# Create new column for month = (datetime(year, 1, 1) + timedelta(days=week*7)).momth
cases_per_capita = pd.Series([], dtype='float64')
month = pd.Series([], dtype='int32')
for i in range(len(df)):
    if isnan(df['number'][i]):
        cases_per_capita[i] = 0
    else:
        cases_per_capita[i] = int(df['number'][i]) / int(df['population'][i])
    year = int(df['year'][i])
    week = int(df['week'][i])
    if week > 52:
        month[i] = 12
    else:
        month[i] = (datetime(year, 1, 1) + timedelta(days=7*week)).month
df.insert(4, 'cases_per_capita', cases_per_capita)
df.insert(1, 'month', month)

# Merge sunshine and surface air temperature dataframe
df = pd.merge(df, sunshine_df, on=['year', 'month'])
df = pd.merge(df, surface_air_temperature_df, on=['year', 'month'])

# Display dataframe
df.head()

Now, we scrape the weather data from 2014 to 2019 for 5 weather stations:

In [None]:
#weather station
# Scrape from websites
base_url = "http://www.weather.gov.sg/files/dailydata/DAILYDATA_" #S24_201911.csv

# (East) Changi = 24, (West) Tuas South = 115, (North) Khatib = 122, (South) Marina Barrage = 108
# (Central) Ang Mo Kio = 109
station_dict = {24: 'Changi', 115: 'Tuas South', 122: 'Khatib', 108: 'Marina Barrage', 109: 'Ang Mo Kio'}
stations_list = [24, 115, 122, 108, 109]

# DataFrame template
df_template = pd.DataFrame(columns = ['Station', 'Year', 'Month', 'Day', 'Daily Rainfall Total (mm)',
       'Highest 30 Min Rainfall (mm)', 'Highest 60 Min Rainfall (mm)',
       'Highest 120 Min Rainfall (mm)', 'Mean Temperature (°C)',
       'Maximum Temperature (°C)', 'Minimum Temperature (°C)',
       'Mean Wind Speed (km/h)', 'Max Wind Speed (km/h)'])
weather_df = df_template.copy()

# Scrape stations and combine into a single csv file
for station in stations_list:
    station_string = "S"+str(station) if (station>9) else "S0"+str(station)
    station_df = df_template.copy()
    for year in range(2014,2020):
        for month in range(1,13):
            month_string = str(month) if (month>9) else "0"+str(month)
            url = base_url+station_string+"_"+str(year)+month_string+".csv"
            # Get csv files to temp files, and store into DataFrame
            try:
                r = requests.get(url, allow_redirects=True)
                open('temp.csv', 'wb').write(r.content)
                station_df = station_df.append(pd.read_csv("temp.csv", encoding = "ISO-8859-1"))
                weather_df = weather_df.append(pd.read_csv("temp.csv", encoding = "ISO-8859-1"))
            except:
                print(url)
                continue
    # Store into weather data folder
    csv_filename = 'Datasets/Weather Data/' + station_dict[station] + '.csv'
    station_df.to_csv(csv_filename, index=False)


We then add the week column to the weather data

In [None]:
#Weather station 
station_dict = {24: 'Changi', 115: 'Tuas South', 122: 'Khatib', 108: 'Marina Barrage', 109: 'Ang Mo Kio'}
stations_list = [24, 115, 122, 108, 109]

#add week column to each weather stations csv
for station in stations_list:
    week_col = pd.Series([], dtype='int32')
    df = pd.read_csv("Datasets/Weather Data/" + station_dict[station] + ".csv")
    for i in range(len(df)):
        week_col[i] = datetime(int(df['Year'][i]), int(df['Month'][i]), int(df['Day'][i])).isocalendar()[1]
    df.insert(3, 'Week', week_col)
    csv_filename = 'Datasets/Weather Data/' + station_dict[station] + '.csv'
    df.to_csv(csv_filename, index=False)

Convert PSI data to contain year, month, week and day columns:

In [2]:
#PSI
raw_psi_df = pd.read_csv("Datasets/PSI Data/psi_df_2016_2019.csv")

#split to year month and week
raw_psi_df = raw_psi_df.rename(columns={"timestamp": "year_month_week_day"}, errors="raise")
year_col = pd.Series([], dtype='int32')
month_col = pd.Series([], dtype='int32')
day_col = pd.Series([], dtype='int32')
week_col = pd.Series([], dtype='int32')
for i in range(len(raw_psi_df)):
    year_col[i] = int(raw_psi_df['year_month_week_day'][i].split('-')[0])
    month_col[i] = int(raw_psi_df['year_month_week_day'][i].split('-')[1])
    day_col[i] = int(raw_psi_df['year_month_week_day'][i].split('-')[2][0:2])
    week_col[i] = datetime(int(year_col[i]), int(month_col[i]), int(day_col[i])).isocalendar()[1]
raw_psi_df.insert(0, 'year', year_col)
raw_psi_df.insert(1, 'month', month_col)
raw_psi_df.insert(2, 'week', week_col)
raw_psi_df.insert(3, 'day', day_col)
psi_df = raw_psi_df.drop(['year_month_week_day'], 1)

#display
psi_df.head()


Unnamed: 0,year,month,week,day,national,south,north,east,central,west
0,2016,2,5,7,47,44,37,47,47,34
1,2016,2,6,8,59,57,54,59,57,56
2,2016,2,6,9,59,57,55,59,57,56
3,2016,2,6,9,59,57,55,59,56,56
4,2016,2,6,9,59,58,55,59,56,56


Merge 2014-2019 location dengue cluster into one csv
Add year, month, week and day columns
Split locality with '/' and '('
save as new csv

In [5]:
#merge the location data into one csv
raw_location_dengue_2014_df = pd.read_csv("Datasets/Dengue Location Cluster Data/2014-Dengue-Cluster.csv")
raw_location_dengue_2015_df = pd.read_csv("Datasets/Dengue Location Cluster Data/2015-Dengue-Cluster.csv")
raw_location_dengue_2016_df = pd.read_csv("Datasets/Dengue Location Cluster Data/2016-Dengue-Cluster.csv")
raw_location_dengue_2017_df = pd.read_csv("Datasets/Dengue Location Cluster Data/2017-Dengue-Cluster.csv", encoding='unicode_escape')
raw_location_dengue_2018_df = pd.read_csv("Datasets/Dengue Location Cluster Data/2018-Dengue-Cluster.csv")
raw_location_dengue_2019_df = pd.read_csv("Datasets/Dengue Location Cluster Data/2019-Dengue-Cluster.csv")

raw_location_dengue_df = raw_location_dengue_2014_df.append([raw_location_dengue_2015_df, raw_location_dengue_2016_df, raw_location_dengue_2017_df,
         raw_location_dengue_2018_df, raw_location_dengue_2019_df])

#save the raw csv
csv_filename = 'Datasets/Dengue Location Cluster Data/' + 'raw_location_dengue' + '.csv'
raw_location_dengue_df.to_csv(csv_filename, index=False)

raw_location_dengue_df = pd.read_csv("Datasets/Dengue Location Cluster Data/raw_location_dengue.csv")

#split year_month_day to year month and week columns
raw_location_dengue_df = raw_location_dengue_df.rename(columns={"date": "year_month_week_day"}, errors="raise")
year_col = pd.Series([], dtype='int32')
month_col = pd.Series([], dtype='int32')
day_col = pd.Series([], dtype='int32')
week_col = pd.Series([], dtype='int32')
locality_col = pd.Series([], dtype='str')
# shortcut_list = ['rd', 'jln', 'lor', 'dr', 'ave', 'st', 'pk', 'gdns', 'nth', "s'goon", 'ter']
# shortcut_dict = {'rd': 'Road', 'jln': 'Jalan', 'lor': 'Lorong', 'dr': 'Drive', 'ave': 'Avenue',
#                 'st': 'Street', 'pk' : 'Park', 'gdns' : 'Gardens', 'nth' : 'North', "s'goon" : 'Serangoon',
#                 'ter' : 'Terminal'}

for i in range(len(raw_location_dengue_df)):
#     address = []
    year_col[i] = 2000 + int(str(raw_location_dengue_df['year_month_week_day'][i])[0:2])
    month_col[i] = int(str(raw_location_dengue_df['year_month_week_day'][i])[2:4])
    day_col[i] = int(str(raw_location_dengue_df['year_month_week_day'][i])[4:])
    week_col[i] = datetime(int(year_col[i]), int(month_col[i]), int(day_col[i])).isocalendar()[1]
    #split location with '/'
    locality_col[i] = raw_location_dengue_df['locality'][i].split('/')[0]
    #split location with '('
    locality_col[i] = locality_col[i].split('(')[0]
    locality_col[i] = "".join([x if ord(x) < 128 else ' ' for x in locality_col[i]])
#     address = locality_col[i].split(' ')
#     for j in range(len(address)):
#         for shortcut in shortcut_list:
#             if(str.lower(address[j]) == shortcut):
#                 address[j] = shortcut_dict[shortcut]
#                 break
#     print(address)
#     locality_col[i] = address[0]
#     print(locality_col[i])
#     for j in range(len(address)):
#         if j != 0 and address[j] != '':
#             locality_col[i] += " " + address[j]
    
        
raw_location_dengue_df.insert(1, 'year', year_col)
raw_location_dengue_df.insert(2, 'month', month_col)
raw_location_dengue_df.insert(3, 'week', week_col)
raw_location_dengue_df.insert(4, 'day', day_col)
raw_location_dengue_df.insert(5, 'address', locality_col)
location_dengue_df = raw_location_dengue_df.drop(['year_month_week_day', 'locality'], 1)

#save
csv_filename = 'Datasets/Dengue Location Cluster Data/' + 'location_dengue' + '.csv'
location_dengue_df.to_csv(csv_filename, index=False)



Convert the buildings.json to a csv file (buildings.json contain all addresses in Singapore)

In [8]:
#path to json file
# df = pd.read_json("Datasets/buildings.json", orient="records")

# df.head()

fileInput = 'Datasets/buildings.json'
fileOutput = 'Datasets/buildings.csv'

inputFile = open(fileInput)
outputFile = open(fileOutput, 'w')
data = json.load(inputFile)
inputFile.close()
print(data)

output = csv.writer(outputFile)
output.writerow(data[0].keys()) 

for row in data:
    output.writerow(row.values())

 

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Find unique locations in the location_dengue cluster csv

In [15]:
#csv of unique locations in dengue location cluster (2014 to 2019)
location_dengue_df = pd.read_csv("Datasets/Dengue Location Cluster Data/location_dengue.csv")
location_set = set()
for i in range(len(location_dengue_df)):
    location_set.add(location_dengue_df['address'][i])

fileOutput = 'Datasets/unique_locations.csv'
outputFile = open(fileOutput, 'w')
output = csv.writer(outputFile)
output.writerow(['unique location'])
for location in location_set:
    output.writerow([location])
    

In [None]:
#[IGNORE]
#add x,y coordinates to the unique locations
unique_location_df = pd.read_csv("Datasets/unique_locations.csv")
open_buildings_df = pd.read_csv("Datasets/buildings.csv")
buildings_df = open_buildings_df.drop(['POSTAL'], 1)
for i in range(len(unique_location_df)):
    for j in range(len(buildings_df)):
        if str.upper(str(unique_location_df['unique location'][i])) in str(buildings_df['ADDRESS'][j]):
            unique_location_df['X'][i] = buildings_df['X'][j]
            unique_location_df['Y'][i] = buildings_df['Y'][j]
            break

csv_filename = "Datasets/unique_locations_in_string_function_xy_new.csv"            
unique_location_df.to_csv(csv_filename, index=False)

In [None]:
#[IGNORE]
#add x,y coordinates to the unique locations
unique_location_df = pd.read_csv("Datasets/unique_locations.csv")
open_buildings_df = pd.read_csv("Datasets/buildings.csv")
buildings_df = open_buildings_df.drop(['POSTAL'], 1)
count = 0
locality_col = pd.Series([], dtype='str')

for i in range(len(unique_location_df)):
    for j in range(len(buildings_df)):
        if fuzz.token_set_ratio(str(unique_location_df['unique location'][i]), str(buildings_df['ADDRESS'][j])) >= 60:
            count += 1
            unique_location_df['X'][i] = buildings_df['X'][j]
            unique_location_df['Y'][i] = buildings_df['Y'][j]
            locality_col[i] = buildings_df['ADDRESS'][j]
            break

print(count)
csv_filename = "Datasets/unique_locations_fuzzywuzzy_60_xy.csv"
unique_location_df.insert(3, 'address from buildings', locality_col)
unique_location_df.to_csv(csv_filename, index=False)

Put in lat and long data to the unique locations

In [75]:
fileInput = 'Datasets/unique_location_lat_long.json'
unique_location_df = pd.read_csv("Datasets/unique_locations_after_processing.csv")

inputFile = open(fileInput)
data = json.load(inputFile)
inputFile.close()
lat_col = pd.Series([], dtype='float64')
long_col = pd.Series([], dtype='float64')

for i in range(len(unique_location_df)):
    if unique_location_df['unique location'][i] != 'nan':
        #print(unique_location_df['unique location'][i].strip())
        lat_col[i] = data[unique_location_df['unique location'][i].strip()]['lat']
        long_col[i] = data[unique_location_df['unique location'][i].strip()]['lng']

unique_location_df.insert(1, 'lat', lat_col)
unique_location_df.insert(2, 'long', long_col)

csv_filename = "Datasets/unique_locations_lat_long.csv"            
unique_location_df.to_csv(csv_filename, index=False)
    
#output.writerow(['unique location', 'lat', 'long'])
#print(data['Surin Ave\n']['lat'])

# for row in data:
#     output.writerow()
#     output.writerow(row.values())

Find nearest weather station to each unique location

In [None]:
unique_location_lat_long_df = pd.read_csv("Datasets/unique_locations_lat_long.csv")
weatherstation_lat_long_df = pd.read_csv("Datasets/weatherstation_lat_long.csv")

for i in range(len(unique_location_lat_long_df)):
    


Next, we can visualize the data.

In [None]:
# Copy the dataframe, not to corrupt the original
vis_df = df.copy()
x_data = range(0, vis_df.shape[0])

# columns to visualize
columns = ['cases_per_capita', 'mean_sunshine_hrs', 'temp_mean_daily_max']

# plot line graphs
for column in columns:
    f = plt.figure()
    ax = f.add_subplot()
    ax.plot(x_data, vis_df[column], label=column)
    ax.legend()

In [None]:
# fix random seed for reproducibility
np.random.seed(7)

# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)



Here we extract the min, max and avg of same week
First convert Ang Mo Kio station.

In [121]:
# Read from CSV
raw_AngMoKio_df = pd.read_csv(
    "Datasets/Weather Data/Ang Mo Kio.csv")
raw_Changi_df = pd.read_csv(
    "Datasets/Weather Data/Changi.csv")
raw_Khatib_df = pd.read_csv(
    "Datasets/Weather Data/Khatib.csv")
raw_MarinaBarrage_df = pd.read_csv(
    "Datasets/Weather Data/Marina Barrage.csv")
raw_TuasSouth_df = pd.read_csv(
    "Datasets/Weather Data/Tuas South.csv")

columns = ['Year', 'Week', 'Max Daily Rainfall Total (mm)', 'Min Daily Rainfall Total (mm)',
               'Average Daily Rainfall Total (mm)', 'Highest 30 Min Rainfall (mm)', 'Highest 60 Min Rainfall (mm)',
                  'Highest 120 Min Rainfall (mm)', 'Mean Temperature (°C)', 'Maximum Temperature (°C)',
                      'Minimum Temperature (°C)', 'Mean Wind Speed (km/h)', 'Max Wind Speed (km/h)']

del raw_AngMoKio_df['Station']
AngMoKio_df = pd.DataFrame(columns=columns)

def isfloat(value):
    try:
        float(value)
        return True
    except ValueError:
        return False

index = 0
for i in range(2014, 2020):
    for j in range(1, 54):
        temp_df = raw_AngMoKio_df[(raw_AngMoKio_df['Year'] == i) & (raw_AngMoKio_df['Week'] == j)]
        if len(temp_df) == 0:
            continue
            
        rainSerie = temp_df['Daily Rainfall Total (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d30RainSerie = temp_df['Highest 30 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d60RainSerie = temp_df['Highest 60 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d120RainSerie = temp_df['Highest 120 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        meanTempSerie = temp_df['Mean Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxTempSerie = temp_df['Maximum Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        minTempSerie = temp_df['Minimum Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        meanWindSerie = temp_df['Mean Wind Speed (km/h)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxWindSerie = temp_df['Max Wind Speed (km/h)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxRain = rainSerie.max()
        minRain = rainSerie.min()
        avgRain = rainSerie.sum() / len(rainSerie.dropna())
        max30Rain = d30RainSerie.max()
        max60Rain = d60RainSerie.max()
        max120Rain = d120RainSerie.max()
        avgTemp = meanTempSerie.sum() / len(meanTempSerie.dropna())
        maxTemp = maxTempSerie.max()
        minTemp = minTempSerie.min()
        avgWind = meanWindSerie.sum() / len(meanWindSerie.dropna())
        maxWind = maxWindSerie.max()
        AngMoKio_df.loc[index] = [i, j, maxRain, minRain, avgRain, max30Rain, max60Rain, max120Rain, avgTemp,
                                         maxTemp, minTemp, avgWind, maxWind]
        index = index + 1

AngMoKio_df


Unnamed: 0,Year,Week,Max Daily Rainfall Total (mm),Min Daily Rainfall Total (mm),Average Daily Rainfall Total (mm),Highest 30 Min Rainfall (mm),Highest 60 Min Rainfall (mm),Highest 120 Min Rainfall (mm),Mean Temperature (°C),Maximum Temperature (°C),Minimum Temperature (°C),Mean Wind Speed (km/h),Max Wind Speed (km/h)
0,2014.0,1.0,8.8,0.0,3.480000,8.6,8.8,8.8,26.160000,30.9,23.3,6.380000,32.0
1,2014.0,2.0,40.0,0.0,7.028571,8.4,9.4,13.8,26.157143,31.8,23.3,5.657143,35.6
2,2014.0,3.0,0.0,0.0,0.000000,0.0,0.0,0.0,25.742857,29.4,23.3,9.328571,41.4
3,2014.0,4.0,0.0,0.0,0.000000,0.0,0.0,0.0,25.342857,29.3,22.5,9.157143,38.9
4,2014.0,5.0,0.0,0.0,0.000000,0.0,0.0,0.0,25.728571,31.6,22.2,8.900000,40.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...
308,2019.0,48.0,31.8,0.0,11.700000,18.0,26.4,31.8,27.833333,34.5,24.9,13.700000,38.9
309,2019.0,49.0,0.2,0.0,0.050000,0.2,0.2,0.2,27.050000,31.2,25.1,15.500000,50.4
310,2019.0,50.0,46.2,0.6,27.714286,13.2,17.4,25.6,24.671429,31.0,22.1,13.485714,45.0
311,2019.0,51.0,7.8,0.0,2.371429,7.4,7.4,7.4,26.214286,30.9,22.8,14.400000,38.5


Convert Changi station

In [122]:
del raw_Changi_df['Station']
Changi_df = pd.DataFrame(columns=columns)        
        
index = 0
for i in range(2014, 2020):
    for j in range(1, 54):
        temp_df = raw_Changi_df[(raw_Changi_df['Year'] == i) & (raw_Changi_df['Week'] == j)]
        if len(temp_df) == 0:
            continue
            
        rainSerie = temp_df['Daily Rainfall Total (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d30RainSerie = temp_df['Highest 30 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d60RainSerie = temp_df['Highest 60 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d120RainSerie = temp_df['Highest 120 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        meanTempSerie = temp_df['Mean Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxTempSerie = temp_df['Maximum Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        minTempSerie = temp_df['Minimum Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        meanWindSerie = temp_df['Mean Wind Speed (km/h)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxWindSerie = temp_df['Max Wind Speed (km/h)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxRain = rainSerie.max()
        minRain = rainSerie.min()
        avgRain = rainSerie.sum() / len(rainSerie.dropna())
        max30Rain = d30RainSerie.max()
        max60Rain = d60RainSerie.max()
        max120Rain = d120RainSerie.max()
        avgTemp = meanTempSerie.sum() / len(meanTempSerie.dropna())
        maxTemp = maxTempSerie.max()
        minTemp = minTempSerie.min()
        avgWind = meanWindSerie.sum() / len(meanWindSerie.dropna())
        maxWind = maxWindSerie.max()
        Changi_df.loc[index] = [i, j, maxRain, minRain, avgRain, max30Rain, max60Rain, max120Rain, avgTemp,
                                         maxTemp, minTemp, avgWind, maxWind]
        index = index + 1
Changi_df

Unnamed: 0,Year,Week,Max Daily Rainfall Total (mm),Min Daily Rainfall Total (mm),Average Daily Rainfall Total (mm),Highest 30 Min Rainfall (mm),Highest 60 Min Rainfall (mm),Highest 120 Min Rainfall (mm),Mean Temperature (°C),Maximum Temperature (°C),Minimum Temperature (°C),Mean Wind Speed (km/h),Max Wind Speed (km/h)
0,2014.0,1.0,18.4,0.0,3.680000,8.6,10.8,15.4,26.620000,31.1,23.3,8.960000,36.0
1,2014.0,2.0,31.2,0.0,8.142857,18.2,18.2,18.2,26.371429,31.8,23.7,7.514286,34.9
2,2014.0,3.0,0.0,0.0,0.000000,0.0,0.0,0.0,26.242857,30.3,23.5,13.214286,40.0
3,2014.0,4.0,0.0,0.0,0.000000,0.0,0.0,0.0,25.700000,29.4,22.7,12.285714,41.4
4,2014.0,5.0,0.0,0.0,0.000000,0.0,0.0,0.0,26.057143,31.6,22.4,13.671429,42.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
308,2019.0,48.0,73.4,0.0,13.371429,36.0,48.8,55.6,27.714286,33.0,24.0,7.357143,34.9
309,2019.0,49.0,59.6,0.0,8.971429,27.4,47.2,56.8,27.471429,31.9,23.9,12.000000,42.8
310,2019.0,50.0,74.2,3.4,43.400000,20.6,26.2,36.8,24.914286,31.1,22.7,7.471429,41.8
311,2019.0,51.0,34.8,0.0,7.442857,18.8,20.0,34.2,26.500000,31.1,23.2,7.657143,37.4


Convert Khatib station

In [127]:
Khatib_df = pd.DataFrame(columns=columns)        
        
index = 0
for i in range(2014, 2020):
    for j in range(1, 54):
        temp_df = raw_Khatib_df[(raw_Khatib_df['Year'] == i) & (raw_Khatib_df['Week'] == j)]
        if len(temp_df) == 0:
            continue
            
        rainSerie = temp_df['Daily Rainfall Total (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d30RainSerie = temp_df['Highest 30 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d60RainSerie = temp_df['Highest 60 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d120RainSerie = temp_df['Highest 120 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        meanTempSerie = temp_df['Mean Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxTempSerie = temp_df['Maximum Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        minTempSerie = temp_df['Minimum Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        meanWindSerie = temp_df['Mean Wind Speed (km/h)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxWindSerie = temp_df['Max Wind Speed (km/h)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxRain = rainSerie.max()
        minRain = rainSerie.min()
        avgRain = rainSerie.sum() / len(rainSerie.dropna())
        max30Rain = d30RainSerie.max()
        max60Rain = d60RainSerie.max()
        max120Rain = d120RainSerie.max()
        avgTemp = meanTempSerie.sum() / len(meanTempSerie.dropna())
        maxTemp = maxTempSerie.max()
        minTemp = minTempSerie.min()
        avgWind = meanWindSerie.sum() / len(meanWindSerie.dropna())
        maxWind = maxWindSerie.max()
        Khatib_df.loc[index] = [i, j, maxRain, minRain, avgRain, max30Rain, max60Rain, max120Rain, avgTemp,
                                         maxTemp, minTemp, avgWind, maxWind]
        index = index + 1
Khatib_df

  avgRain = rainSerie.sum() / len(rainSerie.dropna())
  avgTemp = meanTempSerie.sum() / len(meanTempSerie.dropna())
  avgWind = meanWindSerie.sum() / len(meanWindSerie.dropna())


Unnamed: 0,Year,Week,Max Daily Rainfall Total (mm),Min Daily Rainfall Total (mm),Average Daily Rainfall Total (mm),Highest 30 Min Rainfall (mm),Highest 60 Min Rainfall (mm),Highest 120 Min Rainfall (mm),Mean Temperature (°C),Maximum Temperature (°C),Minimum Temperature (°C),Mean Wind Speed (km/h),Max Wind Speed (km/h)
0,2014.0,1.0,19.8,0.0,5.080000,17.2,17.6,19.8,26.760000,32.4,23.7,5.660000,29.2
1,2014.0,2.0,61.8,0.0,12.685714,16.8,24.4,24.6,26.542857,32.7,23.5,4.985714,32.4
2,2014.0,3.0,0.0,0.0,0.000000,0.0,0.0,0.0,26.371429,30.8,23.4,7.771429,34.6
3,2014.0,4.0,0.0,0.0,0.000000,0.0,0.0,0.0,25.857143,30.6,22.4,7.371429,31.7
4,2014.0,5.0,0.0,0.0,0.000000,0.0,0.0,0.0,26.357143,33.0,21.9,7.800000,36.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,2019.0,48.0,97.2,0.0,23.000000,57.2,69.8,74.4,27.114286,33.3,23.0,7.100000,41.8
306,2019.0,49.0,45.4,0.0,6.485714,14.6,22.6,40.8,27.100000,32.7,21.8,8.714286,41.8
307,2019.0,50.0,66.4,2.0,28.342857,23.6,27.6,40.4,24.742857,31.6,22.9,6.328571,47.5
308,2019.0,51.0,15.0,0.0,2.714286,14.4,14.6,14.8,25.600000,30.4,22.1,6.785714,33.5


Convert Marina Barrage station

In [128]:
del raw_MarinaBarrage_df['Station']
MarinaBarrage_df = pd.DataFrame(columns=columns)        
        
index = 0
for i in range(2014, 2020):
    for j in range(1, 54):
        temp_df = raw_MarinaBarrage_df[(raw_MarinaBarrage_df['Year'] == i) & (raw_MarinaBarrage_df['Week'] == j)]
        if len(temp_df) == 0:
            continue
            
        rainSerie = temp_df['Daily Rainfall Total (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d30RainSerie = temp_df['Highest 30 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d60RainSerie = temp_df['Highest 60 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d120RainSerie = temp_df['Highest 120 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        meanTempSerie = temp_df['Mean Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxTempSerie = temp_df['Maximum Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        minTempSerie = temp_df['Minimum Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        meanWindSerie = temp_df['Mean Wind Speed (km/h)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxWindSerie = temp_df['Max Wind Speed (km/h)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxRain = rainSerie.max()
        minRain = rainSerie.min()
        avgRain = rainSerie.sum() / len(rainSerie.dropna())
        max30Rain = d30RainSerie.max()
        max60Rain = d60RainSerie.max()
        max120Rain = d120RainSerie.max()
        avgTemp = meanTempSerie.sum() / len(meanTempSerie.dropna())
        maxTemp = maxTempSerie.max()
        minTemp = minTempSerie.min()
        avgWind = meanWindSerie.sum() / len(meanWindSerie.dropna())
        maxWind = maxWindSerie.max()
        MarinaBarrage_df.loc[index] = [i, j, maxRain, minRain, avgRain, max30Rain, max60Rain, max120Rain, avgTemp,
                                         maxTemp, minTemp, avgWind, maxWind]
        index = index + 1
MarinaBarrage_df

  avgRain = rainSerie.sum() / len(rainSerie.dropna())
  avgTemp = meanTempSerie.sum() / len(meanTempSerie.dropna())
  avgWind = meanWindSerie.sum() / len(meanWindSerie.dropna())


Unnamed: 0,Year,Week,Max Daily Rainfall Total (mm),Min Daily Rainfall Total (mm),Average Daily Rainfall Total (mm),Highest 30 Min Rainfall (mm),Highest 60 Min Rainfall (mm),Highest 120 Min Rainfall (mm),Mean Temperature (°C),Maximum Temperature (°C),Minimum Temperature (°C),Mean Wind Speed (km/h),Max Wind Speed (km/h)
0,2014.0,1.0,21.8,0.0,6.960000,15.6,17.6,17.8,26.940000,31.9,23.8,8.140000,32.0
1,2014.0,2.0,25.0,0.0,6.942857,19.4,22.0,24.8,27.042857,34.0,24.1,7.442857,44.3
2,2014.0,3.0,1.0,0.0,0.142857,1.0,1.0,1.0,26.500000,31.0,24.4,12.471429,45.0
3,2014.0,4.0,0.0,0.0,0.000000,0.0,0.0,0.0,26.085714,31.0,23.9,12.300000,45.4
4,2014.0,5.0,0.0,0.0,0.000000,0.0,0.0,0.0,26.542857,34.3,23.7,11.842857,37.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
308,2019.0,48.0,20.0,0.0,4.171429,9.4,10.2,15.0,28.533333,34.5,24.9,6.985714,34.6
309,2019.0,49.0,52.8,0.0,10.560000,36.6,44.4,46.2,27.980000,32.9,23.2,12.340000,45.7
310,2019.0,50.0,54.0,25.6,37.333333,9.8,15.2,20.2,25.000000,29.9,23.7,6.566667,36.7
311,2019.0,51.0,6.6,0.0,1.520000,3.8,3.8,4.4,27.166667,31.1,23.8,7.560000,33.5


Convert Tuas South station

In [143]:
TuasSouth_df = pd.DataFrame(columns=columns)        
        
index = 0
for i in range(2014, 2020):
    for j in range(1, 54):
        temp_df = raw_TuasSouth_df[(raw_MarinaBarrage_df['Year'] == i) & (raw_MarinaBarrage_df['Week'] == j)]
        if len(temp_df) == 0:
            continue
            
        rainSerie = temp_df['Daily Rainfall Total (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d30RainSerie = temp_df['Highest 30 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d60RainSerie = temp_df['Highest 60 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        d120RainSerie = temp_df['Highest 120 Min Rainfall (mm)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        meanTempSerie = temp_df['Mean Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxTempSerie = temp_df['Maximum Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        minTempSerie = temp_df['Minimum Temperature (°C)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        meanWindSerie = temp_df['Mean Wind Speed (km/h)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxWindSerie = temp_df['Max Wind Speed (km/h)'].map(lambda x : float(x) if isfloat(x) else np.nan)
        
        maxRain = rainSerie.max()
        minRain = rainSerie.min()
        avgRain = rainSerie.sum() / len(rainSerie.dropna())
        max30Rain = d30RainSerie.max()
        max60Rain = d60RainSerie.max()
        max120Rain = d120RainSerie.max()
        avgTemp = meanTempSerie.sum() / len(meanTempSerie.dropna())
        maxTemp = maxTempSerie.max()
        minTemp = minTempSerie.min()
        avgWind = meanWindSerie.sum() / len(meanWindSerie.dropna())
        maxWind = maxWindSerie.max()
        TuasSouth_df.loc[index] = [i, j, maxRain, minRain, avgRain, max30Rain, max60Rain, max120Rain, avgTemp,
                                         maxTemp, minTemp, avgWind, maxWind]
        index = index + 1

        
TuasSouth_df

  avgTemp = meanTempSerie.sum() / len(meanTempSerie.dropna())
  avgRain = rainSerie.sum() / len(rainSerie.dropna())
  avgWind = meanWindSerie.sum() / len(meanWindSerie.dropna())


Unnamed: 0,Year,Week,Max Daily Rainfall Total (mm),Min Daily Rainfall Total (mm),Average Daily Rainfall Total (mm),Highest 30 Min Rainfall (mm),Highest 60 Min Rainfall (mm),Highest 120 Min Rainfall (mm),Mean Temperature (°C),Maximum Temperature (°C),Minimum Temperature (°C),Mean Wind Speed (km/h),Max Wind Speed (km/h)
0,2014.0,1.0,57.6,0.0,15.040000,30.6,42.2,52.0,26.580000,32.0,23.3,6.340000,72.4
1,2014.0,2.0,21.2,0.0,4.857143,5.0,6.8,9.6,26.642857,31.7,23.2,6.700000,66.2
2,2014.0,3.0,0.0,0.0,0.000000,0.0,0.0,0.0,26.585714,30.9,23.8,8.142857,28.1
3,2014.0,4.0,0.0,0.0,0.000000,0.0,0.0,0.0,26.185714,30.5,23.4,8.300000,27.7
4,2014.0,5.0,0.0,0.0,0.000000,0.0,0.0,0.0,26.742857,32.7,23.1,8.471429,30.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
308,2019.0,48.0,32.4,0.2,8.428571,12.6,19.4,22.6,27.614286,32.9,23.8,5.657143,40.7
309,2019.0,49.0,15.0,0.0,2.285714,6.8,9.0,11.2,27.771429,32.8,24.0,6.542857,42.8
310,2019.0,50.0,46.6,0.0,16.000000,13.4,22.2,25.0,25.528571,31.3,23.2,6.585714,101.9
311,2019.0,51.0,62.0,0.0,13.771429,32.2,48.2,58.8,27.133333,31.7,24.9,5.714286,28.8


Output weekly weather data of each stations.

In [133]:
AngMoKio_csv_filename = "Datasets/Weather Data/Weekly Ang Mo Kio.csv" 
Changi_csv_filename = "Datasets/Weather Data/Weekly Changi.csv" 
Khatib_csv_filename = "Datasets/Weather Data/Weekly Khatib.csv" 
MarinaBarrage_csv_filename = "Datasets/Weather Data/Weekly Marina Barrage.csv" 
TuasSouth_csv_filename = "Datasets/Weather Data/Weekly Tuas South.csv" 

AngMoKio_df.to_csv(AngMoKio_csv_filename, index=False)
Changi_df.to_csv(Changi_csv_filename, index=False)
Khatib_df.to_csv(Khatib_csv_filename, index=False)
MarinaBarrage_df.to_csv(MarinaBarrage_csv_filename, index=False)
TuasSouth_df.to_csv(TuasSouth_csv_filename, index=False)

Combine stations' data to weather data for whole SG.

In [145]:
Singapore_df = pd.DataFrame(columns=columns)
for i in range(303):
    rowAngMoKio = AngMoKio_df.iloc[i,:]
    rowChangi = Changi_df.iloc[i,:]
    rowKhatib = Khatib_df.iloc[i,:]
    rowMarinaBarrage = MarinaBarrage_df.iloc[i,:]
    rowTuasSouth = TuasSouth_df.iloc[i,:]
    
    maxRainData = [rowAngMoKio['Max Daily Rainfall Total (mm)'], rowChangi['Max Daily Rainfall Total (mm)'],
                   rowKhatib['Max Daily Rainfall Total (mm)'], rowMarinaBarrage['Max Daily Rainfall Total (mm)'],
                   rowTuasSouth['Max Daily Rainfall Total (mm)']]
    minRainData = [rowAngMoKio['Min Daily Rainfall Total (mm)'], rowChangi['Min Daily Rainfall Total (mm)'],
                   rowKhatib['Min Daily Rainfall Total (mm)'], rowMarinaBarrage['Min Daily Rainfall Total (mm)'],
                   rowTuasSouth['Min Daily Rainfall Total (mm)']]
    avgRainData = [rowAngMoKio['Average Daily Rainfall Total (mm)'], rowChangi['Average Daily Rainfall Total (mm)'],
                   rowKhatib['Average Daily Rainfall Total (mm)'], rowMarinaBarrage['Average Daily Rainfall Total (mm)'],
                   rowTuasSouth['Average Daily Rainfall Total (mm)']]
    max30RainData = [rowAngMoKio['Highest 30 Min Rainfall (mm)'], rowChangi['Highest 30 Min Rainfall (mm)'],
                   rowKhatib['Highest 30 Min Rainfall (mm)'], rowMarinaBarrage['Highest 30 Min Rainfall (mm)'],
                   rowTuasSouth['Highest 30 Min Rainfall (mm)']]
    max60RainData = [rowAngMoKio['Highest 60 Min Rainfall (mm)'], rowChangi['Highest 60 Min Rainfall (mm)'],
                   rowKhatib['Highest 60 Min Rainfall (mm)'], rowMarinaBarrage['Highest 60 Min Rainfall (mm)'],
                   rowTuasSouth['Highest 60 Min Rainfall (mm)']]
    max120RainData = [rowAngMoKio['Highest 120 Min Rainfall (mm)'], rowChangi['Highest 120 Min Rainfall (mm)'],
                   rowKhatib['Highest 120 Min Rainfall (mm)'], rowMarinaBarrage['Highest 120 Min Rainfall (mm)'],
                   rowTuasSouth['Highest 120 Min Rainfall (mm)']]
    avgTempData = [rowAngMoKio['Mean Temperature (°C)'], rowChangi['Mean Temperature (°C)'],
                   rowKhatib['Mean Temperature (°C)'], rowMarinaBarrage['Mean Temperature (°C)'],
                   rowTuasSouth['Mean Temperature (°C)']]
    maxTempData = [rowAngMoKio['Maximum Temperature (°C)'], rowChangi['Maximum Temperature (°C)'],
                   rowKhatib['Maximum Temperature (°C)'], rowMarinaBarrage['Maximum Temperature (°C)'],
                   rowTuasSouth['Maximum Temperature (°C)']]
    minTempData = [rowAngMoKio['Minimum Temperature (°C)'], rowChangi['Minimum Temperature (°C)'],
                   rowKhatib['Minimum Temperature (°C)'], rowMarinaBarrage['Minimum Temperature (°C)'],
                   rowTuasSouth['Minimum Temperature (°C)']]
    avgWindData = [rowAngMoKio['Mean Wind Speed (km/h)'], rowChangi['Mean Wind Speed (km/h)'],
                   rowKhatib['Mean Wind Speed (km/h)'], rowMarinaBarrage['Mean Wind Speed (km/h)'],
                   rowTuasSouth['Mean Wind Speed (km/h)']]
    maxWindData = [rowAngMoKio['Max Wind Speed (km/h)'], rowChangi['Max Wind Speed (km/h)'],
                   rowKhatib['Max Wind Speed (km/h)'], rowMarinaBarrage['Max Wind Speed (km/h)'],
                   rowTuasSouth['Max Wind Speed (km/h)']]
    
    maxRainSerie = pd.Series(maxRainData) 
    minRainSerie = pd.Series(minRainData) 
    avgRainSerie = pd.Series(avgRainData) 
    max30RainSerie = pd.Series(max30RainData) 
    max60RainSerie = pd.Series(max60RainData) 
    max120RainSerie = pd.Series(max120RainData) 
    avgTempSerie = pd.Series(avgTempData) 
    maxTempSerie = pd.Series(maxTempData) 
    minTempSerie = pd.Series(minTempData) 
    avgWindSerie = pd.Series(avgWindData) 
    maxWindSerie = pd.Series(maxWindData) 
    
    maxRain = maxRainSerie.max() 
    minRain = minRainSerie.min() 
    avgRain = avgRainSerie.sum() / len(avgRainSerie.dropna()) 
    max30Rain = max30RainSerie.max() 
    max60Rain = max60RainSerie.max() 
    max120Rain = max120RainSerie.max() 
    avgTemp = avgTempSerie.sum() / len(avgTempSerie.dropna())
    maxTemp = maxTempSerie.max()
    minTemp = minTempSerie.min() 
    avgWind = avgWindSerie.sum() / len(avgWindSerie.dropna())
    maxWind = maxWindSerie.max() 
    Singapore_df.loc[i] = [int(rowChangi['Year']), int(rowChangi['Week']), maxRain, minRain, avgRain, max30Rain, max60Rain, max120Rain, avgTemp,
                                         maxTemp, minTemp, avgWind, maxWind]
Singapore_df.to_csv(TuasSouth_csv_filename, index=False)

Unnamed: 0,Year,Week,Max Daily Rainfall Total (mm),Min Daily Rainfall Total (mm),Average Daily Rainfall Total (mm),Highest 30 Min Rainfall (mm),Highest 60 Min Rainfall (mm),Highest 120 Min Rainfall (mm),Mean Temperature (°C),Maximum Temperature (°C),Minimum Temperature (°C),Mean Wind Speed (km/h),Max Wind Speed (km/h)
0,2014.0,1.0,57.6,0.0,6.848000,30.6,42.2,52.0,26.612000,32.4,23.3,7.096000,72.4
1,2014.0,2.0,61.8,0.0,7.931429,19.4,24.4,24.8,26.551429,34.0,23.2,6.460000,66.2
2,2014.0,3.0,1.0,0.0,0.028571,1.0,1.0,1.0,26.288571,31.0,23.3,10.185714,45.0
3,2014.0,4.0,0.0,0.0,0.000000,0.0,0.0,0.0,25.834286,31.0,22.4,9.882857,45.4
4,2014.0,5.0,0.0,0.0,0.000000,0.0,0.0,0.0,26.285714,34.3,21.9,10.137143,42.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,2019.0,38.0,40.5,0.0,2.060000,31.0,40.0,40.5,28.653143,34.0,21.5,10.002857,46.1
299,2019.0,39.0,39.8,0.0,5.743810,14.6,23.6,28.2,28.153571,34.9,22.8,8.284286,47.2
300,2019.0,40.0,53.6,0.0,5.794286,24.8,29.8,30.2,27.523571,34.2,22.5,7.799524,60.5
301,2019.0,41.0,27.0,0.0,4.702857,19.5,23.0,27.0,27.998857,34.5,22.6,7.262857,47.5
