# explore_data_gov_sg_api

## Purpose:
Explore the weather-related APIs at https://developers.data.gov.sg.

## History:
- 2018-04-18 - Shi Yuan: Removed API Key requirement since Feb 2018, code changes
- 2017-05 - Benjamin S. Grandey
- 2017-05-29 - Moving from atmos-scripts repository to access-data-gov-sg repository, and renaming from data_gov_sg_explore.ipynb to explore_data_gov_sg_api.ipynb.

In [68]:
import matplotlib.pyplot as plt
import pandas as pd
import requests
import seaborn as sns
from tqdm import tqdm

%matplotlib inline

## Meta-data for available meteorological APIs
[I added this section after exploring the wind-speed data - see below.]

In [2]:
# Meteorological variables
for variable in ['rainfall', 'wind-speed', 'wind-direction', 'air-temperature', 'relative-humidity']:
    print(variable)
    r = requests.get('https://api.data.gov.sg/v1/environment/{}'.format(variable))
    metadata = r.json()['metadata']
    for key in metadata.keys():
        if key != 'stations':  # don't print information about stations
            print('  {}: {}'.format(key, r.json()['metadata'][key]))

rainfall
  reading_type: TB1 Rainfall 5 Minute Total F
  reading_unit: mm
wind-speed
  reading_type: Wind Speed AVG(S)10M M1M
  reading_unit: knots
wind-direction
  reading_type: Wind Dir AVG (S) 10M M1M
  reading_unit: degrees
air-temperature
  reading_type: DBT 1M F
  reading_unit: deg C
relative-humidity
  reading_type: RH 1M F
  reading_unit: percentage


## Specify Date

In [95]:
date = '2018-03-24'
datetime = date + ' 00:05:00'
datetime

'2018-03-24 00:05:00'

## Wind-speed (5-min interval)
https://data.gov.sg/dataset/realtime-weather-readings?resource_id=16035f22-37b4-4a5c-b024-ca2381f11b48

In [77]:
pd.date_range(datetime, periods=(24*12+1), freq='5min')

DatetimeIndex(['2017-12-31 00:05:00', '2017-12-31 00:10:00',
               '2017-12-31 00:15:00', '2017-12-31 00:20:00',
               '2017-12-31 00:25:00', '2017-12-31 00:30:00',
               '2017-12-31 00:35:00', '2017-12-31 00:40:00',
               '2017-12-31 00:45:00', '2017-12-31 00:50:00',
               ...
               '2017-12-31 23:20:00', '2017-12-31 23:25:00',
               '2017-12-31 23:30:00', '2017-12-31 23:35:00',
               '2017-12-31 23:40:00', '2017-12-31 23:45:00',
               '2017-12-31 23:50:00', '2017-12-31 23:55:00',
               '2018-01-01 00:00:00', '2018-01-01 00:05:00'],
              dtype='datetime64[ns]', length=289, freq='5T')

In [38]:
# Get wind-speed at 5-min intervals on a specific date
# Note: if 'date' is used instead of 'date_time', the API appears to timeout
wind_speed_df = pd.DataFrame(columns=['station_id', 'wind-speed', 'timestamp (SGT)'])
for dt in pd.date_range(datetime, periods=(24*12+1), freq='5min'):
    r = requests.get('https://api.data.gov.sg/v1/environment/wind-speed',
                     params={'date_time': dt.strftime('%Y-%m-%dT%H:%M:%S')})
    temp_df = pd.DataFrame(r.json()['items'][0]['readings'])
    temp_df = temp_df.rename(columns={'value': 'wind-speed'})
    temp_df['timestamp (SGT)'] = pd.to_datetime(r.json()['items'][0]['timestamp'].split('+')[0])
    wind_speed_df = wind_speed_df.append(temp_df, ignore_index=True)
#wind_speed_df.head(15)

stations_df = pd.DataFrame(r.json()['metadata']['stations'])
stations_df['longitude'] = ''
stations_df['latitude'] = ''

for i in range(0,len(stations_df)):
    stations_df['longitude'][i] = stations_df['location'][i]['longitude']
    stations_df['latitude'][i] = stations_df['location'][i]['latitude']
    
stations_df = stations_df.rename(columns={'id': 'station_id'})
stations_df

Unnamed: 0,device_id,station_id,location,name,longitude,latitude
0,S109,S109,"{'latitude': 1.3764, 'longitude': 103.8492}",Ang Mo Kio Avenue 5,103.849,1.3764
1,S117,S117,"{'latitude': 1.256, 'longitude': 103.679}",Banyan Road,103.679,1.256
2,S50,S50,"{'latitude': 1.3337, 'longitude': 103.7768}",Clementi Road,103.777,1.3337
3,S107,S107,"{'latitude': 1.3135, 'longitude': 103.9625}",East Coast Parkway,103.963,1.3135
4,S43,S43,"{'latitude': 1.3399, 'longitude': 103.8878}",Kim Chuan Road,103.888,1.3399
5,S106,S106,"{'latitude': 1.4168, 'longitude': 103.9673}",Pulau Ubin,103.967,1.4168
6,S102,S102,"{'latitude': 1.189, 'longitude': 103.768}",Semakau Landfill,103.768,1.189
7,S122,S122,"{'latitude': 1.41731, 'longitude': 103.8249}",Sembawang Road,103.825,1.41731
8,S115,S115,"{'latitude': 1.29377, 'longitude': 103.61843}",Tuas South Avenue 3,103.618,1.29377
9,S24,S24,"{'latitude': 1.3678, 'longitude': 103.9826}",Upper Changi Road North,103.983,1.3678


In [39]:
# Union of wind-speed and station data
outer_df = pd.merge(wind_speed_df, stations_df, how='outer', on=['station_id'])
outer_df.head(15)

# drop duplicating info columns
outer_wind_speed_df = outer_df.drop(['location','device_id'], axis=1)
outer_wind_speed_df.head(15)

Unnamed: 0,station_id,wind-speed,timestamp (SGT),name,longitude,latitude
0,S109,5.3,2017-05-24 00:00:00,Ang Mo Kio Avenue 5,103.849,1.3764
1,S109,4.1,2017-05-24 00:05:00,Ang Mo Kio Avenue 5,103.849,1.3764
2,S109,4.7,2017-05-24 00:10:00,Ang Mo Kio Avenue 5,103.849,1.3764
3,S109,6.0,2017-05-24 00:15:00,Ang Mo Kio Avenue 5,103.849,1.3764
4,S109,9.3,2017-05-24 00:20:00,Ang Mo Kio Avenue 5,103.849,1.3764
5,S109,6.4,2017-05-24 00:25:00,Ang Mo Kio Avenue 5,103.849,1.3764
6,S109,2.9,2017-05-24 00:30:00,Ang Mo Kio Avenue 5,103.849,1.3764
7,S109,4.7,2017-05-24 00:35:00,Ang Mo Kio Avenue 5,103.849,1.3764
8,S109,3.1,2017-05-24 00:40:00,Ang Mo Kio Avenue 5,103.849,1.3764
9,S109,7.2,2017-05-24 00:45:00,Ang Mo Kio Avenue 5,103.849,1.3764


In [67]:
filename = 'wind_speed_{}.csv'.format(date)
filename

outer_wind_speed_df.to_csv(filename, index = False)

In [40]:
# export to csv
#outer_df.to_csv('output.csv', index = False)

In [23]:
#outer_df.info()

In [24]:
#outer_df.groupby('name').describe()

In [17]:
#wind_speed_df.info()

In [18]:
#wind_speed_df.groupby('station_id').describe()

## Rainfall

In [99]:
# Get rainfall at 5-min intervals on a specific date
rainfall_df = pd.DataFrame(columns=['station_id', 'rainfall', 'timestamp (SGT)'])
for dt in tqdm(pd.date_range(datetime, periods=(24*12+1), freq='5min')):  # I remember this was a wet day
    r = requests.get('https://api.data.gov.sg/v1/environment/rainfall',
                     params={'date_time': dt.strftime('%Y-%m-%dT%H:%M:%S')})
    try:
        temp_df = pd.DataFrame(r.json()['items'][0]['readings'])
    except ValueError:
        print('No data JSONDecodeError {}'.format(dt))
        continue
    except KeyError:
        print('No items KeyError {}'.format(dt))
        continue
    temp_df = temp_df.rename(columns={'value': 'rainfall'})
    temp_df['timestamp (SGT)'] = pd.to_datetime(r.json()['items'][0]['timestamp'].split('+')[0])
    rainfall_df = rainfall_df.append(temp_df, ignore_index=True)
#rainfall_df.head()

stations_df = pd.DataFrame(r.json()['metadata']['stations'])
stations_df['longitude'] = ''
stations_df['latitude'] = ''

for i in range(0,len(stations_df)):
    stations_df['longitude'][i] = stations_df['location'][i]['longitude']
    stations_df['latitude'][i] = stations_df['location'][i]['latitude']
    
stations_df = stations_df.rename(columns={'id': 'station_id'})
stations_df

100%|██████████| 289/289 [00:58<00:00,  4.97it/s]


Unnamed: 0,device_id,station_id,location,name,longitude,latitude
0,S77,S77,"{'latitude': 1.2937, 'longitude': 103.8125}",Alexandra Road,103.812,1.2937
1,S109,S109,"{'latitude': 1.3764, 'longitude': 103.8492}",Ang Mo Kio Avenue 5,103.849,1.3764
2,S117,S117,"{'latitude': 1.256, 'longitude': 103.679}",Banyan Road,103.679,1.256
3,S55,S55,"{'latitude': 1.3837, 'longitude': 103.886}",Buangkok Green,103.886,1.3837
4,S64,S64,"{'latitude': 1.3824, 'longitude': 103.7603}",Bukit Panjang Road,103.76,1.3824
5,S90,S90,"{'latitude': 1.3191, 'longitude': 103.8191}",Bukit Timah Road,103.819,1.3191
6,S61,S61,"{'latitude': 1.323, 'longitude': 103.9217}",Chai Chee Street,103.922,1.323
7,S114,S114,"{'latitude': 1.38, 'longitude': 103.73}",Choa Chu Kang Avenue 4,103.73,1.38
8,S11,S11,"{'latitude': 1.3746, 'longitude': 103.6938}",Choa Chu Kang Road,103.694,1.3746
9,S50,S50,"{'latitude': 1.3337, 'longitude': 103.7768}",Clementi Road,103.777,1.3337


In [96]:
# Get rainfall at 30-min intervals on a specific date
rainfall_df = pd.DataFrame(columns=['station_id', 'rainfall', 'timestamp (SGT)'])
for dt in tqdm(pd.date_range(datetime, periods=(24*(60/30)+1), freq='30min')):  # I remember this was a wet day
    r = requests.get('https://api.data.gov.sg/v1/environment/rainfall',
                     params={'date_time': dt.strftime('%Y-%m-%dT%H:%M:%S')})
    try:
        temp_df = pd.DataFrame(r.json()['items'][0]['readings'])
    except ValueError:
        print('No data JSONDecodeError {}'.format(dt))
        continue
    except KeyError:
        print('No items KeyError {}'.format(dt))
        continue
    temp_df = temp_df.rename(columns={'value': 'rainfall'})
    temp_df['timestamp (SGT)'] = pd.to_datetime(r.json()['items'][0]['timestamp'].split('+')[0])
    rainfall_df = rainfall_df.append(temp_df, ignore_index=True)
#rainfall_df.head()

stations_df = pd.DataFrame(r.json()['metadata']['stations'])
stations_df['longitude'] = ''
stations_df['latitude'] = ''

for i in range(0,len(stations_df)):
    stations_df['longitude'][i] = stations_df['location'][i]['longitude']
    stations_df['latitude'][i] = stations_df['location'][i]['latitude']
    
stations_df = stations_df.rename(columns={'id': 'station_id'})
stations_df

100%|██████████| 49/49 [00:09<00:00,  5.42it/s]


Unnamed: 0,device_id,station_id,location,name,longitude,latitude
0,S77,S77,"{'latitude': 1.2937, 'longitude': 103.8125}",Alexandra Road,103.812,1.2937
1,S109,S109,"{'latitude': 1.3764, 'longitude': 103.8492}",Ang Mo Kio Avenue 5,103.849,1.3764
2,S117,S117,"{'latitude': 1.256, 'longitude': 103.679}",Banyan Road,103.679,1.256
3,S55,S55,"{'latitude': 1.3837, 'longitude': 103.886}",Buangkok Green,103.886,1.3837
4,S64,S64,"{'latitude': 1.3824, 'longitude': 103.7603}",Bukit Panjang Road,103.76,1.3824
5,S90,S90,"{'latitude': 1.3191, 'longitude': 103.8191}",Bukit Timah Road,103.819,1.3191
6,S61,S61,"{'latitude': 1.323, 'longitude': 103.9217}",Chai Chee Street,103.922,1.323
7,S114,S114,"{'latitude': 1.38, 'longitude': 103.73}",Choa Chu Kang Avenue 4,103.73,1.38
8,S11,S11,"{'latitude': 1.3746, 'longitude': 103.6938}",Choa Chu Kang Road,103.694,1.3746
9,S50,S50,"{'latitude': 1.3337, 'longitude': 103.7768}",Clementi Road,103.777,1.3337


In [26]:
#pd.to_datetime(r.json()['items'][0]['timestamp'].split('+')[0])

In [100]:
rainfall_df['rainfall'] = rainfall_df['rainfall'].astype('float')  # convert to float

# Union of wind-speed and station data
outer_df = pd.merge(rainfall_df, stations_df, how='outer', on=['station_id'])
outer_df.head(15)

# drop duplicating info columns
outer_rainfall_df = outer_df.drop(['location','device_id'], axis=1)
outer_rainfall_df.head(15)

Unnamed: 0,station_id,rainfall,timestamp (SGT),name,longitude,latitude
0,S77,0.0,2018-03-24 00:05:00,Alexandra Road,103.812,1.2937
1,S77,0.0,2018-03-24 00:10:00,Alexandra Road,103.812,1.2937
2,S77,0.0,2018-03-24 00:15:00,Alexandra Road,103.812,1.2937
3,S77,0.0,2018-03-24 00:20:00,Alexandra Road,103.812,1.2937
4,S77,0.0,2018-03-24 00:20:00,Alexandra Road,103.812,1.2937
5,S77,0.0,2018-03-24 00:30:00,Alexandra Road,103.812,1.2937
6,S77,0.0,2018-03-24 00:35:00,Alexandra Road,103.812,1.2937
7,S77,0.0,2018-03-24 00:40:00,Alexandra Road,103.812,1.2937
8,S77,0.0,2018-03-24 00:45:00,Alexandra Road,103.812,1.2937
9,S77,0.0,2018-03-24 00:50:00,Alexandra Road,103.812,1.2937


In [101]:
outer_rainfall_df.describe()

Unnamed: 0,rainfall
count,14786.0
mean,0.046612
std,0.456185
min,0.0
25%,0.0
50%,0.0
75%,0.0
max,11.4


In [29]:
#rainfall_df.info()

In [30]:
#rainfall_df['rainfall'] = rainfall_df['rainfall'].astype('float')  # convert to float
#rainfall_df.info()

In [84]:
filename = 'rainfall_{}.csv'.format(date)
filename

outer_rainfall_df.to_csv(filename, index = False)

## Temperature

In [42]:
# Get temperature at 5-min intervals on a specific date
temperature_df = pd.DataFrame(columns=['station_id', 'air-temperature', 'timestamp (SGT)'])
for dt in pd.date_range(datetime, periods=(24*12+1), freq='5min'):  # I remember this was a wet day
    r = requests.get('https://api.data.gov.sg/v1/environment/air-temperature',
                     params={'date_time': dt.strftime('%Y-%m-%dT%H:%M:%S')})
    temp_df = pd.DataFrame(r.json()['items'][0]['readings'])
    temp_df = temp_df.rename(columns={'value': 'air-temperature'})
    temp_df['timestamp (SGT)'] = pd.to_datetime(r.json()['items'][0]['timestamp'].split('+')[0])
    temperature_df = temperature_df.append(temp_df, ignore_index=True)
#temperature_df.head()

stations_df = pd.DataFrame(r.json()['metadata']['stations'])
stations_df['longitude'] = ''
stations_df['latitude'] = ''

for i in range(0,len(stations_df)):
    stations_df['longitude'][i] = stations_df['location'][i]['longitude']
    stations_df['latitude'][i] = stations_df['location'][i]['latitude']
    
stations_df = stations_df.rename(columns={'id': 'station_id'})
stations_df

Unnamed: 0,device_id,station_id,location,name,longitude,latitude
0,S109,S109,"{'latitude': 1.3764, 'longitude': 103.8492}",Ang Mo Kio Avenue 5,103.849,1.3764
1,S117,S117,"{'latitude': 1.256, 'longitude': 103.679}",Banyan Road,103.679,1.256
2,S50,S50,"{'latitude': 1.3337, 'longitude': 103.7768}",Clementi Road,103.777,1.3337
3,S107,S107,"{'latitude': 1.3135, 'longitude': 103.9625}",East Coast Parkway,103.963,1.3135
4,S43,S43,"{'latitude': 1.3399, 'longitude': 103.8878}",Kim Chuan Road,103.888,1.3399
5,S108,S108,"{'latitude': 1.2799, 'longitude': 103.8703}",Marina Gardens Drive,103.87,1.2799
6,S121,S121,"{'latitude': 1.37288, 'longitude': 103.72244}",Old Choa Chu Kang Road,103.722,1.37288
7,S106,S106,"{'latitude': 1.4168, 'longitude': 103.9673}",Pulau Ubin,103.967,1.4168
8,S102,S102,"{'latitude': 1.189, 'longitude': 103.768}",Semakau Landfill,103.768,1.189
9,S122,S122,"{'latitude': 1.41731, 'longitude': 103.8249}",Sembawang Road,103.825,1.41731


In [43]:
# Union of wind-speed and station data
outer_df = pd.merge(temperature_df, stations_df, how='outer', on=['station_id'])
outer_df.head(15)

# drop duplicating info columns
outer_temperature_df = outer_df.drop(['location','device_id'], axis=1)
outer_temperature_df.head(15)

Unnamed: 0,station_id,air-temperature,timestamp (SGT),name,longitude,latitude
0,S109,29.3,2017-05-24 00:00:00,Ang Mo Kio Avenue 5,103.849,1.3764
1,S109,29.3,2017-05-24 00:05:00,Ang Mo Kio Avenue 5,103.849,1.3764
2,S109,29.3,2017-05-24 00:10:00,Ang Mo Kio Avenue 5,103.849,1.3764
3,S109,29.3,2017-05-24 00:15:00,Ang Mo Kio Avenue 5,103.849,1.3764
4,S109,29.3,2017-05-24 00:20:00,Ang Mo Kio Avenue 5,103.849,1.3764
5,S109,29.3,2017-05-24 00:25:00,Ang Mo Kio Avenue 5,103.849,1.3764
6,S109,29.3,2017-05-24 00:30:00,Ang Mo Kio Avenue 5,103.849,1.3764
7,S109,29.3,2017-05-24 00:35:00,Ang Mo Kio Avenue 5,103.849,1.3764
8,S109,29.3,2017-05-24 00:40:00,Ang Mo Kio Avenue 5,103.849,1.3764
9,S109,29.3,2017-05-24 00:45:00,Ang Mo Kio Avenue 5,103.849,1.3764


In [65]:
filename = 'temperature_{}.csv'.format(date)
filename

outer_temperature_df.to_csv(filename, index = False)

In [44]:
#r.json()['items'][0]

## Relative Humidity

In [45]:
# Get relative humidity at 5-min intervals on a specific date
humidity_df = pd.DataFrame(columns=['station_id', 'relative-humidity', 'timestamp (SGT)'])
for dt in pd.date_range(datetime, periods=(24*12+1), freq='5min'):  # I remember this was a wet day
    r = requests.get('https://api.data.gov.sg/v1/environment/relative-humidity',
                     params={'date_time': dt.strftime('%Y-%m-%dT%H:%M:%S')})
    temp_df = pd.DataFrame(r.json()['items'][0]['readings'])
    temp_df = temp_df.rename(columns={'value': 'relative-humidity'})
    temp_df['timestamp (SGT)'] = pd.to_datetime(r.json()['items'][0]['timestamp'].split('+')[0])
    humidity_df = humidity_df.append(temp_df, ignore_index=True)
#humidity_df.head()

stations_df = pd.DataFrame(r.json()['metadata']['stations'])
stations_df['longitude'] = ''
stations_df['latitude'] = ''

for i in range(0,len(stations_df)):
    stations_df['longitude'][i] = stations_df['location'][i]['longitude']
    stations_df['latitude'][i] = stations_df['location'][i]['latitude']
    
stations_df = stations_df.rename(columns={'id': 'station_id'})
stations_df

Unnamed: 0,device_id,station_id,location,name,longitude,latitude
0,S109,S109,"{'latitude': 1.3764, 'longitude': 103.8492}",Ang Mo Kio Avenue 5,103.849,1.3764
1,S117,S117,"{'latitude': 1.256, 'longitude': 103.679}",Banyan Road,103.679,1.256
2,S50,S50,"{'latitude': 1.3337, 'longitude': 103.7768}",Clementi Road,103.777,1.3337
3,S107,S107,"{'latitude': 1.3135, 'longitude': 103.9625}",East Coast Parkway,103.963,1.3135
4,S43,S43,"{'latitude': 1.3399, 'longitude': 103.8878}",Kim Chuan Road,103.888,1.3399
5,S108,S108,"{'latitude': 1.2799, 'longitude': 103.8703}",Marina Gardens Drive,103.87,1.2799
6,S121,S121,"{'latitude': 1.37288, 'longitude': 103.72244}",Old Choa Chu Kang Road,103.722,1.37288
7,S102,S102,"{'latitude': 1.189, 'longitude': 103.768}",Semakau Landfill,103.768,1.189
8,S115,S115,"{'latitude': 1.29377, 'longitude': 103.61843}",Tuas South Avenue 3,103.618,1.29377
9,S24,S24,"{'latitude': 1.3678, 'longitude': 103.9826}",Upper Changi Road North,103.983,1.3678


In [46]:
# Union of wind-speed and station data
outer_df = pd.merge(humidity_df, stations_df, how='outer', on=['station_id'])
outer_df.head(15)

# drop duplicating info columns
outer_humidity_df = outer_df.drop(['location','device_id'], axis=1)
outer_humidity_df.head(15)

Unnamed: 0,station_id,relative-humidity,timestamp (SGT),name,longitude,latitude
0,S109,78.0,2017-05-24 00:00:00,Ang Mo Kio Avenue 5,103.849,1.3764
1,S109,78.2,2017-05-24 00:05:00,Ang Mo Kio Avenue 5,103.849,1.3764
2,S109,77.7,2017-05-24 00:10:00,Ang Mo Kio Avenue 5,103.849,1.3764
3,S109,78.1,2017-05-24 00:15:00,Ang Mo Kio Avenue 5,103.849,1.3764
4,S109,78.2,2017-05-24 00:20:00,Ang Mo Kio Avenue 5,103.849,1.3764
5,S109,78.3,2017-05-24 00:25:00,Ang Mo Kio Avenue 5,103.849,1.3764
6,S109,78.3,2017-05-24 00:30:00,Ang Mo Kio Avenue 5,103.849,1.3764
7,S109,78.4,2017-05-24 00:35:00,Ang Mo Kio Avenue 5,103.849,1.3764
8,S109,78.7,2017-05-24 00:40:00,Ang Mo Kio Avenue 5,103.849,1.3764
9,S109,78.7,2017-05-24 00:45:00,Ang Mo Kio Avenue 5,103.849,1.3764


In [66]:
filename = 'humidity_{}.csv'.format(date)
filename

outer_humidity_df.to_csv(filename, index = False)

## Merge wind-speed, rainfall, temperature & humidity DataFrames

In [57]:
outer_df = ""
# Union of data
outer_df = pd.merge(outer_wind_speed_df, outer_rainfall_df, how='outer', on=['station_id', 'timestamp (SGT)', 'name', 'longitude', 'latitude'])
outer_df = pd.merge(outer_df, outer_temperature_df, how='outer', on=['station_id', 'timestamp (SGT)'])
outer_df = pd.merge(outer_df, outer_humidity_df, how='outer', on=['station_id', 'timestamp (SGT)'])

# drop duplicating info columns
outer_df = outer_df.filter(['timestamp (SGT)','station_id', 'name','longitude','latitude','wind-speed','rainfall','air-temperature','relative-humidity'], axis=1)

outer_df

Unnamed: 0,timestamp (SGT),station_id,name,longitude,latitude,wind-speed,rainfall,air-temperature,relative-humidity
0,2017-05-24 00:00:00,S109,Ang Mo Kio Avenue 5,103.849,1.3764,5.3,,29.3,78.0
1,2017-05-24 00:05:00,S109,Ang Mo Kio Avenue 5,103.849,1.3764,4.1,,29.3,78.2
2,2017-05-24 00:10:00,S109,Ang Mo Kio Avenue 5,103.849,1.3764,4.7,,29.3,77.7
3,2017-05-24 00:15:00,S109,Ang Mo Kio Avenue 5,103.849,1.3764,6.0,,29.3,78.1
4,2017-05-24 00:15:00,S109,Ang Mo Kio Avenue 5,103.849,1.3764,,0.0,29.3,78.1
5,2017-05-24 00:20:00,S109,Ang Mo Kio Avenue 5,103.849,1.3764,9.3,,29.3,78.2
6,2017-05-24 00:20:00,S109,Ang Mo Kio Avenue 5,103.849,1.3764,,0.0,29.3,78.2
7,2017-05-24 00:25:00,S109,Ang Mo Kio Avenue 5,103.849,1.3764,6.4,,29.3,78.3
8,2017-05-24 00:25:00,S109,Ang Mo Kio Avenue 5,103.849,1.3764,,0.0,29.3,78.3
9,2017-05-24 00:30:00,S109,Ang Mo Kio Avenue 5,103.849,1.3764,2.9,,29.3,78.3


In [58]:
outer_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13762 entries, 0 to 13761
Data columns (total 9 columns):
timestamp (SGT)      13762 non-null datetime64[ns]
station_id           13762 non-null object
name                 3770 non-null object
longitude            3770 non-null object
latitude             3770 non-null object
wind-speed           3660 non-null float64
rainfall             13196 non-null float64
air-temperature      4435 non-null float64
relative-humidity    3946 non-null float64
dtypes: datetime64[ns](1), float64(4), object(4)
memory usage: 1.0+ MB


In [59]:
outer_df.groupby('name').describe()

Unnamed: 0_level_0,air-temperature,air-temperature,air-temperature,air-temperature,air-temperature,air-temperature,air-temperature,air-temperature,rainfall,rainfall,...,relative-humidity,relative-humidity,wind-speed,wind-speed,wind-speed,wind-speed,wind-speed,wind-speed,wind-speed,wind-speed
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Ang Mo Kio Avenue 5,448.0,26.684152,1.751559,23.5,25.4,25.8,28.8,29.5,180.0,0.045556,...,96.0,100.8,268.0,4.758955,2.751825,0.6,2.7,4.5,6.2,15.7
Banyan Road,289.0,26.732872,1.582563,24.1,25.5,26.3,28.6,29.2,289.0,0.116263,...,92.7,98.2,289.0,6.411764,3.3461,0.4,3.9,6.2,8.5,16.5
Clementi Road,289.0,26.049827,1.74141,23.2,25.0,25.5,27.9,28.9,289.0,0.143945,...,96.2,99.3,289.0,5.933218,3.711479,0.6,2.9,5.6,7.8,21.6
East Coast Parkway,268.0,27.587313,1.819142,24.4,26.2,27.5,29.4,29.9,263.0,0.061597,...,94.025,100.8,268.0,7.429477,3.005486,1.4,5.1,7.8,9.35,20.2
Kim Chuan Road,289.0,27.175779,1.905612,24.1,25.9,26.3,29.4,30.3,286.0,0.08042,...,93.4,97.8,289.0,4.515917,2.546394,0.6,2.5,4.5,6.0,16.9
Marina Gardens Drive,289.0,26.976125,1.918265,24.0,25.5,26.1,29.1,30.1,285.0,0.107368,...,94.9,97.9,0.0,,,,,,,
Old Choa Chu Kang Road,289.0,26.238754,1.704595,23.2,24.8,25.8,28.1,29.0,264.0,0.200758,...,97.8,99.2,0.0,,,,,,,
Semakau Landfill,284.0,27.398239,1.560544,25.1,25.9,27.0,29.2,29.5,258.0,0.072868,...,94.6,100.9,284.0,10.772183,4.551849,3.1,8.15,9.5,12.1,25.7
Tuas South Avenue 3,289.0,26.613149,1.823201,23.8,25.0,26.1,28.8,29.5,289.0,0.102422,...,92.3,93.1,289.0,3.43564,2.84804,0.4,1.7,2.7,4.1,17.7
Upper Changi Road North,289.0,26.726644,1.824967,24.0,25.3,25.8,28.7,29.7,229.0,0.034061,...,93.9,96.4,289.0,6.128373,2.876525,0.6,4.1,6.6,8.2,15.2


In [60]:
outer_df.to_csv('output.csv', index = False)