In [62]:
import matplotlib.pyplot as plt
import pandas as pd
import requests
import seaborn as sns
from tqdm import tqdm

%matplotlib inline

stations_df = pd.read_csv("stations.csv")
stations_df.head()

Unnamed: 0,station_id,name,latitude,longitude
0,S7,Macritchie Reservoir,1.3417,103.8338
1,S8,Lower Peirce Reservoir,1.3701,103.8271
2,S11,Choa Chu Kang (West),1.3746,103.6938
3,S24,Changi Climate Station,1.3678,103.9826
4,S29,Pasir Ris (West),1.3863,103.9412


## Define function

In [63]:
def rain_data(date):
    # Get rainfall at 30-min intervals on a specific date
    start_datetime = date.strftime("%Y-%m-%d") + ' 07:05:00'
    end_datetime = date.strftime("%Y-%m-%d") + ' 09:35:00'
    
    rainfall_df = pd.DataFrame(columns=['station_id', 'rainfall', 'timestamp (SGT)'])
    for dt in pd.date_range(start_datetime, end_datetime, freq='30min'):  # I remember this was a wet day
        r = requests.get('https://api.data.gov.sg/v1/environment/rainfall',
                         params={'date_time': dt.strftime('%Y-%m-%dT%H:%M:%S')})
        try:
            temp_df = pd.DataFrame(r.json()['items'][0]['readings'])
        except ValueError:
            print('No data JSONDecodeError {}'.format(dt))
            continue
        except KeyError:
            print('No items KeyError {}'.format(dt))
            continue
        temp_df = temp_df.rename(columns={'value': 'rainfall'})
        temp_df['timestamp (SGT)'] = pd.to_datetime(r.json()['items'][0]['timestamp'].split('+')[0])
        rainfall_df = rainfall_df.append(temp_df, ignore_index=True)
    #rainfall_df.head()

    rainfall_df['rainfall'] = rainfall_df['rainfall'].astype('float')  # convert to float
    
    rainfallmean_df = rainfall_df.groupby('station_id').mean()
    rainfallmean_df['date'] = date
    rainfallmean_df = rainfallmean_df.reset_index()
    
    return rainfallmean_df

## Specify dates

In [64]:
start_date = '2018-01-01'
end_date = '2018-01-31'

date_list =pd.date_range(start_date, end_date, freq='1D')
date_list

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
               '2018-01-13', '2018-01-14', '2018-01-15', '2018-01-16',
               '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20',
               '2018-01-21', '2018-01-22', '2018-01-23', '2018-01-24',
               '2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
               '2018-01-29', '2018-01-30', '2018-01-31'],
              dtype='datetime64[ns]', freq='D')

In [57]:
#date_list[0].strftime("%Y-%m-%d")

## Main program

In [65]:
#date_list =pd.date_range('2018-01-01', '2018-01-02', freq='1D')
rainfallmean_df = pd.DataFrame()

for date in tqdm(date_list):
    temp_df = rain_data(date)
    rainfallmean_df = rainfallmean_df.append(temp_df, ignore_index=True)

rainfallmean_df

# Union of wind-speed and station data
outer_df = pd.merge(rainfallmean_df, stations_df, how='inner', on=['station_id'])
outer_df.head(15)

filename = 'rainfallmean_{}to{}.csv'.format(start_date, end_date)
filename

#outer_rainfall_df.to_csv(filename, index = False)
outer_df.to_csv(filename, index = False)

100%|██████████| 31/31 [00:37<00:00,  1.21s/it]


## Debug - backup

In [15]:
all_date_list = []
all_date_list

[]

In [13]:
date_list

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
               '2018-01-13', '2018-01-14', '2018-01-15', '2018-01-16',
               '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20',
               '2018-01-21', '2018-01-22', '2018-01-23', '2018-01-24',
               '2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
               '2018-01-29', '2018-01-30', '2018-01-31'],
              dtype='datetime64[ns]', freq='D')

In [9]:
#pd.date_range(datetime, periods=(24*(60/30)+1), freq='30min')
pd.date_range(start_datetime, end_datetime, freq='30min')

DatetimeIndex(['2018-01-08 07:05:00', '2018-01-08 07:35:00',
               '2018-01-08 08:05:00', '2018-01-08 08:35:00',
               '2018-01-08 09:05:00', '2018-01-08 09:35:00'],
              dtype='datetime64[ns]', freq='30T')

In [None]:

# Get rainfall at 30-min intervals on a specific date
rainfall_df = pd.DataFrame(columns=['station_id', 'rainfall', 'timestamp (SGT)'])
for dt in tqdm(pd.date_range(start_datetime, end_datetime, freq='30min')):  # I remember this was a wet day
    r = requests.get('https://api.data.gov.sg/v1/environment/rainfall',
                     params={'date_time': dt.strftime('%Y-%m-%dT%H:%M:%S')})
    try:
        temp_df = pd.DataFrame(r.json()['items'][0]['readings'])
    except ValueError:
        print('No data JSONDecodeError {}'.format(dt))
        continue
    except KeyError:
        print('No items KeyError {}'.format(dt))
        continue
    temp_df = temp_df.rename(columns={'value': 'rainfall'})
    temp_df['timestamp (SGT)'] = pd.to_datetime(r.json()['items'][0]['timestamp'].split('+')[0])
    rainfall_df = rainfall_df.append(temp_df, ignore_index=True)
#rainfall_df.head()

rainfall_df['rainfall'] = rainfall_df['rainfall'].astype('float')  # convert to float

# Union of wind-speed and station data
outer_df = pd.merge(rainfall_df, stations_df, how='outer', on=['station_id'])
outer_df.head(15)

filename = 'rainfall_{}.csv'.format(date)
filename

#outer_rainfall_df.to_csv(filename, index = False)
outer_df.to_csv(filename, index = False)

In [16]:
# Get rainfall at 30-min intervals on a specific date
rainfall_df = pd.DataFrame(columns=['station_id', 'rainfall', 'timestamp (SGT)'])
for dt in tqdm(pd.date_range(start_datetime, end_datetime, freq='30min')):  # I remember this was a wet day
    r = requests.get('https://api.data.gov.sg/v1/environment/rainfall',
                     params={'date_time': dt.strftime('%Y-%m-%dT%H:%M:%S')})
    try:
        temp_df = pd.DataFrame(r.json()['items'][0]['readings'])
    except ValueError:
        print('No data JSONDecodeError {}'.format(dt))
        continue
    except KeyError:
        print('No items KeyError {}'.format(dt))
        continue
    temp_df = temp_df.rename(columns={'value': 'rainfall'})
    temp_df['timestamp (SGT)'] = pd.to_datetime(r.json()['items'][0]['timestamp'].split('+')[0])
    rainfall_df = rainfall_df.append(temp_df, ignore_index=True)
#rainfall_df.head()

rainfall_df['rainfall'] = rainfall_df['rainfall'].astype('float')  # convert to float

100%|██████████| 6/6 [00:01<00:00,  3.34it/s]


In [48]:
rainfallmean_df = rainfall_df.groupby('station_id').mean()
rainfallmean_df['date'] = date
rainfallmean_df = rainfallmean_df.reset_index()

In [52]:
# Union of wind-speed and station data
outer_df = pd.merge(rainfallmean_df, stations_df, how='inner', on=['station_id'])
outer_df.head(15)

filename = 'rainfallmean_{}.csv'.format(date.strftime("%Y-%m-%d"))
filename

#outer_rainfall_df.to_csv(filename, index = False)
outer_df.to_csv(filename, index = False)

In [54]:
# Union of wind-speed and station data
outer_df = pd.merge(rainfallmean_df, stations_df, how='inner', on=['station_id'])
outer_df.head(15)

filename = 'rainfallmean_{}to{}.csv'.format(start_date, end_date)
filename

#outer_rainfall_df.to_csv(filename, index = False)
outer_df.to_csv(filename, index = False)