## Import library

In [25]:
from datetime import timedelta, date
import requests
import json
import pandas as pd

##  Date list generation

In [26]:
def date_gen(start_date, end_date):
    for i in range((end_date - start_date).days +1):
        yield start_date + timedelta(i)
        
def date_lst_gen(first_date, last_date):
    
    date_lst = []
    for j in date_gen(first_date, last_date):
        date_lst.append(j.strftime("%Y-%m-%d"))
    return date_lst


## Input start and end dates

In [23]:
yr_in = input("Start yr: ")
mth_in = input("Start mth: ")
day_in = input("Start day: ")
yr_out = input("End yr: ")
mth_out = input("End mth: ")
day_out = input("End day: ")

first_date = date(int(yr_in), int(mth_in), int(day_in))
last_date = date(int(yr_out), int(mth_out), int(day_out))
date_lst_gen(first_date, last_date)

Start yr: 2017
Start mth: 11
Start day: 8
End yr: 2017
End mth: 11
End day: 8


['2017-11-08']

## Hourly time list generation


In [6]:
time_lst = ["T0"+str(x)+":01" for x in range(10)] + ["T"+str(x)+":01" for x in range(10,24)]

print(time_lst)

['T00:01', 'T01:01', 'T02:01', 'T03:01', 'T04:01', 'T05:01', 'T06:01', 'T07:01', 'T08:01', 'T09:01', 'T10:01', 'T11:01', 'T12:01', 'T13:01', 'T14:01', 'T15:01', 'T16:01', 'T17:01', 'T18:01', 'T19:01', 'T20:01', 'T21:01', 'T22:01', 'T23:01']


## Define data extraction functions

### Get data from URL

In [7]:
def get_data(url, date_lst):

    hr_data_lst = []
    no_data = []
      
    for i in date_lst:
        params['date'] = i
        try:
            resp = requests.get(url,params)
            resp_py = resp.json() 
            data = resp_py['items']
            for item in data:
                for t in time_lst:
                    if t == item['timestamp'][10:16]:
                        hr_data_lst.append( {'timestamp':item['timestamp'], 'readings':item['readings']} )                       
        except:
            no_data.append(i)
            
    print("Unsuccessful dates: ", no_data)
    print(len(hr_data_lst))
    return hr_data_lst

## To get all data (not just hourly)

In [21]:
def get_all_data(url, date_lst):

    hr_data_lst = []
    no_data = []
      
    for i in date_lst:
        params['date'] = i
        try:
            resp = requests.get(url,params)
            resp_py = resp.json() 
            data = resp_py['items']
            for item in data:
                hr_data_lst.append( {'timestamp':item['timestamp'], 'readings':item['readings']} )                       
        except:
            no_data.append(i)
            
    print("Unsuccessful dates: ", no_data)
    print(len(hr_data_lst))
    return hr_data_lst

### Clean data into usable format

In [8]:
def get_clean_data(hr_data_lst):
    data_lst = []
    data_reading = []
    rec = []
    
    for i in hr_data_lst:
        try:
            for j in i['readings']:
                data_reading.append( { 'timestamp':i['timestamp'], 'station_id':j['station_id'], label : j['value']} ) 
            data_lst = data_lst + data_reading
            data_reading = []
        except:
            rec.append(i)
    
    print("Unsuccessful clean data: ", rec)       
    return data_lst


## Enter URL

In [14]:
url = input("Enter url: ")

params = {}
label = url.split('/')[-1]
print(label)

Enter url: https://api.data.gov.sg/v1/environment/air-temperature
air-temperature


## Get data in dataframe

In [28]:
results = get_clean_data(get_data(url, date_lst_gen(first_date, last_date)))

Unsuccessful dates:  []
23
Unsuccessful clean data:  []


## For all data

In [27]:
results = get_clean_data(get_all_data(url, date_lst_gen(first_date, last_date)))

Unsuccessful dates:  []
1358
Unsuccessful clean data:  []


In [16]:
df = pd.DataFrame(results)

In [17]:
df                                                                               

Unnamed: 0,timestamp,station_id,air-temperature
0,2018-01-13T00:01:00+08:00,S109,22.7
1,2018-01-13T00:01:00+08:00,S117,23.6
2,2018-01-13T00:01:00+08:00,S50,22.6
3,2018-01-13T00:01:00+08:00,S107,23.7
4,2018-01-13T00:01:00+08:00,S43,23.5
...,...,...,...
347,2018-01-13T23:01:00+08:00,S60,22.9
348,2018-01-13T23:01:00+08:00,S115,23.5
349,2018-01-13T23:01:00+08:00,S24,22.6
350,2018-01-13T23:01:00+08:00,S116,23.3


In [12]:
#num_days = (last_date - first_date).days +1

filename = label + "_" + str(first_date) + "_" + str(last_date) + ".csv"
df.to_csv(filename)
print("Saved!")


Saved!


## Get Station data

In [9]:
import requests
import json
import pandas as pd
j = date_lst[0]
params = {}
params['date'] = j
resp = requests.get("https://api.data.gov.sg/v1/environment/air-temperature",params)
station_py = resp.json()
station_data = station_py['metadata']['stations']
for i in station_data:
    i['long'] = i['location']['longitude']
    i['lat'] = i['location']['latitude']
    
#print(station_data)

In [10]:
df_station = pd.DataFrame(station_data)

In [11]:
df_station

Unnamed: 0,id,device_id,name,location,long,lat
0,S109,S109,Ang Mo Kio Avenue 5,"{'latitude': 1.3764, 'longitude': 103.8492}",103.8492,1.3764
1,S117,S117,Banyan Road,"{'latitude': 1.256, 'longitude': 103.679}",103.679,1.256
2,S107,S107,East Coast Parkway,"{'latitude': 1.3135, 'longitude': 103.9625}",103.9625,1.3135
3,S43,S43,Kim Chuan Road,"{'latitude': 1.3399, 'longitude': 103.8878}",103.8878,1.3399
4,S108,S108,Marina Gardens Drive,"{'latitude': 1.2799, 'longitude': 103.8703}",103.8703,1.2799
5,S44,S44,Nanyang Avenue,"{'latitude': 1.34583, 'longitude': 103.68166}",103.68166,1.34583
6,S121,S121,Old Choa Chu Kang Road,"{'latitude': 1.37288, 'longitude': 103.72244}",103.72244,1.37288
7,S106,S106,Pulau Ubin,"{'latitude': 1.4168, 'longitude': 103.9673}",103.9673,1.4168
8,S06,S06,S06,"{'latitude': 1.3524, 'longitude': 103.9007}",103.9007,1.3524
9,S102,S102,Semakau Landfill,"{'latitude': 1.189, 'longitude': 103.768}",103.768,1.189


In [12]:
df_station.drop(columns='location', inplace=True)

In [13]:
df_station

Unnamed: 0,id,device_id,name,long,lat
0,S109,S109,Ang Mo Kio Avenue 5,103.8492,1.3764
1,S117,S117,Banyan Road,103.679,1.256
2,S107,S107,East Coast Parkway,103.9625,1.3135
3,S43,S43,Kim Chuan Road,103.8878,1.3399
4,S108,S108,Marina Gardens Drive,103.8703,1.2799
5,S44,S44,Nanyang Avenue,103.68166,1.34583
6,S121,S121,Old Choa Chu Kang Road,103.72244,1.37288
7,S106,S106,Pulau Ubin,103.9673,1.4168
8,S06,S06,S06,103.9007,1.3524
9,S102,S102,Semakau Landfill,103.768,1.189


In [None]:
df_station.to_csv("weather_station.csv")