In [1]:
import requests
import numpy as np
import pandas as pd
import json
import logging
import time
from requests.exceptions import HTTPError
from pprint import pprint
from functools import wraps
from geopy.distance import geodesic as GD

In [13]:
df1 = pd.read_csv("static/2022_01.csv")
df2 = pd.read_csv("static/2022_02.csv")
df3 = pd.read_csv("static/2022_03.csv")

error_df = pd.concat([df1, df2, df3])
error_df

Unnamed: 0,_id,resale_price,year,month,timeseries_month,region,town,rooms,avg_storey,floor_area_sqm,remaining_lease,dist_to_marina_bay,latitude,longitude,nearest_station_0,dist_to_station_0
0,99276,282000.0,2022,4,2022-04-01,North-East,Ang Mo Kio,2.0,11.0,44.0,55.083333,10.18,1.373346,103.836459,Mayflower MRT,0.21
1,101537,773000.0,2022,4,2022-04-01,North,Yishun,5.5,8.0,142.0,64.916667,14.79,1.415452,103.833091,Yishun MRT,1.56
2,100671,655000.0,2022,4,2022-04-01,North-East,Punggol,5.0,17.0,113.0,92.000000,13.49,1.400720,103.898485,Sumang LRT,0.86
3,101536,739388.0,2022,4,2022-04-01,North,Yishun,5.5,2.0,142.0,64.833333,14.67,1.414174,103.832454,Yishun MRT,1.71
4,101535,920000.0,2022,4,2022-04-01,North,Yishun,5.5,2.0,164.0,69.166667,15.21,1.419667,103.834891,Yishun MRT,1.08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8895,110501,360000.0,2022,9,2022-09-01,North-East,Ang Mo Kio,3.0,14.0,67.0,55.916667,8.64,1.362663,103.856516,Ang Mo Kio MRT,1.12
8896,110500,380000.0,2022,9,2022-09-01,North-East,Ang Mo Kio,3.0,5.0,73.0,52.666667,9.31,1.366558,103.841624,Mayflower MRT,0.78
8897,110499,580000.0,2022,9,2022-09-01,North-East,Ang Mo Kio,3.0,8.0,70.0,89.083333,9.02,1.364612,103.844851,Ang Mo Kio MRT,0.79
8898,110498,365000.0,2022,9,2022-09-01,North-East,Ang Mo Kio,3.0,5.0,67.0,54.416667,9.50,1.367396,103.838150,Mayflower MRT,0.48


In [9]:
# Wrapper for timing function calls:
def timeit(func):
    '''
    Wrapper to time function call
    '''
    @wraps(func)
    def timeit_wrapper(*args, **kwargs):
        '''
        *args and **kwargs here allow parameters for the original function to be taken in
        and passed to the function contained in the wrapper.
        '''
        current_time = time.strftime("%H:%M:%S", time.localtime())
        start = time.perf_counter()
        result = func(*args, **kwargs)
        end = time.perf_counter()
        time_taken = end-start
        print(f'{func.__name__}() called at \t{current_time} \texecution time: {time_taken:.4f} seconds')
        logging.info(f'{func.__name__}() called at \texecution time: {time_taken:.4f} seconds')
        return result
    return timeit_wrapper

def error_handler(func, max_attempts=3, delay=120):
    '''
    Wrapper to catch and handle errors
    '''
    @wraps(func)
    def error_handler_wrapper(*args, **kwargs):
        '''
        *args and **kwargs here allow parameters for the original function to be taken in
        and passed to the function contained in the wrapper, without needed to declare them in the wrapper function.
        '''
        for i in range(max_attempts):
            try:
                result = func(*args, **kwargs)
            except HTTPError as err:
                logging.error(f'{func.__name__}() encountered {err}')
                # Raise exception if we reach max tries
                if i == max_attempts:
                    raise HTTPError(f'Exceeded max tries of {max_attempts}')
                print(f'{func.__name__}() encountered {err}')

                # err.response gives us the Response object from requests module, we can call .status_code to get the code as int
                if err.response.status_code == 429:
                    print(f'Sleeping for {delay} seconds', end = '\t')
                    time.sleep(delay)
                    print('Retrying...', end='\t')
            except Exception as err:
                logging.error(f'{func.__name__}() encountered {err}') 
                print(f'{func.__name__}() encountered {err}')
                break
            else:
                return result
    return error_handler_wrapper

def cache_calls():
    pass

In [18]:
@timeit
@error_handler
def get_token(location: str):
    '''
    Function to check if API token is still valid and updates API token if outdated
    ##Parameters
        location: filepath (str)
    Returns API token : str
    '''
    with open(location, 'r+') as fp:
        file = fp.read()
        data = json.loads(file)
        response = requests.post("https://developers.onemap.sg/privateapi/auth/post/getToken", data=data)
        token = response.json()
        if token['access_token'] != data['access_token']:
            print(f"New token found")
            data['access_token'] = token['access_token']
            data['expiry_timestamp'] = token['expiry_timestamp']
            fp.seek(0)
            json.dump(data, fp = fp, indent=4)
            print('Updated token json')
            data = json.loads(file)
        return data['access_token']

@timeit
@error_handler
def datagovsg_api_call(url: str, sort: str = 'month desc', limit: int = 100, 
                       months:list =[1,2,3,4,5,6,7,8,9,10,11,12], 
                       years:list =None, 
                       ids:list =None)-> pd.DataFrame:
    '''
    Function to build the API call and construct the pandas dataframe
    ## Parameters
    url: str
        url for API, with resource_id parameters
    sort: str
        field, by ascending/desc, default by Latest month
    limit: int
        maximum entries (API default by OneMap is 100, if not specified)
    months: list
        months desired, int between 1-12
    years: list
        months desired , int
    Returns Dataframe of data : pd.DataFrame
    '''
    if years:
        time_range_dict = '{"month":['
        for year in years:
            for month in months: # months 1-12
                time_range_dict = time_range_dict + f'"{year}-{str(month).zfill(2)}", '
        time_range_dict = time_range_dict[:-2] # Cancel out extra strings ", "
        time_range_dict = time_range_dict + '],'
        url = url+f'&sort={sort}&filters={time_range_dict}'

    if ids:
        id_dict = '"_id":['
        for id in ids:
            id_dict = id_dict + f'"{id}", '
        id_dict = id_dict[:-2] # Cancel out extra strings ", "
        id_dict = id_dict + ']}'
        url = url+f'{id_dict}'

    if limit: # API call's default is 100 even without specifying
        print(f'Call limit : {limit}')
        url = url+f'&limit={limit}'

    pprint(f'API call = {url}')
    response = requests.get(url)
    response.raise_for_status()
    data = response.json()
    df = pd.DataFrame(data['result']['records'])
    return df

In [12]:
errors = [98209, 116017, 118198, 144671, 153348, 98161, 115987, 115986, 98191, 
          102611, 116006, 144653, 102528, 144570, 153265, 95993,106877,109268,113930,115956,144588,
          151074, 151075, 151076]

In [21]:
df = datagovsg_api_call('https://data.gov.sg/api/action/datastore_search?resource_id=f1765b54-a209-4718-8d38-a39237f502b3', 
                        years=[2022, 2023], ids=errors, limit=100000)
df

Call limit : 100000
('API call = '
 'https://data.gov.sg/api/action/datastore_search?resource_id=f1765b54-a209-4718-8d38-a39237f502b3&sort=month '
 'desc&filters={"month":["2022-01", "2022-02", "2022-03", "2022-04", '
 '"2022-05", "2022-06", "2022-07", "2022-08", "2022-09", "2022-10", "2022-11", '
 '"2022-12", "2023-01", "2023-02", "2023-03", "2023-04", "2023-05", "2023-06", '
 '"2023-07", "2023-08", "2023-09", "2023-10", "2023-11", '
 '"2023-12"],"_id":["98209", "116017", "118198", "144671", "153348", "98161", '
 '"115987", "115986", "98191", "102611", "116006", "144653", "102528", '
 '"144570", "153265", "95993", "106877", "109268", "113930", "115956", '
 '"144588", "151074", "151075", "151076"]}&limit=100000')
datagovsg_api_call() called at 	20:16:12 	execution time: 1.0558 seconds


Unnamed: 0,town,flat_type,flat_model,floor_area_sqm,street_name,resale_price,month,remaining_lease,lease_commence_date,storey_range,_id,block
0,KALLANG/WHAMPOA,3 ROOM,New Generation,82,GEYLANG BAHRU,440000,2023-05,54 years 08 months,1979,13 TO 15,153265,94
1,PASIR RIS,4 ROOM,Model A,112,PASIR RIS DR 10,540000,2023-05,71 years 09 months,1996,04 TO 06,153348,705
2,KALLANG/WHAMPOA,3 ROOM,Model A,67,JLN TENTERAM,568000,2023-04,93 years 08 months,2017,13 TO 15,151074,117A
3,KALLANG/WHAMPOA,3 ROOM,Model A,67,JLN TENTERAM,550000,2023-04,93 years 07 months,2017,16 TO 18,151075,117A
4,KALLANG/WHAMPOA,3 ROOM,Improved,65,KALLANG BAHRU,375000,2023-04,56 years 09 months,1981,10 TO 12,151076,65
5,KALLANG/WHAMPOA,3 ROOM,Model A,67,JLN TENTERAM,559000,2023-01,93 years 11 months,2017,16 TO 18,144588,117A
6,KALLANG/WHAMPOA,2 ROOM,Standard,50,JLN BATU,278000,2023-01,46 years 06 months,1970,07 TO 09,144570,7
7,KALLANG/WHAMPOA,5 ROOM,Standard,117,NTH BRIDGE RD,765000,2023-01,51 years 01 month,1975,16 TO 18,144653,12
8,MARINE PARADE,5 ROOM,Standard,126,MARINE CRES,890000,2023-01,51 years 03 months,1975,19 TO 21,144671,28
9,QUEENSTOWN,3 ROOM,Premium Apartment,65,DAWSON RD,530000,2022-01,93 years 07 months,2016,04 TO 06,118198,87


In [14]:
error_df[error_df['_id'].isin(errors)]

Unnamed: 0,_id,resale_price,year,month,timeseries_month,region,town,rooms,avg_storey,floor_area_sqm,remaining_lease,dist_to_marina_bay,latitude,longitude,nearest_station_0,dist_to_station_0
3416,98161,525000.0,2022,3,2022-03-01,Central,Kallang/Whampoa,4.0,5.0,91.0,59.166667,7.86,1.280597,103.79046,Haw Par Villa MRT,0.99
3446,98191,788888.0,2022,3,2022-03-01,Central,Kallang/Whampoa,5.0,17.0,127.0,59.25,7.83,1.280287,103.790785,Haw Par Villa MRT,1.04
3462,98209,800000.0,2022,3,2022-03-01,Central,Marine Parade,5.0,8.0,126.0,52.0,24.34,1.314112,103.644248,Tuas Crescent MRT,0.93
5212,95993,310000.0,2022,2,2022-02-01,Central,Kallang/Whampoa,3.0,5.0,60.0,46.0,16.66,1.391731,103.755741,Yew Tee MRT,1.13
1226,109268,340000.0,2022,8,2022-08-01,Central,Kallang/Whampoa,3.0,5.0,60.0,45.5,16.66,1.391731,103.755741,Yew Tee MRT,1.13
3360,106877,300000.0,2022,7,2022-07-01,Central,Kallang/Whampoa,3.0,2.0,60.0,45.583333,16.66,1.391731,103.755741,Yew Tee MRT,1.13
7727,102611,798888.0,2022,5,2022-05-01,Central,Kallang/Whampoa,5.0,26.0,118.0,59.166667,7.83,1.280287,103.790785,Haw Par Villa MRT,1.04
7812,102528,272000.0,2022,5,2022-05-01,Central,Kallang/Whampoa,2.0,8.0,48.0,47.166667,16.55,1.392531,103.757872,Yew Tee MRT,1.29
1645,118198,910000.0,2022,12,2022-12-01,Central,Marine Parade,5.0,23.0,126.0,51.333333,24.34,1.314112,103.644248,Tuas Crescent MRT,0.93
3226,115956,320000.0,2022,11,2022-11-01,Central,Kallang/Whampoa,3.0,2.0,60.0,45.25,16.66,1.391731,103.755741,Yew Tee MRT,1.13
