In [1]:
import requests
import pandas as pd
import numpy as np
# import polars as pl
import json
import re

# Custom scripts
- scripts to retrive additional page with consideration for total records

In [2]:
def fetch_data(url: str, dataset_id: str) -> pd.DataFrame:
    """
    Fetches data from the Data.gov.sg API and returns a concatenated DataFrame.

    Args:
        url (str): The base URL for the API request.
        dataset_id (str): The ID of the dataset to fetch.

    Returns:
        pd.DataFrame: A concatenated DataFrame containing the fetched data.
    """
    _response_agg = []  # Initialize an empty list to store the DataFrames
    offset_value = 0  # Initialize the offset value
    total_records = 0  # Initialize the total records count

    while True:
        try:
            # Send a GET request to the API
            response = requests.get(url+dataset_id)
            response_text = json.loads(response.text)

            # Append the fetched data to the list
            _response_agg.append(pd.DataFrame.from_dict(response_text['result']['records']))

            # Check if there's a next page
            if 'next' not in response_text['result']['_links'].keys():
                break

            # Update the URL for the next page
            url = 'https://data.gov.sg' + response_text['result']['_links']['next']

            # Update the offset value and total records count
            match = re.search(r'offset=(\d+)', url)
            offset_value = int(match.group(1))
            total_records = response_text['result']['total']
            # print(total_records)

            # Break if the offset value exceeds the total records count
            if offset_value > total_records:
                break
        except Exception as e:
            print(f"Error: {e}")
            break

    # Concatenate the DataFrames
    df = pd.concat(_response_agg, ignore_index=True)

    return df

# Data Gov Sg data

## Private Residential Property Transactions in Rest of Central Region, Quarterly
### New Launch/Resale/SubSale Counts
- https://data.gov.sg/datasets/d_5785799d63a9da091f4e0b456291eeb8/view

In [3]:
property_transasctions = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=",
    dataset_id="d_5785799d63a9da091f4e0b456291eeb8"
    )
property_transasctions.to_parquet(r"../data/raw_data/datagov_general_sale.parquet")

### Private Residential Property Rental Index , (Base Quarter 2009-Q1 = 100), Quarterly
- https://data.gov.sg/datasets/d_8e4c50283fb7052a391dfb746a05c853/view

In [4]:
rental_index = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
    dataset_id="d_8e4c50283fb7052a391dfb746a05c853"
    )
rental_index.to_parquet(r"../data/raw_data/datagov_rental_index.parquet")

### Private Residential Property Price Index (Base Quarter 2009-Q1 = 100), Quarterly
- https://data.gov.sg/datasets/d_97f8a2e995022d311c6c68cfda6d034c/view

In [5]:
price_index = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
    dataset_id="d_97f8a2e995022d311c6c68cfda6d034c"
    )
price_index.to_parquet(r"../data/raw_data/datagov_price_index.parquet")

### Median Annual Value and Property Tax By Type of Private Residential Property
- Median Annual Value and Property Tax
- https://data.gov.sg/datasets/d_774a81df45dca33112e59207e6dae1af/view

In [6]:
median_val_property_tax = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
    dataset_id="d_774a81df45dca33112e59207e6dae1af"
    )
median_val_property_tax.to_parquet(r"../data/raw_data/datagov_median_price_via_property_type.parquet")

### Private Residential Property Transactions in the Whole of Singapore, Quarterly
- Private Residential Property Transactions
- https://data.gov.sg/datasets/d_7c69c943d5f0d89d6a9a773d2b51f337/view

In [7]:
private_residential_transactions_whole = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
    dataset_id="d_7c69c943d5f0d89d6a9a773d2b51f337"
    )
private_residential_transactions_whole.to_parquet(r"../data/raw_data/datagov_private_transactions_property_type.parquet")

## Resale Flat Prices
- https://data.gov.sg/collections/189/view
- download manually, just converting it here

In [18]:
resale_flat_2015 = pd.read_csv('../data/raw_data/csv/ResaleFlatPricesBasedonRegistrationDateFromJan2015toDec2016.csv')
resale_flat_2017 = pd.read_csv('../data/raw_data/csv/ResaleflatpricesbasedonregistrationdatefromJan2017onwards.csv')

resale_flat_2015.to_parquet(r"../data/raw_data/datagov_ResaleFlatPricesBasedonRegistrationDateFromJan2015toDec2016.parquet")
resale_flat_2017.to_parquet(r"../data/raw_data/datagov_ResaleflatpricesbasedonregistrationdatefromJan2017onwards.parquet")

In [10]:
# resale_flat_df = fetch_data(
#     url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
#     dataset_id="d_8b84c4ee58e3cfc0ece0d773c8ca6abc"
#     )

KeyboardInterrupt: 

In [None]:
resale_flat_df.to_parquet(r"../data/raw_data/datagov_resale_flat_price_2017onwards.parquet")

In [13]:
response.json()

{'help': 'https://data.gov.sg/api/3/action/help_show?name=datastore_search',
 'success': True,
 'result': {'resource_id': 'd_ea9ed51da2787afaf8e51f827c304208',
  'fields': [{'type': 'text', 'id': 'month'},
   {'type': 'text', 'id': 'town'},
   {'type': 'text', 'id': 'flat_type'},
   {'type': 'text', 'id': 'block'},
   {'type': 'text', 'id': 'street_name'},
   {'type': 'text', 'id': 'storey_range'},
   {'type': 'numeric', 'id': 'floor_area_sqm'},
   {'type': 'text', 'id': 'flat_model'},
   {'type': 'numeric', 'id': 'lease_commence_date'},
   {'type': 'text', 'id': 'remaining_lease'},
   {'type': 'numeric', 'id': 'resale_price'},
   {'type': 'int4', 'id': '_id'}],
  'records': [{'_id': 1,
    'month': '2015-01',
    'town': 'ANG MO KIO',
    'flat_type': '3 ROOM',
    'block': '174',
    'street_name': 'ANG MO KIO AVE 4',
    'storey_range': '07 TO 09',
    'floor_area_sqm': '60',
    'flat_model': 'Improved',
    'lease_commence_date': '1986',
    'remaining_lease': '70',
    'resale_pr

In [11]:
resale_flat_df_jan2015 = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
    dataset_id="d_ea9ed51da2787afaf8e51f827c304208"
    )

resale_flat_df_jan2015.to_parquet(r"../data/raw_data/datagov_resale_flat_price_2015_2016.parquet")

KeyboardInterrupt: 

## Private Residential Property Transactions in Outside Central Region, Quarterly
- https://data.gov.sg/datasets/d_1a7823f3d31e7db4b426833833762bab/view

In [None]:
private_residential_transactions_outside_central = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
    dataset_id="d_1a7823f3d31e7db4b426833833762bab"
    )

## Private Residential Property Transactions in Core Central Region, Quarterly
- https://data.gov.sg/datasets/d_c287c8be114bfa7d055b27ab2c87de83/view

In [None]:
private_residential_transactions_central = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
    dataset_id="d_c287c8be114bfa7d055b27ab2c87de83"
    )

## Demand for Rental and Sold Flats
- https://data.gov.sg/datasets/d_4b4ee36346b27fe35c529588900340b2/view

In [None]:
demand_rental_sold_flats = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
    dataset_id="d_4b4ee36346b27fe35c529588900340b2"
    )

## Number of Sold and Rented HDB Residential Units
- https://data.gov.sg/datasets/d_67966e5fd5dce14cf9fa5f0bc5164faf/view

In [None]:
sold_rented_hdb = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
    dataset_id="d_67966e5fd5dce14cf9fa5f0bc5164faf"
    )

## Price Range of HDB Flats Offered
- https://data.gov.sg/datasets/d_2d493bdcc1d9a44828b6e71cb095b88d/view

In [None]:
price_range_hdb = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
    dataset_id="d_2d493bdcc1d9a44828b6e71cb095b88d"
    )

## HDB Resale Price Index (1Q2009 = 100), Quarterly
- https://data.gov.sg/datasets/d_14f63e595975691e7c24a27ae4c07c79/view

In [None]:
price_index_hdb = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
    dataset_id="d_14f63e595975691e7c24a27ae4c07c79"
    )

## Renting Out of Flats 2024 (CSV)
- https://data.gov.sg/datasets/d_c9f57187485a850908655db0e8cfe651/view

In [None]:
renting_flats = fetch_data(
    url="https://data.gov.sg/api/action/datastore_search?resource_id=", 
    dataset_id="d_c9f57187485a850908655db0e8cfe651"
    )