## Overview

This Jupyter notebook makes it easy to :

1. Get the dataset and column metadata programmatically
2. Load CSV files automatically into a pandas dataframe so you can do the fun explorations

# Setup
1. Paste the dataset ID you copied into the cell below
2. Run All Cells

In [None]:
DATASET_ID = "d_8575e84912df3c28995b8e6e0e05205a" # e.g. "d_69b3380ad7e51aff3a7dcc84eba52b8a"

## Dataset and Column Metadata

In [None]:
import json
import requests

base_url = "https://api-production.data.gov.sg"
url = base_url + f"/v2/public/api/datasets/{DATASET_ID}/metadata"
print(url)
response = requests.get(url)
data = response.json()['data']
columnMetadata = data.pop('columnMetadata', None)

print("Dataset Metadata:")
print(json.dumps(data, indent=2))

print("\nColumns:\n", list(columnMetadata['map'].values()))


https://api-production.data.gov.sg/v2/public/api/datasets/d_07bae9a30cd02f43f1475b134188060b/metadata
Dataset Metadata:
{
  "datasetId": "d_07bae9a30cd02f43f1475b134188060b",
  "createdAt": "2023-12-19T17:47:55+08:00",
  "name": "Government Gazette Supplement 2024",
  "collectionIds": [],
  "format": "CSV",
  "lastUpdatedAt": "2024-07-31T17:04:29+08:00",
  "managedBy": "Ministry of Digital Development and Information",
  "coverageStart": "2024-02-27T08:00:00+08:00",
  "coverageEnd": "2024-07-31T08:00:00+08:00",
  "contactEmails": [
    "MDDI_egazette@mddi.gov.sg"
  ],
  "datasetSize": "966"
}

Columns:
 ['Notification_No', 'Subject', 'Published_Date']


## Download File

In [None]:
# initiate download
initiate_download_response = requests.get(
    f"https://api-open.data.gov.sg/v1/public/api/datasets/{DATASET_ID}/initiate-download",
    headers={"Content-Type":"application/json"},
    json={}
)

print(initiate_download_response.json())

{'code': 0, 'data': {'message': 'Download successfully initiated. Proceed to poll download', 'url': 'https://s3.ap-southeast-1.amazonaws.com/table-downloads-ingest.data.gov.sg/d_07bae9a30cd02f43f1475b134188060b/93ef5aeae7d4910881fa4c22cf1995082a73c13bb3efd24b06ec21bdf98790db.csv?AWSAccessKeyId=ASIAU7LWPY2WEWWG4F2N&Expires=1728367406&Signature=iZHZFMUD78ltDouApB3oRXnomKw%3D&X-Amzn-Trace-Id=Root%3D1-6704bd1e-398093b71550dcf81756aa05%3BParent%3D393aad617d753ce4%3BSampled%3D0%3BLineage%3D1%3Ab9934a3d%3A0&response-content-disposition=attachment%3B%20filename%3D%22GovernmentGazetteSupplement2024.csv%22&x-amz-security-token=IQoJb3JpZ2luX2VjEOX%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaDmFwLXNvdXRoZWFzdC0xIkcwRQIgfih28PqEKud07ZHwX%2BSjVUKeGRX1BMGHh352dbKgaAECIQD7DuCxygdpPoDmf1s27zST15NS6BUdeWQOm44W2lHrfiqmAwg%2BEAQaDDM0MjIzNTI2ODc4MCIMgHGochmBNY%2Bnxpk6KoMDsPMtqaoo82NhkeJZdjyjqQcF%2FDmqHfbWXGTo5rLBjLTI8GJsb7CZwinZ%2F6h9mxzaoDFi7JXgqs06eNVt1Kml4Rj4n8vyjX7EPn8TbMzLLC3z9S14h7ty%2F4R5ClvfjUoVIOwQbG%2Ba6%2FS

In [None]:
import pandas as pd

response = requests.get(
    f"https://api-open.data.gov.sg/v1/public/api/datasets/{DATASET_ID}/poll-download",
    headers={"Content-Type":"application/json"},
    json={}
)

DOWNLOAD_URL = response.json()['data']['url']
df = pd.read_csv(DOWNLOAD_URL)
df.head()

Unnamed: 0,Notification_No,Subject,Published_Date
0,5,"<a href=""https://storage.egazette.gov.sg/getFi...",2024-07-31
1,4,"<a href=""https://storage.egazette.gov.sg/getFi...",2024-07-31
2,3,"<a href=""https://storage.egazette.gov.sg/getFi...",2024-07-17
3,2,"<a href=""https://storage.egazette.gov.sg/getFi...",2024-03-27
4,1,"<a href=""https://storage.egazette.gov.sg/getFi...",2024-02-27


In [None]:
df.describe()

Unnamed: 0,Notification_No
count,5.0
mean,3.0
std,1.581139
min,1.0
25%,2.0
50%,3.0
75%,4.0
max,5.0
