# Pandas: 
### - Calling APIs using Python

<strong> <h2> Run The Following Code </h2> </strong>

Hit SHIFT + ENTER when your cusor is inside the cell of code.<br/>

> Import the packages

In [1]:
import pandas as pd
import requests

## Calling API Example 1

> 1. Open this url in your browser: https://data.gov.sg/dataset/list-of-government-markets-hawker-centres
> 2. Click on the "Data API" button on top rigth corner to see resource_id for this dataset
> 3. Use **requests** package to call this API and get all first 5 rows of data

In [2]:
# Option A - Manually construct the request URL
url_full = 'https://data.gov.sg/api/action/datastore_search?resource_id=8f6bba57-19fc-4f36-8dcf-c0bda382364d&limit=5'

response = requests.get(url_full)

In [3]:
# Option B - (Recommneded) passing a dictionary to .get() method in requests to construct the request url
url_base = 'https://data.gov.sg/api/action/datastore_search'

parameters = {
    'resource_id' : '8f6bba57-19fc-4f36-8dcf-c0bda382364d',
    'limit': '5'
}
response = requests.get(url_base, params=parameters)

In [4]:
# Check the url that sent to the API server
response.url

'https://data.gov.sg/api/action/datastore_search?resource_id=8f6bba57-19fc-4f36-8dcf-c0bda382364d&limit=5'

In [5]:
# Check the status of the request
response.status_code

200

In [6]:
# View the json text returned by the API server
response_dict = response.json()
response_dict

{'help': 'https://data.gov.sg/api/3/action/help_show?name=datastore_search',
 'success': True,
 'result': {'resource_id': '8f6bba57-19fc-4f36-8dcf-c0bda382364d',
  'fields': [{'type': 'int4', 'id': '_id'},
   {'type': 'text', 'id': 'name_of_centre'},
   {'type': 'text', 'id': 'location_of_centre'},
   {'type': 'text', 'id': 'type_of_centre'},
   {'type': 'text', 'id': 'owner'},
   {'type': 'numeric', 'id': 'no_of_stalls'},
   {'type': 'numeric', 'id': 'no_of_cooked_food_stalls'},
   {'type': 'numeric', 'id': 'no_of_mkt_produce_stalls'}],
  'records': [{'location_of_centre': '2, Adam Road, S(289876)',
    'no_of_cooked_food_stalls': '32',
    'no_of_mkt_produce_stalls': '0',
    'name_of_centre': 'Adam Road Food Centre',
    'type_of_centre': 'HC',
    'no_of_stalls': '32',
    'owner': 'Government',
    '_id': 1},
   {'location_of_centre': 'National Development Building, Annex B, Telok Ayer Street, S(069111)',
    'no_of_cooked_food_stalls': '134',
    'no_of_mkt_produce_stalls': '1',


> The json text above is a nested-dictionary <br>
> Picture below shows the structure in a visual form

> ![](hawker_structure.png)

In [7]:
# Traverse the dictionary to the level where the main records are located
response_dict['result']['total']

107

In [8]:
# Traverse the dictionary to the level where the main records are located
records = response_dict['result']['records']
records

[{'location_of_centre': '2, Adam Road, S(289876)',
  'no_of_cooked_food_stalls': '32',
  'no_of_mkt_produce_stalls': '0',
  'name_of_centre': 'Adam Road Food Centre',
  'type_of_centre': 'HC',
  'no_of_stalls': '32',
  'owner': 'Government',
  '_id': 1},
 {'location_of_centre': 'National Development Building, Annex B, Telok Ayer Street, S(069111)',
  'no_of_cooked_food_stalls': '134',
  'no_of_mkt_produce_stalls': '1',
  'name_of_centre': 'Amoy Street Food Centre',
  'type_of_centre': 'HC',
  'no_of_stalls': '135',
  'owner': 'Government',
  '_id': 2},
 {'location_of_centre': '1, Bedok Road, S(469572)',
  'no_of_cooked_food_stalls': '32',
  'no_of_mkt_produce_stalls': '0',
  'name_of_centre': 'Bedok Food Centre',
  'type_of_centre': 'HC',
  'no_of_stalls': '32',
  'owner': 'Government',
  '_id': 3},
 {'location_of_centre': '38A, Beo Crescent, S(169982)',
  'no_of_cooked_food_stalls': '32',
  'no_of_mkt_produce_stalls': '62',
  'name_of_centre': 'Beo Crescent Market',
  'type_of_centre'

In [9]:
# Check the number of records
len(records)

5

In [10]:
# Use .json_normalize from Pandas to convert the dictionary into a DataFrame
pd.json_normalize(records)

Unnamed: 0,location_of_centre,no_of_cooked_food_stalls,no_of_mkt_produce_stalls,name_of_centre,type_of_centre,no_of_stalls,owner,_id
0,"2, Adam Road, S(289876)",32,0,Adam Road Food Centre,HC,32,Government,1
1,"National Development Building, Annex B, Telok ...",134,1,Amoy Street Food Centre,HC,135,Government,2
2,"1, Bedok Road, S(469572)",32,0,Bedok Food Centre,HC,32,Government,3
3,"38A, Beo Crescent, S(169982)",32,62,Beo Crescent Market,MHC,94,Government,4
4,"166, Jalan Besar, S(208877)",66,0,Berseh Food Centre,HC,66,Government,5


In [11]:
# Use .json_normalize from Pandas to convert the dictionary into a DataFrame 
# Store into a variable called "df_hawker"
df_hawker = pd.json_normalize(records)

## Calling API Example 2: with Parameters

> 1. Open this url in your browser: https://www.onemap.gov.sg/docs/#onemap-rest-apis
> 2. Understand the parameters (required and optional) of the "search" endpoint from OneMap API
> 3. Use **requests** package to call this API and get all first 5 rows of data

> ![apiexp](https://i.imgur.com/2bwoYYl.png)

In [12]:
# Try to call the API (Case-sensitive Parameters not match)

url_base = 'https://developers.onemap.sg/commonapi/search'

parameters = {
    'searchval' : 'Adam Road Food Centre',
    'returngeom': 'Y',
    'getAddrDetails': 'Y'
}
response = requests.get(url_base, params=parameters)
response.status_code


# Outcome is 400 status code

400

In [13]:
# Try to call the API (Required Parameters not provided)

url_base = 'https://developers.onemap.sg/commonapi/search'

parameters = {
    'searchval' : 'Adam Road Food Centre',
}
response = requests.get(url_base, params=parameters)
response.status_code


# Outcome is 400 series status code

400

In [14]:
# Try to call the API (Correctly)

url_base = 'https://developers.onemap.sg/commonapi/search'

parameters = {
    'searchVal' : 'Adam Road Food Centre',
    'returnGeom': 'Y',
    'getAddrDetails': 'Y'
}
response = requests.get(url_base, params=parameters)
response.status_code

200

In [15]:
# View the json text returned by the server
response.json()

{'found': 1,
 'totalNumPages': 1,
 'pageNum': 1,
 'results': [{'SEARCHVAL': 'ADAM ROAD FOOD CENTRE',
   'BLK_NO': '2',
   'ROAD_NAME': 'ADAM ROAD',
   'BUILDING': 'ADAM ROAD FOOD CENTRE',
   'ADDRESS': '2 ADAM ROAD ADAM ROAD FOOD CENTRE SINGAPORE 289876',
   'POSTAL': '289876',
   'X': '25870.3018411137',
   'Y': '34035.8644314632',
   'LATITUDE': '1.3240827139625',
   'LONGITUDE': '103.814182099841',
   'LONGTITUDE': '103.814182099841'}]}

In [16]:
# Use .json_normalize from Pandas to convert the dictionary into a DataFrame 
pd.json_normalize(response.json()['results'])

Unnamed: 0,SEARCHVAL,BLK_NO,ROAD_NAME,BUILDING,ADDRESS,POSTAL,X,Y,LATITUDE,LONGITUDE,LONGTITUDE
0,ADAM ROAD FOOD CENTRE,2,ADAM ROAD,ADAM ROAD FOOD CENTRE,2 ADAM ROAD ADAM ROAD FOOD CENTRE SINGAPORE 28...,289876,25870.3018411137,34035.8644314632,1.3240827139625,103.814182099841,103.814182099841


> 1. Loop through the **df_hawker** and find the geoinfo of each hawker from OneMap's SEARCH endpoint
> 2. Store all the geoinfo as a new DataFrame

In [17]:
# Alternative A: Using iterrows() method from DataFrame to step through every row
df_hawker_geoinfo = []

for row_index, row in df_hawker.iterrows():
    # Create a dictionary of the parameters
    parameters = {
        'searchVal' : row['name_of_centre'],
        'returnGeom': 'Y',
        'getAddrDetails': 'Y'
    }

    # Send the request via .get() method
    response = requests.get(url_base, params=parameters)

    # Check if the request is successful
    if response.status_code == 200:
        response_dict = response.json()
        # Check if there is at least 1 record
        if len(response_dict['results']) > 0:
            df_temp = pd.json_normalize(response_dict['results'])
            df_hawker_geoinfo.append(df_temp)
    else:
        print('The request failed and returned status code:' + str(response.status_code))

# Concatenate the records stored in multiple DataFrames into a single DataFrame
df_hawker_geoinfo = pd.concat(df_hawker_geoinfo, axis=0, ignore_index=True)
df_hawker_geoinfo

Unnamed: 0,SEARCHVAL,BLK_NO,ROAD_NAME,BUILDING,ADDRESS,POSTAL,X,Y,LATITUDE,LONGITUDE,LONGTITUDE
0,ADAM ROAD FOOD CENTRE,2,ADAM ROAD,ADAM ROAD FOOD CENTRE,2 ADAM ROAD ADAM ROAD FOOD CENTRE SINGAPORE 28...,289876,25870.3018411137,34035.8644314632,1.3240827139625,103.814182099841,103.814182099841
1,OCBC AMOY STREET FOOD CENTRE,7,MAXWELL ROAD,OCBC AMOY STREET FOOD CENTRE,7 MAXWELL ROAD OCBC AMOY STREET FOOD CENTRE SI...,69111,29480.2599858707,29076.4117134903,1.27923120961067,103.846619273705,103.846619273705
2,AMOY STREET FOOD CENTRE,7,MAXWELL ROAD,AMOY STREET FOOD CENTRE,7 MAXWELL ROAD AMOY STREET FOOD CENTRE SINGAPO...,69111,29483.955767219,29088.4261356071,1.2793398636571,103.846652482254,103.846652482254
3,DBS AMOY STREET FOOD CENTRE,7,MAXWELL ROAD,DBS AMOY STREET FOOD CENTRE,7 MAXWELL ROAD DBS AMOY STREET FOOD CENTRE SIN...,69111,29480.2599858707,29076.4117134903,1.27923120961067,103.846619273705,103.846619273705
4,BEDOK FOOD CENTRE,1,BEDOK ROAD,BEDOK FOOD CENTRE,1 BEDOK ROAD BEDOK FOOD CENTRE SINGAPORE 469572,469572,41595.4345044643,33623.132626934,1.32034716834128,103.955480570427,103.955480570427
5,BEO CRESCENT MARKET,38A,BEO CRESCENT,BEO CRESCENT MARKET,38A BEO CRESCENT BEO CRESCENT MARKET SINGAPORE...,169982,27336.1809281698,30137.891893005,1.28883089150258,103.827353892228,103.827353892228
6,DBS BERSEH FOOD CENTRE,166,JALAN BESAR,DBS BERSEH FOOD CENTRE,166 JALAN BESAR DBS BERSEH FOOD CENTRE SINGAPO...,208877,30623.1447876785,32184.9995951913,1.30734410918998,103.856888783497,103.856888783497
7,BERSEH FOOD CENTRE,166,JALAN BESAR,BERSEH FOOD CENTRE,166 JALAN BESAR BERSEH FOOD CENTRE SINGAPORE 2...,208877,30623.1448179465,32184.9996254593,1.30734410946371,103.856888783769,103.856888783769


In [18]:
# Alternative A: Using apply() method from DataFrame to apply a custom function
def fetch_geoinfo(name_of_centre):
    # Create a dictionary of the parameters
    parameters = {
        'searchVal' : name_of_centre,
        'returnGeom': 'Y',
        'getAddrDetails': 'Y'
    }

    # Send the request via .get() method
    response = requests.get(url_base, params=parameters)

    # Check if the request is successful
    if response.status_code == 200:
        response_dict = response.json()
        # Check if there is at least 1 record
        results = response_dict.get('results')
        if results:
            df_temp = pd.json_normalize(results)  # use your appropriate function here
            return df_temp
    else:
        print(f'The request failed and returned status code: {response.status_code}')

# Apply function to df_hawker
df_hawker_geoinfo = df_hawker['name_of_centre'].apply(fetch_geoinfo)

# Concatenate dataframes in list
df_hawker_geoinfo = pd.concat(list(df_hawker_geoinfo), axis=0, ignore_index=True)

# !! Your Turn !!

## Replace the code marked as <..> COMPLETELY with your own.
- The placeholder <..> is meant to be guidance for your answer.
- It should not be restricting your solutions for the questions.
- Feel free to add more lines or use less lines,
- One placeholder does not mean that you can only insert one line of codes.

# Question 1
> - Continue from the **df_hawker** and **df_hawker_geoinfo**, produce a new DataFrame **df_hawker_center**, <br>
> where each row is a unique hawker center with all the columns both **df_hawker** and **df_hawker_geoinfo**
> - The four columns "X", "Y", "LONGTITUDE", and "_id" must be excluded in the **df_hawker_center**
> - Change all the column names into lower case

In [None]:
<..>

In [28]:
# Combine df_hawker and df_hawker_geoinfo DataFrames
df_hawker_center = pd.concat([df_hawker, df_hawker_geoinfo], axis=1)

# Drop unnecessary columns
columns_to_drop = ['X', 'Y', 'LONGTITUDE', '_id']
df_hawker_center = df_hawker_center.drop(columns=columns_to_drop)

# Change column names to lowercase
df_hawker_center.columns = df_hawker_center.columns.str.lower()

# Print the resulting DataFrame
print(df_hawker_center)


                                  location_of_centre no_of_cooked_food_stalls  \
0                            2, Adam Road, S(289876)                       32   
1  National Development Building, Annex B, Telok ...                      134   
2                           1, Bedok Road, S(469572)                       32   
3                       38A, Beo Crescent, S(169982)                       32   
4                        166, Jalan Besar, S(208877)                       66   
5                                                NaN                      NaN   
6                                                NaN                      NaN   
7                                                NaN                      NaN   

  no_of_mkt_produce_stalls           name_of_centre type_of_centre  \
0                        0    Adam Road Food Centre             HC   
1                        1  Amoy Street Food Centre             HC   
2                        0        Bedok Food Centre         

# Question 2


The cell below is to download a single csv file to the working directory
in your google colab session.

It uses the same `get()` method from the "requests" package that we are using to call APIs.

In [29]:
# This is a pre-populated code cell. Please do not change anything in this cell,
# unless you know what you're doing.

url = 'https://d17lzt44idt8rf.cloudfront.net/data-week-06-MRT.csv'
response = requests.get(url)

# Make sure the request was successful
if response.status_code == 200:

  # Write the content to a file
  with open('data-week-06-MRT.csv', 'wb') as f:
      f.write(response.content)

> Part A) <br>
> Store the first 200 records from the Excel file located at **data > 0_raw_data > hdb_postal_to_stations.xlsx** into a DataFrame, called **df_mrt_subset**

In [None]:
df_mrt_subset = <..>

In [37]:
# Read the first 200 records from the CSV file
df_mrt_subset = pd.read_csv('data-week-06-MRT.csv', nrows=200)

# Print the resulting DataFrame
print(df_mrt_subset)


    Type  postal   Station Name
0    MRT  609690    Jurong East
1    MRT  659958    Bukit Batok
2    MRT  659083   Bukit Gombak
3    MRT  689810  Choa Chu Kang
4    MRT  689715        Yew Tee
..   ...     ...            ...
135  MRT  529683  Tampines West
136  MRT  529538       Tampines
137  MRT  529623  Tampines East
138  MRT  485990   Upper Changi
139  MRT  485985           Expo

[140 rows x 3 columns]


> Part B) <br>
> - Retrieve the geoinfo using OneMap's API Endpoint 'https://developers.onemap.sg/commonapi/search', <br>
> using the postal codes of the MRT stations
> - Store the geoinfo for the MRT stations' postal code into DataFrame **df_mrt_geoinfo** <br>
> 💡Hint: You will need to the remove the decimal point at the end of each postal code

In [None]:
<..>

<..>

<..>

In [40]:
import requests
import pandas as pd

# Postal codes of the MRT stations (remove decimal points)
mrt_postal_codes = df_mrt_subset['postal'].astype(int).astype(str).str.replace('.', '')

# URL base for the OneMap API
url_base = 'https://developers.onemap.sg/commonapi/search'

# List to store the geoinfo for MRT stations
mrt_geoinfo = []

# Iterate over the postal codes
for postal_code in mrt_postal_codes:
    # Create the parameters for the API request
    parameters = {
        'searchVal': postal_code,
        'returnGeom': 'Y',
        'getAddrDetails': 'N'
    }
    
    # Send the request to the OneMap API
    response = requests.get(url_base, params=parameters)
    
    # Check if the request is successful
    if response.status_code == 200:
        # Extract the geoinfo from the response
        geoinfo = response.json()
        if 'results' in geoinfo:
            mrt_geoinfo.append(geoinfo['results'])
    else:
        print(f'Request failed for postal code {postal_code} with status code {response.status_code}')

# Flatten the list of geoinfo dictionaries into a single DataFrame
df_mrt_geoinfo = pd.json_normalize([item for sublist in mrt_geoinfo for item in sublist])

# Print the resulting DataFrame
print(df_mrt_geoinfo)


  mrt_postal_codes = df_mrt_subset['postal'].astype(int).astype(str).str.replace('.', '')


                                       SEARCHVAL                 X  \
0                   OCBC JURONG EAST MRT STATION   17869.080600149   
1           JURONG EAST MRT STATION (EW24 / NS1)  17869.0570516568   
2                    UOB JURONG EAST MRT STATION   17854.317512162   
3                    DBS JURONG EAST MRT STATION   17854.317512162   
4              CITIBANK JURONG EAST SMRT STATION   17854.317512162   
..                                           ...               ...   
584             TAMPINES EAST MRT STATION (DT33)  41501.0748521937   
585  690 UPPER CHANGI ROAD EAST SINGAPORE 485990  42221.1936496524   
586              UPPER CHANGI MRT STATION (DT34)   42262.187971171   
587                2 EXPO DRIVE SINGAPORE 485985  42302.1120167859   
588                      EXPO MRT STATION (DT35)  42362.6034600167   

                    Y          LATITUDE         LONGITUDE        LONGTITUDE  
0    35038.9471976664  1.33315261987295  103.742286544006  103.742286544006  
1  

# Question 3

> Part A) <br>
> - Download all the records for HDB carpark info from the API https://data.gov.sg/dataset/hdb-carpark-information
> - Store the records into a DataFrame, called **df_carpark**

In [None]:
<..>

In [1]:
import requests
import pandas as pd

# API endpoint URL
url = 'https://data.gov.sg/api/action/datastore_search?resource_id=139a3035-e624-4f56-b63f-89ae28d4ae4c&limit=50000'

# Send GET request to the API endpoint
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Extract the records from the response
    records = response.json()['result']['records']
    
    # Create DataFrame from the records
    df_carpark = pd.DataFrame(records)
    
    # Print the DataFrame
    print(df_carpark)
else:
    print('Failed to retrieve carpark information')



     short_term_parking          car_park_type     y_coord     x_coord  \
0             WHOLE DAY      BASEMENT CAR PARK  31490.4942  30314.7936   
1             WHOLE DAY  MULTI-STOREY CAR PARK  33695.5198  33758.4143   
2             WHOLE DAY       SURFACE CAR PARK  34500.3599  29257.7203   
3               7AM-7PM       SURFACE CAR PARK  39012.6664  28185.4359   
4                    NO       SURFACE CAR PARK  38684.1754  29482.0290   
...                 ...                    ...         ...         ...   
2194          WHOLE DAY       SURFACE CAR PARK  45686.2734  27772.9219   
2195          WHOLE DAY       COVERED CAR PARK  45330.3961  30167.5966   
2196          WHOLE DAY  MULTI-STOREY CAR PARK  45535.3488  30194.3665   
2197          WHOLE DAY  MULTI-STOREY CAR PARK  45679.7181  29935.5818   
2198          WHOLE DAY       SURFACE CAR PARK  45507.8047  28077.2305   

                 free_parking gantry_height car_park_basement night_parking  \
0                          NO   

> Part B)** - Challenging Question [Optional] <br>
> - Check the number of records in **df_carpark**
> - You will realize the total records on data.gov.sg is 2,183 records, but **df_carpark**only has 100 records
> - This is because it's common for API return a "page" of results per call. In this case, the API returns 100 records per page.
> - If you check *response.json()['result']['_links']*, you will see there are two links, the "next" link shows how to retrieve the next 100 records
> - Find a way to download all 2,183 records and store into the dataframe **df_carparks**

> hint: you can use anything that you have learnt so far to achieve this

In [2]:
response.json()['result']['_links']

{'start': '/api/action/datastore_search?limit=50000&resource_id=139a3035-e624-4f56-b63f-89ae28d4ae4c',
 'next': '/api/action/datastore_search?offset=50000&limit=50000&resource_id=139a3035-e624-4f56-b63f-89ae28d4ae4c'}

In [None]:
<..>

In [11]:
import requests
import pandas as pd

# API endpoint URL
base_url = 'https://data.gov.sg/api/action/datastore_search'

# Parameters for the initial request
params = {
    'resource_id': '139a3035-e624-4f56-b63f-89ae28d4ae4c',
    'limit': 100,  # Number of records per page
    'offset': 0  # Initial offset
}

# Initialize an empty list to store all the records
all_records = []

# Send GET requests to retrieve all pages of records
while True:
    # Send GET request to the API endpoint with the parameters
    response = requests.get(base_url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        # Extract the records from the response
        data = response.json()['result']

        # Extract the records and append them to the list
        records = data['records']
        all_records.extend(records)

        # Check if there is a next page
        if 'links' in data and 'next' in data['links']:
            # Get the URL for the next page
            next_url = data['links']['next']

            # Update the URL and parameters for the next page
            base_url = next_url
            params = None

        else:
            # No more pages, break the loop
            break
    else:
        print('Failed to retrieve carpark information')
        break

# Create DataFrame from all the records
df_carparks = pd.DataFrame(all_records)

# Print the number of records
print("Total records:", len(df_carparks))


Total records: 100


In [12]:
len(df_carparks)

100