In [2]:
import requests
import pandas as pd
import geopandas as gpd
from dotenv import load_dotenv
import os

def get_bus_info(api_url, api_key, batch_size=500):
    """
    Function to fetch bus data from the LTA DataMall API and handle pagination.

    Use cases:
    - 2.2 Bus Services
    - 2.3 Bus Routes
    - 2.4 Bus Stops
    
    Parameters:
    - api_url (str): The API endpoint URL for the data.
    - api_key (str): Your LTA DataMall API key.
    - batch_size (int): The number of records to fetch per API call (default is 500).
    
    Returns:
    - pd.DataFrame: A pandas DataFrame containing the data.
    """
    # Set up the headers with the API key
    headers = {
        'AccountKey': api_key,
        'accept': 'application/json'
    }

    all_data = []  # To store all the retrieved data
    skip = 0  # To control pagination

    while True:
        # Modify the API URL to include the $skip parameter for pagination
        paginated_url = f"{api_url}?$skip={skip}"

        # Make the GET request
        response = requests.get(paginated_url, headers=headers)

        # Check the response status
        if response.status_code == 200:
            # Convert the response JSON to a DataFrame
            data = response.json()

            # Check if 'value' exists in the response
            if 'value' in data:
                # Convert the 'value' list to a DataFrame and append to the overall data
                df = pd.json_normalize(data['value'])
                all_data.append(df)

                # If the returned data is less than the batch size, stop fetching
                if len(df) < batch_size:
                    break
            else:
                print("No 'value' key found in the response.")
                break
        else:
            print(f"Failed to retrieve data: {response.status_code}")
            break

        # Increase the skip value for the next batch
        skip += batch_size

    # Concatenate all the data into a single DataFrame
    if all_data:
        return pd.concat(all_data, ignore_index=True)
    else:
        return pd.DataFrame()  # Return an empty DataFrame if no data was retrieved

In [42]:
def get_bus_arrival(api_url, api_key, bus_stop_code):
    """
    Function to fetch real-time bus arrival information from the LTA DataMall API.

    Parameters:
    - api_url (str): The API endpoint URL for the bus arrival data.
    - api_key (str): Your LTA DataMall API key.
    - bus_stop_code (str): The bus stop code to retrieve bus arrival data for.
    
    Returns:
    - pd.DataFrame: A pandas DataFrame containing the bus arrival information.
    """
    # Set up the headers with the API key
    headers = {
        'AccountKey': api_key,
        'accept': 'application/json'
    }
    
    # Construct the API URL with the bus stop code as a query parameter
    url_with_params = f"{api_url}?BusStopCode={bus_stop_code}"
    
    # Make the GET request
    response = requests.get(url_with_params, headers=headers)
    
    # Check the response status
    if response.status_code == 200:
        # Parse the response JSON
        data = response.json()
        
        # Check if 'Services' exists in the response
        if 'Services' in data:
            # Convert the 'Services' list to a DataFrame
            services_df = pd.json_normalize(data['Services'])
            services_df['BusStopCode'] = bus_stop_code  # Add the bus stop code for reference
            return services_df
        else:
            print(f"No 'Services' data found for bus stop code: {bus_stop_code}")
            return pd.DataFrame()
    else:
        print(f"Failed to retrieve data for bus stop code {bus_stop_code}: {response.status_code}")
        return pd.DataFrame()