# Real-Time Taxi Suppy & Repositioning System (Interim)

### Objective: 
#### Analyze Singapore cab pickup and dropoff data to identify high-demand areas, busiest times, and optimize driver allocation using SQL for data management and Python for analysis and visualization.

### Data Source: 
##### [1. Real-time - LTA Data Mall (Taxi Availability)](https://datamall.lta.gov.sg/content/datamall/en/search_datasets.html?searchText=taxi)
##### [2. Excel - Singapore City Geo-Coordinates](https://www.kaggle.com/datasets/shymammoth/singapore-city-geo-coordinates-more-reliable?resource=download)

In [None]:
import pandas as pd
import requests
from sqlalchemy import create_engine
from datetime import datetime

#### Function 1: Extract taxi availability data

In [None]:
def taxi_availability_extract(api_key):

    """
    Start streaming live data from LTA API based on given frequency
    
    Returns:
        pandas DataFrame: A list of available taxi coordinates with time stamp in json
    """

    return taxi_data

taxi_availability_extract(api_key)

#### Function 2: Extract Singapore geo-coordinates data

In [None]:
def load_geolocations(csv_path):

  """
    Extracts Singapore geo-coordinates data from csv
    
    Returns:
        pandas DataFrame: A list of Singapore town, region names with coordinate boundaries
    """

    df = pd.read_csv(csv_path)

    # Add surrogate primary key
    df['location_id'] = range(1, len(df) + 1)

    # Reorder columns (check actual column names in your file)
    if 'area_name' in df.columns:
        columns = ['location_id', 'area_name', 'latitude', 'longitude']
    else:
        columns = ['location_id', 'latitude', 'longitude']
        
    geolocations_data = df[columns]
    return geolocations_data


load_geolocations(KAGGLE_CSV_PATH)

#### Function 3: Transform data

In [None]:
def transform_data(taxi_data, geo_data):

    """
    Transform extracted data into suitable schema for visualisation and Postgres
    
    Returns:
        DataFrame with columns ['time_stamp', 'town_name', 'region_name', 'longtitude', 'latitude']
    """
    return transformed_data

transform_data(taxi_data, geo_data)

#### Function 4: Data Visualisation

In [None]:
def geospatial_visualisation(transformed_data)

    """
    Plot live taxi location via Folium library
    
    Returns:
        DataFrame with columns ['time_stamp', 'town_name', 'region_name', 'longtitude', 'latitude']
    """

geospatial_visualisation(transformed_data)

#### Function 5: Load Postgres

In [None]:
def load_postgres(transformed_data):

    """
    Load transformed data into Postgres
    
    """

load_postgres(transformed_data)

#### Function 6: Orchestrating function

In [None]:
def run_pipeline()

    """
    Executes pipeline and save data to Postres by batches when a condition is met
    
    """

    while TRUE:
        
        taxi_availability_extract(api_key) # Function 1

        sg_geo_extract() # Function 2
        
        transform_data(taxi_data, geo_data) # Function 3

        geospatial_visualisation(transformed_data) # Function 4

        load_postgres(transformed_data) # Function 5
