In [1]:
import json
import logging
from datetime import datetime
from rich import print as prt
from typing import Dict, Any, List, Optional

# Configure standard logging for Jupyter
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[logging.StreamHandler()],  # This will output to the notebook
)

logger = logging.getLogger(__name__)

In [2]:
def get_source_objects(data_path: str):
    """
    Get the source raw objects from the data folder and store them as a list of dictionaries.
    """
    try:
        with open(data_path, "r") as f:
            source_objects = json.load(f)

        if not isinstance(source_objects, list):
            logger.error("Source objects should be a list of dictionaries.")
            return []

        logger.info("Source objects loaded successfully.")
        return source_objects
    except FileNotFoundError:
        logger.error("Source objects file not found.")
        return []
    except json.JSONDecodeError:
        logger.error("Error decoding JSON from source objects file.")
        return []

In [3]:
def extract_ism(acm: dict) -> dict:
    """Extract the reduced 'ism' structure from any ACM dict."""
    return {
        "banner": acm.get("banner"),
        "classification": acm.get("classif"),
        "ownerProducer": acm.get("owner_prod"),
        "releaseableTo": acm.get("rel_to"),
        'disseminationControls': acm.get("dissem_ctrls"),
    }

In [4]:
def build_location() -> Dict[str, Any]:
    """
    Generates an empty location structure following the schema structure.

    Returns:
        Dict[str, Any]: Empty location structure with nested objects for altitude and elevation.
    """
    return {
            "ism": None,
            "id": None,
            "timestamp": None,
            "latitude": None,
            "longitude": None,
            "semiMajorError": None,
            "semiMinorError": None,
            "errorOrientation": None,
            "altitude": {
                "value": None,
                "quality": None,
                "error": None,
                "units": {
                    "value": None
                }
            },
            "elevation": {
                "value": None,
                "quality": None,
                "error": None,
                "units": {
                    "value": None
                }
            },
            "derivation": None,
            "quality": None,
            "locationName": None,
            "reason": None,
            "custom": None
        }

In [5]:
def extract_elevation(source_object: Dict[str, Any]) -> Optional[Any]:
    """
    Retrieves the elevation value from the source object, handling variations
    in the attribute name (e.g., "Elevation", "Elevation(m)", "Elevation (m)").

    Args:
        source_object (Dict[str, Any]): The source JSON-like object.

    Returns:
        Optional[Any]: The elevation value if found, otherwise None.
    """
    elevation_value = None
    
    # Define possible variations of the "Elevation" attribute name
    elevation_variations = ["elevation", "elevation(m)", "elevation (m)"]
    try:
        # Ensure the source object is a dictionary and contains the expected structure
        if not isinstance(source_object, dict):
            raise ValueError("source object must be a dictionary.")
        
        if "attributes" not in source_object or "data" not in source_object["attributes"]:
            raise KeyError("source object does not contain the expected 'attributes.data' structure.")
        
        # Iterate through the attributes to find the elevation value
        for attr in source_object["attributes"]["data"]:
            attribute_name = attr.get("attributeName", "").lower()
            
            if attribute_name in elevation_variations and attr.get("attributeValue") is not None:
                elevation_value = attr.get("attributeValue")
                break  # Exit the loop once the elevation value is found
    except Exception as e:
        # Log the exception for debugging purposes
        print(f"Error occurred while retrieving elevation: {e}")
    
    return elevation_value

In [6]:
def parse_location(source_object: Dict[str, Any]) -> List[Dict[str, Any]]:
    """
    Processes location information from the input object's geographic data.

    Args:
        source_object (Dict[str, Any]): The input object containing location data.

    Returns:
        List[Dict[str, Any]]: A list with processed location data or a single empty location if data is invalid.
    """
    try:
        location_data = source_object.get("latestKnownLocation", {})
        if not location_data:
            logger.debug(f"No location data found for object {source_object.get('id')}")
            return None
        
        geometry_data = location_data.get("geometry", {})
        if not geometry_data:
            logger.debug(f"No geometry data found for object {source_object.get('id')}")
            return None
        
        coords = geometry_data.get("coordinates", [])
        if len(coords) != 2:
            logger.debug(f"Invalid coordinates for object {source_object.get('id')}")
            return None
        
        # Initialize empty location
        location = build_location()
        
        # Get the ISM structure for location
        location_ism = extract_ism(location_data.get("acm", {}))
        
        # Get the elevation value
        elevation_value = extract_elevation(source_object)
        
        if isinstance(elevation_value, str):
            try:
                elevation_value = float(elevation_value)
            except Exception as e:
                print(f"Error transforming elevation into float: {e}")
                elevation_value = None
        
        # Update with actual data
        location.update({
            "ism": location_ism,
            "id": location_data.get("id"),
            "timestamp": location_data.get("lastVerified", {}).get("timestamp"),
            "latitude": coords[1],  # Latitude is second value
            "longitude": coords[0],  # Longitude is first value
            "semiMajorError": None,  # Keep these as defined in schema
            "semiMinorError": None,
            "errorOrientation": None,
            "altitude": {  # Complex object as per schema
                "value": None,
                "quality": None,
                "error": None,
                "units": {"value": None}
            },
            "elevation": {  # Complex object as per schema
                "value": elevation_value,
                "quality": None,
                "error": None,
                "units": {"value": None}
            },
            "derivation": geometry_data.get("type"),  # Get the type from geometry_data
            "quality": None,
            "locationName": None,
            "reason": None,
            "custom": None
        })
        
        return location
    except Exception as e:
        logger.error(f"Unexpected error writing location values for object {source_object.get('id')}: {str(e)}")
        return None

In [7]:
# Get the source objects
data_path = "../data/1_raw/source_objects.json"
source_objects = get_source_objects(data_path)

2025-08-29 13:12:31,532 - __main__ - INFO - Source objects loaded successfully.


In [8]:
location_data = parse_location(source_objects[0])

In [9]:
prt(location_data)