In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from datetime import datetime, timezone
import uuid

StatementMeta(, 4b098266-90a5-4f88-93d6-225645a75c84, 3, Finished, Available, Finished)

In [2]:
%run "./nb_config"

StatementMeta(, 4b098266-90a5-4f88-93d6-225645a75c84, 4, Finished, Available, Finished)

Config Loaded


In [3]:
# -------------------------------
# Spark session
# -------------------------------
spark = SparkSession.builder.getOrCreate()

# -------------------------------
# PATH & RUN ID
# -------------------------------
LOG_PATH = f"{BASE_PATH}/logs"
RUN_ID = str(uuid.uuid4())

# -------------------------------
# Helper
# -------------------------------
def get_timestamp():
    return datetime.now(timezone.utc)

# -------------------------------
# Log schema
# -------------------------------
log_schema = StructType([
    StructField("timestamp", TimestampType(), True),
    StructField("level", StringType(), True),
    StructField("source", StringType(), True),
    StructField("process", StringType(), True),
    StructField("message", StringType(), True),
    StructField("details", StringType(), True),
    StructField("run_id", StringType(), True)
])

# -------------------------------
# Core logging function
# -------------------------------
def write_log(level, source, process, message, details=None):

    log_data = [{
        "timestamp": get_timestamp(),
        "level": level,
        "source": source,
        "process": process,
        "message": message,
        "details": details,
        "run_id": RUN_ID
    }]

    df = spark.createDataFrame(log_data, schema=log_schema)
    df.write.mode("append").parquet(LOG_PATH)

    print(f"[{level}] {process} - {message}")

# -------------------------------
# Public log helpers
# -------------------------------
def log_info(source, process, message, details=None):
    write_log("INFO", source, process, message, details)

def log_warning(source, process, message, details=None):
    write_log("WARNING", source, process, message, details)

def log_error(source, process, message, details=None):
    write_log("ERROR", source, process, message, details)

def log_process_start(source, process):
    write_log("START", source, process, "Process started")

def log_process_end(source, process):
    write_log("END", source, process, "Process completed")

print("Logging System Loaded")

StatementMeta(, 4b098266-90a5-4f88-93d6-225645a75c84, 5, Finished, Available, Finished)

Logging System Loaded


# -------------------------------
# Logging Setup
# -------------------------------
LOG_PATH = f"{BASE_PATH}/logs"
RUN_ID = str(uuid.uuid4())

def get_timestamp():
    return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")

log_schema = StructType([
    StructField("timestamp", StringType(), True),
    StructField("level", StringType(), True),
    StructField("source", StringType(), True),
    StructField("process", StringType(), True),
    StructField("message", StringType(), True),
    StructField("details", StringType(), True),
    StructField("run_id", StringType(), True)
])

def write_log(level, source, process, message, details=None):
    log_data = [{
        "timestamp": get_timestamp(),
        "level": level,
        "source": source,
        "process": process,
        "message": message,
        "details": str(details) if details else None,
        "run_id": RUN_ID
    }]
    df = spark.createDataFrame(log_data, schema=log_schema)
    df.write.mode("append").parquet(LOG_PATH)
    print(f"[{level}] {source} - {message}")

def log_process_start(source, process):
    write_log("START", source, process, "Process Started")

def log_process_end(source, process):
    write_log("END", source, process, "Process Completed")

def log_error(source, process, message, details=None):
    write_log("ERROR", source, process, message, details)
