# Notebook Generic Bronze to Silver

In [1]:
%run "./nb_functions"

StatementMeta(, 2facbdf7-5e2d-4876-a8e4-278e525045e2, 14, Finished, Available, Finished)

Config Loaded
Logging System Loaded
Functions Loaded


In [2]:
# ===============================
# NB: Bronze → Silver Notebook
# ===============================

from pyspark.sql import SparkSession, DataFrame
from pyspark.sql.functions import col, explode, to_date, md5, concat_ws
from pyspark.sql.types import *
import uuid
from datetime import datetime, timezone
import re

StatementMeta(, 2facbdf7-5e2d-4876-a8e4-278e525045e2, 15, Finished, Available, Finished)

In [3]:
# -------------------------------
# Spark Session
# -------------------------------
spark = SparkSession.builder.getOrCreate()


StatementMeta(, 2facbdf7-5e2d-4876-a8e4-278e525045e2, 16, Finished, Available, Finished)

In [4]:
# -------------------------------
# Capsules (Bronze → Silver)
# -------------------------------
capsules = {
    "weather": {
        "source_path": PATHS["weather_bronze"],
        "target_path": PATHS["weather_silver"],
        "timestamp_col": "hourly_time",
        "key_cols": ["date", "city"],
        "steps": ["explode_and_flatten", "clean_column_names", "create_time_series_frame"]
    },
    "air": {
        "source_path": PATHS["air_bronze"],
        "target_path": PATHS["air_silver"],
        "timestamp_col": "hourly_time",
        "key_cols": ["date", "city"],
        "steps": ["explode_and_flatten", "clean_column_names", "create_time_series_frame"]
    },
    "energy": {
        "source_path": PATHS["energy_bronze"],
        "target_path": PATHS["energy_silver"],
        "timestamp_col": "prices_readingdate",
        "key_cols": ["date", "city"],
        "steps": ["explode_and_flatten", "clean_column_names", "create_time_series_frame"]
    }
}

StatementMeta(, 2facbdf7-5e2d-4876-a8e4-278e525045e2, 17, Finished, Available, Finished)

In [5]:
# -------------------------------
# Bronze → Silver Processing
# -------------------------------
for capsule_name, capsule in capsules.items():
    log_process_start(capsule_name, "bronze_to_silver")
    try:
        df = spark.read.option("multiline", "true").json(capsule["source_path"])
        
        for step in capsule["steps"]:
            if step == "explode_and_flatten":
                df = explode_and_flatten(df)
            elif step == "clean_column_names":
                df = clean_column_names(df)
            elif step == "create_time_series_frame":
                df = create_time_series_frame(df, capsule["timestamp_col"])
            elif step == "generate_surrogate_key":
                df = generate_surrogate_key(df, capsule["key_cols"])
        
        save_to_lakehouse(df, capsule["target_path"], capsule_name)
    except Exception as e:
        log_error(capsule_name, "bronze_to_silver", f"Hata oluştu: {e}")

print("[COMPLETE] Bronze → Silver processing done")

StatementMeta(, 2facbdf7-5e2d-4876-a8e4-278e525045e2, 18, Finished, Available, Finished)

[START] bronze_to_silver - Process started
[START] save_to_lakehouse - Process started
[END] save_to_lakehouse - Process completed
[START] bronze_to_silver - Process started
[START] save_to_lakehouse - Process started
[END] save_to_lakehouse - Process completed
[START] bronze_to_silver - Process started
[START] save_to_lakehouse - Process started
[END] save_to_lakehouse - Process completed
[COMPLETE] Bronze → Silver processing done
