In [0]:
# import
from py4j.protocol import Py4JJavaError

In [0]:
# ——————————————————————————————
#  COMMON SETUP (pro všechny notebooky)
# ——————————————————————————————
try:
    env = dbutils.widgets.get("pipeline.env")
except:
    env = "dev"

catalog = "principal_lab_db"
silver_schema = f"{env}_silver"
gold_schema = f"{env}_gold"

# Katalog
spark.sql(f"USE CATALOG {catalog}")
spark.sql(f"USE SCHEMA {silver_schema}")

In [0]:
# Funkce pro logování

def run_gold_view_queries(view_queries):
    for i, query in enumerate(view_queries):
        try:
            # Získání názvu view
            view_name = query.split("VIEW")[1].split("AS")[0].strip()
            print(f"[{i+1}/{len(view_queries)}] Vytvářím view: {view_name}")

            # Spuštění
            spark.sql(query)

            print(f"View '{view_name}' bylo úspěšně vytvořeno.\n")

        except Py4JJavaError as e:
            msg = e.java_exception.getMessage()
            print(f"Chyba při vytváření view '{view_name}': {msg}\n")

        except Exception as e:
            print(f"Chyba u view '{view_name}': {str(e)}\n")

In [0]:
# ——————————————————————————————
#  CREATE GOLD VIEW
# ——————————————————————————————
gold_view_queries = []

# agent_performance_view
gold_view_queries.append(f"""
CREATE OR REPLACE VIEW {catalog}.{gold_schema}.agent_performance_view AS
SELECT
    a.agent_id,
    a.first_name,
    a.last_name,
    COUNT(DISTINCT p.policy_id) AS num_policies,
    ROUND(SUM(pt.premium_amount), 2) AS total_premium,
    COUNT(DISTINCT c.claim_id) AS num_claims,
    ROUND(SUM(c.amount), 2) AS total_claim_amount
FROM {catalog}.{silver_schema}.dim_agents a
LEFT JOIN {catalog}.{silver_schema}.dim_policies p USING (agent_id)
LEFT JOIN {catalog}.{silver_schema}.fact_premium_transactions pt USING (policy_id)
LEFT JOIN {catalog}.{silver_schema}.fact_claims c USING (policy_id)
GROUP BY a.agent_id, a.first_name, a.last_name
""")

# agen_language_distribution
gold_view_queries.append(f"""
CREATE OR REPLACE VIEW {catalog}.{gold_schema}.agent_language_distribution AS
SELECT
  lang AS language_code,
  COUNT(DISTINCT agent_id) AS num_agents
FROM {catalog}.{silver_schema}.dim_agents
LATERAL VIEW explode(languages) AS lang
GROUP BY lang
ORDER BY num_agents DESC
""")




In [0]:
run_gold_view_queries(gold_view_queries)