In [0]:
# import
from py4j.protocol import Py4JJavaError

In [0]:
# ——————————————————————————————
#  COMMON SETUP (pro všechny notebooky)
# ——————————————————————————————
try:
    env = dbutils.widgets.get("pipeline.env")
except:
    env = "dev"

catalog = "principal_lab_db"
silver_schema = f"{env}_silver"
gold_schema = f"{env}_gold"

# Katalog
spark.sql(f"USE CATALOG {catalog}")
spark.sql(f"USE SCHEMA {silver_schema}")

In [0]:
# Funkce pro logování

def run_gold_view_queries(view_queries):
    for i, query in enumerate(view_queries):
        try:
            # Získání názvu view
            view_name = query.split("VIEW")[1].split("AS")[0].strip()
            print(f"[{i+1}/{len(view_queries)}] Vytvářím view: {view_name}")

            # Spuštění
            spark.sql(query)

            print(f"View '{view_name}' bylo úspěšně vytvořeno.\n")

        except Py4JJavaError as e:
            msg = e.java_exception.getMessage()
            print(f"Chyba při vytváření view '{view_name}': {msg}\n")

        except Exception as e:
            print(f"Chyba u view '{view_name}': {str(e)}\n")

In [0]:
# ——————————————————————————————
#  CREATE GOLD VIEW
# ——————————————————————————————
gold_view_queries = []

# agent_performance_view
gold_view_queries.append(f"""
CREATE OR REPLACE VIEW {catalog}.{gold_schema}.agent_performance_view AS
SELECT
    a.agent_id,
    a.first_name,
    a.last_name,
    COUNT(DISTINCT p.policy_id) AS num_policies,
    ROUND(SUM(pt.premium_amount), 2) AS total_premium,
    COUNT(DISTINCT c.claim_id) AS num_claims,
    ROUND(SUM(c.amount), 2) AS total_claim_amount
FROM {catalog}.{silver_schema}.dim_agents_mask a
LEFT JOIN {catalog}.{silver_schema}.dim_policies_mask p USING (agent_id)
LEFT JOIN {catalog}.{silver_schema}.fact_premium_transactions_mask pt USING (policy_id)
LEFT JOIN {catalog}.{silver_schema}.fact_claims_mask c USING (policy_id)
GROUP BY a.agent_id, a.first_name, a.last_name
""")

# agen_language_distribution
gold_view_queries.append(f"""
CREATE OR REPLACE VIEW {catalog}.{gold_schema}.agent_language_distribution AS
SELECT
  lang AS language_code,
  COUNT(DISTINCT agent_id) AS num_agents
FROM {catalog}.{silver_schema}.dim_agents_mask
LATERAL VIEW explode(languages) AS lang
GROUP BY lang
ORDER BY num_agents DESC
""")

# customer_income_distribution
gold_view_queries.append(f"""
create or replace view {catalog}.{gold_schema}.customer_income_distribution AS
select  
first_name
, last_name
, email
, address
, sum(income) as income
, case 
    when sum(income) < 250000 then 'low'
    when sum(income) < 1000000 then 'medium'
    else 'high' 
    end as income_level
from {catalog}.{gold_schema}.dim_customers_mask
where `__END_AT` is null
group by all
order by income desc
""")

# agents_with_flagged_payments
gold_view_queries.append(f"""
create or replace view {catalog}.{gold_schema}.agents_with_flagged_payments AS
with agent as (
    select
        agent_id
        , payment_date
        , count(*) as payment_count
    from {catalog}.{gold_schema}.fact_premium_transactions_mask
    where paid_flag = true
      and payment_date is not null
    group by agent_id, payment_date
),

avg_payment as (
    select
        payment_date
        , avg(payment_count) as avg_cnt
    from agent
    group by payment_date
),

flagged_payments as (
    select 
        fpt.agent_id
        , count(*) as payment_count
        , ap.avg_cnt
        , 'neobvykly pocet plateb' as payment_red_flag
    from {catalog}.{gold_schema}.fact_premium_transactions_mask fpt
    left join avg_payment_mask ap on fpt.payment_date = ap.payment_date
    where fpt.paid_flag = true
      and fpt.payment_date is not null
    group by fpt.agent_id, ap.avg_cnt
    having count(*) > 2 * ap.avg_cnt
),

agent_totals as (
    select
        agent_id
        , count(*) as payment_count
    from {catalog}.{gold_schema}.fact_premium_transactions_mask
    where paid_flag = true
    group by agent_id
),

percentile_val as (
    select
        percentile_approx(payment_count, 0.95) as p95
    from agent_totals
),

unusual_by_volume as (
    select distinct
        a.agent_id
        , 'vyssi objem prijatych platieb' as payment_red_flag
    from agent_totals a
    join percentile_val p on a.payment_count > p.p95
),

unusual_by_count as (
    select distinct
        fp.agent_id
        , fp.payment_red_flag
    from flagged_payments fp
)

select 
    u.agent_id
    , da.first_name
    , da.last_name
    , u.payment_red_flag
from unusual_by_count u
left join {catalog}.{gold_schema}.dim_agents da on u.agent_id = da.agent_id

union

select 
    u.agent_id
    , da.first_name
    , da.last_name
    , u.payment_red_flag
from unusual_by_volume u
left join {catalog}.{gold_schema}.dim_agents da on u.agent_id = da.agent_id
""")

In [0]:
run_gold_view_queries(gold_view_queries)