## Common setup

In [0]:
# ——————————————————————————————
#  COMMON SETUP
# ——————————————————————————————
env = spark.conf.get("pipeline.env", "dev") 
catalog = "principal_lab_db"
silver_schema = f"{env}_silver" 
gold_schema = f"{env}_gold"

# Katalog
spark.sql(f"USE CATALOG {catalog}")
spark.sql(f"USE SCHEMA {silver_schema}")

## Agent performance view

In [0]:
# ——————————————————————————————
#  CREATE GOLD VIEW
# ——————————————————————————————
gold_view_sql = f"""
CREATE OR REPLACE VIEW {catalog}.{gold_schema}.agent_performance_view AS
SELECT
    a.agent_id,
    a.first_name,
    a.last_name,
    COUNT(DISTINCT p.policy_id) AS num_policies,
    ROUND(SUM(pt.premium_amount), 2) AS total_premium,
    COUNT(DISTINCT c.claim_id) AS num_claims,
    ROUND(SUM(c.amount), 2) AS total_claim_amount
FROM {catalog}.{silver_schema}.dim_agents a
LEFT JOIN {catalog}.{silver_schema}.dim_policies p USING (agent_id)
LEFT JOIN {catalog}.{silver_schema}.fact_premium_transactions pt USING (policy_id)
LEFT JOIN {catalog}.{silver_schema}.fact_claims c USING (policy_id)
GROUP BY a.agent_id, a.first_name, a.last_name
"""


In [0]:
#spusť sql viewčka
spark.sql(gold_view_sql)

## Customer income distribution

In [0]:
gold_view_sql_cust_disti = f"""
create or replace view {catalog}.{gold_schema}.customer_income_distribution AS
select  
first_name
, last_name
, email
, address
, sum(income) as income
, case 
    when sum(income) < 250000 then 'low'
    when sum(income) < 1000000 then 'medium'
    else 'high' 
    end as income_level
from {catalog}.{gold_schema}.dim_customers
where `__END_AT` is null
group by all
order by income desc
"""

In [0]:
#spusť sql viewčka
spark.sql(gold_view_sql_cust_disti)

## Agents analysis

In [0]:
%sql
select
avg()
from principal_lab_db.ondra_test_silver.fact_premium_transactions

In [0]:
%sql
with sub as (
select
    agent_id,
    payment_date,
    COUNT(*) AS payment_count
from principal_lab_db.ondra_test_silver.fact_premium_transactions
WHERE
    paid_flag = TRUE
    AND payment_date IS NOT NULL
GROUP BY
    agent_id
    , payment_date
ORDER BY
    agent_id
    , payment_date
)
select
payment_date
, avg(payment_count)
from sub
group by payment_date