In [None]:
dbutils.widgets.text('catalog','ddavis_hls_sql')
catalog = dbutils.widgets.get('catalog')
print(f'catalog = {catalog}')

In [None]:
%sql
use catalog ${catalog};

In [None]:
%sql
CREATE OR REPLACE TABLE ai.training_beneficiary(
    beneficiary_code string not null
    ,date_of_birth date
    ,date_of_death date
    ,gender string
    ,race string
    ,esrd_flag string
    ,state string
    ,county_code string
    ,heart_failure_flag string
    ,cronic_kidney_disease_flag string
    ,cancer_flag string
    ,copd_flag string
    ,depression_flag string
    ,diabetes_flag string
    ,ischemic_heart_disease_flag string
    ,osteoporosis_flag string
    ,asrheumatoid_arthritis_flag string
    ,stroke_transient_ischemic_attack_flag string
    ,claim_amount double
    ,CONSTRAINT training_beneficiary_pk PRIMARY KEY(beneficiary_code)
)
TBLPROPERTIES (delta.enableChangeDataFeed = true);

In [None]:
%sql
CREATE OR REPLACE TEMP VIEW vw_training_beneficiary
as
SELECT
    a.beneficiary_code,
    a.date_of_birth,
    a.date_of_death,
    a.gender,
    a.race,
    a.esrd_flag,
    a.state,
    a.county_code,
    a.heart_failure_flag,
    a.cronic_kidney_disease_flag,
    a.cancer_flag,
    a.copd_flag,
    a.depression_flag,
    a.diabetes_flag,
    a.ischemic_heart_disease_flag,
    a.osteoporosis_flag,
    a.asrheumatoid_arthritis_flag,
    a.stroke_transient_ischemic_attack_flag,
    b.claim_amount
FROM
    cms.gold_dim_beneficiary AS a
INNER JOIN (
    SELECT
        d.beneficiary_code,
        SUM(f.claim_payment_amount) AS claim_amount
    FROM
        cms.gold_fact_patient_claims AS f
    JOIN
        cms.gold_dim_beneficiary AS d ON f.beneficiary_key = d.beneficiary_key
    GROUP BY
        d.beneficiary_code
    LIMIT 30000
) AS b ON b.beneficiary_code = a.beneficiary_code
WHERE
    a.__END_AT IS NULL

In [None]:
%sql
MERGE INTO ai.training_beneficiary
USING vw_training_beneficiary
  ON vw_training_beneficiary.beneficiary_code = ai.training_beneficiary.beneficiary_code
WHEN MATCHED THEN UPDATE SET *
WHEN NOT MATCHED THEN INSERT *

In [None]:
%sql
select * from ai.training_beneficiary
limit 100

In [None]:
%sql
CREATE OR REPLACE TABLE ai.feature_beneficiary(
    beneficiary_code string not null
    ,date_of_birth date
    ,date_of_death date
    ,gender string
    ,race string
    ,esrd_flag string
    ,state string
    ,county_code string
    ,heart_failure_flag string
    ,cronic_kidney_disease_flag string
    ,cancer_flag string
    ,copd_flag string
    ,depression_flag string
    ,diabetes_flag string
    ,ischemic_heart_disease_flag string
    ,osteoporosis_flag string
    ,asrheumatoid_arthritis_flag string
    ,stroke_transient_ischemic_attack_flag string
    ,CONSTRAINT feature_beneficiary_pk PRIMARY KEY(beneficiary_code)
)
TBLPROPERTIES (delta.enableChangeDataFeed = true);

In [None]:
%sql
CREATE OR REPLACE TEMP VIEW vw_feature_beneficiary as 
SELECT
    a.beneficiary_code,
    a.date_of_birth,
    a.date_of_death,
    a.gender,
    a.race,
    a.esrd_flag,
    a.state,
    a.county_code,
    a.heart_failure_flag,
    a.cronic_kidney_disease_flag,
    a.cancer_flag,
    a.copd_flag,
    a.depression_flag,
    a.diabetes_flag,
    a.ischemic_heart_disease_flag,
    a.osteoporosis_flag,
    a.asrheumatoid_arthritis_flag,
    a.stroke_transient_ischemic_attack_flag,
    b.claim_amount
FROM
    cms.gold_dim_beneficiary AS a
INNER JOIN (
    SELECT
        d.beneficiary_code,
        SUM(f.claim_payment_amount) AS claim_amount
    FROM
        cms.gold_fact_patient_claims AS f
    JOIN
        cms.gold_dim_beneficiary AS d ON f.beneficiary_key = d.beneficiary_key
    GROUP BY
        d.beneficiary_code
) AS b ON b.beneficiary_code = a.beneficiary_code
WHERE
    a.__END_AT IS NULL

In [None]:
%sql
MERGE INTO ai.feature_beneficiary
USING vw_feature_beneficiary
  ON vw_feature_beneficiary.beneficiary_code = ai.feature_beneficiary.beneficiary_code
WHEN MATCHED THEN UPDATE SET *
WHEN NOT MATCHED THEN INSERT *