In [1]:
from sqlalchemy import create_engine


In [2]:
import pandas as pd

In [3]:
engine = create_engine("postgresql://postgres:123456@localhost:5432/Project1")


In [None]:

import pandas as pd

df = pd.read_sql_query("SELECT * FROM gold1.presentation", engine)

DISEASE_FLAGS = [
    'high_blood_pressure','high_cholesterol','diagnosed_diabetes',
    'had_heart_attack','had_coronary_heart_disease','had_stroke',
    'has_asthma','had_skin_cancer','had_other_cancer','has_copd',
    'has_arthritis','has_depression','had_kidney_disease'
]

def build_burden_kpis(df):
    for c in DISEASE_FLAGS + ['state']:
        df[c] = pd.to_numeric(df[c], errors='coerce')
    df['disease_count'] = (df[DISEASE_FLAGS] == 1).sum(axis=1)
    df['has_any_disease'] = (df['disease_count'] > 0).astype('int8')

    sgrp = df.groupby('state')
    
    out = (pd.DataFrame({
        'respondents': sgrp.size(),
        'total_diseases': sgrp['disease_count'].sum(),
        'avg_disease_count': sgrp['disease_count'].mean(),
        'prevalence_any_disease': (sgrp['has_any_disease'].sum() / sgrp.size())
    }).reset_index())

    # ‚úÖ Round for readability
    out['avg_disease_count'] = out['avg_disease_count'].round(2)
    out['prevalence_any_disease'] = out['prevalence_any_disease'].round(2)
    return out

burden_state = build_burden_kpis(df)

burden_state.to_sql(
    "metrics_burden_by_state",
    engine,
    schema="gold1",
    if_exists="replace",
    index=False,
    method="multi",
    chunksize=50_000
)


52

In [5]:

def build_condition_counts_by_state(df, flags):
    for c in flags + ['state']:
        df[c] = pd.to_numeric(df[c], errors='coerce')
    out = (df.groupby('state')[(flags)]
             .apply(lambda g: (g == 1).sum())
             .reset_index())
    out = out.rename(columns={c: f"{c}_cases" for c in flags})
    return out

cond_state = build_condition_counts_by_state(df, DISEASE_FLAGS)
cond_state.to_sql("condition_counts_by_state", engine, schema="gold1", if_exists="replace", index=False, method="multi", chunksize=50_000)


52

In [6]:

def access_vs_burden_by_state(df):
    for c in ['has_health_plan','has_personal_doctor'] + DAY_METRICS + DISEASE_FLAGS + ['state']:
        df[c] = pd.to_numeric(df[c], errors='coerce')
    df['disease_count'] = (df[DISEASE_FLAGS] == 1).sum(axis=1)

    g = df.groupby('state')
    out = pd.DataFrame({
        'pct_no_health_plan': (1 - (g['has_health_plan'].mean())),
        'pct_no_doctor':     (1 - (g['has_personal_doctor'].mean())),
        'avg_poor_health_days': g['poor_health_days'].mean(),
        'avg_disease_count': g['disease_count'].mean()
    }).reset_index()
    return out

access_burden = access_vs_burden_by_state(df)
access_burden.to_sql("access_vs_burden_state", engine, schema="gold1", if_exists="replace", index=False, method='multi', chunksize=50_000)


NameError: name 'DAY_METRICS' is not defined

In [None]:

# -*- coding: utf-8 -*-
"""
Gold (Presentation) Layer Pipeline
----------------------------------
- Samples 200,000 rows from silver1.cleaned
- Enriches sample with basic disease burden columns
- Writes sample to gold1.presentation
- Aggregates state & state+month metrics (rounded to 2 dp)
- Writes to gold1.metrics_by_state and gold1.metrics_by_state_month (NUMERIC(10,2))
- Creates indexes and logs diagnostics

Requirements:
  pip install pandas numpy sqlalchemy psycopg2-binary
"""

import pandas as pd
import numpy as np
import random
import re
from sqlalchemy import create_engine, text
from sqlalchemy.types import Integer, Numeric, String

# ---------------------
# CONFIG
# ---------------------
SILVER_SCHEMA = 'silver1'
SILVER_TABLE  = 'cleaned'

GOLD_SCHEMA   = 'gold1'
GOLD_TABLE    = 'presentation'           # sample landing table
GOLD_AGG_STATE        = 'metrics_by_state'
GOLD_AGG_STATE_MONTH  = 'metrics_by_state_month'

SAMPLE_SIZE = 200000  # 2 lakh
YES_VALUE   = 1        # assumed "Yes" code

# Columns available in gold presentation table (as per your Silver)
GOLD_COLS = [
    'state','survey_month','survey_date','month','day','year',
    'disposition_code','sequence_number','primary_sampling_unit',
    'telephone_number','private_residence','state_residence','cell_phone',
    'num_of_adults','num_of_men','num_of_women',
    'general_health','physical_health_days','mental_health_days','poor_health_days',
    'has_health_plan','has_personal_doctor','medical_cost_issue','last_checkup',
    'high_blood_pressure','high_cholesterol','cholesterol_check','diagnosed_diabetes',
    'had_heart_attack','had_coronary_heart_disease','had_stroke','has_asthma',
    'had_skin_cancer','had_other_cancer','has_copd','has_arthritis',
    'has_depression','had_kidney_disease'
]

# Indicator columns (treated as binary: YES_VALUE means positive)
INDICATORS = [
    'has_health_plan','has_personal_doctor','medical_cost_issue',
    'high_blood_pressure','high_cholesterol','cholesterol_check','diagnosed_diabetes',
    'had_heart_attack','had_coronary_heart_disease','had_stroke','has_asthma',
    'had_skin_cancer','had_other_cancer','has_copd','has_arthritis',
    'has_depression','had_kidney_disease'
]

DAY_METRICS = ['physical_health_days','mental_health_days','poor_health_days']

# Programmatic target column sets for Gold aggregate tables
STATE_AGG_COLS = (
    ['state', 'rows'] +
    [f'{c}_prev' for c in INDICATORS] +
    [f'{c}_avg' for c in DAY_METRICS] +
    [f'{c}_null_pct' for c in INDICATORS + DAY_METRICS]
)
STATE_MONTH_AGG_COLS = (
    ['state', 'month', 'rows'] +
    [f'{c}_prev' for c in INDICATORS] +
    [f'{c}_avg' for c in DAY_METRICS]
)

# Only the true disease flags (exclude plan/doctor/cost/cholesterol_check)
DISEASE_FLAGS = [
    'high_blood_pressure','high_cholesterol','diagnosed_diabetes',
    'had_heart_attack','had_coronary_heart_disease','had_stroke',
    'has_asthma','had_skin_cancer','had_other_cancer','has_copd',
    'has_arthritis','has_depression','had_kidney_disease'
]

# Bucketing thresholds (simple & clear)
HIGH_MIN   = 3          # High if total_disease >= 3
MEDIUM_MIN = 1          # Medium if total_disease 1..2
MEDIUM_MAX = 2
LOW_MAX    = 0          # Low if total_disease == 0

# Rounding config
DECIMALS = 2            # round aggregates to 2 decimals

# ---------------------
# DB helpers
# ---------------------
def get_engine(uri: str):
    return create_engine(uri)

def ensure_schemas(engine):
    with engine.begin() as conn:
        conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {SILVER_SCHEMA};"))
        conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {GOLD_SCHEMA};"))

def table_rowcount(engine, schema: str, table: str) -> int:
    with engine.connect() as conn:
        return conn.exec_driver_sql(f"SELECT COUNT(*) FROM {schema}.{table};").scalar()

def get_table_columns(engine, schema: str, table: str):
    # Fetch one row to get column names
    df = pd.read_sql_query(text(f"SELECT * FROM {schema}.{table} LIMIT 1"), con=engine)
    return df.columns.tolist()

# ---------------------
# Sampling
# ---------------------
def fetch_random_sample(engine, sample_size=SAMPLE_SIZE) -> pd.DataFrame:
    """
    Fast sampler for large tables:
    - Uses LIMIT + random OFFSET (pseudo-random, very fast).
    - Avoids ORDER BY RANDOM() which is slow on millions of rows.
    """
    total_df = pd.read_sql_query(
        sql=text(f"SELECT COUNT(*) AS c FROM {SILVER_SCHEMA}.{SILVER_TABLE}"),
        con=engine
    )
    
    total = int(total_df['c'][0])
    if total == 0:
        raise RuntimeError(f"{SILVER_SCHEMA}.{SILVER_TABLE} is empty.")

    offset = max(0, random.randint(0, max(0, total - sample_size)))
    query = f"""
        SELECT {', '.join(GOLD_COLS)}
        FROM {SILVER_SCHEMA}.{SILVER_TABLE}
        OFFSET {offset}
        LIMIT {sample_size}
    """
    df = pd.read_sql_query(sql=text(query), conn =engine)

    # ‚úÖ Enrich before writing to gold
    df = enrich_gold_presentation(df)
    return df

# ---------------------
# Enrich sample with basic burden columns
# ---------------------
def enrich_gold_presentation(df: pd.DataFrame) -> pd.DataFrame:
    """
    Adds row-level columns to the same gold1.presentation table:
      - total_disease
      - has_any_disease
      - multimorbidity_flag
      - severe_multimorbidity_flag
      - disease_rank (Low/Medium/High)
      - risk_bucket (friendly labels)
    """
    # Coerce disease flags safely
    for c in DISEASE_FLAGS:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors='coerce')
        else:
            raise KeyError(f"Missing expected disease flag: {c}")

    # Total diseases per respondent (sum of 1s across disease flags)
    total = (df[DISEASE_FLAGS] == YES_VALUE).sum(axis=1)
    df['total_disease'] = total.astype('int16')

    # Simple binary helpers
    df['has_any_disease']            = (df['total_disease'] > 0).astype('int8')
    df['multimorbidity_flag']        = (df['total_disease'] >= 2).astype('int8')
    df['severe_multimorbidity_flag'] = (df['total_disease'] >= 3).astype('int8')

    # Rank buckets
    def _rank(x):
        if pd.isna(x): return np.nan
        if x >= HIGH_MIN: return "High"
        if MEDIUM_MIN <= x <= MEDIUM_MAX: return "Medium"
        if x <= LOW_MAX: return "Low"
        return "Medium"

    df['disease_rank'] = df['total_disease'].apply(_rank).astype('string')

    # Friendly bucket label
    def _bucket(x):
        if x == 0: return "No disease"
        if x == 1: return "1 disease"
        if x == 2: return "2 diseases"
        return "‚â•3 diseases"

    df['risk_bucket'] = df['total_disease'].apply(_bucket).astype('string')

    return df

# ---------------------
# Writing sample to Gold
# ---------------------
def write_sample_to_gold(engine, df: pd.DataFrame) -> int:
    # Ensure column order & presence
    existing = [c for c in GOLD_COLS if c in df.columns]
    # Include the new columns too when writing
    extra_cols = ['total_disease','has_any_disease','multimorbidity_flag',
                  'severe_multimorbidity_flag','disease_rank','risk_bucket']
    for c in extra_cols:
        if c not in df.columns:
            raise KeyError(f"Missing expected enriched column: {c}")

    df = df[existing + extra_cols]

    with engine.begin() as conn:
        df.to_sql(
            name=GOLD_TABLE,
            con=conn,
            schema=GOLD_SCHEMA,
            if_exists='replace',   # overwrite each run (deterministic)
            index=False,
            method='multi',
            chunksize=50_000
        )
    return len(df)

# ---------------------
# Aggregation helpers
# ---------------------
def prevalence(series: pd.Series, yes_value=YES_VALUE):
    """Share of YES among non-null values."""
    s = pd.to_numeric(series, errors='coerce').dropna()
    return (s == yes_value).mean() if len(s) else np.nan

def align_columns(df: pd.DataFrame, target_cols: list) -> pd.DataFrame:
    """Ensure df has all target columns (add missing as NaN) and reorder."""
    for c in target_cols:
        if c not in df.columns:
            df[c] = np.nan
    return df[target_cols]

def month_alignment_stats(df: pd.DataFrame):
    """Optional sanity: compare 'survey_month' and 'month' fields."""
    for col in ['survey_month', 'month', 'state']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    df['month_mismatch'] = np.where(
        df['survey_month'].notna() & df['month'].notna() & (df['survey_month'] != df['month']),
        1, 0
    )
    by_state = (df.groupby('state')['month_mismatch']
                  .mean()
                  .reset_index()
                  .rename(columns={'month_mismatch': 'month_mismatch_rate'}))
    overall = df['month_mismatch'].mean()
    return overall, by_state

def round_agg_for_presentation(df: pd.DataFrame, decimals=2) -> pd.DataFrame:
    """
    Round only presentation KPIs in aggregates:
      - *_prev, *_avg, *_null_pct ‚Üí round to 2 decimals
    """
    for col in df.columns:
        if (
            col.endswith('_prev') or
            col.endswith('_avg') or
            col.endswith('_null_pct')
        ):
            if pd.api.types.is_float_dtype(df[col]):
                df[col] = df[col].round(decimals)
    return df

def dtype_numeric_2dp(df: pd.DataFrame):
    """
    Build dtype map: ints stay ints; floats become NUMERIC(10,2) for clean storage.
    """
    dtypes = {}
    for col in df.columns:
        s = df[col]
        if pd.api.types.is_integer_dtype(s):
            dtypes[col] = Integer()
        elif pd.api.types.is_float_dtype(s):
            dtypes[col] = Numeric(10, 2)
        elif pd.api.types.is_string_dtype(s):
            dtypes[col] = String()
        # leave bool/datetime to inference
    return dtypes

# ---------------------
# BUILD AGGREGATIONS
# ---------------------
def build_profiles(df: pd.DataFrame):
    """Build overall null profiles, and aggregations by state and by (state, month)."""
    # Null % overall (sample)
    null_pct_overall = (df.isnull().mean() * 100).sort_values(ascending=False)

    # Coerce numeric for grouping columns + metrics
    for col in INDICATORS + DAY_METRICS + ['state', 'survey_month', 'month', 'year']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')

    # --- By state ---
    agg_dict_state = {}
    for col in INDICATORS:
        agg_dict_state[col + '_prev'] = (col, prevalence)
    for col in DAY_METRICS:
        agg_dict_state[col + '_avg'] = (col, 'mean')

    agg_frame_state = df.groupby('state').agg(
        rows=('state', 'size'),
        **agg_dict_state
    ).reset_index()

    # Attach null% per column per state
    for col in INDICATORS + DAY_METRICS:
        s_null = (
            df.groupby('state')[col]
              .apply(lambda s: s.isna().mean() * 100)
              .reset_index()
              .rename(columns={col: f'{col}_null_pct'})
        )
        agg_frame_state = agg_frame_state.merge(s_null, on='state', how='left')

    # Align to target column order (ensures DDL match)
    agg_frame_state = align_columns(agg_frame_state, STATE_AGG_COLS)
    # ‚úÖ Round presentation KPIs to 2 decimals
    agg_frame_state = round_agg_for_presentation(agg_frame_state, decimals=DECIMALS)

    # --- By (state, month) ---
    agg_dict_state_month = {}
    for col in INDICATORS:
        agg_dict_state_month[col + '_prev'] = (col, prevalence)
    for col in DAY_METRICS:
        agg_dict_state_month[col + '_avg'] = (col, 'mean')

    agg_frame_state_month = df.groupby(['state', 'month']).agg(
        rows=('state', 'size'),
        **agg_dict_state_month
    ).reset_index()

    # Align to target column order & round
    agg_frame_state_month = align_columns(agg_frame_state_month, STATE_MONTH_AGG_COLS)
    agg_frame_state_month = round_agg_for_presentation(agg_frame_state_month, decimals=DECIMALS)

    return null_pct_overall, agg_frame_state, agg_frame_state_month

# ---------------------
# CREATE GOLD TABLES (DDL)
# ---------------------
def ensure_gold_agg_tables(engine):
    """
    Create aggregate tables with NUMERIC(10,2) for presentation columns to ensure
    clean display (no long floating-point tails).
    """
    # DDL for metrics_by_state
    ddl_state = f"""
    CREATE TABLE IF NOT EXISTS {GOLD_SCHEMA}.{GOLD_AGG_STATE} (
        state int,
        rows bigint,
        has_health_plan_prev      NUMERIC(10,2),
        has_personal_doctor_prev  NUMERIC(10,2),
        medical_cost_issue_prev   NUMERIC(10,2),
        high_blood_pressure_prev  NUMERIC(10,2),
        high_cholesterol_prev     NUMERIC(10,2),
        cholesterol_check_prev    NUMERIC(10,2),
        diagnosed_diabetes_prev   NUMERIC(10,2),
        had_heart_attack_prev     NUMERIC(10,2),
        had_coronary_heart_disease_prev NUMERIC(10,2),
        had_stroke_prev           NUMERIC(10,2),
        has_asthma_prev           NUMERIC(10,2),
        had_skin_cancer_prev      NUMERIC(10,2),
        had_other_cancer_prev     NUMERIC(10,2),
        has_copd_prev             NUMERIC(10,2),
        has_arthritis_prev        NUMERIC(10,2),
        has_depression_prev       NUMERIC(10,2),
        had_kidney_disease_prev   NUMERIC(10,2),
        physical_health_days_avg  NUMERIC(10,2),
        mental_health_days_avg    NUMERIC(10,2),
        poor_health_days_avg      NUMERIC(10,2),
        has_health_plan_null_pct      NUMERIC(10,2),
        has_personal_doctor_null_pct  NUMERIC(10,2),
        medical_cost_issue_null_pct   NUMERIC(10,2),
        high_blood_pressure_null_pct  NUMERIC(10,2),
        high_cholesterol_null_pct     NUMERIC(10,2),
        cholesterol_check_null_pct    NUMERIC(10,2),
        diagnosed_diabetes_null_pct   NUMERIC(10,2),
        had_heart_attack_null_pct     NUMERIC(10,2),
        had_coronary_heart_disease_null_pct NUMERIC(10,2),
        had_stroke_null_pct           NUMERIC(10,2),
        has_asthma_null_pct           NUMERIC(10,2),
        had_skin_cancer_null_pct      NUMERIC(10,2),
        had_other_cancer_null_pct     NUMERIC(10,2),
        has_copd_null_pct             NUMERIC(10,2),
        has_arthritis_null_pct        NUMERIC(10,2),
        has_depression_null_pct       NUMERIC(10,2),
        had_kidney_disease_null_pct   NUMERIC(10,2),
        physical_health_days_null_pct NUMERIC(10,2),
        mental_health_days_null_pct   NUMERIC(10,2),
        poor_health_days_null_pct     NUMERIC(10,2)
    );
    """
    # DDL for metrics_by_state_month
    ddl_state_month = f"""
    CREATE TABLE IF NOT EXISTS {GOLD_SCHEMA}.{GOLD_AGG_STATE_MONTH} (
        state int,
        month int,
        rows bigint,
        has_health_plan_prev      NUMERIC(10,2),
        has_personal_doctor_prev  NUMERIC(10,2),
        medical_cost_issue_prev   NUMERIC(10,2),
        high_blood_pressure_prev  NUMERIC(10,2),
        high_cholesterol_prev     NUMERIC(10,2),
        cholesterol_check_prev    NUMERIC(10,2),
        diagnosed_diabetes_prev   NUMERIC(10,2),
        had_heart_attack_prev     NUMERIC(10,2),
        had_coronary_heart_disease_prev NUMERIC(10,2),
        had_stroke_prev           NUMERIC(10,2),
        has_asthma_prev           NUMERIC(10,2),
        had_skin_cancer_prev      NUMERIC(10,2),
        had_other_cancer_prev     NUMERIC(10,2),
        has_copd_prev             NUMERIC(10,2),
        has_arthritis_prev        NUMERIC(10,2),
        has_depression_prev       NUMERIC(10,2),
        had_kidney_disease_prev   NUMERIC(10,2),
        physical_health_days_avg  NUMERIC(10,2),
        mental_health_days_avg    NUMERIC(10,2),
        poor_health_days_avg      NUMERIC(10,2)
    );
    """
    with engine.begin() as conn:
        conn.execute(text(ddl_state))
        conn.execute(text(ddl_state_month))

def write_aggs_to_gold(engine, agg_state: pd.DataFrame, agg_state_month: pd.DataFrame):
    ensure_gold_agg_tables(engine)
    with engine.begin() as conn:
        # Truncate existing data and append (keeps DDL intact)
        conn.execute(text(f"TRUNCATE TABLE {GOLD_SCHEMA}.{GOLD_AGG_STATE};"))
        conn.execute(text(f"TRUNCATE TABLE {GOLD_SCHEMA}.{GOLD_AGG_STATE_MONTH};"))

        # Append with explicit dtype mapping (floats -> NUMERIC(10,2))
        agg_state.to_sql(
            name=GOLD_AGG_STATE, schema=GOLD_SCHEMA, con=conn,
            if_exists='append', index=False, method='multi', chunksize=50_000,
            dtype=dtype_numeric_2dp(agg_state)
        )
        agg_state_month.to_sql(
            name=GOLD_AGG_STATE_MONTH, schema=GOLD_SCHEMA, con=conn,
            if_exists='append', index=False, method='multi', chunksize=50_000,
            dtype=dtype_numeric_2dp(agg_state_month)
        )

def create_gold_indexes(engine):
    stmts = [
        f"CREATE INDEX IF NOT EXISTS ix_{GOLD_SCHEMA}_{GOLD_TABLE}_state ON {GOLD_SCHEMA}.{GOLD_TABLE}(state);",
        f"CREATE INDEX IF NOT EXISTS ix_{GOLD_SCHEMA}_{GOLD_TABLE}_survey_month ON {GOLD_SCHEMA}.{GOLD_TABLE}(survey_month);",
        f"CREATE INDEX IF NOT EXISTS ix_{GOLD_SCHEMA}_{GOLD_AGG_STATE}_state ON {GOLD_SCHEMA}.{GOLD_AGG_STATE}(state);",
        f"CREATE INDEX IF NOT EXISTS ix_{GOLD_SCHEMA}_{GOLD_AGG_STATE_MONTH}_state_month ON {GOLD_SCHEMA}.{GOLD_AGG_STATE_MONTH}(state, month);",
    ]
    with engine.begin() as conn:
        for s in stmts:
            conn.execute(text(s))

# ---------------------
# MAIN orchestration
# ---------------------
def run_gold_pipeline(engine):
    try:
        ensure_schemas(engine)

        # Diagnostics
        with engine.connect() as conn:
            version = conn.exec_driver_sql("SELECT version();").scalar()
            print("PostgreSQL version:", version)
        silver_rows = table_rowcount(engine, SILVER_SCHEMA, SILVER_TABLE)
        print(f"Silver table rowcount: {silver_rows:,}")

        # 1) Sample 200k rows
        print("\nüîπ Sampling 200,000 rows from silver1.cleaned ...")
        df = fetch_random_sample(engine, SAMPLE_SIZE)
        print(f"‚úÖ Sample fetched: {len(df):,} rows")

        if df.empty:
            raise RuntimeError("Sample is empty ‚Äî check table name/columns or sampler.")

        # 2) Write sample to Gold (with enriched columns)
        print("\nüîπ Writing sampled rows to gold1.presentation ...")
        written = write_sample_to_gold(engine, df)
        print(f"‚úÖ Written to gold1.presentation: {written:,} rows")

        # 3) Build analytics
        print("\nüîπ Building analytics (null profiles, prevalence, averages) ...")
        null_pct_overall, agg_state, agg_state_month = build_profiles(df)
        print(f"‚úÖ State metrics rows: {len(agg_state)} | State-month metrics rows: {len(agg_state_month)}")

        # 4) Write aggregates
        print("\nüîπ Writing aggregated outputs to Gold tables ...")
        write_aggs_to_gold(engine, agg_state, agg_state_month)
        create_gold_indexes(engine)
        print("‚úÖ Gold layer analytics written and indexed.")

    except Exception as e:
        import traceback
        print("‚ùå ERROR in pipeline:", e)
        traceback.print_exc()
        raise

if __name__ == "__main__":
    # üîê Set your actual URI below:
    ENGINE_URI = "postgresql+psycopg2://postgres:123456@localhost:5432/Project1"
    engine = get_engine(ENGINE_URI)
    run_gold_pipeline(engine)


PostgreSQL version: PostgreSQL 18.1 on x86_64-windows, compiled by msvc-19.44.35219, 64-bit
Silver table rowcount: 2,000,000

üîπ Sampling 200,000 rows from silver1.cleaned ...
‚úÖ Sample fetched: 200,000 rows

üîπ Writing sampled rows to gold1.presentation ...
‚úÖ Written to gold1.presentation: 200,000 rows

üîπ Building analytics (null profiles, prevalence, averages) ...
‚úÖ State metrics rows: 52 | State-month metrics rows: 229

üîπ Writing aggregated outputs to Gold tables ...
‚úÖ Gold layer analytics written and indexed.
