In [0]:
import pandas as pd
import networkx as nx

# Define table schemas (simplified for ERD)
table_schemas = {
    'access_log_silver': ['access_id', 'user_id', 'system', 'access_date', 'action', 'success'],
    'admissions_silver': ['admission_id', 'member_id', 'facility_id', 'admit_date', 'discharge_date', 'drg_code', 'readmission_30d'],
    'appeals_silver': ['appeal_id', 'member_id', 'claim_id', 'level', 'received_date', 'decision_date', 'outcome'],
    'ar_aging_silver': ['snapshot_date', 'customer_type', 'customer_id', 'invoice_id', 'aging_bucket', 'amount'],
    'audits_silver': ['audit_id', 'area', 'finding_severity', 'finding_desc', 'opened_date', 'closed_date', 'status'],
    'authorizations_silver': ['auth_id', 'member_id', 'provider_id', 'service_category', 'request_date', 'decision_date', 'decision', 'units_approved'],
    'brokers_silver': ['broker_id', 'broker_name', 'channel', 'agency'],
    'calls_silver': ['call_id', 'member_id', 'reason', 'call_date', 'handle_time_sec', 'resolved', 'first_contact_resolution'],
    'campaigns_silver': ['campaign_id', 'campaign_name', 'channel_id', 'start_date', 'end_date'],
    'care_management_silver': ['care_case_id', 'member_id', 'program', 'start_date', 'end_date', 'risk_tier', 'outcome'],
    'channels_silver': ['channel_id', 'channel_name'],
    'claim_lines_silver': ['claim_line_id', 'claim_id', 'cpt_code', 'diagnosis_code', 'rev_code', 'units', 'line_billed', 'line_allowed', 'line_paid', 'denial_reason_code'],
    'claims_silver': ['claim_id', 'member_id', 'provider_id', 'facility_id', 'service_start', 'service_end', 'claim_type', 'billed_amt', 'allowed_amt', 'paid_amt', 'status', 'received_date', 'adjudicated_date'],
    'contracts_silver': ['contract_id', 'provider_id', 'effective_date', 'fee_schedule', 'reimbursement_method', 'rate'],
    'data_quality_issues_silver': ['dq_issue_id', 'dataset', 'issue_type', 'detected_date', 'resolved_date', 'severity', 'record_count'],
    'drugs_silver': ['drug_id', 'ndc11', 'generic_name', 'brand_name', 'specialty_flag', 'tier'],
    'eligibility_silver': ['eligibility_id', 'member_id', 'coverage_start', 'coverage_end', 'status'],
    'employers_silver': ['employer_id', 'employer_name', 'market_id', 'segment'],
    'enrollment_silver': ['enrollment_id', 'member_id', 'product_id', 'plan_id', 'employer_id', 'broker_id', 'channel_id', 'effective_date', 'term_date', 'premium_amount'],
    'facilities_silver': ['facility_id', 'facility_name', 'facility_type', 'market_id'],
    'fwa_cases_silver': ['fwa_id', 'case_open_date', 'case_close_date', 'type', 'amount_identified', 'amount_recovered', 'status'],
    'gl_accounts_silver': ['gl_account_id', 'account_name', 'type'],
    'gl_transactions_silver': ['gl_txn_id', 'gl_account_id', 'txn_date', 'amount', 'description'],
    'grievances_silver': ['grievance_id', 'member_id', 'received_date', 'category', 'resolved_date', 'outcome'],
    'initiatives_silver': ['initiative_id', 'name', 'owner', 'start_date', 'target_date', 'budget', 'spent', 'status', 'kpi1', 'kpi2'],
    'markets_silver': ['market_id', 'market_name', 'region'],
    'members_silver': ['member_id', 'subscriber_id', 'first_name', 'last_name', 'dob', 'gender', 'market_id', 'product_id', 'plan_id', 'employer_id', 'risk_score', 'start_date', 'end_date'],
    'network_adequacy_silver': ['market_id', 'specialty', 'provider_count', 'members_per_provider', 'avg_distance_miles'],
    'network_providers_silver': ['network_id', 'provider_id'],
    'networks_silver': ['network_id', 'network_name', 'market_id'],
    'payments_silver': ['payment_id', 'invoice_id', 'amount', 'payment_date', 'method', 'status'],
    'pbm_contracts_silver': ['pbm_contract_id', 'manufacturer', 'start_date', 'end_date', 'rebate_rate'],
    'pharmacy_claims_silver': ['rx_claim_id', 'member_id', 'pharmacy_provider_id', 'drug_id', 'fill_date', 'days_supply', 'quantity', 'billed', 'allowed', 'paid', 'status'],
    'plans_silver': ['plan_id', 'product_id', 'plan_name', 'actuarial_value', 'csr_variant'],
    'policies_silver': ['policy_id', 'member_id', 'product_id', 'plan_id', 'employer_id', 'effective_date', 'term_date', 'premium'],
    'premium_billing_silver': ['invoice_id', 'policy_id', 'employer_id', 'member_id', 'invoice_date', 'due_date', 'amount_due', 'status'],
    'privacy_incidents_silver': ['incident_id', 'date', 'type', 'records_affected', 'reported_to_regulator', 'status'],
    'products_silver': ['product_id', 'lob', 'product_name', 'metal_level', 'effective_date'],
    'provider_scorecards_silver': ['provider_id', 'period_start', 'period_end', 'cost_index', 'quality_index', 'utilization_index', 'total_patients'],
    'providers_silver': ['provider_id', 'npi', 'provider_name', 'specialty', 'group_name', 'market_id'],
    'quality_measures_silver': ['measure_id', 'member_id', 'measure_code', 'measurement_year', 'numerator', 'denominator', 'compliant'],
    'quotes_silver': ['quote_id', 'opportunity_id', 'product_id', 'plan_id', 'quote_date', 'quoted_rate'],
    'rebates_silver': ['rebate_id', 'drug_id', 'manufacturer', 'period', 'amount', 'guaranteed_min'],
    'regulatory_filings_silver': ['filing_id', 'regulator', 'program', 'due_date', 'submitted_date', 'status'],
    'risk_adjustment_silver': ['ra_id', 'member_id', 'program', 'year', 'raf', 'risk_score'],
    'sales_opportunities_silver': ['opportunity_id', 'employer_id', 'broker_id', 'market_id', 'product_id', 'plan_id', 'status', 'created_date', 'closed_date'],
    'statutory_financials_silver': ['stat_fin_id', 'year', 'market_id', 'revenue', 'expenses', 'net_income'],
    'tickets_silver': ['ticket_id', 'member_id', 'category', 'opened_date', 'closed_date', 'status'],
    'underwriting_silver': ['underwriting_id', 'employer_id', 'product_id', 'plan_id', 'status', 'decision_date'],
    'users_silver': ['user_id', 'username', 'role', 'created_date', 'last_login'],
    'vbc_contracts_silver': ['vbc_contract_id', 'provider_id', 'effective_date', 'end_date', 'payment_model', 'quality_metrics']
}

# Identify primary keys and foreign keys
primary_keys = {}
foreign_keys = {}
for table, fields in table_schemas.items():
    pk = [f for f in fields if f.endswith('_id') and f.startswith(table.split('_')[0])]
    if not pk:
        pk = [fields[0]]  # fallback: first field
    primary_keys[table] = pk
    fk = [f for f in fields if f.endswith('_id') and f not in pk]
    foreign_keys[table] = fk

# Build ERD relationship table
rows = []
for table in table_schemas:
    for fk in foreign_keys[table]:
        # Find referenced table
        ref_table = None
        for t, pk in primary_keys.items():
            if fk in pk and t != table:
                ref_table = t
                break
        if ref_table:
            rows.append({
                'Table Name': table,
                'Joins To': ref_table,
                'Primary Key': ', '.join(primary_keys[table]),
                'Foreign Key': fk
            })

erd_df = pd.DataFrame(rows, columns=['Table Name', 'Joins To', 'Primary Key', 'Foreign Key'])
display(erd_df)