In [1]:
# Cell 1: Setup Logging and Load Prepared Datasets

import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans


PREPARED_PATH = "./data/prepared/"
files = {
    "pbj_nurse": "pbj_nurse_prepared.parquet",
    "pbj_non_nurse": "pbj_non_nurse_prepared.parquet",
    "qrp_provider": "qrp_provider_prepared.parquet",
    "nh_survey": "nh_survey_prepared.parquet",
    "nh_quality_mds": "nh_quality_mds_prepared.parquet",
    "nh_ownership": "nh_ownership_prepared.parquet",
    "nh_citations": "nh_citations_prepared.parquet"
}

loaded_datasets = {}
for key, filename in files.items():
    file_path = os.path.join(PREPARED_PATH, filename)
    try:
        loaded_datasets[key] = pd.read_parquet(file_path)
        print(f"Loaded {key} with shape {loaded_datasets[key].shape}")
    except Exception as e:
        print(f"Error loading {key}: {e}")

pbj_nurse = loaded_datasets['pbj_nurse']
pbj_non_nurse = loaded_datasets['pbj_non_nurse']
nh_survey = loaded_datasets['nh_survey']
qrp_provider = loaded_datasets['qrp_provider']
nh_ownership = loaded_datasets['nh_ownership']

print("=== Data Loading Complete ===")

Loaded pbj_nurse with shape (1325324, 33)
Loaded pbj_non_nurse with shape (1325324, 82)
Loaded qrp_provider with shape (710016, 16)
Loaded nh_survey with shape (44189, 41)
Loaded nh_quality_mds with shape (251464, 23)
Loaded nh_ownership with shape (144651, 13)
Loaded nh_citations with shape (406789, 23)
=== Data Loading Complete ===


In [2]:
pbj_nurse = loaded_datasets['pbj_nurse']
pbj_non_nurse = loaded_datasets['pbj_non_nurse']
nh_survey = loaded_datasets['nh_survey']
qrp_provider = loaded_datasets['qrp_provider']
nh_ownership = loaded_datasets['nh_ownership']

print("=== Data Loading Complete ===")

=== Data Loading Complete ===


In [3]:
EXPORT_PATH = "./data/final-csv/"

if not os.path.exists(EXPORT_PATH):
    os.makedirs(EXPORT_PATH)

for key, df in loaded_datasets.items():
    export_file_path = os.path.join(EXPORT_PATH, f"{key}.csv")
    df.to_csv(export_file_path, index=False)
    print(f"Exported {key} to {export_file_path}")

print("=== Data Export Complete ===")

Exported pbj_nurse to ./data/final-csv/pbj_nurse.csv
Exported pbj_non_nurse to ./data/final-csv/pbj_non_nurse.csv
Exported qrp_provider to ./data/final-csv/qrp_provider.csv
Exported nh_survey to ./data/final-csv/nh_survey.csv
Exported nh_quality_mds to ./data/final-csv/nh_quality_mds.csv
Exported nh_ownership to ./data/final-csv/nh_ownership.csv
Exported nh_citations to ./data/final-csv/nh_citations.csv
=== Data Export Complete ===
