In [5]:
import pandas as pd
from pathlib import Path

In [12]:
NOTEBOOK_DIR = Path.cwd()
MODULE_ROOT = NOTEBOOK_DIR.parent  # This should be the 'vertexcare' directory
CLUSTERED_DATA_PATH = MODULE_ROOT / "data" / "03_primary" / "clustered_patients.parquet"

In [13]:
FEATURES_TO_ANALYZE = [
    'age',
    'sumcomorbidities',
    'hypertension',
    'diabetes',
    'readmitted', # How likely is this cluster to be readmitted?
    'llm_transportation_issue',
    'llm_financial_concern',
    'llm_sentiment_positive',
    'llm_sentiment_negative',
]

In [14]:
# --- Load the Data ---
print(f"Loading data from: {CLUSTERED_DATA_PATH}")
df = pd.read_parquet(CLUSTERED_DATA_PATH)
print("Data loaded successfully.")
print("-" * 30)

Loading data from: /workspaces/vertex-care/vertexcare/data/03_primary/clustered_patients.parquet
Data loaded successfully.
------------------------------


In [15]:
# --- Analyze the Clusters ---
print("Analyzing Cluster Profiles...")
cluster_profiles = df.groupby('cluster')[FEATURES_TO_ANALYZE].mean().round(2)

print("Cluster Profiles (Mean Values):")
print(cluster_profiles)

Analyzing Cluster Profiles...
Cluster Profiles (Mean Values):
           age  sumcomorbidities  hypertension  diabetes  readmitted  \
cluster                                                                
0        56.84              0.62          0.32      0.00        0.27   
1        55.69              0.51          0.38      0.31        0.30   
2        56.91              0.72          0.29      0.28        0.29   
3        58.57              0.76          0.36      1.00        0.28   
4        56.14              0.68          0.30      0.00        0.29   

         llm_transportation_issue  llm_financial_concern  \
cluster                                                    
0                             0.0                    0.0   
1                             0.0                    1.0   
2                             1.0                    0.0   
3                             0.0                    0.0   
4                             0.0                    0.0   

         llm