# K-means results — quick viewer
This notebook loads the k-means clustering outputs you generated and shows:

- Metrics by *k* (Silhouette, CH, DBI, Inertia).

- Chosen solution **k=2**: cluster sizes, original-unit profiles, z-centroids, and discriminative ranking.

- External label distributions (`is_fair`, `clasification_fairness`, `model_type`) per cluster (k=2).


> All files are read from `/mnt/data` which is already populated in this session.


In [None]:

import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

base = Path('/mnt/data')

# File paths produced earlier
p_resumen = base / 'kmeans_resumen_por_k.csv'
p_sil = base / 'silhouette_vs_k.png'
p_ch  = base / 'ch_vs_k.png'
p_dbi = base / 'dbi_vs_k.png'
p_elb = base / 'elbow_inertia.png'
p_sizes_k2 = base / 'cluster_sizes_k2.png'

p_centroids_k2 = base / 'centroides_zscores_k2.csv'
p_perfil_k2     = base / 'perfil_unidades_originales_k2.csv'
p_rank_k2       = base / 'ranking_max_abs_z_k2.csv'

p_dist_fair_k2 = base / 'distrib_is_fair_por_cluster_k2.csv'
p_dist_cls_k2  = base / 'distrib_clasification_fairness_por_cluster_k2.csv'
p_dist_type_k2 = base / 'distrib_model_type_por_cluster_k2.csv'

# Convenience display
def show_img(path):
    from IPython.display import Image, display
    display(Image(filename=str(path)))


## 1) Metrics by *k*

In [None]:

if p_resumen.exists():
    resumen = pd.read_csv(p_resumen)
    display(resumen.head(12))
else:
    print("Missing:", p_resumen)


In [None]:

for p in [p_sil, p_ch, p_dbi, p_elb]:
    if p.exists():
        show_img(p)
    else:
        print("Missing:", p)


## 2) Chosen solution: **k = 2**

In [None]:

if p_sizes_k2.exists():
    show_img(p_sizes_k2)


### Profiles in original units (k=2)

In [None]:

if p_perfil_k2.exists():
    perfil = pd.read_csv(p_perfil_k2)
    display(perfil)
else:
    print("Missing:", p_perfil_k2)


### Z-centroids (k=2)

In [None]:

if p_centroids_k2.exists():
    cent = pd.read_csv(p_centroids_k2)
    display(cent)
else:
    print("Missing:", p_centroids_k2)


### Discriminative ranking by max |z| (k=2)

In [None]:

if p_rank_k2.exists():
    rank = pd.read_csv(p_rank_k2)
    display(rank)
else:
    print("Missing:", p_rank_k2)


## 3) External label distributions (k=2)

In [None]:

for path in [p_dist_fair_k2, p_dist_cls_k2, p_dist_type_k2]:
    if path.exists():
        print(path.name)
        display(pd.read_csv(path, index_col=0))
    else:
        print("Missing:", path)
