# Fair-Centroid

#### Libraries

In [1]:
from importlib import import_module

from src import init, metrics
from src.algorithms import faircentroid

#### Experiment configuration

In [2]:
DATASET_MODULE = 'adult'
DATASET_CONFIG = 'data/datasets/Adult_race/adult.yaml'
N_CLUSTERS = 5
RANDOM_STATE = 0
INIT_METHOD = 'kmeans_plusplus'
LAMBDA_ = .5

#### Load dataset

In [3]:
dataset_module = import_module(f"src.datasets.{DATASET_MODULE}")
dataset = dataset_module.load(yamlpath=DATASET_CONFIG)
sensitive_groups = dataset['sensitive_groups']
X = dataset['X']
s = dataset['s']

Loading dataset configuration from 'data/datasets/Adult_race/adult.yaml'
Loading processed Adult dataset (Adult_race) from 'data/datasets/Adult_race/adult.csv'
  shape: (46033, 28)
╭────────────┬───────────┬──────────────────┬───────────────────────┬────────────────────╮
│    dataset │   samples │   dimensionality │   sensitive attribute │   sensitive groups │
├────────────┼───────────┼──────────────────┼───────────────────────┼────────────────────┤
│ Adult_race │     46033 │               26 │                  race │                  5 │
╰────────────┴───────────┴──────────────────┴───────────────────────┴────────────────────╯


#### Initialise cluster centroids

In [4]:
init_method = getattr(import_module("src.init"), INIT_METHOD)
init_centroids = init_method(X=X, n_clusters=N_CLUSTERS, random_state=RANDOM_STATE)
print(f"shape: {init_centroids.shape}")

shape: (5, 26)


#### Run Fair-Centroid

In [5]:
c, centroids = faircentroid.run(n_clusters=N_CLUSTERS, X=X, s=s, init_centroids=init_centroids, sensitive_groups=sensitive_groups, lambda_=LAMBDA_)

Configuration:
╭───────────────┬──────────────┬────────────┬────────┬───────┬───────┬────────────┬───────────╮
│     algorithm │   n_clusters │   max_iter │    tol │   eta │   phi │   patience │   lambda_ │
├───────────────┼──────────────┼────────────┼────────┼───────┼───────┼────────────┼───────────┤
│ Fair-Centroid │            5 │        200 │ 0.0001 │ 0.001 │  1000 │         10 │       0.5 │
╰───────────────┴──────────────┴────────────┴────────┴───────┴───────┴────────────┴───────────╯
Initialising centroids
Running algorithm
╭──────┬───────────────┬───────────┬───────────┬───────────────┬────────────────╮
│ iter │       utility |  fairness | objective │ reassignments │ centroid shift │
├──────┼───────────────┼───────────┼───────────┼───────────────┼────────────────┤
│    1 │  133509.77497 │  28.29486 │   4.27964 │         46033 │              - │
│    2 │   83451.38689 │  12.84802 │   2.19123 │          1122 │    3.70556e+00 │
│    3 │   83494.75307 │  12.55909 │   2.16281 │      

#### Evaluate

In [6]:
_ = metrics.evaluate(X=X, s=s, c=c, centroids=centroids)

Evaluating against all metrics
╭───────────────────────────┬────────────╮
│ average cluster disparity │ 0.00876847 │
│ k-means objective         │ 1.81232    │
│ fair centroid objective   │ 2.5053     │
│ fair k-means objective    │ 2.11248    │
╰───────────────────────────┴────────────╯
