# Fair-Lloyd (with gradient descent)

#### Libraries

In [1]:
from importlib import import_module

from src import init, metrics
from src.algorithms import fairlloyd_gd

#### Experiment configuration

In [2]:
DATASET_MODULE = 'adult'
DATASET_CONFIG = 'data/datasets/Adult_race/adult.yaml'
N_CLUSTERS = 5
RANDOM_STATE = 0
INIT_METHOD = 'kmeans_plusplus'

#### Load dataset

In [3]:
dataset_module = import_module(f"src.datasets.{DATASET_MODULE}")
dataset = dataset_module.load(yamlpath=DATASET_CONFIG)
X = dataset['X']
s = dataset['s']

Loading dataset configuration from 'data/datasets/Adult_race/adult.yaml'
Loading processed Adult dataset (Adult_race) from 'data/datasets/Adult_race/adult.csv'
  shape: (46033, 28)
╭────────────┬───────────┬──────────────────┬───────────────────────┬────────────────────╮
│    dataset │   samples │   dimensionality │   sensitive attribute │   sensitive groups │
├────────────┼───────────┼──────────────────┼───────────────────────┼────────────────────┤
│ Adult_race │     46033 │               26 │                  race │                  5 │
╰────────────┴───────────┴──────────────────┴───────────────────────┴────────────────────╯


#### Initialise cluster centroids

In [4]:
init_method = getattr(import_module("src.init"), INIT_METHOD)
init_centroids = init_method(X=X, n_clusters=N_CLUSTERS, random_state=RANDOM_STATE)
print(f"shape: {init_centroids.shape}")

shape: (5, 26)


#### Run Fair-Lloyd

In [5]:
c, centroids = fairlloyd_gd.run(n_clusters=N_CLUSTERS, X=X, s=s, init_centroids=init_centroids)

Configuration:
╭───────────────────────────────┬──────────────┬────────────┬────────┬───────┬───────┬────────────╮
│                     algorithm │   n_clusters │   max_iter │    tol │   eta │   phi │   patience │
├───────────────────────────────┼──────────────┼────────────┼────────┼───────┼───────┼────────────┤
│ Fair-Lloyd (Gradient Descent) │            5 │        200 │ 0.0001 │ 0.001 │  1000 │         10 │
╰───────────────────────────────┴──────────────┴────────────┴────────┴───────┴───────┴────────────╯
Initialising centroids
Running algorithm
╭──────┬───────────┬───────────────┬────────────────╮
│ iter │ objective │ reassignments │ centroid shift │
├──────┼───────────┼───────────────┼────────────────┤
│    1 │   3.23269 │         46033 │              - │
│    2 │   1.97823 │          1320 │    3.26896e+00 │
│    3 │   1.92283 │          2169 │    3.32609e-01 │
│    4 │   1.88240 │          1669 │    2.55338e-01 │
│    5 │   1.87664 │           467 │    1.17450e-01 │
│    6 │   1

#### Evaluate

In [6]:
_ = metrics.evaluate(X=X, s=s, c=c, centroids=centroids)

Evaluating against all metrics
╭───────────────────────────┬───────────╮
│ average cluster disparity │ 0.0247837 │
│ k-means objective         │ 1.83009   │
│ fair centroid objective   │ 2.93149   │
│ fair k-means objective    │ 1.87444   │
╰───────────────────────────┴───────────╯
