In [1]:
import numpy as np

In [2]:
import fado

In [3]:
import numpy as np
from sklearn.datasets import make_classification

# Create artificial data
x, y = make_classification(n_samples=1000, n_features=20, n_informative=2, n_redundant=10, random_state=42)

# Convert y to float
y = y.astype(float)

x, y

(array([[-0.49693203, -0.33912228,  0.22914552, ..., -1.17927302,
          0.45780561,  0.35600629],
        [ 0.01249932,  1.00744397,  0.14009566, ..., -1.76385344,
          0.93791554,  0.45936344],
        [-0.72021202, -1.24448645, -0.47229097, ...,  0.41394755,
         -0.5387903 , -0.26636053],
        ...,
        [ 1.01027459, -0.13092581,  1.18030384, ..., -1.98959095,
         -0.80579965,  0.88702644],
        [-1.20775939, -0.3807634 ,  0.12906749, ..., -0.77725344,
          0.59910333,  0.22671772],
        [ 1.14356604, -0.42459817, -0.31394763, ...,  1.24769039,
         -0.30704834, -0.40246308]]),
 array([0., 0., 0., 1., 1., 0., 1., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1.,
        0., 1., 0., 0., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1., 0.,
        0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 1.,
        1., 0., 0., 0., 0., 1., 0., 1., 0., 1., 1., 0., 1., 1., 1., 1., 1.,
        1., 0., 0., 1., 1., 0., 0., 1., 1., 0., 0., 1., 1., 1., 0., 0

In [4]:
from sklearn.neighbors import NearestNeighbors


def consistency_score(x: np.array, y: np.array, n_neighbors=5, **kwargs) -> float:
    """
    Compute the Consistency Score as defined in Learning Fair Representations (Zemel et al. 2013).

    This score measures the consistency of the output `y` with respect to nearest neighbors in `x`.
    A higher score indicates more fairness.

    Parameters
    ----------
    x : np.array
        Array representing the input data.
    y : np.array
        Array of the same length as x, representing the output data.
    n_neighbors : int, optional
        Number of neighbors to consider. Default is 5.
    **kwargs
        Additional keyword arguments. These are not currently used.

    Returns
    -------
    float
        The consistency score. Higher values indicate more fairness.
    """
    nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm='ball_tree')
    nbrs.fit(x)
    indices = nbrs.kneighbors(x, return_distance=False)

    return 1 - np.mean(np.abs(y - np.mean(y[indices], axis=1)))

In [12]:
nbrs = NearestNeighbors(n_neighbors=5, algorithm='ball_tree')
nbrs.fit(x)

In [13]:
indices = nbrs.kneighbors(x, return_distance=False)

In [15]:
indices

array([[  0, 221,  78, 757, 696],
       [  1, 474, 137, 952, 216],
       [  2, 540, 783, 835, 829],
       ...,
       [997, 780, 413, 825, 555],
       [998,  88, 248, 474, 504],
       [999, 914, 512, 399, 161]])

In [19]:
(y - np.mean(y[indices], axis=1)).shape

(1000,)

In [37]:
def consistency_score_knn(y_pred, X, n_neighbors=5):
    """
    Calculate the consistency score using K-Nearest Neighbors.

    Args:
        y_pred: predictions from a model.
        X: features used to make predictions.
        n_neighbors: the number of neighbors to consider.

    Returns:
        The consistency score.
    """

    # fit the KNN model
    nbrs = NearestNeighbors(n_neighbors=n_neighbors+1, algorithm='ball_tree').fit(X)

    # get the distances and indices of the k nearest neighbors for each instance
    distances, indices = nbrs.kneighbors(X)

    # drop the first column (self-loops)
    indices = indices[:, 1:]

    diff = 0
    count = 0

    for idx, neighbors in enumerate(indices):
        for neighbor in neighbors:
            diff += abs(y_pred[idx] - y_pred[neighbor])
            count += 1

    return 1 - diff / count

# test the function
y = np.array([1, 0, 1, 0, 1, 0])
x = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]])

print(consistency_score_knn(y_pred, X, n_neighbors=5))

0.4


In [5]:

y = np.array([1, 0, 1, 0, 1, 0])
x = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]])

In [8]:
fado.metrics.consistency_score(x, y, a=1)

TypeError: consistency_score() got an unexpected keyword argument 'a'

In [7]:
fado.metrics.consistency_score_objective(x, y)

0.6

In [47]:
def consistency_score_knn(y_pred, X, n_neighbors=5):
    """
    Calculate the consistency score using K-Nearest Neighbors.

    Args:
        y_pred: predictions from a model.
        X: features used to make predictions.
        n_neighbors: the number of neighbors to consider.

    Returns:
        The consistency score.
    """

    # fit the KNN model
    nbrs = NearestNeighbors(n_neighbors=n_neighbors+1, algorithm='ball_tree').fit(X)

    # get the distances and indices of the k nearest neighbors for each instance
    distances, indices = nbrs.kneighbors(X)

    # drop the first column (self-loops)
    indices = indices[:, 1:]
    
    differences = np.abs(y_pred[indices] - y_pred[:, None])
    
    return 1 - np.mean(differences)

# test the function
y_pred = np.array([1, 0, 1, 0, 1, 0])
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]])

print(consistency_score_knn(y_pred, X, n_neighbors=5))

[[1 0 1 0 1]
 [1 1 0 1 0]
 [1 1 0 0 1]
 [1 1 0 0 1]
 [1 1 0 1 0]
 [1 0 1 0 1]]
0.4


In [46]:
y[:, None].shape

(1000, 1)

In [22]:
def consistency_score_knn(y_pred, X, n_neighbors=5):
    """
    Calculate the consistency score using K-Nearest Neighbors.

    Args:
        y_pred: predictions from a model.
        X: features used to make predictions.
        n_neighbors: the number of neighbors to consider.

    Returns:
        The consistency score.
    """
    # Fit the KNN model and find neighbors
    indices = NearestNeighbors(n_neighbors=n_neighbors+1).fit(X).kneighbors(X, return_distance=False)[:, 1:]

    # Calculate differences
    differences = np.abs(y_pred[indices] - y_pred[:, None])

    # Return consistency score
    return 1 - np.mean(differences)

# test the function
y_pred = np.array([1, 0, 1, 0, 1, 0])
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]])

print(consistency_score_knn(y_pred, X, n_neighbors=5))

0.4
