In [8]:
import numpy as np
import pandas as pd

In [9]:
from folktables import ACSDataSource, ACSEmployment, ACSPublicCoverage
data_source = ACSDataSource(survey_year=2018, horizon='1-Year', survey='person')
acs_data = data_source.get_data(states=['NY'], download=True)
X, y, group = ACSPublicCoverage.df_to_numpy(acs_data)

columns = [
            'AGEP',
            'SCHL',
            'MAR',
            'SEX',
            'DIS',
            'ESP',
            'CIT',
            'MIG',
            'MIL',
            'ANC',
            'NATIVITY',
            'DEAR',
            'DEYE',
            'DREM',
            'PINCP',
            'ESR',
            'ST',
            'FER',
            'RAC1P',
        ]
X = pd.DataFrame(X, columns=columns)

In [10]:
from sklearn.linear_model import LinearRegression 
from sklearn.metrics import log_loss

def LinReg(X, y, wts):
    reg = LinearRegression()
    reg.fit(X, y, sample_weight=wts)
    return reg

def filter_grp(x, y, g_):
    return x[g_], y[g_]

def eps_k(h, L, x_k, y_k):
    return L(y_k, h.predict(x_k))

def weights_updater(lr, err, old_wts, g_):
    old_wts[g_] = old_wts[g_] * np.exp(lr * err)
    return old_wts

In [11]:
X_ = X.to_numpy()

In [16]:

# Number of iterations
T = 30
L = log_loss

k = 3
G = [None for _ in range(k)]


G[0] = np.where(X['RAC1P'] == 6)
G[1] = np.where(X['RAC1P'] == 2)
G[2] = np.where(X['NATIVITY'] == 2)


def MinimaxFair(X, y, G, L, T, H_, lr=None):
    H = [None for _ in range(T)]
    N = len(y)
    K = len(G)
    lr = lr if lr else np.log(N) / T
    weights = np.ones(N) / N

    for t in range(T):
        h_t = H_(X, y, weights)
        H[t] = h_t
        errs = [None for _ in range(K)]
        for k in range(K):
            err = eps_k(h_t, L, *filter_grp(X, y, G[k]))
            weights = weights_updater(lr, err, weights, G[k])
            errs[k] = err
    return H

assert len(MinimaxFair(X_, y, G, L, T, LinReg)) == 30