In [None]:
# plotting
import numpy as np
from fairness import *
from data_prep import *
from retention import *
from opt_retention import *
from regression_retention import *
import matplotlib.pyplot as plt
from matplotlib import rc
import matplotlib
import matplotlib.ticker as mtick
from matplotlib.ticker import FormatStrFormatter

matplotlib.rcParams['mathtext.fontset'] = 'cm'
matplotlib.rcParams['font.family'] = 'Times New Roman'

%matplotlib inline
from sklearn.linear_model import LinearRegression

### Example 4

In [None]:
from scipy.optimize import minimize

# Find the optimal theta
def minimize_objective(shifted_true_0, shifted_true_1):
    # Define the loss function
    def loss(theta):
        return 0.3 * abs(theta - shifted_true_0) + 0.7 * abs(theta - shifted_true_1)
    x = np.linspace(0,1,5001)
    losses = loss(x)
    theta_min = x[np.argmin(losses)]
    loss_a = abs(theta_min - shifted_true_0)
    loss_b = abs(theta_min - shifted_true_1)
    return theta_min, loss_a, loss_b

minimize_objective(0.5,0.5)


In [None]:
t0, t1 = 0.5,0.5
theta_list = []
la_list = []
lb_list = []
for i in range(10):
    theta, la, lb = minimize_objective(t0,t1)
    theta_list.append(theta)
    t0 = 0.5 + 0.01*theta
    t1 = 0.5 + 0.2*theta
    la_list.append(la)
    lb_list.append(lb)

la_list = np.round(la_list,4)
lb_list = np.round(lb_list,4)
    

In [None]:
import matplotlib.pyplot as plt
from matplotlib import rc
import matplotlib
import matplotlib.ticker as mtick
from matplotlib.ticker import FormatStrFormatter

matplotlib.rcParams['mathtext.fontset'] = 'cm'
matplotlib.rcParams['font.family'] = 'Times New Roman'

%matplotlib inline

plt.figure(figsize=(2.4,1.8))
plt.plot(1 - la_list, label='group a',color='red')
plt.plot(1 - lb_list, label='group b',color='blue')
plt.xticks([0,3,6,9])
plt.xlabel(r'$t$',fontsize=12)
plt.ylabel('Acc',fontsize=12)
plt.legend(handlelength=0.5, fontsize=10)
plt.tight_layout()
plt.savefig('e4.pdf', bbox_inches='tight')


### Example 3

In [None]:
syn_p0 = 0.3
syn_p1 = 0.7
mu_1 = 0.0
mu_2 = 1.0
sd = 0.0
syn_num = 1000
num_iter = 10

In [None]:
def new_map(metric, pt, groups = [0,1]):
    """
    given a metric, output the participation rates of next round for all groups
    """
    psum = 0
    pnext = np.zeros(len(pt))
    pnext[0] = pt[0] + 1/10*(metric[1] - metric[0])
    if pnext[0] < 0 :
        pnext[0] = 0
    if pnext[0] > 1:
        pnext[0] = 1
    pnext[1] = 1 - pnext[0]
    return pnext

def calc_new_MSE_prev(X,Y,Z,theta):
    """
    Get group-wise MSE
    """
    lprev = []
    if 0 not in np.unique(Z):
        lprev.append(np.inf)
    for z in np.unique(Z):
        X_z = X[Z==z]
        Y_z = Y[Z==z]
        n = len(Y_z)
        loss = 1/n*np.sum((Y_z-X_z@theta)**2)
        lprev.append(loss)
    if 1 not in np.unique(Z):
        lprev.append(np.inf)
    return np.array(lprev)

In [None]:
def E3_reg(seeds=[0]):
    """
    regular RRM for regression
    """
    k = len(seeds)
    rrm_p0_list, rrm_p1_list, rrm_l_list = np.zeros((k,num_iter)),np.zeros((k,num_iter)),np.zeros((k,num_iter))
    rrm_theta_list, rrm_ldisp_list = np.zeros((k,num_iter)),np.zeros((k,num_iter))

    for i in range(len(seeds)):
        seed = seeds[i]
        X, Y, Z, = Gaussian_mean_data(mu_1=mu_1,mu_2=mu_2,sd=sd,p_0=syn_p0,p_1=syn_p1,num_samples=syn_num,seed=seed)
        p_0 = syn_p0
        p_1 = syn_p1  

        # initial round
        n,d = X.shape[0], X.shape[1]
        theta_initial = linear_regression_ana(X, Y)
        rrm_lprev = calc_new_MSE_prev(X,Y,Z,theta_initial)
        rrm_theta = np.copy(theta_initial)
        rrm_p0_list[i][0] = p_0
        rrm_p1_list[i][0] = p_1
        rrm_theta_list[i][0] = rrm_theta

        # RRM process
        for t in range(1,num_iter):
        # first get the participation rate induced by theta_t
            rrm_p_t = new_map(rrm_lprev,[rrm_p0_list[i][t-1], rrm_p1_list[i][t-1]])
            rrm_p0_list[i][t] = rrm_p_t[0]
            rrm_p1_list[i][t] = rrm_p_t[1]

            # generate new features
            X, Y, Z = Gaussian_mean_data(mu_1=mu_1,mu_2=mu_2,sd=sd,p_0=rrm_p_t[0],p_1=rrm_p_t[1],num_samples=syn_num,seed=seed)
            
            # get the corresponding expected accuracy/loss
            rrm_phat_t = [len(Z[Z == 0])/len(Z), len(Z[Z==1])/len(Z)]
            rrm_l_list[i][t] = calc_curr_l(rrm_phat_t, rrm_lprev)

            # perform ERM with the new X, Y, Z parametrized by p_t, get theta
            rrm_theta_new = linear_regression_ana(X, Y)

            # get the new acc and loss with respect to the original sample X, Y, Z. Not the resampled ones
            rrm_lprev = calc_new_MSE_prev(X,Y,Z,rrm_theta_new)
            rrm_theta_list[i][t] = np.copy(rrm_theta_new)
            rrm_ldisp_list[i][t] = l_disparity_reg(X,Y,Z,rrm_theta_new)
    
    return rrm_p0_list, rrm_p1_list, rrm_l_list, rrm_theta_list, rrm_ldisp_list


In [None]:
rrm_p0_list, rrm_p1_list, rrm_l_list, rrm_theta_list, rrm_ldisp_list = E3_reg()

In [None]:
plt.figure(figsize=(2.4,1.8))
plt.plot(rrm_p1_list[0], label='group a',color='blue')
plt.plot(rrm_p0_list[0], label='group b',color='red')
plt.xticks([0,3,6,9])
plt.xlabel(r'$t$',fontsize=12)
plt.ylabel('Group fraction',fontsize=12)
plt.legend(handlelength=0.5, fontsize=10)
plt.tight_layout()
plt.savefig('e3.pdf', bbox_inches='tight')