In [1]:
# Import all utility functions
from utility import *
from matplotlib.ticker import LogLocator

## Income Data: Dynamics of at and qt and Delta

In [None]:
df = pd.read_csv("income_embeddings.csv")
df

### Read Data

In [None]:
df_a_n = df[(df['y'] == 0) & (df['z'] == 0)]
df_a_p = df[(df['y'] == 1) & (df['z'] == 0)] 
df_b_n = df[(df['y'] == 0) & (df['z'] == 1)]
df_b_p = df[(df['y'] == 1) & (df['z'] == 1)] 

alpha_a = len(df_a_p)/(len(df_a_p) + len(df_a_n))
alpha_b = len(df_b_p)/(len(df_b_p) + len(df_b_n))
print(alpha_a, alpha_b)

In [4]:
lx1, rx1, lx2, rx2 = df['x1'].min(), df['x1'].max(), df['x2'].min(), df['x2'].max()

In [None]:
lx1, rx1, lx2, rx2

### Fit Beta Distribution

In [6]:
def plot_beta(x_range, a, b, mu=0, sigma=1, cdf=False, **kwargs):
    '''
    Plots the f distribution function for a given x range, a and b
    If mu and sigma are not provided, standard beta is plotted
    If cdf=True cumulative distribution is plotted
    Passes any keyword arguments to matplotlib plot function
    '''
    x = x_range
    if cdf:
        y = beta.cdf(x, a, b, mu, sigma)
    else:
        y = beta.pdf(x, a, b, mu, sigma)
    plt.plot(x, y, **kwargs)

In [None]:
# plot an overall distribution
df_a = pd.concat([df_a_n, df_a_p])
all_params_1 = beta.fit(df_a['x1'],floc = 0.3, fscale = 0.3)
all_params_2 = beta.fit(df_a['x2'],floc = 0.55, fscale = 0.35)
x = np.linspace(0.3, 0.6, 1000)
plot_beta(x, all_params_1[0], all_params_1[1], 0.3, 0.3, cdf=True, color='black', label=r"$F_{X_1}$")

In [None]:
# save the beta distribution parameters
params_1 = {'a':{'p':[],'n':[]},'b':{'p':[],'n':[]}}
params_2 = {'a':{'p':[],'n':[]},'b':{'p':[],'n':[]}}

# fit parameters
params_1['a'] ['n'] = beta.fit(df_a_n['x1'],floc = 0.3, fscale = 0.3)
params_1['a'] ['p'] = beta.fit(df_a_p['x1'],floc = 0.3, fscale = 0.3)
params_1['b'] ['n'] = beta.fit(df_b_n['x1'],floc = 0.3, fscale = 0.3)
params_1['b'] ['p'] = beta.fit(df_b_p['x1'],floc = 0.3, fscale = 0.3)
params_2['a'] ['n'] = beta.fit(df_a_n['x2'],floc = 0.55, fscale = 0.35)
params_2['a'] ['p'] = beta.fit(df_a_p['x2'],floc = 0.55, fscale = 0.35)
params_2['b'] ['n'] = beta.fit(df_b_n['x2'],floc = 0.55, fscale = 0.35)
params_2['b'] ['p'] = beta.fit(df_b_p['x2'],floc = 0.55, fscale = 0.35)

print(params_1, params_2)

In [None]:
# plot the distribution
x = np.linspace(0, 1, 500)
fig = plt.figure(figsize=(12, 2.5))

# group a and X1
plt.subplot(1,4,1)
plt.ylim(0.1, 16.6)
plt.xlim(0, 1)
plot_beta(x, params_1['a'] ['n'][0], params_1['a'] ['n'][1], 0.3, 0.3, color='blue', lw=2, ls='-')
plot_beta(x, params_1['a'] ['p'][0], params_1['a'] ['p'][1], 0.3, 0.3, color='red', lw=2, ls='-')
plt.hist(df_a_n['x1'], density=True, color = 'blue', bins=10, label='unqualified',alpha=0.5)
plt.hist(df_a_p['x1'], density=True, bins=100, color = 'red', label='qualified',alpha=0.5)
plt.title(r"Group $i$")
plt.xlabel(r'$X_1$')
plt.legend()


# group a and X2
plt.subplot(1,4,2)
plt.ylim(0.1, 16.6)
plt.xlim(0, 1)
plot_beta(x, params_2['a'] ['n'][0], params_2['a'] ['n'][1], 0.55, 0.35, color='blue', lw=2, ls='-')
plot_beta(x, params_2['a'] ['p'][0], params_2['a'] ['p'][1], 0.55, 0.35, color='red', lw=2, ls='-')
plt.hist(df_a_n['x2'], density=True, color = 'blue', bins=10, label='unqualified',alpha=0.5)
plt.hist(df_a_p['x2'], density=True, bins=100, color = 'red', label='qualified',alpha=0.5)
plt.title(r"Group $i$")
plt.xlabel(r'$X_2$')
plt.legend()


# group b and X1
plt.subplot(1,4,3)
plt.ylim(0.1, 16.6)
plt.xlim(0, 1)
plot_beta(x, params_1['b'] ['n'][0], params_1['b'] ['n'][1], 0.3, 0.3, color='blue', lw=2, ls='-')
plot_beta(x, params_1['b'] ['p'][0], params_1['b'] ['p'][1], 0.3, 0.3, color='red', lw=2, ls='-')
plt.hist(df_b_n['x1'], density=True, color = 'blue', bins=10, label='unqualified',alpha=0.5)
plt.hist(df_b_p['x1'], density=True, bins=100, color = 'red', label='qualified',alpha=0.5)
plt.title(r"Group $j$")
plt.xlabel(r'$X_1$')
plt.legend()


# group b and X2
plt.subplot(1,4,4)
plt.ylim(0.1, 16.6)
plt.xlim(0, 1)
plot_beta(x, params_2['b'] ['n'][0], params_2['b'] ['n'][1], 0.55, 0.35, color='blue', lw=2, ls='-')
plot_beta(x, params_2['b'] ['p'][0], params_2['b'] ['p'][1], 0.55, 0.35, color='red', lw=2, ls='-')
plt.hist(df_b_n['x2'], density=True, color = 'blue', bins=10, label='unqualified',alpha=0.5)
plt.hist(df_b_p['x2'], density=True, bins=100, color = 'red', label='qualified',alpha=0.5)
plt.title(r"Group $j$")
plt.xlabel(r'$X_2$')
plt.legend()
plt.tight_layout()
# plt.subplots_adjust(left=0.1,
#                     bottom=0.1,
#                     right=0.9,
#                     top=0.9,
#                     wspace=0.2,
#                     hspace=0.4)
plt.show()
fig.savefig('plots_new/feature_dist_real_income.pdf')

### Verify the monotone likelihood 

In [None]:
def plot_monotone(x_range, a0, b0, a1, b1, mu1=0, sigma1=1, **kwargs):
    '''
    Plots the f distribution function for a given x range, a and b
    If mu and sigma are not provided, standard beta is plotted
    If cdf=True cumulative distribution is plotted
    Passes any keyword arguments to matplotlib plot function
    '''
    x = x_range
    y0 = beta.pdf(x, a0, b0, mu1, sigma1)
    y1 = beta.pdf(x, a1, b1, mu1, sigma1)
    ratio_10 = y1/y0
    plt.plot(x, ratio_10, **kwargs)
    plt.yscale('log')


fig = plt.figure(figsize=(6,2.5))   
ax = fig.add_subplot(121)
x = np.linspace(0, 1, 500)
plt.xlim(0, 1)
plt.ylim(1e-4, 10**15)
plot_monotone(x,params_1['a'] ['n'][0],params_1['a'] ['n'][1],params_1['a'] ['p'][0],params_1['a'] ['p'][1], 0.3, 0.3, lw=1.5, ls='-',label = 'Group a')
plot_monotone(x,params_1['b'] ['n'][0],params_1['b'] ['n'][1],params_1['b'] ['p'][0],params_1['b'] ['p'][1], 0.3, 0.3, lw=1.5, ls='-',label = 'Group b')
ax.yaxis.set_major_locator(LogLocator(base=10**5))
ax.set_xlabel(r'$X_1$')
plt.legend()
ax = fig.add_subplot(122)
plt.xlim(0, 1)
plt.ylim(1e-4, 10**15)
plot_monotone(x,params_2['a'] ['n'][0],params_2['a'] ['n'][1],params_2['a'] ['p'][0],params_2['a'] ['p'][1], 0.55, 0.35, lw=1.5, ls='-',label = 'Group a')
plot_monotone(x,params_2['b'] ['n'][0],params_2['b'] ['n'][1],params_2['b'] ['p'][0],params_2['b'] ['p'][1], 0.55, 0.35, lw=1.5, ls='-',label = 'Group b')
ax.yaxis.set_major_locator(LogLocator(base=10**5))
ax.set_xlabel(r'$X_2$')
plt.legend()
plt.minorticks_off()
plt.tight_layout()
fig.savefig('plots_new/monotone.pdf')
plt.show()

### Experiment Begins


- Note: slight quantitative differences may exist because of deprecated versions of sklearn implement logistic classifier, but the qualitative results should remain same.

In [7]:
n=10
T = 15
Q = np.array([[5,0], [0,5]])
N = 2000
alphas = {'a':alpha_a, 'b':alpha_b}
tp=4

Group $i$

In [None]:
# ratio = 0.1
np.random.seed(2)
ratio = 0.1
r = 0.1
bias = 'up'
mag = 0.1
des = f"income_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alphas,bias,mag,tp,r,params_1=params_1,params_2=params_2,sd=True)
plot_save_single(At, Qt, des, False)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, False)

Group $j$

In [None]:
# ratio = 0.1
np.random.seed(2)
r = 0.1
bias = 'down'
mag = 0.1
des = f"income_setting_ratio{r}_bias{bias}_mag{mag}"
At, Qt, At_sd, Qt_sd = simulation(Q,N,n,T,alphas,bias,mag,tp,r,params_1=params_1,params_2=params_2,group='b',sd=True)
plot_save_single(At, Qt, des, False)
plot_save_single_err(At, Qt, At_sd, Qt_sd, des, False)