# 882 Replication Paper: Flabbi 2010

- Data from CPS, 1995
- Estimation strategy from Flabbi 2010 

## Import Packages

In [None]:
# Data Manipulation 
import numpy as np
import pandas as pd

# General
import pdb

# Estimation
from scipy.optimize import minimize
import scipy.stats as stats
# import numdifftools as ndt

# Data Visualization
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn.apionly as sns
from pylab import *

## Import Data 

- CPS data on gender, wages, duration of unemployment
- M: males
- F: females
- U: unemployed
- E: employed 
- {M,F} X {E,U} = {males,females} X {employed, unemployed}

In [None]:
data=pd.read_csv('../data/est_c.csv')
data.columns = ['dur', 'wage', 'empl', 'women']

M = data[data['women']==0] #1186 men 
F = data[data['women']==1] #993 women 
U = data[data['empl']==0] #45 unemployed
E = data[data['empl']==1] #2134 employed

ME = M[M['empl']==1] #1168 employed men
MU = M[M['empl']==0] #18 unemployed men
FE = F[F['empl']==1] #966 employed women
FU = F[F['empl']==0] #27 unemployed women

### Summary Statistics, without trimming

In [None]:
agg_dict = {
    'wage': ['mean', 'std', 'count'],
    'dur': ['mean', 'std', 'count'],
}

In [None]:
print(data.groupby(['women', 'empl']).agg(agg_dict).to_latex()) # by gender

In [None]:
print(data.groupby(['empl']).agg(agg_dict).to_latex()) # all

### Summary Statistics, with trimming

In [None]:
print(np.percentile(ME['wage'], 5))

In [None]:
print(np.percentile(FE['wage'], 5))

In [None]:
M_WAGE_COND = (data['wage'] > 7.27) #hard coded percentile so it does not continually update
M_COND = (data['women'] == 0)

F_WAGE_COND = (data['wage'] > 5.75625) #hard coded percentile so it does not continually update
F_COND = (data['women'] == 1)

DUR_COND = (data['dur'] > 0)

trim = data[ (M_COND & M_WAGE_COND) | (F_COND & F_WAGE_COND) | DUR_COND ]


M = trim[trim['women']==0] #1127 men 
F = trim[trim['women']==1] #944 women 
U = trim[trim['empl']==0] #45 unemployed
E = trim[trim['empl']==1] #2026 employed

ME = M[M['empl']==1] #1109 employed men
MU = M[M['empl']==0] #18 unemployed men
FE = F[F['empl']==1] #917 employed women
FU = F[F['empl']==0] #27 unemployed women

In [None]:
print(trim.groupby(['women', 'empl']).agg(agg_dict).to_latex()) # by gender

In [None]:
print(trim.groupby(['empl']).agg(agg_dict).to_latex()) # all

## Figures 

- Distribution of wages, men and women

In [None]:
# Seaborn
fig, ax = plt.subplots(2, 1, figsize=(12, 8))

sns.distplot(ME['wage'], color='#4B9CD3', hist_kws={'alpha' : .3}, bins=50, ax=ax[0])
sns.distplot(FE['wage'], color='#4B9CD3', hist_kws={'alpha' : .3}, bins=50, ax=ax[1])

ax[0].legend(['Men'])
ax[1].legend(['Women'])

ax[0].set_xlim([0,75])
ax[1].set_xlim([0,75])

ax[0].set_ylim([0,0.07])
ax[1].set_ylim([0,0.07])

plt.tight_layout()

In [None]:
fig.savefig('./figures/fig1_2.png', bbox_inches='tight', transparent=True)

## Estimation 

### Initial Conditions

In [None]:
λ_0 = 0.22
λ_M = 0.18
λ_F = 0.28

h_0 = 0.234
h_M = 0.203
h_F = 0.260

η_0 = 0.005
η_M = 0.003
η_F = 0.0077

μ_0 = 3.433
μ_M = 3.456
μ_F = 3.454

σ_0 = 0.523
σ_M = 0.558
σ_F = 0.423

p_0 = 0.5
d_0 = 15 #leads to successful termination (13 leads to unsuccessful)

### Parameters without distributional assumptions

In [None]:
wstarM = min(ME['wage'])
wstarF = min(FE['wage'])

α = 0.5

### Accepted Wage Distribution as in 13, Log Normal as transformation from Normal

In [None]:
def lognormpdf(x: np.array, μ: float, σ: float):
    """
    Calculates lognormal pdf without stats packages
    """
    
    denom = x * σ * np.sqrt(2*pi)
    exp_num = -(np.log(x)-μ)**2
    exp_denom = 2 * σ * σ
    num = np.exp(exp_num/exp_denom)
    
    return num/denom

In [None]:
def lognormsf(x: np.array, μ: float, σ: float):
    """
    Calculated lognormal cdf with scipy.stats normal cdf
    """
    
    lnx = np.log(x)
    num = lnx - μ
    denom = σ
    
    return 1-stats.norm.cdf(num/denom)

In [None]:
def dens_accepted(wage: np.array, α: float, μ: float, σ: float, wstar: float):
    """
    Calculates the density of accepted wages using the lognormal distribution (end of eq 13)
    """
    
    pdf_in = (wage - (1-α)*wstar)/α
    sf_in = wstar
    
    num = lognormpdf(pdf_in, μ, σ) 
    denom = lognormsf(sf_in, μ, σ)
    
    return num / (α * denom)

In [None]:
def dens_accepted_prej(wage: np.array, α: float, μ: float, σ: float, wstar: float, d: float):
    """
    Calculates the density of accepted wages when prejudice is present using the lognormal distribution (end of eq 13)
    """
    
    pdf_in = (wage + α*d - (1-α)*wstar)/α
    sf_in = wstar+d
    
    num = lognormpdf(pdf_in, μ, σ) 
    denom = lognormsf(sf_in, μ, σ)    
    
    return num / (α * denom)

In [None]:
def hazardM(λ: float, wstar: float, α: float, μ: float, σ: float):
    """
    Estimates hazard rate for men (eq 10 of the paper)
    """

    mult = lognormsf(wstar, μ, σ)
    
    return λ * mult

In [None]:
def hazardF(λ: float, wstar: float, α: float, μ: float, σ: float, p: float, d: float):
    """
    Estimates hazard rate for women (eq 10 of the paper)
    """
    
    sf_in1 = wstar
    sf_in2 = wstar+d
    
    mult = (1-p)*lognormsf(sf_in1, μ, σ) + p*lognormsf(sf_in2, μ, σ)
    
    return λ * mult

### Test Statistics

In [None]:
def teststats (hess_inv: np.ndarray, lnL: float, nparams: int):
    """
    Calculates the standard errors and p value from the LR tests
    """
    se = np.sqrt(np.diag(hess_inv))
    
    loglik_H0 = lnL_6
    
    LR = 2 * (lnL - loglik_H0)
    pval = stats.chi2.pdf(LR, nparams)
    
    return [se.tolist(), [lnL], [pval]]

### Likelihood Functions

#### Estimation 6

In [None]:
def loglik_6( params: list ):
    """
    Calculates log likelihood with prejudice and productivity differences (Estimation 6)
    
    Parameters to estimate: λM, λF, ηM, ηF, μM, σM, μF, σF, d, p
    """
    
    λM = np.exp(params[0])
    λF = np.exp(params[1])
    ηM = np.exp(params[2])
    ηF = np.exp(params[3])
#     μM = np.exp(params[4])
    μM = params[4]
    σM = np.exp(params[5])
#     μF = np.exp(params[6])
    μF = params[6]
    σF = np.exp(params[7])
    d  = np.exp(params[8])
    p  = np.exp(params[9])
#     p  = np.exp(params[9])/(1+np.exp(params[9]))
    
    # Men's equations 
    hM = hazardM(λM, wstarM, α, μM, σM)
    
    a = M['dur'].count() * np.log(hM/(hM+ηM))
    b = MU['dur'].count() * np.log(ηM)
    c = - hM * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted( ME['wage'], α, μM, σM, wstarM) ) )  

    
    # Women's equations
    hF = hazardF(λF, wstarF, α, μF, σF, p, d)
    
    f = F['dur'].count() * np.log(hF/(hF+ηF))
    g = FU['dur'].count() * np.log(ηF)
    h = - hF * np.sum(FU.values[:,0])
    
    y = ((1-p)/α) * dens_accepted( FE['wage'], α, μF, σF, wstarF)
    z = (p/α) * dens_accepted_prej( FE['wage'], α, μF, σF, wstarF, d)
      
    i = np.sum( np.log( y + z ) )
    
    
    return -(a + b + c + e + f + g + h + i)

In [None]:
# Check on log-likelihood

param6 = [λ_M, λ_F, η_M, η_F, μ_M, σ_M, μ_F, σ_F, d_0, p_0]

b6_0 = np.log(param6[0])
b6_1 = np.log(param6[1])
b6_2 = np.log(param6[2])
b6_3 = np.log(param6[3])
# b6_4 = np.log(param6[4])
b6_4 = param6[4]
b6_5 = np.log(param6[5])
# b6_6 = np.log(param6[6])
b6_6 = param6[6]
b6_7 = np.log(param6[7])
b6_8 = np.log(param6[8])
b6_9 = np.log(1)

init6 = [b6_0, b6_1, b6_2, b6_3, b6_4, b6_5, b6_6, b6_7, b6_8, b6_9]

print(loglik_6(init6))

In [None]:
print(init6)

In [None]:
est_6 = minimize(loglik_6, init6)

est_6

In [None]:
# Coefficients

λM_6 = np.exp(est_6.x[0])
λF_6 = np.exp(est_6.x[1])
ηM_6 = np.exp(est_6.x[2])
ηF_6 = np.exp(est_6.x[3])
# μM_6 = np.exp(est_6.x[4])
μM_6 = est_6.x[4]
σM_6 = np.exp(est_6.x[5])
# σM_6 = est_6.x[5]
# μF_6 = np.exp(est_6.x[6])
μF_6 = est_6.x[6]
σF_6 = np.exp(est_6.x[7])
# σF_6 = est_6.x[7]
d_6  = np.exp(est_6.x[8])
p_6  = np.exp(est_6.x[9])
# p_6  = np.exp(est_6.x[9])/(1+np.exp(est_6.x[9]))

coeff_6 = [λM_6, λF_6, ηM_6, ηF_6, μM_6, σM_6, μF_6, σF_6, d_6, p_6]

print(coeff_6)

In [None]:
lnL_6 = est_6.fun

print(lnL_6)

In [None]:
ts_6 = teststats(est_6.hess_inv, lnL_6, 10)

print(ts_6)

#### Estimation 3

In [None]:
def loglik_3( params: list ):
    """
    Calculates log likelihood with prejudice and productivity differences 
    
    Estimation 3 (η and h not gender specific)
    
    Parameters to estimate: λ, η, μM, σM, μF, σF, d, p
    """
    
    λM = np.exp(params[0]) #same lambda
    λF = np.exp(params[0]) #same lambda
    η = np.exp(params[1]) #same eta
#     μM = np.exp(params[2])
    μM = params[2]
    σM = np.exp(params[3])
#     μF = np.exp(params[4])
    μF = params[4]
    σF = np.exp(params[5])
    d = np.exp(params[6])
    p = np.exp(params[7])
    
    # Men's equations
    hM = hazardM(λM, wstarM, α, μM, σM)
    
    a = M['dur'].count() * np.log(hM/(hM+η))
    b = MU['dur'].count() * np.log(η)
    c = - hM * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted( ME['wage'], α, μM, σM, wstarM) ) )
    
    
    # Women's equations
    hF = hazardF(λF, wstarF, α, μM, σM, p, d)
    
    f = F['dur'].count() * np.log(hF/(hF+η))
    g = FU['dur'].count() * np.log(η)
    i = - hF * np.sum(FU.values[:,0])
    
    y = ((1-p)/α) * dens_accepted( FE['wage'], α, μF, σF, wstarF)
    z = (p/α) * dens_accepted_prej( FE['wage'], α, μF, σF, wstarF, d)

    j = np.sum( np.log( y + z ) )
    
    
    return -(a + b + c + e + f + g + i + j)

In [None]:
# Check on log-likelihood

param3 = [λ_0, η_0, μ_M, σ_M, μ_F, σ_F, d_0, p_0]

b3_0 = np.log(param3[0])
b3_1 = np.log(param3[1])
# b3_2 = np.log(param3[2])
b3_2 = param3[2]
b3_3 = np.log(param3[3])
# b3_4 = np.log(param3[4])
b3_4 = param3[4]
b3_5 = np.log(param3[5])
b3_6 = np.log(param3[6])
b3_7 = np.log(1)

init3 = [b3_0, b3_1, b3_2, b3_3, b3_4, b3_5, b3_6, b3_7]

print(loglik_3(init3))

In [None]:
est_3 = minimize(loglik_3, init3)

est_3

In [None]:
# Coefficients

λ_3 = np.exp(est_3.x[0])
η_3 = np.exp(est_3.x[1])
μM_3 = est_3.x[2]
σM_3 = np.exp(est_3.x[3])
μF_3 = est_3.x[4]
σF_3 = np.exp(est_3.x[5])
d_3 = np.exp(est_3.x[6])
p_3 = np.exp(est_3.x[7])

coeff_3 = [λ_3, η_3, μM_3, σM_3, μF_3, σF_3, d_3, p_3]

print(coeff_3)

In [None]:
lnL_3 = est_3.fun

print(lnL_3)

In [None]:
ts_3 = teststats(est_3.hess_inv, lnL_3, 8)

print(ts_3)

#### Estimation 5

In [None]:
def loglik_5( params: list ):
    """
    Calculates log likelihood with prejudice, no productivity differences (estimation 5). 
    
    Parameters to estimate: λM, λF, μ, σ, p, d
    """

    λM = np.exp(params[0])
    λF = np.exp(params[1])
    ηM = np.exp(params[2])
    ηF = np.exp(params[3])
#     μ = np.exp(params[4])
    μ = params[4]
    σ = np.exp(params[5])
    d = np.exp(params[6])
    p = np.exp(params[7])
    
    # Men's equations 
    hM = hazardM(λM, wstarM, α, μ, σ)
    
    a = M['dur'].count() * np.log(hM/(hM+ηM))
    b = MU['dur'].count() * np.log(ηM)
    c = - hM * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μ, σ, wstarM) ) )
    
    
    # Women's equations
    hF = hazardF(λF, wstarF, α, μ, σ, p, d)
    
    f = F['dur'].count() * np.log(hF/(hF+ηF))
    g = FU['dur'].count() * np.log(ηF)
    h = - hF * np.sum(FU.values[:,0])
    
    y = ((1-p)/α) * dens_accepted(FE['wage'], α, μ, σ, wstarF)
    z = (p/α) * dens_accepted_prej(FE['wage'], α, μ, σ, wstarF, d)

    i = np.sum( np.log( y + z ) )
    
    
    return -(a + b + c + e + f + g + h + i)

In [None]:
# Check on log-likelihood

param5 = [λ_M, λ_F, η_M, η_F, μ_0, σ_0, d_0, p_0]

b5_0 = np.log(param6[0])
b5_1 = np.log(param6[1])
b5_2 = np.log(param6[2])
b5_3 = np.log(param6[3])
# b5_4 = np.log(param6[4])
b5_4 = param6[4]
b5_5 = np.log(param6[5])
b5_6 = np.log(param6[6])
b5_7 = np.log(1)

init5 = [b5_0, b5_1, b5_2, b5_3, b5_4, b5_5, b5_6, b5_7]

print(loglik_5(init5))

In [None]:
est_5 = minimize(loglik_5, init5)

est_5

In [None]:
# Coefficients

λM_5 = np.exp(est_5.x[0])
λF_5 = np.exp(est_5.x[1])
ηM_5 = np.exp(est_5.x[2])
ηF_5 = np.exp(est_5.x[3])
μ_5 = est_5.x[4]
σ_5 = np.exp(est_5.x[5])
d_5 = np.exp(est_5.x[6])
p_5 = np.exp(est_5.x[7])

coeff_5 = [λM_5, λF_5, ηM_5, ηF_5, μ_5, σ_5, d_5, p_5]

print(coeff_5)

In [None]:
lnL_5 = est_5.fun

print(lnL_5)

In [None]:
ts_5 = teststats(est_5.hess_inv, lnL_5, 8)

print(ts_5)

#### Estimation 2

In [None]:
def loglik_2( params: list ):
    """
    Calculates log likelihood with prejudice, no productivity differences (estimation 2).
    
    Parameters to estimate: λ, μ, σ, p, d
    """
    
    λM = np.exp(params[0])
    λF = np.exp(params[0])
    η = np.exp(params[1])
#     μ = np.exp(params[2])
    μ = params[2]
    σ = np.exp(params[3])
    d = np.exp(params[4])
    p = np.exp(params[5])

    
    # Men's equations 
    hM = hazardM(λM, wstarM, α, μ, σ)
    
    a = M['dur'].count() * np.log(hM/(hM+η))
    b = MU['dur'].count() * np.log(η)
    c = - hM * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μ, σ, wstarM) ) )
    
    
    # Women's equations
    hF = hazardF(λF, wstarF, α, μ, σ, p, d)
    
    f = F['dur'].count() * np.log(hF/(hF+η))
    g = FU['dur'].count() * np.log(η)
    i = - hF * np.sum(FU.values[:,0])
    
    y = ((1-p)/α) * dens_accepted(FE['wage'], α, μ, σ, wstarF)
    z = (p/α) * dens_accepted_prej(FE['wage'], α, μ, σ, wstarF, d)

    j = np.sum( np.log( y + z ) )
    
    
    return -(a + b + c + e + f + g + i + j)

In [None]:
# Check on log-likelihood

param2 = [λ_0, η_0, μ_0, σ_0, d_0, p_0]

b2_0 = np.log(param2[0])
b2_1 = np.log(param2[1])
# b2_2 = np.log(param2[2])
b2_2 = param2[2]
b2_3 = np.log(param2[3])
b2_4 = np.log(param2[4])
b2_5 = np.log(1)

init2 = [b2_0, b2_1, b2_2, b2_3, b2_4, b2_5]

print(loglik_2(init2))

In [None]:
est_2 = minimize(loglik_2, init2)

est_2

In [None]:
# Coefficients

λ_2 = np.exp(est_2.x[0])
η_2 = np.exp(est_2.x[1])
μ_2 = est_2.x[2]
σ_2 = np.exp(est_2.x[3])
d_2 = np.exp(est_2.x[4])
p_2 = np.exp(est_2.x[5])

coeff_2 = [λ_2, η_2, μ_2, σ_2, d_2, p_2]

print(coeff_2)

In [None]:
lnL_2 = est_2.fun

print(lnL_2)

In [None]:
ts_2 = teststats(est_2.hess_inv, lnL_2, 6)

print(ts_2)

#### Estimation 4

In [None]:
def loglik_4( params: list ):
    """
    Calculates log likelihood with productivity differences, no prejudice (estimation 4).
    
    Parameters to estimate: λM, λF, μM, σM, μF, σF
    """

    λM = np.exp(params[0])
    λF = np.exp(params[1])
    ηM = np.exp(params[2])
    ηF = np.exp(params[3])
    μM = params[4]
    σM = np.exp(params[5])
    μF = params[6]
    σF = np.exp(params[7])
    d = 0
    p = 0

    
    # Men's equations 
    hM = hazardM(λM, wstarM, α, μM, σM)
    
    a = M['dur'].count() * np.log(hM/(hM+ηM))
    b = MU['dur'].count() * np.log(ηM)
    c = - hM * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted( ME['wage'], α, μM, σM, wstarM) ) )
    
    # Women's equations
    hF = hazardF(λF, wstarF, α, μF, σF, p, d)
    
    f = F['dur'].count() * np.log(hF/(hF+ηF))
    g = FU['dur'].count() * np.log(ηF)
    i = - hF * np.sum(FU.values[:,0])
    
    y = (1/α) * dens_accepted( FE['wage'], α, μF, σF, wstarF)

    j = np.sum( np.log( y ) )

    return -(a + b + c + e + f + g + i + j)

In [None]:
# Check on log-likelihood

param4 = [λ_M, λ_F, η_M, η_F, μ_M, σ_M, μ_F, σ_F]

b4_0 = np.log(param4[0])
b4_1 = np.log(param4[1])
b4_2 = np.log(param4[2])
b4_3 = np.log(param4[3])
b4_4 = param4[4]
b4_5 = np.log(param4[5])
b4_6 = param4[6]
b4_7 = np.log(param4[7])

init4 = [b4_0, b4_1, b4_2, b4_3, b4_4, b4_5, b4_6, b4_7]

print(loglik_4(init4))

In [None]:
print(init4)

In [None]:
est_4 = minimize(loglik_4, init4)

est_4

In [None]:
# Coefficients

λM_4 = np.exp(est_4.x[0])
λF_4 = np.exp(est_4.x[1])
ηM_4 = np.exp(est_4.x[2])
ηF_4 = np.exp(est_4.x[3])
μM_4 = est_4.x[4]
σM_4 = np.exp(est_4.x[5])
μF_4 = est_4.x[6]
σF_4 = np.exp(est_4.x[7])

coeff_4 = [λM_4, λF_4, ηM_4, ηF_4, μM_4, σM_4, μF_4, σF_4]

print(coeff_4)

In [None]:
lnL_4 = est_4.fun

print(lnL_4)

In [None]:
ts_4 = teststats(est_4.hess_inv, lnL_4, 8)

print(ts_4)

#### Estimation 1

In [None]:
def loglik_1( params: list ):
    """
    Calculates log likelihood with productivity differences, no prejudice (estimation 1).
    
    Parameters to estimate: λ, η, μM, σM, μF, σF
    """

    λM = np.exp(params[0]) #same lambda
    λF = np.exp(params[0]) #same lambda
    η = np.exp(params[1]) #same eta
    μM = params[2]
    σM = np.exp(params[3])
    μF = params[4]
    σF = np.exp(params[5])
    d = 0
    p = 0
    
    
    # Men's equations 
    hM = hazardM(λM, wstarM, α, μM, σM)
    
    a = M['dur'].count() * np.log(hM/(hM+η))
    b = MU['dur'].count() * np.log(η)
    c = - hM * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μM, σM, wstarM) ) )
    
    # Women's equations
    hF = hazardF(λM, wstarF, α, μF, σF, p, d)
    
    f = F['dur'].count() * np.log(hF/(hF+η))
    g = FU['dur'].count() * np.log(η)
    i = - hF * np.sum(FU.values[:,0])
    
    y = (1/α) * dens_accepted(FE['wage'], α, μF, σF, wstarF)
    
    j = np.sum( np.log( y ) )
    
    
    return -(a + b + c + e + f + g + i + j)

In [None]:
# Check on log-likelihood

param1 = [λ_0, η_0, μ_M, σ_M, μ_F, σ_F]

b1_0 = np.log(param1[0])
b1_1 = np.log(param1[1])
# b1_2 = np.log(param1[2])
b1_2 = param1[2]
b1_3 = np.log(param1[3])
# b1_3 = param1[3]
# b1_4 = np.log(param1[4])
b1_4 = param1[4]
b1_5 = np.log(param1[5])
# b1_5 = param1[5]

init1 = [b1_0, b1_1, b1_2, b1_3, b1_4, b1_5]

print(loglik_1(init1))

In [None]:
est_1 = minimize(loglik_1, init1)

est_1

In [None]:
# Coefficients

λ_1 = np.exp(est_1.x[0])
η_1 = np.exp(est_1.x[1])
μM_1 = est_1.x[2]
σM_1 = np.exp(est_1.x[3])
μF_1 = est_1.x[4]
σF_1 = np.exp(est_1.x[5])

coeff_1 = [λ_1, η_1, μM_1, σM_1, μF_1, σF_1]

print(coeff_1)

In [None]:
lnL_1 = est_1.fun

print(lnL_1)

In [None]:
ts_1 = teststats(est_1.hess_inv, lnL_1, 6)

print(ts_1)

### Output to Latex

In [None]:
np.arange(1, 10 )

In [None]:
np.arange(10, 19)

In [None]:
ts1 = [item for sublist in ts_1 for item in sublist]
ts2 = [item for sublist in ts_2 for item in sublist]
ts3 = [item for sublist in ts_3 for item in sublist]
ts4 = [item for sublist in ts_4 for item in sublist]
ts5 = [item for sublist in ts_5 for item in sublist]
ts6 = [item for sublist in ts_6 for item in sublist]

In [None]:
# NA pad coef arrays
def pad(coeff: np.array):
    while len(coeff) != 12:
        coeff.append(None)
    return coeff

In [None]:
# Pad all vectors for dataframe. 
pad(coeff_1)
pad(ts1)
pad(coeff_2)
pad(ts2)
pad(coeff_3)
pad(ts3)
pad(coeff_4)
pad(ts4)
pad(coeff_5)
pad(ts5)
pad(coeff_6)
pad(ts6)

In [None]:
out = pd.DataFrame({
    '(1)': coeff_1,
    'se1': ts1,
    '(2)': coeff_2,
    'se2': ts2,
    '(3)': coeff_3,
    'se3': ts3,
    '(4)': coeff_4,
    'se4': ts4,
    '(5)': coeff_5,
    'se5': ts5,
    '(6)': coeff_6,
    'se6': ts6
})

In [None]:
np.savetxt('./output.csv', out, fmt='%1.4f', delimiter=',')

## Scratch

In [None]:
def lambdaM(h: float, wstarM: float, α: float, μ: float, σ: float):
    """
    Estimates lambda for men
    """
    
    l = (α*μ) + ((1-α)*wstarM)
    s = α * σ
    shape = 1
    
    sf_in = (wstarM-l)/s
    
    denom = stats.lognorm.sf(sf_in, shape, l, s)
    
    return h/denom

In [None]:
def lambdaF(h: float, wstarF: float, α: float, μ: float, σ: float, p: float, d: float):
    """
    Estimates lambda for women
    """
    
    l1 = (α*μ) + ((1-α)*wstarF)
    l2 = (α*μ) + ((1-α)*wstarF) - α*d
    s = α * σ
    shape = 1
    
    sf_in1 = (wstarF-l1)/s
    sf_in2 = (wstarF-l2)/s
    
    denom = (1-p)*stats.lognorm.sf(sf_in1, shape, l1, s) + p*stats.lognorm.sf(sf_in2, shape, l2, s)
    
    return h/denom

### Accepted Wage Function and Distributions as in 17, 18

In [None]:
def wagefxn(wage: np.array, α: float, wstar: float):
    """
    Calculates wage (equation 8)
    """
    
    num = wage - (1-α)*wstar
    denom = α
    
    return num/denom

In [None]:
def dens_accepted(wage: np.array, α: float, μ: float, σ: float, wstar: float):
    """
    Calculates the density of accepted wages using the lognormal distribution (eq 17 and first part of eq 18 in the paper)
    """
    
    l = (α*μ) + ((1-α)*wstar)
    s = α*σ
    shape = 1
    
    sf_in = (wstar-l)/s
    
    num = stats.lognorm.pdf(wage, shape, l, s) 
#     denom = stats.lognorm.sf(wstar, shape, l, s)
    denom = stats.lognorm.sf(sf_in, shape, loc=0, scale=1)
    
    return num / denom

In [None]:
def dens_accepted_prej(wage: np.array, α: float, μ: float, σ: float, wstar: float, d: float):
    """
    Calculates the density of accepted wages when prejudice is present using the lognormal distribution (second part of eq 18 in the paper)
    """
    
    l = (α*μ) + ((1-α)*wstar) - (α*d)
    s = α*σ
    shape = 1
    
    sf_in = (wstar+d-l)/s
    
    num = stats.lognorm.pdf(wage,shape,l,s) 
#     denom = stats.lognorm.sf(wstar, shape, l, s) # +d or -d or no d?
    denom = stats.lognorm.sf(sf_in, shape, loc=0, scale=1)    
    
    return num / denom

In [None]:
def hazardM(λ: float, wstar: float, α: float, μ: float, σ: float):
    """
    Estimates hazard rate for men (eq 10 of the paper)
    """
        
    l = (α*μ) + ((1-α)*wstar)
    s = α*σ
    shape = 1
    
    sf_in = (wstar-l)/s

#     mult = stats.lognorm.sf(wstar, shape, l, s)
    mult = stats.lognorm.sf(sf_in, shape, loc=0, scale=1)
    
    return λ * mult

In [None]:
def hazardF(λ: float, wstar: float, α: float, μ: float, σ: float, p: float, d: float):
    """
    Estimates hazard rate for women (eq 10 of the paper)
    """
    
    l1 = (α*μ) + ((1-α)*wstar)
    l2 = (α*μ) + ((1-α)*wstar) - (α*d)
    s = α*σ
    shape = 1
    
    sf_in1 = (wstar-l1)/s
    sf_in2 = (wstar+d-l2)/s
    
#     mult = (1-p)*stats.lognorm.sf(wstar, shape, l1, s) + p*stats.lognorm.sf(wstar, shape, l2, s)
    mult = (1-p)*stats.lognorm.sf(sf_in1, shape, loc=0, scale=1) + p*stats.lognorm.sf(sf_in2, shape, loc=0, scale=1)
    
    return λ * mult

### Accepted Wage Distribution as in 13

In [None]:
def dens_accepted(wage: np.array, α: float, μ: float, σ: float, wstar: float):
    """
    Calculates the density of accepted wages using the lognormal distribution (end of eq 13)
    """
    
    pdf_in = (wage - (1-α)*wstar)/α
    sf_in = wstar
    
    shape = 1
    
    num = stats.lognorm.pdf(pdf_in, shape, μ, σ) 
    denom = stats.lognorm.sf(sf_in, shape, μ, σ)
    
    return num / (α * denom)

In [None]:
def dens_accepted_prej(wage: np.array, α: float, μ: float, σ: float, wstar: float, d: float):
    """
    Calculates the density of accepted wages when prejudice is present using the lognormal distribution (end of eq 13)
    """
    
    shape = 1
    
    pdf_in = (wage + α*d - (1-α)*wstar)/α
    sf_in = wstar+d
    
    num = stats.lognorm.pdf(pdf_in, shape, μ, σ) 
    denom = stats.lognorm.sf(sf_in, shape, μ, σ)    
    
    return num / (α * denom)

In [None]:
def hazardM(λ: float, wstar: float, α: float, μ: float, σ: float):
    """
    Estimates hazard rate for men (eq 10 of the paper)
    """
    
    shape = 1

    mult = stats.lognorm.sf(wstar, shape, μ, σ)
    
    return λ * mult

In [None]:
def hazardF(λ: float, wstar: float, α: float, μ: float, σ: float, p: float, d: float):
    """
    Estimates hazard rate for women (eq 10 of the paper)
    """
    
    shape = 1
    
    sf_in1 = wstar
    sf_in2 = wstar+d
    
    mult = (1-p)*stats.lognorm.sf(sf_in1, shape, μ, σ) + p*stats.lognorm.sf(sf_in2, shape, μ, σ)
    
    return λ * mult