# 882 Replication Paper: Flabbi 2010

- Read in data
- Define log-normal pdf & cdf 
- Estimate men and women separately to allow for parameters to very by gender

## Import Packages

In [1]:
# Data Manipulation 
import numpy as np
import pandas as pd

# General
import pdb

# Estimation
from scipy.optimize import minimize
import scipy.stats as stats
# import numdifftools as ndt

# Data Visualization
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn.apionly as sns
from pylab import *



## Import Data 

- CPS data on gender, wages, duration of unemployment
- M: males
- F: females
- U: unemployed
- E: employed 
- {M,F} X {E,U} = {males,females} X {employed, unemployed}

In [2]:
data=pd.read_csv('../data/est_c.csv')
data.columns = ['dur', 'wage', 'empl', 'women']

M = data[data['women']==0] #1186 men 
F = data[data['women']==1] #993 women 
U = data[data['empl']==0] #45 unemployed
E = data[data['empl']==1] #2134 employed

ME = M[M['empl']==1] #1109 employed men
MU = M[M['empl']==0] #18 unemployed men
FE = F[F['empl']==1] #966 employed women
FU = F[F['empl']==0] #27 unemployed women

In [None]:
data.describe()

### Summary Statistics, without trimming

In [None]:
agg_dict = {
    'wage': ['mean', 'std'],
    'dur': ['mean', 'std'],
}

In [None]:
print(data.groupby(['women', 'empl']).agg(agg_dict).to_latex()) # by gender

In [None]:
print(data.groupby(['empl']).agg(agg_dict).to_latex()) # all

### Summary Statistics, with trimming

In [None]:
print(np.percentile(ME['wage'], 5))

In [None]:
print(np.percentile(FE['wage'], 5))

In [3]:
M_WAGE_COND = (data['wage'] > 7.27) #hard coded percentile so it does not continually update
M_COND = (data['women'] == 0)

F_WAGE_COND = (data['wage'] > 5.75625) #hard coded percentile so it does not continually update
F_COND = (data['women'] == 1)

DUR_COND = (data['dur'] > 0)

trim = data[ (M_COND & M_WAGE_COND) | (F_COND & F_WAGE_COND) | DUR_COND ]

# trim = data[ (M_COND & (M_WAGE_COND | DUR_COND)) | (F_COND & (F_WAGE_COND | DUR_COND)) ]

M = trim[trim['women']==0] #1071 ?? men 
F = trim[trim['women']==1] #944 ?? women 
U = trim[trim['empl']==0] #45 unemployed
E = trim[trim['empl']==1] #1970 ?? employed

ME = M[M['empl']==1] #1053 ?? employed men
MU = M[M['empl']==0] #18 unemployed men
FE = F[F['empl']==1] #917 employed women
FU = F[F['empl']==0] #27 unemployed women

In [None]:
print(trim.groupby(['women', 'empl']).agg(agg_dict).to_latex()) # by gender

In [None]:
print(trim.groupby(['empl']).agg(agg_dict).to_latex()) # all

## Figures 

- Distribution of wages, men and women

In [None]:
# Seaborn
fig, ax = plt.subplots(2, 1, figsize=(12, 8))

sns.distplot(ME['wage'], color='#4B9CD3', hist_kws={'alpha' : .3}, bins=50, ax=ax[0])
sns.distplot(FE['wage'], color='#4B9CD3', hist_kws={'alpha' : .3}, bins=50, ax=ax[1])

ax[0].legend(['Men'])
ax[1].legend(['Women']) 

plt.tight_layout()

In [None]:
fig.savefig('./figures/fig1_2.png', bbox_inches='tight', transparent=True)

## Estimation 

- Estimations 1-3: Same arrival and termination rates
- Estimations 4-6: Gender specific arrival and termination rates

In [None]:
# Initial Conditions

wstarM = min(ME['wage'])
wstarF = min(FE['wage'])

λ_M = 0.22
λ_F = 0.22
η_M = 0.005
η_F = 0.005
# α = 0.5  #FIXED
μ_M = 3.456
μ_F= 3.454
σ_M = 0.558
σ_F = 0.423
p = 0.5
d_F = 10
# d_M = 0   #FIXED

### Wage Function

In [None]:
def f_w(wage: np.array, α: float, wstar: float):
    """
    Wage functions
    """
    return (wage - (1-α)*wstar)*(1/α)

### Estimation 1: Same arrival and termination, Productivity Differences, No Prejudice

In [None]:
def loglik1(params: list):
    """
    Calculates the log likelihood with the log normal distribution
    """
    
    λ = np.exp(params[0])
    η = np.exp(params[1])
    μ_M = params[2]
    σ_M = np.exp(params[3])
    μ_F = params[4]
    σ_F = np.exp(params[5])
    d_F = 0
    p = 0
    α = 0.5

    
    hM = λ * ( (1-stats.lognorm.cdf(wstarM,μ_M,σ_M)) );
    L00 = (η/(η+hM)) * hM * np.exp(-hM*np.sum(MU.values[:,0])); #unemployed men

    L01a = ( ( (1/α)*stats.lognorm.pdf(f_w(ME.values[:,1],α,wstarM),μ_M,σ_M) )/(1-stats.lognorm.cdf(wstarM,μ_M,σ_M)) );
    L01 = np.log(hM/(η+hM)) + np.sum( np.log(L01a) ); #employed men
    
    hF = λ*( (1-p)*(1-stats.lognorm.cdf(wstarF,μ_F,σ_F)) + p*(1-stats.lognorm.cdf((wstarF+d_F),μ_F,σ_F)) );
    L10 = ( (η/(η+hF)) * hF * np.exp(-hF*np.sum(FU.values[:,0])) ); #unemployed women

    L11a = ( ( ((1-p)/α)*stats.lognorm.pdf(f_w(FE.values[:,1],α,wstarF),μ_F,σ_F) ) / (1-stats.lognorm.cdf(wstarF,μ_F,σ_F)) );
    L11b = ( ( (p/α)*stats.lognorm.pdf(f_w((FE.values[:,1]+α*d_F),α,wstarF),μ_F,σ_F) ) / (1-stats.lognorm.cdf((wstarF+d_F),μ_F,σ_F)) );
    L11 = np.log(hF/(η+hF)) + np.sum( np.log(L11a + L11b) ); #employed women

    results = np.log(L00) + L01 + np.log(L10) + L11

    return -1*results

In [None]:
init1 = [λ_M, η_M, μ_M, σ_M, μ_F, σ_F]

est1 = minimize(loglik1, init1, method='nelder-mead', options={'maxiter': 10000})

In [None]:
# est1 default

print(est1.x)
print(-loglik1(est1.x))

In [None]:
# est1 nelder

print(est1.x)
print(-loglik1(est1.x))

#### Test Statistics

In [None]:
# Standard Errors

vcv_mle1 = est1.hess_inv
se1 = np.sqrt(np.diag(vcv_mle1))
print('Standard errors are given in ', se1)

# Likelihood Ratio Test

LR_val_1 = 2 * (-loglik1(est1.x) + loglik1(init1))
pval_h0_1 = 1.0 - stats.chi2.cdf(LR_val_1, 5)
print('LR value = ', LR_val_1, '. Chi squared of H0 with 5 degrees of freedom p-value = ', pval_h0_1)

### Estimation 2: Same arrival and termination, Prejudice, No Productivity Differences

In [None]:
def loglik2(params: list):
    """
    Calculates the log likelihood with the log normal distribution
    """
  
    λ = np.exp(params[0])
    η = np.exp(params[1])
    μ = params[2]
    σ = np.exp(params[3])
    d_F = np.exp(params[5])
    p = np.exp(params[4]) / (1 + np.exp(params[4]))
    α = 0.5

#    pdb.set_trace()
    
    hM = λ * ( (1-stats.lognorm.cdf(wstarM,μ,σ)) );
    L00 = (η/(η+hM)) * hM * np.exp(-hM*np.sum(MU.values[:,0])); #unemployed men

    L01a = ( ( (1/α)*stats.lognorm.pdf(f_w(ME.values[:,1],α,wstarM),μ,σ) )/(1-stats.lognorm.cdf(wstarM,μ,σ)) );
    L01 = np.log(hM/(η+hM)) + np.sum( np.log(1+L01a) ); #employed men
    
    hF = λ * ( (1-p)*(1-stats.lognorm.cdf(wstarF,μ,σ)) + p*(1-stats.lognorm.cdf((wstarF+d_F),μ,σ)) );
    L10 = ( (η/(η+hF)) * hF * np.exp(-hF*np.sum(FU.values[:,0])) ); #unemployed women

    L11a = ( ( ((1-p)/α)*stats.lognorm.pdf(f_w(FE.values[:,1],α,wstarF),μ,σ) ) / (1-stats.lognorm.cdf(wstarF,μ,σ)) );
    L11b = ( ( (p/α)*stats.lognorm.pdf(f_w((FE.values[:,1]+ α*d_F),α,wstarF),μ,σ) ) / (1-stats.lognorm.cdf((wstarF+d_F),μ,σ)) );
    L11 = np.log(hF/(η+hF)) + np.sum( np.log(1+L11a + L11b) ); #employed women

    results = np.log(1+L00) + L01 + np.log(1+L10) + L11

    return -1*results

In [None]:
init2 = [λ_M, η_M, μ_M, σ_M, p, d_F]

est2 = minimize(loglik2, init2, method='nelder-mead', options={'maxiter': 10000})

In [None]:
# est2

print(est2.x)
print(-loglik2(est2.x))

#### Test Statistics

In [None]:
# Standard Errors

vcv_mle2 = est2.hess_inv
se2 = np.sqrt(np.diag(vcv_mle2))
print('Standard errors are given in ', se2)

# Likelihood Ratio Test

LR_val_2 = 2 * (-loglik2(est2.x) + loglik2(init2))
pval_h0_2 = 1.0 - stats.chi2.cdf(LR_val_2, 5)
print('LR value = ', LR_val_2, '. Chi squared of H0 with 5 degrees of freedom p-value = ', pval_h0_2)


### Estimation 3: Same arrival and termination, Productivity Differences and Prejudice

In [None]:
def loglik3(params: list):
    """
    Calculates the log likelihood with the log normal distribution
    """
    
    λ = np.exp(params[0])
    η = np.exp(params[1])
    μ_M = params[2]
    σ_M = np.exp(params[3])
    μ_F = params[4]
    σ_F = np.exp(params[5])
    d_F = np.exp(params[7])
    p = (np.exp(params[6]))/(1+np.exp(params[6]))
    α = 0.5
    
#    pdb.set_trace()
    
    hM = λ * ( (1-stats.lognorm.cdf(wstarM,μ_M,σ_M)) );
    L00 = (η/(η+hM)) * hM * np.exp(-hM*np.sum(MU.values[:,0])); #unemployed men

    L01a = ( ( (1/α)*stats.lognorm.pdf(f_w(ME.values[:,1],α,wstarM),μ_M,σ_M) )/(1-stats.lognorm.cdf(wstarM,μ_M,σ_M)) );
    L01 = np.log(hM/(η+hM)) + np.sum( np.log(L01a) ); #employed men
    
    hF = λ * ( (1-p)*(1-stats.lognorm.cdf(wstarF,μ_F,σ_F)) + p*(1-stats.lognorm.cdf((wstarF+d_F),μ_F,σ_F)) );
    L10 = ( (η/(η+hF)) * hF * np.exp(-hF*np.sum(FU.values[:,0])) ); #unemployed women

    L11a = ( ( ((1-p)/α)*stats.lognorm.pdf(f_w(FE.values[:,1],α,wstarF),μ_F,σ_F) ) / (1-stats.lognorm.cdf(wstarF,μ_F,σ_F)) );
    L11b = ( ( (p/α)*stats.lognorm.pdf(f_w((FE.values[:,1]+α*d_F),α,wstarF),μ_F,σ_F) ) / (1-stats.lognorm.cdf((wstarF+d_F),μ_F,σ_F)) );
    L11 = np.log(hF/(η+hF)) + np.sum( np.log(L11a + L11b) ); #employed women

    results = np.log(L00) + L01 + np.log(L10) + L11

    return -1*results

In [None]:
init3 = [λ_M, η_M, μ_M, σ_M, μ_F, σ_F, p, d_F]

est3 = minimize(loglik3, init3, options={'maxiter': 10000})

In [None]:
print(est3.x)
print(-loglik3(est3.x))

#### Test Statistics

In [None]:
# Standard Errors

vcv_mle3 = est3.hess_inv
se3 = np.sqrt(np.diag(vcv_mle3))
print('Standard errors are given in ', se3)

# Likelihood Ratio Test

LR_val_3 = 2 * (-loglik3(est3.x) + loglik3(init3))
pval_h0_3 = 1.0 - stats.chi2.cdf(LR_val_3, 5)
print('LR value = ', LR_val_3, '. Chi squared of H0 with 5 degrees of freedom p-value = ', pval_h0_3)


### Estimation 4: Productivity Differences, No Prejudice

In [None]:
def loglik4(params: list):
    """
    Calculates the log likelihood with the log normal distribution
    """
    
    λ_M = np.exp(params[0])
    λ_F = np.exp(params[1])
    η_M = np.exp(params[2])
    η_F = np.exp(params[3])
    μ_M = params[4]
    σ_M = np.exp(params[5])
    μ_F = params[6]
    σ_F = np.exp(params[7])
    d_F = 0
    p = 0
    α = 0.5
    
    hM = λ_M * ( (1-stats.lognorm.cdf(wstarM,μ_M,σ_M)) );
    L00 = (η_M/(η_M+hM)) * hM * np.exp(-hM*np.sum(MU.values[:,0])); #unemployed men

    L01a = ( ( (1/α)*stats.lognorm.pdf(f_w(ME.values[:,1],α,wstarM),μ_M,σ_M) )/(1-stats.lognorm.cdf(wstarM,μ_M,σ_M)) );
    L01 = np.log(hM/(η_M+hM)) + np.sum( np.log(L01a) ); #employed men
    
    hF = λ_F*( (1-p)*(1-stats.lognorm.cdf(wstarF,μ_F,σ_F)) + p*(1-stats.lognorm.cdf((wstarF+d_F),μ_F,σ_F)) );
    L10 = ( (η_F/(η_F+hF)) * hF * np.exp(-hF*np.sum(FU.values[:,0])) ); #unemployed women

    L11a = ( ( ((1-p)/α)*stats.lognorm.pdf(f_w(FE.values[:,1],α,wstarF),μ_F,σ_F) ) / (1-stats.lognorm.cdf(wstarF,μ_F,σ_F)) );
    L11b = ( ( (p/α)*stats.lognorm.pdf(f_w((FE.values[:,1]+α*d_F),α,wstarF),μ_F,σ_F) ) / (1-stats.lognorm.cdf((wstarF+d_F),μ_F,σ_F)) );
    L11 = np.log(hF/(η_F+hF)) + np.sum( np.log(L11a + L11b) ); #employed women

    results = np.log(L00) + L01 + np.log(L10) + L11

    return -1*results

In [None]:
init4 = [λ_M, λ_F, η_M, η_F, μ_M, σ_M, μ_F, σ_F]

est4 = minimize(loglik4, init4, method='nelder-mead', options={'maxiter': 10000})

In [None]:
print(est4.x)
print(-loglik4(est4.x))

#### Test Statistics

In [None]:
# Standard Errors

vcv_mle4 = est4.hess_inv
se4 = np.sqrt(np.diag(vcv_mle4))
print('Standard errors are given in ', se4)

# Likelihood Ratio Test

LR_val_4 = 2 * (-loglik4(est4.x) + loglik4(init4))
pval_h0_4 = 1.0 - stats.chi2.cdf(LR_val_4, 7)
print('LR value = ', LR_val_4, '. Chi squared of H0 with 7 degrees of freedom p-value = ', pval_h0_4)


### Estimation 5: Prejudice, No Productivity Differences

In [None]:
def loglik5(params: list):
    """
    Calculates the log likelihood with the log normal distribution
    """
    
    λ_M = np.exp(params[0])
    λ_F = np.exp(params[1])
    η_M = np.exp(params[2])
    η_F = np.exp(params[3])
    μ = params[4]
    σ = np.exp(params[5])
    d_F = np.exp(params[7])
    p = (np.exp(params[6]))/(1+np.exp(params[6]))
    α = 0.5
    
#    pdb.set_trace()
    
    hM = λ_M * ( (1-stats.lognorm.cdf(wstarM,μ,σ)) );
    L00 = (η_M/(η_M+hM)) * hM * np.exp(-hM*np.sum(MU.values[:,0])); #unemployed men

    L01a = ( ( (1/α)*stats.lognorm.pdf(f_w(ME.values[:,1],α,wstarM),μ,σ) )/(1-stats.lognorm.cdf(wstarM,μ,σ)) );
    L01 = np.log(hM/(η_M+hM)) + np.sum( np.log(1 + L01a) ); #employed men
    
    hF = λ_F*( (1-p)*(1-stats.lognorm.cdf(wstarF,μ,σ)) + p*(1-stats.lognorm.cdf((wstarF+d_F),μ,σ)) );
    L10 = ( (η_F/(η_F+hF)) * hF * np.exp(-hF*np.sum(FU.values[:,0])) ); #unemployed women

    L11a = ( ( ((1-p)/α)*stats.lognorm.pdf(f_w(FE.values[:,1],α,wstarF),μ,σ) ) / (1-stats.lognorm.cdf(wstarF,μ,σ)) );
    L11b = ( ( (p/α)*stats.lognorm.pdf(f_w((FE.values[:,1]+α*d_F),α,wstarF),μ,σ) ) / (1-stats.lognorm.cdf((wstarF+d_F),μ,σ)) );
    L11 = np.log(hF/(η_F+hF)) + np.sum( np.log(1 + L11a + L11b) ); #employed women

    results = np.log(1+L00) + L01 + np.log(1+L10) + L11

    return -1*results

In [None]:
init5 = [λ_M, λ_F, η_M, η_F, μ_M, σ_M, p, d_F]

est5 = minimize(loglik5, init5, method='nelder-mead', options={'maxiter': 10000})

In [None]:
est5.x

### Estimation 6: Productivity Differences and Prejudice

In [None]:
def loglik6(params: list):
    """
    Calculates the log likelihood with the log normal distribution
    """
    
    λ_M = np.exp(params[0])
    λ_F = np.exp(params[1])
    η_M = np.exp(params[2])
    η_F = np.exp(params[3])
    μ_M = params[4]
    σ_M = np.exp(params[5])
    μ_F = params[6]
    σ_F = np.exp(params[7])
    d_F = np.exp(params[9])
    p = (np.exp(params[8]))/(1+np.exp(params[8]))
    α = 0.5
    
#    pdb.set_trace()
    
    hM = λ_M * ( (1-stats.lognorm.cdf(wstarM,μ_M,σ_M)) );
    L00 = (η_M/(η_M+hM)) * hM * np.exp(-hM*np.sum(MU.values[:,0])); #unemployed men

    L01a = ( ( (1/α)*stats.lognorm.pdf(f_w(ME.values[:,1],α,wstarM),μ_M,σ_M) )/(1-stats.lognorm.cdf(wstarM,μ_M,σ_M)) );
    L01 = np.log(hM/(η_M+hM)) + np.sum( np.log(1 + L01a) ); #employed men
    
    hF = λ_F*( (1-p)*(1-stats.lognorm.cdf(wstarF,μ_F,σ_F)) + p*(1-stats.lognorm.cdf((wstarF+d_F),μ_F,σ_F)) );
    L10 = ( (η_F/(η_F+hF)) * hF * np.exp(-hF*np.sum(FU.values[:,0])) ); #unemployed women

    L11a = ( ( ((1-p)/α)*stats.lognorm.pdf(f_w(FE.values[:,1],α,wstarF),μ_F,σ_F) ) / (1-stats.lognorm.cdf(wstarF,μ_F,σ_F)) );
    L11b = ( ( (p/α)*stats.lognorm.pdf(f_w((FE.values[:,1]+α*d_F),α,wstarF),μ_F,σ_F) ) / (1-stats.lognorm.cdf((wstarF+d_F),μ_F,σ_F)) );
    L11 = np.log(hF/(η_F+hF)) + np.sum( np.log(1 + L11a + L11b) ); #employed women

    results = np.log(1+L00) + L01 + np.log(1+L10) + L11

    return -1*results

In [None]:
init6 = [λ_M, λ_F, η_M, η_F, μ_M, σ_M, μ_F, σ_F, p, d_F]

est6 = minimize(loglik6, init6)#options={'maxiter': 10000})

In [None]:
est6.x

In [None]:
pd.DataFrame(est1.x,est2.x,est3.x,est4.x,est5.x,est6.x)

## Scratch

### Initial Conditions

In [4]:
λ_M = 0.18
λ_F = 0.28
η_M = 0.003
η_F = 0.0077
μ_M = 3.456
μ_F= 3.454
σ_M = 0.558
σ_F = 0.423
p = 0.5
d = 15

### Parameters without distributional assumptions

In [5]:
wstarM = min(ME['wage'])
wstarF = min(FE['wage'])

hM = MU['dur'].count()/sum(MU.values[:,0])
hF = FU['dur'].count()/sum(FU.values[:,0])
h = U['dur'].count()/sum(U.values[:,0])

ηM = hM * (MU['dur'].count()/ME['empl'].count())
ηF = hF * (FU['dur'].count()/FE['empl'].count())
η = h * (U['dur'].count()/E['empl'].count())

α = 0.5

### Accepted Wage Function and Distributions

In [6]:
def dens_accepted(wage: np.array, α: float, μ: float, σ: float, wstar: float):
    """
    Calculates the density of accepted wages
    """
    
    l = (α*μ) + ((1-α)*wstar)
    s = α * σ
    shape = 1
    
    sf_in = (wstar - l)/s
    
    return stats.lognorm.pdf(wage, shape, l, s) / stats.lognorm.sf(sf_in, shape, l, s)

In [7]:
def dens_accepted_prej(wage: np.array, α: float, μ: float, σ: float, wstar: float, d: float):
    """
    Calculates the density of accepted wages when prejudice is present
    """
    
    l = (α*μ) + ((1-α)*wstar) - α*d
    s = α * σ
    shape = 1
    
    sf_in = (wstar - l)/s
    
    return stats.lognorm.pdf(wage,shape,l,s) / stats.lognorm.sf(sf_in, shape, l, s)

In [8]:
def lambdaM(h: float, wstarM: float, α: float, μ: float, σ: float):
    """
    Estimates lambda for men
    """
    
    l = (α*μ) + ((1-α)*wstarM)
    s = α * σ
    shape = 1
    
    sf_in = (wstarM-l)/s
    
    denom = stats.lognorm.sf(sf_in, shape, l, s)
    
    return h/denom

In [9]:
def lambdaF(h: float, wstarF: float, α: float, μ: float, σ: float, p: float):
    """
    Estimates lambda for women
    """
    
    l1 = (α*μ) + ((1-α)*wstarF)
    l2 = (α*μ) + ((1-α)*wstarF) - α*d
    s = α * σ
    shape = 1
    
    sf_in1 = (wstarF-l1)/s
    sf_in2 = (wstarF-l2)/s
    
    denom = (1-p)*stats.lognorm.sf(sf_in1, shape, l1, s) + p*stats.lognorm.sf(sf_in2, shape, l2, s)
    
    return h/denom

In [None]:
# def hM(λM: float, wstarM: float, α: float, μ: float, σ: float):
    
#     l = (α*μ) + ((1-α)*wstarM)
#     s = α * σ
#     shape = .5
    
#     return λM * stats.lognorm.sf(wstarM,shape, l, s)

In [None]:
# def hF(λF: float, wstarF: float, α: float, μ: float, σ: float, p: float):
    
#     l1 = (α*μ) + ((1-α)*wstarF)
#     l2 = (α*μ) + ((1-α)*wstarF) - α*d
#     s = α * σ
#     shape = .5
    
#     mult = (1-p)*stats.lognorm.sf(wstarF, shape, l1, s) + p*stats.lognorm.sf(wstarF, shape, l2, s)
    
#     return λF * mult

### Likelihood Functions

In [10]:
def loglik_6( params: list ):
    """
    Calculates log likelihood with prejudice and productivity differences 
    
    Estimation 6 (ηM, ηF, hM, hF)
    
    Parameters to estimate: 
        λM # see fxn elsewhere
        λF # see fxn elsewhere
        ηM ?
        ηF ?
        μM
        σM
        μF
        σF
        d
        p
    """
    
    λM = np.exp(params[0])
    λF = np.exp(params[1])
    ηM = np.exp(params[2])
    ηF = np.exp(params[3])
    μM = params[4]
    σM = np.exp(params[5])
    μF = params[6]
    σF = np.exp(params[7])
    d = np.exp(params[8])
    p = np.exp(params[9])/(1+np.exp(params[9]))

    
    # Men's equations 
    λM = lambdaM(hM, wstarM, α, μM, σM)
    
    a = M['dur'].count() * np.log(hM/(hM+ηM))
    b = MU['dur'].count() * np.log(ηM)
    c = - hM * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μM, σM, wstarM) ) )
    
    # Women's equations
    λF = lambdaF(hF, p, wstarF, α, μM, σM)
    
    f = F['dur'].count() * np.log(hF/(hF+ηF))
    g = FU['dur'].count() * np.log(ηF)
    h = - hF * np.sum(FU.values[:,0])
    
    y = ((1-p)/α) * dens_accepted(FE['wage'], α, μF, σF, wstarF)
    z = (p/α) * dens_accepted_prej(FE['wage'], α, μF, σF, wstarF, d)

    i = np.sum( np.log( y + z ) )
    
    return (a + b + c + e + f + g + h + i)

In [17]:
# Check on log-likelihood

param6 = [λ_M, λ_F, η_M, η_F, μ_M, σ_M, μ_F, σ_F, d, p]

b0 = np.log(param6[0])
b1 = np.log(param6[1])
b2 = np.log(param6[2])
b3 = np.log(param6[3])
b4 = param6[4]
b5 = np.log(param6[5])
b6 = param6[6]
b7 = np.log(param6[7])
b8 = np.log(param6[8])
b9 = np.log(1)

init6 = [b0, b1, b2, b3, b4, b5, b6, b7, b8,b9]

print(loglik_6(init6))

5947.5045035078465


In [None]:
est_6 = minimize(loglik_6, init6)

est_6

In [None]:
# Coefficients

λM = np.exp(est_6.x[0])
λF = np.exp(est_6.x[1])
ηM = np.exp(est_6.x[2])
ηF = np.exp(est_6.x[3])
μM = est_6.x[4]
σM = np.exp(est_6.x[5])
μF = est_6.x[6]
σF = np.exp(est_6.x[7])
d = np.exp(est_6.x[8])
p = np.exp(est_6.x[9])/(1+np.exp(est_6.x[9]))

print(λM, λF, ηM, ηF, μM, σM, μF, σF, d, p)

In [None]:
def loglik_3( params: list ):
    """
    Calculates log likelihood with prejudice and productivity differences 
    
    Estimation 3 (η and h not gender specific)
    
    Parameters to estimate: 
        λ # see fxn elsewhere
        μM
        σM
        μF
        σF
        d
        p
    """

    
    # Men's equations 
    a = M['dur'].count() * np.log(h/(h+η))
    b = MU['dur'].count() * np.log(η)
    c = - h * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μM, σM, wstarM) ) )
    
    
    # Women's equations
    f = F['dur'].count() * np.log(h/(h+η))
    g = FU['dur'].count * np.log(η)
    h = - h * np.sum(FU.values[:,0])
    
    y = ((1-p)/α) * dens_accepted(FE['wage'], α, μF, σF, wstarF)
    z = (p/α) * dens_accepted_prej(FE['wage'], α, μF, σF, wstarF, d)

    i = np.sum( np.log( y + z ) )
    
    
    return a + b + c + e + f + g + h + i

In [None]:
def loglik_5( params: list ):
    """
    Calculates log likelihood with prejudice, no productivity differences. 
    
    Estimation 5 (ηM, ηF, hM, hF)
    
    Parameters to estimate: 
        λM # see fxn elsewhere
        λF # see fxn elsewhere
        μ
        σ
        p
        d
    """

    
    # Men's equations 
    a = M['dur'].count() * np.log(hM/(hM+ηM))
    b = MU['dur'].count() * np.log(ηM)
    c = - hM * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μ, σ, wstarM) ) )
    
    
    # Women's equations
    f = F['dur'].count() * np.log(hF/(hF+ηF))
    g = FU['dur'].count * np.log(ηF)
    h = - hF * np.sum(FU.values[:,0])
    
    y = ((1-p)/α) * dens_accepted(FE['wage'], α, μ, σ, wstarF)
    z = (p/α) * dens_accepted_prej(FE['wage'], α, μ, σ, wstarF, d)

    i = np.sum( np.log( y + z ) )
    
    
    return a + b + c + e + f + g + h + i

In [None]:
def loglik_2( params: list ):
    """
    Calculates log likelihood with prejudice, no productivity differences. 
    
    Estimation 2 (η and h not gender specific)
    
    Parameters to estimate: 
        λ # see fxn elsewhere
        μ
        σ
        p
        d
    """

    
    # Men's equations 
    a = M['dur'].count() * np.log(h/(h+η))
    b = MU['dur'].count() * np.log(η)
    c = - h * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μ, σ, wstarM) ) )
    
    
    # Women's equations
    f = F['dur'].count() * np.log(h/(h+η))
    g = FU['dur'].count * np.log(η)
    h = - h * np.sum(FU.values[:,0])
    
    y = ((1-p)/α) * dens_accepted(FE['wage'], α, μ, σ, wstarF)
    z = (p/α) * dens_accepted_prej(FE['wage'], α, μ, σ, wstarF, d)

    i = np.sum( np.log( y + z ) )
    
    
    return a + b + c + e + f + g + h + i

In [None]:
def loglik_4( params: list ):
    """
    Calculates log likelihood with productivity differences, no prejudice
    
    Estimation 4 (ηM, ηF, hM, hF)
    
    Parameters to estimate: 
        λM # see fxn elsewhere
        λF # see fxn elsewhere
        μM
        σM
        μF
        σF
    """

    
    # Men's equations 
    a = M['dur'].count() * np.log(hM/(hM+ηM))
    b = MU['dur'].count() * np.log(ηM)
    c = - hM * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μM, σM, wstarM) ) )
    
    
    # Women's equations
    f = F['dur'].count() * np.log(hF/(hF+ηF))
    g = FU['dur'].count * np.log(ηF)
    h = - hF * np.sum(FU.values[:,0])
    
    y = (1/α) * dens_accepted(FE['wage'], α, μF, σF, wstarF)

    i = np.sum( np.log( y ) )
    
    
    return a + b + c + e + f + g + h + i

In [None]:
def loglik_1( params: list ):
    """
    Calculates log likelihood with productivity differences, no prejudice
    
    Estimation 1 (η and h not gender specific)
    
    Parameters to estimate: 
        λ
        μM
        σM
        μF
        σF
    """

    
    # Men's equations 
    a = M['dur'].count() * np.log(h/(h+η))
    b = MU['dur'].count() * np.log(η)
    c = - h * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μM, σM, wstarM) ) )
    
    
    # Women's equations
    f = F['dur'].count() * np.log(h/(h+η))
    g = FU['dur'].count * np.log(η)
    h = - h * np.sum(FU.values[:,0])
    
    y = (1/α) * dens_accepted(FE['wage'], α, μF, σF, wstarF)

    i = np.sum( np.log( y ) )
    
    
    return a + b + c + e + f + g + h + i