# 882 Replication with SOEP Data

- SOEP from CPS, 2017
- Estimation strategy from Flabbi 2010 

- Need to add education and race for homogeneity in types

## Import Packages

In [2]:
# Data Manipulation 
import numpy as np
import pandas as pd

# General
import pdb

# Estimation
from scipy.optimize import minimize
import scipy.stats as stats
# import numdifftools as ndt

# Data Visualization
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn.apionly as sns
from pylab import *



## Import Data 

- SOEP (Germany) data on gender, wages, duration of unemployment
- M: males
- F: females
- U: unemployed
- E: employed 
- {M,F} X {E,U} = {males,females} X {employed, unemployed}

In [9]:
df=pd.read_stata('../data/merged.dta') # 624,562
df.columns = ['pid', 'year', 'inc_gro', 'inc_net', 'pos', 'emplsta', 'change', 'change_reason', 'dur', 'sex', 'age', 'hours', 'edu']

# Only the year 2017
df1 = df[df['year']==2017] # 31,630

# Turning categorical into dummies
df_sex = pd.get_dummies(df1, columns=['sex'])
# 1 if man, 2 if woman. 
df_empl = pd.get_dummies(df_sex, columns=['emplsta'])
# 5 if unemployed, 1 if full time
df_edu = pd.get_dummies(df_empl, columns=['edu'])
# 6 if bachelors, 7 if masters, 8 if doctorate

# Keeping only the necessary variables 
df2 = df_edu[['inc_net', 'dur', 'age', 'sex_[2] weiblich', 'emplsta_[1] Voll erwerbstaetig', 'emplsta_[5] Nicht erwerbstaetig', 'hours', 'edu_[6] Bachelor s or equivalent level', 'edu_[7] Master s or equivalent level', 'edu_[8] Doctoral or equivalent level']]
df2.columns = ['wage', 'dur', 'age', 'women', 'ft_empl', 'unempl', 'hours', 'college', 'masters', 'phd']

# Using age for condition, then dropping age as a variable
AGE_LOW = (df2['age']>29)
AGE_HIGH = (df2['age']<56)
AGE_COND = AGE_LOW & AGE_HIGH

df3 = df2[AGE_COND==True] # 15,443

# Creating condition for FT employment or unemployed only 
FT_COND = (df3['ft_empl']==1)
UN_COND = (df3['unempl']==1)
EMPL_COND = FT_COND | UN_COND

df4 = df3[EMPL_COND==True] # 11,533

# # Ensuring positive hours, wages for employed; positive unemployment duration for unemployed
# POS_WAGE = (df4['wage']>0)
# POS_HOURS = (df4['hours']>0)
# EMPL = (df4['ft_empl']==1)
# EMPLVAR_COND = POS_WAGE & POS_HOURS & EMPL

# POS_DUR = (df4['dur']>0)
# UNEMPL = (df4['unempl']==1)
# UNEMPLVAR_COND = POS_DUR & UNEMPL

# CONSIST_COND = EMPLVAR_COND | UNEMPLVAR_COND

# df5 = df4[CONSIST_COND==True]

# # Remove (-2) for "does not apply" in wage, hours for unemployed; dur for employed. Change wage to hourly for employed
# if(UN_COND==True):
#     df4.loc[df4['wage']<0, 'wage']=0
#     df4.loc[df4['hours']<0, 'hours']=0
# else:
#     df4.loc[df4['dur']<0, 'dur']=0 
#     df4['wage'] = df4['wage'] / df4['hours']

# # Keeping only college or more educated 
# BS_COND = (df4['college']==1) 
# MS_COND = (df4['masters']==1) 
# PHD_COND = (df4['phd']==1)
# EDU_COND = BS_COND | MS_COND | PHD_COND

# df5 = df4[EDU_COND==True] #3,083


# # Dropping unnecessary columns (age, unempl, edu, hours) and reorganizing to be consistent with CPS organization
# df6 = df5[['dur', 'wage', 'empl', 'women']]

# # Ensure not unemployment spell for employed
# UNEMPL_COND = (df6['empl']==0)
# POS_DUR_COND2 = (df6['dur']>0)
# ZERO_DUR_COND = (df6['dur']==0)
# POS_WAGE_COND = (df6['wage']>0)
# EMPL_COND2 = (df6['empl']==1)
# ZERO_WAGE_COND = (df6['wage']==0)
# CONSIST_COND = (EMPL_COND2 & ZERO_DUR_COND & POS_WAGE_COND) | (UNEMPL_COND & POS_DUR_COND2 & ZERO_WAGE_COND)

# df7 = df6[CONSIST_COND==True] #6910

In [10]:
df5.describe()

Unnamed: 0,wage,dur,age,women,ft_empl,unempl,hours,college,masters,phd
count,8439.0,8439.0,8439.0,8439.0,8439.0,8439.0,8439.0,8439.0,8439.0,8439.0
mean,1637.395426,1.635618,42.824978,0.369831,0.707311,0.292689,27.070255,0.160683,0.084015,0.019078
std,1418.370184,3.489719,7.362296,0.482787,0.455024,0.455024,18.973017,0.36726,0.277426,0.136808
min,-2.0,-1.0,30.0,0.0,0.0,0.0,-2.0,0.0,0.0,0.0
25%,-2.0,0.0,37.0,0.0,0.0,0.0,-2.0,0.0,0.0,0.0
50%,1670.0,0.4,43.0,0.0,1.0,0.0,38.5,0.0,0.0,0.0
75%,2400.0,1.4,49.0,1.0,1.0,1.0,40.0,0.0,0.0,0.0
max,20000.0,33.0,55.0,1.0,1.0,1.0,75.0,1.0,1.0,1.0


In [None]:
# Change wage to be hourly 

# df7['wage'] = df7['wage'] / (40*4)

df7.describe()

In [None]:
# Creating naming conventions that are consistent with analysis from CPS data

data = df7 

M = data[data['women']==0] # men 
F = data[data['women']==1] # women 
U = data[data['empl']==0] # unemployed
E = data[data['empl']==1] # employed

ME = M[M['empl']==1] # employed men
MU = M[M['empl']==0] # unemployed men
FE = F[F['empl']==1] # employed women
FU = F[F['empl']==0] # unemployed women

In [None]:
U.describe()

### Summary Statistics, without trimming

In [None]:
agg_dict = {
    'wage': ['mean', 'std', 'count'],
    'dur': ['mean', 'std', 'count'],
}

In [None]:
print(data.groupby(['women', 'empl']).agg(agg_dict).to_latex()) # by gender

In [None]:
print(data.groupby(['empl']).agg(agg_dict).to_latex()) # all

In [None]:
# Seaborn
fig, ax = plt.subplots(2, 1, figsize=(12, 8))

sns.distplot(ME['wage'], color='#4B9CD3', hist_kws={'alpha' : .3}, bins=50, ax=ax[0])
sns.distplot(FE['wage'], color='#4B9CD3', hist_kws={'alpha' : .3}, bins=50, ax=ax[1])

ax[0].legend(['Men'])
ax[1].legend(['Women']) 

plt.tight_layout()

### Summary Statistics, with trimming

**Windsor for robustness check on trimming amount

In [None]:
print(np.percentile(ME['wage'], 1))

In [None]:
print(np.percentile(ME['wage'],95))

In [None]:
print(np.percentile(FE['wage'], 1))

In [None]:
print(np.percentile(FE['wage'],95))

In [None]:
# # Only trim bottom 

# M_WAGE_COND = (data['wage'] > 7.786249999999999) #hard coded percentile so it does not continually update
# M_COND = (data['women'] == 0)

# F_WAGE_COND = (data['wage'] > 6.01125) #hard coded percentile so it does not continually update
# F_COND = (data['women'] == 1)

# DUR_COND = (data['dur'] > 0)

# trim = data[ (M_COND & M_WAGE_COND) | (F_COND & F_WAGE_COND) | DUR_COND ]

# M = trim[trim['women']==0] 
# F = trim[trim['women']==1] 
# U = trim[trim['empl']==0] 
# E = trim[trim['empl']==1] 

# ME = M[M['empl']==1] 
# MU = M[M['empl']==0] 
# FE = F[F['empl']==1] 
# FU = F[F['empl']==0] 

In [None]:
# Trim both ends

M_WAGE_COND_L = (data['wage'] > 7.786249999999999) #hard coded percentile so it does not continually update
M_WAGE_COND_H = (data['wage'] < 35)
M_COND = (data['women'] == 0)

F_WAGE_COND_L = (data['wage'] > 6.01125) #hard coded percentile so it does not continually update
F_WAGE_COND_H = (data['wage'] < 26.21249999999999)
F_COND = (data['women'] == 1)

DUR_COND = (data['empl'] == 0)

trim = data[ (M_COND & M_WAGE_COND_L & M_WAGE_COND_H) | (F_COND & F_WAGE_COND_L & M_WAGE_COND_H) | DUR_COND ] #6,519

M = trim[trim['women']==0] 
F = trim[trim['women']==1] 
U = trim[trim['empl']==0] 
E = trim[trim['empl']==1] 

ME = M[M['empl']==1] 
MU = M[M['empl']==0] 
FE = F[F['empl']==1] 
FU = F[F['empl']==0] 

In [None]:
print(trim.groupby(['women', 'empl']).agg(agg_dict).to_latex()) # by gender

In [None]:
print(trim.groupby(['empl']).agg(agg_dict).to_latex()) # all

## Figures 

- Distribution of wages, men and women

In [None]:
# Seaborn
fig, ax = plt.subplots(2, 1, figsize=(12, 8))

sns.distplot(ME['wage'], color='#4B9CD3', hist_kws={'alpha' : .3}, bins=50, ax=ax[0])
sns.distplot(FE['wage'], color='#4B9CD3', hist_kws={'alpha' : .3}, bins=50, ax=ax[1])

ax[0].legend(['Men'])
ax[1].legend(['Women']) 

plt.tight_layout()

In [None]:
fig.savefig('./figures/fig1_2_SOEP.png', bbox_inches='tight', transparent=True)

## Estimation 

### Initial Conditions

In [None]:
λ = 0.25530507424781373
λ_M = 0.29441739346048834
λ_F = 0.22246729559007514

η = 0.15574303121563346
η_M = 0.13822412838520579
η_F = 0.20334901237530306

μ = 15.922555
μ_M = 16.959963
μ_F= 13.678350

σ = 6.040666
σ_M = 6.094529
σ_F = 5.266233

p = 0.5
d = 10

### Parameters without distributional assumptions

In [None]:
wstarM = min(ME['wage'])
wstarF = min(FE['wage'])

hM = MU['dur'].count()/sum(MU.values[:,0])
hF = FU['dur'].count()/sum(FU.values[:,0])
h = U['dur'].count()/sum(U.values[:,0])

ηM = hM * (MU['dur'].count()/ME['empl'].count())
ηF = hF * (FU['dur'].count()/FE['empl'].count())
η = h * (U['dur'].count()/E['empl'].count())

α = 0.5

In [None]:
wstarF

### Accepted Wage Function and Distributions

In [None]:
def dens_accepted(wage: np.array, α: float, μ: float, σ: float, wstar: float):
    """
    Calculates the density of accepted wages using the lognormal distribution
    """
    
    l = (α*μ) + ((1-α)*wstar)
    s = α * σ
    shape = 1
    
    sf_in = (wstar - l)/s
    
    return stats.lognorm.pdf(wage, shape, l, s) / stats.lognorm.sf(sf_in, shape, l, s)

In [None]:
dens_accepted(FE['wage'],.5,13,5,6)

In [None]:
def dens_accepted_prej(wage: np.array, α: float, μ: float, σ: float, wstar: float, d: float):
    """
    Calculates the density of accepted wages when prejudice is present using the lognormal distribution
    """
    
    l = (α*μ) + ((1-α)*wstar) - α*d
    s = α * σ
    shape = 1
    
    sf_in = (wstar - l)/s
    
    return stats.lognorm.pdf(wage,shape,l,s) / stats.lognorm.sf(wstar, shape, l, s)

In [None]:
def lambdaM(h: float, wstarM: float, α: float, μ: float, σ: float):
    """
    Estimates lambda for men
    """
    
    l = (α*μ) + ((1-α)*wstarM)
    s = α * σ
    shape = 1
    
    sf_in = (wstarM-l)/s
    
    denom = stats.lognorm.sf(sf_in, shape, l, s)
    
    return h/denom

In [None]:
lambdaM(hM, wstarM, α, μ_M, σ_M)

In [None]:
lambdaM(h,wstarF,α,μ,σ)

In [None]:
def lambdaF(h: float, wstarF: float, α: float, μ: float, σ: float, p: float):
    """
    Estimates lambda for women
    """
    
    l1 = (α*μ) + ((1-α)*wstarF)
    l2 = (α*μ) + ((1-α)*wstarF) - α*d
    s = α * σ
    shape = 1
    
    sf_in1 = (wstarF-l1)/s
    sf_in2 = (wstarF-l2)/s
    
    denom = (1-p)*stats.lognorm.sf(sf_in1, shape, l1, s) + p*stats.lognorm.sf(sf_in2, shape, l2, s)
    
    return h/denom

In [None]:
lambdaF(hF, wstarF, α, μ_F, σ_F, 0.5)

### Test Statistics

In [None]:
def teststats (hess_inv : np.ndarray, lnL : float, nparams : int):
    """
    Calculates the standard errors and p value from the LR tests
    """
    se = np.sqrt(np.diag(hess_inv))
    
    loglik_H0 = lnL_6
    
    LR = 2 * (lnL - loglik_H0)
    pval = stats.chi2.pdf(LR, nparams)
    
    return print('standard errors: ', se ,'. p = ', pval)

### Likelihood Functions

#### Estimation 6

In [None]:
def loglik_6( params: list ):
    """
    Calculates log likelihood with prejudice and productivity differences 
    
    Estimation 6
    
    Parameters to estimate: 
        λM
        λF
        ηM
        ηF
        μM
        σM
        μF
        σF
        d
        p
    """
    
    λM = np.exp(params[0])
    λF = np.exp(params[1])
    ηM = np.exp(params[2])
    ηF = np.exp(params[3])
    μM = params[4]
    σM = np.exp(params[5])
    μF = params[6]
    σF = np.exp(params[7])
    d = np.exp(params[8])
    p = np.exp(params[9])/(1+np.exp(params[9]))

#    pdb.set_trace()
    
    # Men's equations 
    λM = lambdaM(hM, wstarM, α, μM, σM)
    
    a = M['dur'].count() * np.log(hM/(hM+ηM))
    b = MU['dur'].count() * np.log(ηM)
    c = - hM * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μM, σM, wstarM) ) )
    
    # Women's equations
    λF = lambdaF(hF, wstarF, α, μM, σM, p)
    
    f = F['dur'].count() * np.log(hF/(hF+ηF))
    g = FU['dur'].count() * np.log(ηF)
    h = - hF * np.sum(FU.values[:,0])
    
    y = ((1-p)/α) * dens_accepted(FE['wage'], α, μF, σF, wstarF)
    z = (p/α) * dens_accepted_prej(FE['wage'], α, μF, σF, wstarF, d)

    i = np.sum( np.log( y + z ) )
    
    return (a + b + c + e + f + g + h + i)

In [None]:
# Check on log-likelihood

param6 = [λ_M, λ_F, η_M, η_F, μ_M, σ_M, μ_F, σ_F, d, p]

b6_0 = np.log(param6[0])
b6_1 = np.log(param6[1])
b6_2 = np.log(param6[2])
b6_3 = np.log(param6[3])
b6_4 = param6[4]
b6_5 = np.log(param6[5])
b6_6 = param6[6]
b6_7 = np.log(param6[7])
b6_8 = np.log(param6[8])
b6_9 = np.log(1)

init6 = [b6_0, b6_1, b6_2, b6_3, b6_4, b6_5, b6_6, b6_7, b6_8, b6_9]

print(loglik_6(init6))

In [None]:
print(init6)

In [None]:
est_6 = minimize(loglik_6, init6)

est_6

In [None]:
# Coefficients

λM_6 = np.exp(est_6.x[0])
λF_6 = np.exp(est_6.x[1])
ηM_6 = np.exp(est_6.x[2])
ηF_6 = np.exp(est_6.x[3])
μM_6 = est_6.x[4]
σM_6 = np.exp(est_6.x[5])
μF_6 = est_6.x[6]
σF_6 = np.exp(est_6.x[7])
d_6 = np.exp(est_6.x[8])
p_6 = np.exp(est_6.x[9])/(1+np.exp(est_6.x[9]))

print(λM_6, λF_6, ηM_6, ηF_6, μM_6, σM_6, μF_6, σF_6, d_6, p_6)

In [None]:
lnL_6 = est_6.fun

print(lnL_6)

In [None]:
ts_6 = teststats(est_6.hess_inv, lnL_6, 10)

#### Estimation 3

In [None]:
def loglik_3( params: list ):
    """
    Calculates log likelihood with prejudice and productivity differences 
    
    Estimation 3 (η and h not gender specific)
    
    Parameters to estimate: 
        λ 
        η
        μM
        σM
        μF
        σF
        d
        p
    """
    
    λM = np.exp(params[0]) #same lambda
    λF = np.exp(params[0]) #same lambda
    η = np.exp(params[1]) #same eta
    μM = params[2]
    σM = np.exp(params[3])
    μF = params[4]
    σF = np.exp(params[5])
    d = np.exp(params[6])
    p = np.exp(params[7])/(1+np.exp(params[7]))
    
    # Men's equations
    λM = lambdaM(h, wstarM, α, μM, σM)
    
    a = M['dur'].count() * np.log(h/(h+η))
    b = MU['dur'].count() * np.log(η)
    c = - h * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μM, σM, wstarM) ) )
    
    
    # Women's equations
    λF = lambdaF(h, wstarF, α, μM, σM, p)
    
    f = F['dur'].count() * np.log(h/(h+η))
    g = FU['dur'].count() * np.log(η)
    i = - h * np.sum(FU.values[:,0])
    
    y = ((1-p)/α) * dens_accepted(FE['wage'], α, μF, σF, wstarF)
    z = (p/α) * dens_accepted_prej(FE['wage'], α, μF, σF, wstarF, d)

    j = np.sum( np.log( y + z ) )
    
    
    return a + b + c + e + f + g + i + j

In [None]:
# Check on log-likelihood

param3 = [λ, η, μ_M, σ_M, μ_F, σ_F, d, p]

b3_0 = np.log(param3[0])
b3_1 = np.log(param3[1])
b3_2 = param3[2]
b3_3 = np.log(param3[3])
b3_4 = param3[4]
b3_5 = np.log(param3[5])
b3_6 = np.log(param3[6])
b3_7 = np.log(1)

init3 = [b3_0, b3_1, b3_2, b3_3, b3_4, b3_5, b3_6, b3_7]

print(loglik_3(init3))

In [None]:
est_3 = minimize(loglik_3, init3)

est_3

In [None]:
# Coefficients

λ_3 = np.exp(est_3.x[0])
η_3 = np.exp(est_3.x[1])
μM_3 = est_3.x[2]
σM_3 = np.exp(est_3.x[3])
μF_3 = est_3.x[4]
σF_3 = np.exp(est_3.x[5])
d_3 = np.exp(est_3.x[6])
p_3 = np.exp(est_3.x[7])/(1+np.exp(est_3.x[7]))

print(λ_3, η_3, μM_3, σM_3, μF_3, σF_3, d_3, p_3)

In [None]:
lnL_3 = est_3.fun

print(lnL_3)

In [None]:
ts_3 = teststats(est_3.hess_inv, lnL_3, 8)

#### Estimation 5

In [None]:
def loglik_5( params: list ):
    """
    Calculates log likelihood with prejudice, no productivity differences. 
    
    Estimation 5 (ηM, ηF, hM, hF)
    
    Parameters to estimate: 
        λM # see fxn elsewhere
        λF # see fxn elsewhere
        μ
        σ
        p
        d
    """

    λM = np.exp(params[0])
    λF = np.exp(params[1])
    ηM = np.exp(params[2])
    ηF = np.exp(params[3])
    μ = params[4]
    σ = np.exp(params[5])
    d = np.exp(params[6])
    p = np.exp(params[7])/(1+np.exp(params[7]))
    
    # Men's equations 
    λM = lambdaM(hM, wstarM, α, μ, σ)
    
    a = M['dur'].count() * np.log(hM/(hM+ηM))
    b = MU['dur'].count() * np.log(ηM)
    c = - hM * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μ, σ, wstarM) ) )
    
    
    # Women's equations
    λF = lambdaF(hF, wstarF, α, μ, σ, p)
    
    f = F['dur'].count() * np.log(hF/(hF+ηF))
    g = FU['dur'].count() * np.log(ηF)
    h = - hF * np.sum(FU.values[:,0])
    
    y = ((1-p)/α) * dens_accepted(FE['wage'], α, μ, σ, wstarF)
    z = (p/α) * dens_accepted_prej(FE['wage'], α, μ, σ, wstarF, d)

    i = np.sum( np.log( y + z ) )
    
    
    return a + b + c + e + f + g + h + i

In [None]:
# Check on log-likelihood

param5 = [λ_M, λ_F, η_M, η_F, μ, σ, d, p]

b5_0 = np.log(param6[0])
b5_1 = np.log(param6[1])
b5_2 = np.log(param6[2])
b5_3 = np.log(param6[3])
b5_4 = param6[4]
b5_5 = np.log(param6[5])
b5_6 = np.log(param6[6])
b5_7 = np.log(1)

init5 = [b5_0, b5_1, b5_2, b5_3, b5_4, b5_5, b5_6, b5_7]

print(loglik_5(init5))

In [None]:
est_5 = minimize(loglik_5, init5)

est_5

In [None]:
# Coefficients

λM_5 = np.exp(est_5.x[0])
λF_5 = np.exp(est_5.x[1])
ηM_5 = np.exp(est_5.x[2])
ηF_5 = np.exp(est_5.x[3])
μ_5 = est_5.x[4]
σ_5 = np.exp(est_5.x[5])
d_5 = np.exp(est_5.x[6])
p_5 = np.exp(est_5.x[7])/(1+np.exp(est_5.x[7]))

print(λM_5, λF_5, ηM_5, ηF_5, μ_5, σ_5, d_5, p_5)

In [None]:
lnL_5 = est_5.fun

print(lnL_5)

In [None]:
ts_5 = teststats(est_5.hess_inv, lnL_5, 8)

#### Estimation 2

In [None]:
def loglik_2( params: list ):
    """
    Calculates log likelihood with prejudice, no productivity differences. 
    
    Estimation 2 (η and h not gender specific)
    
    Parameters to estimate: 
        λ # see fxn elsewhere
        μ
        σ
        p
        d
    """
    
    λM = np.exp(params[0])
    λF = np.exp(params[0])
    η = np.exp(params[1])
    μ = params[2]
    σ = np.exp(params[3])
    d = np.exp(params[4])
    p = np.exp(params[5])/(1+np.exp(params[5]))

    
    # Men's equations 
    λM = lambdaM(h, wstarM, α, μ, σ)
    
    a = M['dur'].count() * np.log(h/(h+η))
    b = MU['dur'].count() * np.log(η)
    c = - h * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μ, σ, wstarM) ) )
    
    
    # Women's equations
    λF = lambdaF(h, wstarF, α, μ, σ, p)
    
    f = F['dur'].count() * np.log(h/(h+η))
    g = FU['dur'].count() * np.log(η)
    i = - h * np.sum(FU.values[:,0])
    
    y = ((1-p)/α) * dens_accepted(FE['wage'], α, μ, σ, wstarF)
    z = (p/α) * dens_accepted_prej(FE['wage'], α, μ, σ, wstarF, d)

    j = np.sum( np.log( y + z ) )
    
    
    return a + b + c + e + f + g + i + j

In [None]:
# Check on log-likelihood

param2 = [λ, η, μ, σ, d, p]

b2_0 = np.log(param2[0])
b2_1 = np.log(param2[1])
b2_2 = param2[2]
b2_3 = np.log(param2[3])
b2_4 = np.log(param2[4])
b2_5 = np.log(1)

init2 = [b2_0, b2_1, b2_2, b2_3, b2_4, b2_5]

print(loglik_2(init2))

In [None]:
est_2 = minimize(loglik_2, init2)

est_2

In [None]:
# Coefficients

λ_2 = np.exp(est_2.x[0])
η_2 = np.exp(est_2.x[1])
μ_2 = est_2.x[2]
σ_2 = np.exp(est_2.x[3])
d_2 = np.exp(est_2.x[4])
p_2 = np.exp(est_2.x[5])/(1+np.exp(est_2.x[5]))

print(λ_2, η_2, μ_2, σ_2, d_2, p_2)

In [None]:
lnL_2 = est_2.fun

print(lnL_2)

In [None]:
ts_2 = teststats(est_2.hess_inv, lnL_2, 6)

#### Estimation 4

In [None]:
def loglik_4( params: list ):
    """
    Calculates log likelihood with productivity differences, no prejudice
    
    Estimation 4 (ηM, ηF, hM, hF)
    
    Parameters to estimate: 
        λM # see fxn elsewhere
        λF # see fxn elsewhere
        μM
        σM
        μF
        σF
    """

    λM = np.exp(params[0])
    λF = np.exp(params[1])
    ηM = np.exp(params[2])
    ηF = np.exp(params[3])
    μM = params[4]
    σM = np.exp(params[5])
    μF = params[6]
    σF = np.exp(params[6])
    p = 0
    
    # Men's equations 
    λM = lambdaM(hM, wstarM, α, μM, σM)
    
    a = M['dur'].count() * np.log(hM/(hM+ηM))
    b = MU['dur'].count() * np.log(ηM)
    c = - hM * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μM, σM, wstarM) ) )
    
    
    # Women's equations
    λF = lambdaF(hF, wstarF, α, μF, σF, p)
    
    f = F['dur'].count() * np.log(hF/(hF+ηF))
    g = FU['dur'].count() * np.log(ηF)
    h = - hF * np.sum(FU.values[:,0])
    
    y = (1/α) * dens_accepted(FE['wage'], α, μF, σF, wstarF)

    i = np.sum( np.log( y ) )
    
    
    return a + b + c + e + f + g + h + i

In [None]:
# Check on log-likelihood

param4 = [λ_M, λ_F, η_M, η_F, μ_M, σ_M, μ_F, σ_F]

b4_0 = np.log(param4[0])
b4_1 = np.log(param4[1])
b4_2 = np.log(param4[2])
b4_3 = np.log(param4[3])
b4_4 = param4[4]
b4_5 = np.log(param4[5])
b4_6 = param4[6]
b4_7 = np.log(param4[7])

init4 = [b4_0, b4_1, b4_2, b4_3, b4_4, b4_5, b4_6, b4_7]

print(loglik_4(init4))

In [None]:
est_4 = minimize(loglik_4, init4)

est_4

In [None]:
# Coefficients

λM_4 = np.exp(est_4.x[0])
λF_4 = np.exp(est_4.x[1])
ηM_4 = np.exp(est_4.x[2])
ηF_4 = np.exp(est_4.x[3])
μM_4 = est_4.x[4]
σM_4 = np.exp(est_4.x[5])
μF_4 = est_4.x[6]
σF_4 = np.exp(est_4.x[7])

print(λM_4, λF_4, ηM_4, ηF_4, μM_4, σM_4, μF_4, σF_4)

In [None]:
lnL_4 = est_4.fun

print(lnL_4)

In [None]:
ts_4 = teststats(est_4.hess_inv, lnL_4, 8)

#### Estimation 1

In [None]:
def loglik_1( params: list ):
    """
    Calculates log likelihood with productivity differences, no prejudice
    
    Estimation 1 (η and h not gender specific)
    
    Parameters to estimate: 
        λ
        μM
        σM
        μF
        σF
    """

    λM = np.exp(params[0]) #same lambda
    λF = np.exp(params[0]) #same lambda
    η = np.exp(params[1]) #same eta
    μM = params[2]
    σM = np.exp(params[3])
    μF = params[4]
    σF = np.exp(params[5])
    p = 0
    
    # Men's equations 
    λM = lambdaM(h, wstarM, α, μM, σM)
    
    a = M['dur'].count() * np.log(h/(h+η))
    b = MU['dur'].count() * np.log(η)
    c = - h * np.sum(MU.values[:,0])
    e = np.sum( np.log( (1/α) * dens_accepted(ME['wage'], α, μM, σM, wstarM) ) )
    
    
    # Women's equations
    λF = lambdaF(h, wstarF, α, μF, σF, p)
    
    f = F['dur'].count() * np.log(h/(h+η))
    g = FU['dur'].count() * np.log(η)
    i = - h * np.sum(FU.values[:,0])
    
    y = (1/α) * dens_accepted(FE['wage'], α, μF, σF, wstarF)

    j = np.sum( np.log( y ) )
    
    
    return a + b + c + e + f + g + i + j

In [None]:
# Check on log-likelihood

param1 = [λ, η, μ_M, σ_M, μ_F, σ_F]

b1_0 = np.log(param1[0])
b1_1 = np.log(param1[1])
b1_2 = param1[2]
b1_3 = np.log(param1[3])
b1_4 = param1[4]
b1_5 = np.log(param1[5])

init1 = [b1_0, b1_1, b1_2, b1_3, b1_4, b1_5]

print(loglik_1(init1))

In [None]:
est_1 = minimize(loglik_1, init1)

est_1

In [None]:
# Coefficients

λ_1 = np.exp(est_1.x[0])
η_1 = np.exp(est_1.x[1])
μM_1 = est_1.x[2]
σM_1 = np.exp(est_1.x[3])
μF_1 = est_1.x[4]
σF_1 = np.exp(est_1.x[5])

print(λ_1, η_1, μM_1, σM_1, μF_1, σF_1)

In [None]:
lnL_1 = est_1.fun

print(lnL_1)

In [None]:
ts_1 = teststats(est_1.hess_inv, lnL_1, 6)