In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
df_bids = pd.read_csv('employer_wage_bids.csv', index_col='employer')
df_guesses = pd.read_csv('applicant_wage_guesses.csv', index_col='guesser')
df_app = pd.read_csv('applicant_data_clean.csv', index_col='applicant')

In [3]:
def make_table(fitted_models, title=None):
    tables = []
    for name, fitted in fitted_models.items():
        tables.append(
            pd.DataFrame(
                np.stack((fitted.params, fitted.bse, fitted.pvalues), axis=1),
                index=fitted.params.index,
                columns = pd.MultiIndex.from_arrays([
                    [name, name, name],
                    ['coef.', '(s.e.)', 'p']
                ]),
            )
        )
    df = pd.concat(tables, axis=1)
    df = df.loc[[i for i in df.index if not i.startswith('fe')]]
    sty = df.style.format(
        subset=pd.IndexSlice[:, pd.IndexSlice[:, 'coef.']],
        formatter=lambda x: f'{x:.2g}',
        na_rep = ''
    ).format(
        subset=pd.IndexSlice[:, pd.IndexSlice[:, '(s.e.)']],
        formatter=lambda x: f'({x:.2g})',
        na_rep = ''
    ).format(
        subset=pd.IndexSlice[:, pd.IndexSlice[:, 'p']],
        formatter=lambda x: f'{x:.2f}',
        na_rep = ''
    ).format_index(
        escape="latex", axis=0
    )
    
    return sty.to_latex(
        column_format = 'l' + '|'.join(['rlc']*len(fitted_models)),
        multicol_align='p{12em}',
        hrules = True,
        caption = title,
        position_float='centering',
    )

In [4]:
def hyp1_3_table(promote_type = 1):
    data = df_bids[
        (df_bids['treatment'] == 1) & (df_bids['promote_type_seen'] == promote_type)
    ]
    X = sm.add_constant(
        data[f'app_promote{promote_type}']
    )
    X.columns=['const', 'Self-promotion']

    fitted1 = sm.OLS(data['bid'], X).fit(
        cov_type='cluster', cov_kwds={'groups': data.index}
    )

    data = df_bids[
        (df_bids['treatment'] == 2) & (df_bids['promote_type_seen'] == promote_type)
    ]
    X = pd.DataFrame(
        sm.add_constant(
            np.stack(
                (
                    data[f'app_promote{promote_type}'],
                    data['app_is_female'],
                    data['app_is_female'] * data[f'app_promote{promote_type}']
                ),
                axis=1
            )
        ),
        columns = ['const', 'Self-evaluation', 'Female', 'Self-evaluation x Female'],
        index = data.index
    )
    fitted2 = sm.OLS(data['bid'], X).fit(
        cov_type='cluster', cov_kwds={'groups': data.index}
    )

    data = df_bids[
        (df_bids['treatment'] == 3) & (df_bids['promote_type_seen'] == promote_type)
    ]
    X = pd.DataFrame(
        sm.add_constant(
            np.concatenate(
                (
                    np.stack(
                        (
                            data[f'app_promote{promote_type}'],
                            data['app_is_female'],
                            data['app_is_female'] * data[f'app_promote{promote_type}']
                        ),
                        axis=1
                    ),
                    pd.get_dummies(data['app_eval_correct']).to_numpy()[:, :-1]
                ),
                axis=1
            )
        ),
        columns = [
            'const', 'Self-evaluation', 'Female', 'Self-evaluation x Female',
        ] + [f"fe{i}" for i in range(9)],
        index = data.index
    )
    fitted3 = sm.OLS(data['bid'], X).fit(
        cov_type='cluster', cov_kwds={'groups': data.index}
    )

    return make_table(
        {
            'Self-evaluation': fitted1,
            'Self-evaluation + gender': fitted2,
            'Self-evaluation + gender + performance*': fitted3
        },
        title = f'Employer bids, with {"first" if promote_type == 1 else "second"} self-evaluation type',
    )
    

In [5]:
print(hyp1_3_table(1))

\begin{table}
\centering
\caption{Employer bids, with first self-evaluation type}
\begin{tabular}{lrlc|rlc|rlc}
\toprule
 & \multicolumn{3}{p{12em}}{Self-evaluation} & \multicolumn{3}{p{12em}}{Self-evaluation + gender} & \multicolumn{3}{p{12em}}{Self-evaluation + gender + performance*} \\
 & coef. & (s.e.) & p & coef. & (s.e.) & p & coef. & (s.e.) & p \\
\midrule
const & 0.22 & (0.14) & 0.12 & 0.29 & (0.2) & 0.15 & 1.4 & (0.22) & 0.00 \\
Self-promotion & 0.2 & (0.029) & 0.00 &  &  &  &  &  &  \\
Self-evaluation &  &  &  & 0.15 & (0.045) & 0.00 & 0.07 & (0.04) & 0.08 \\
Female &  &  &  & 0.29 & (0.27) & 0.29 & 0.45 & (0.26) & 0.08 \\
Self-evaluation x Female &  &  &  & -0.07 & (0.06) & 0.24 & -0.075 & (0.057) & 0.19 \\
\bottomrule
\end{tabular}
\end{table}



In [6]:
print(hyp1_3_table(2))

\begin{table}
\centering
\caption{Employer bids, with second self-evaluation type}
\begin{tabular}{lrlc|rlc|rlc}
\toprule
 & \multicolumn{3}{p{12em}}{Self-evaluation} & \multicolumn{3}{p{12em}}{Self-evaluation + gender} & \multicolumn{3}{p{12em}}{Self-evaluation + gender + performance*} \\
 & coef. & (s.e.) & p & coef. & (s.e.) & p & coef. & (s.e.) & p \\
\midrule
const & 0.41 & (0.19) & 0.03 & 0.044 & (0.17) & 0.80 & 1.1 & (0.52) & 0.04 \\
Self-promotion & 0.01 & (0.002) & 0.00 &  &  &  &  &  &  \\
Self-evaluation &  &  &  & 0.012 & (0.0023) & 0.00 & -0.00024 & (0.0027) & 0.93 \\
Female &  &  &  & 0.17 & (0.21) & 0.43 & -0.033 & (0.3) & 0.91 \\
Self-evaluation x Female &  &  &  & -0.0012 & (0.0028) & 0.67 & 0.0005 & (0.0033) & 0.88 \\
\bottomrule
\end{tabular}
\end{table}



In [7]:
def get_hyp7_fit(treatment=None, promote_type=1):
    data = df_app if treatment is None else df_app[df_app['treatment'] == treatment]
    X = pd.DataFrame(
        sm.add_constant(
            np.concatenate(
                (
                    data['female'].to_numpy().reshape(-1, 1),
                    pd.get_dummies(data['eval_correct']).to_numpy()[:, :-1]
                ),
                axis=1
            )
        ),
        columns = ['const', 'Female'] + [f"fe{i}" for i in range(9)],
        index = data.index
    )
    return sm.OLS(data[f'promote{promote_type}'], X).fit(
        cov_type='HC1'
    )

def hyp7_table(promote_type=1):
    fits = [get_hyp7_fit(t, promote_type) for t in [None, 1, 2, 3]]
    return make_table(
        {
            'All treatments': fits[0],
            'Self-evaluation': fits[2],
            'Self-evaluation + gender': fits[2],
            'Self-evaluation + gender + performance*': fits[3]
        },
        title = f'Applicant self-evaluation, with {"first" if promote_type == 1 else "second"} self-evaluation type',
    )

In [8]:
print(hyp7_table(1))

\begin{table}
\centering
\caption{Applicant self-evaluation, with first self-evaluation type}
\begin{tabular}{lrlc|rlc|rlc|rlc}
\toprule
 & \multicolumn{3}{p{12em}}{All treatments} & \multicolumn{3}{p{12em}}{Self-evaluation} & \multicolumn{3}{p{12em}}{Self-evaluation + gender} & \multicolumn{3}{p{12em}}{Self-evaluation + gender + performance*} \\
 & coef. & (s.e.) & p & coef. & (s.e.) & p & coef. & (s.e.) & p & coef. & (s.e.) & p \\
\midrule
const & 5.2 & (0.21) & 0.00 & 5.6 & (0.36) & 0.00 & 5.6 & (0.36) & 0.00 & 5.1 & (0.13) & 0.00 \\
Female & 0.12 & (0.12) & 0.31 & 0.073 & (0.22) & 0.74 & 0.073 & (0.22) & 0.74 & -0.15 & (0.23) & 0.50 \\
\bottomrule
\end{tabular}
\end{table}



In [9]:
print(hyp7_table(2))

\begin{table}
\centering
\caption{Applicant self-evaluation, with second self-evaluation type}
\begin{tabular}{lrlc|rlc|rlc|rlc}
\toprule
 & \multicolumn{3}{p{12em}}{All treatments} & \multicolumn{3}{p{12em}}{Self-evaluation} & \multicolumn{3}{p{12em}}{Self-evaluation + gender} & \multicolumn{3}{p{12em}}{Self-evaluation + gender + performance*} \\
 & coef. & (s.e.) & p & coef. & (s.e.) & p & coef. & (s.e.) & p & coef. & (s.e.) & p \\
\midrule
const & 92 & (2) & 0.00 & 92 & (4.9) & 0.00 & 92 & (4.9) & 0.00 & 96 & (4.9) & 0.00 \\
Female & 3.2 & (2.3) & 0.16 & 4.3 & (4) & 0.28 & 4.3 & (4) & 0.28 & -1.8 & (4.5) & 0.68 \\
\bottomrule
\end{tabular}
\end{table}



In [10]:
def hyp4_fit(promote_type=1):
    data = df_guesses[(df_guesses['treatment'] == 1) & (df_guesses['promote_type_seen'] == promote_type)]
    X = pd.DataFrame(
            sm.add_constant(
            np.stack(
                (
                    data[f'other_promote{promote_type}'],
                    data['guesser_is_female'],
                    data['guesser_is_female'] * data[f'other_promote{promote_type}']
                ),
                axis=1
            )
        ),
        columns = ['const', 'Self-evaluation', 'Female guesser', 'Self-evaluation x Female guesser'],
        index = data.index
    )
    return sm.OLS(data['wage_guess'], X).fit(
        cov_type='cluster', cov_kwds={'groups': data.index}
    )

def hyp4_table():
    return make_table(
        {
            'First self-promotion type': hyp4_fit(1),
            'Second self-promotion type': hyp4_fit(2),
        },
        title = 'Wage guesses (self-evaluation-only treatment)'
    )

In [11]:
print(hyp4_table())

\begin{table}
\centering
\caption{Wage guesses (self-evaluation-only treatment)}
\begin{tabular}{lrlc|rlc}
\toprule
 & \multicolumn{3}{p{12em}}{First self-promotion type} & \multicolumn{3}{p{12em}}{Second self-promotion type} \\
 & coef. & (s.e.) & p & coef. & (s.e.) & p \\
\midrule
const & 0.7 & (0.059) & 0.00 & 0.64 & (0.049) & 0.00 \\
Self-evaluation & 0.12 & (0.012) & 0.00 & 0.0091 & (0.00062) & 0.00 \\
Female guesser & 0.15 & (0.14) & 0.29 & 0.23 & (0.098) & 0.02 \\
Self-evaluation x Female guesser & 0.0024 & (0.03) & 0.94 & -0.0017 & (0.0016) & 0.29 \\
\bottomrule
\end{tabular}
\end{table}



In [12]:
def hyp5_6_table(female = 1, promote_type = 1):
    data = df_guesses[
        (df_guesses['treatment'] == 2) & (df_guesses['promote_type_seen'] == promote_type) & (df_guesses['guesser_is_female'] == female)
    ]
    X = pd.DataFrame(
        sm.add_constant(
            np.stack(
                (
                    data[f'other_promote{promote_type}'],
                    data['other_is_female'],
                    data['other_is_female'] * data[f'other_promote{promote_type}']
                ),
                axis=1
            ),
        ),
        columns = ['const', 'Self-evaluation', 'Female', 'Self-evaluation x Female'],
        index = data.index
    )

    fitted1 = sm.OLS(data['wage_guess'], X).fit(
        cov_type='cluster', cov_kwds={'groups': data.index}
    )

    data = df_guesses[(df_guesses['treatment'] == 3) & (df_guesses['promote_type_seen'] == promote_type) & df_guesses['guesser_is_female'] == female]
    X = pd.DataFrame(
        sm.add_constant(
            np.concatenate(
                (
                    np.stack(
                        (
                            data['other_promote1'],
                            data['other_is_female'],
                            data['other_is_female'] * data['other_promote1']
                        ),
                        axis=1
                    ),
                    pd.get_dummies(data['other_eval_correct']).to_numpy()[:, :-1]
                ),
                axis=1
            )
        ),
        columns = [
            'const', 'Self-evaluation', 'Female', 'Self-evaluation x Female'
        ] + [f"fe{i}" for i in range(10)],
        index = data.index
    )
    fitted2 = sm.OLS(data['wage_guess'], X).fit(
        cov_type='cluster', cov_kwds={'groups': data.index}
    )

    return make_table(
        {
            'Self-evaluation + gender': fitted1,
            'Self-evaluation + gender + performance*': fitted2
        },
        title = f'Wage guesses, with {"first" if promote_type == 1 else "second"} self-evaluation type and {"female" if female == 1 else "male"} guessers only',
    )

In [13]:
print(hyp5_6_table(1, 1))

\begin{table}
\centering
\caption{Wage guesses, with first self-evaluation type and female guessers only}
\begin{tabular}{lrlc|rlc}
\toprule
 & \multicolumn{3}{p{12em}}{Self-evaluation + gender} & \multicolumn{3}{p{12em}}{Self-evaluation + gender + performance*} \\
 & coef. & (s.e.) & p & coef. & (s.e.) & p \\
\midrule
const & 0.65 & (0.16) & 0.00 & 1.3 & (0.13) & 0.00 \\
Self-evaluation & 0.093 & (0.036) & 0.01 & 0.092 & (0.032) & 0.00 \\
Female & -0.31 & (0.23) & 0.18 & 0.3 & (0.18) & 0.10 \\
Self-evaluation x Female & 0.11 & (0.049) & 0.03 & -0.053 & (0.048) & 0.27 \\
\bottomrule
\end{tabular}
\end{table}



In [14]:
print(hyp5_6_table(1, 2))

\begin{table}
\centering
\caption{Wage guesses, with second self-evaluation type and female guessers only}
\begin{tabular}{lrlc|rlc}
\toprule
 & \multicolumn{3}{p{12em}}{Self-evaluation + gender} & \multicolumn{3}{p{12em}}{Self-evaluation + gender + performance*} \\
 & coef. & (s.e.) & p & coef. & (s.e.) & p \\
\midrule
const & 0.68 & (0.11) & 0.00 & 1.7 & (0.16) & 0.00 \\
Self-evaluation & 0.0075 & (0.0018) & 0.00 & -0.019 & (0.03) & 0.53 \\
Female & -0.35 & (0.13) & 0.00 & 0.13 & (0.18) & 0.47 \\
Self-evaluation x Female & 0.0059 & (0.0019) & 0.00 & -0.0049 & (0.045) & 0.91 \\
\bottomrule
\end{tabular}
\end{table}



In [15]:
print(hyp5_6_table(0, 1))

\begin{table}
\centering
\caption{Wage guesses, with first self-evaluation type and male guessers only}
\begin{tabular}{lrlc|rlc}
\toprule
 & \multicolumn{3}{p{12em}}{Self-evaluation + gender} & \multicolumn{3}{p{12em}}{Self-evaluation + gender + performance*} \\
 & coef. & (s.e.) & p & coef. & (s.e.) & p \\
\midrule
const & 0.84 & (0.11) & 0.00 & 1.1 & (0.039) & 0.00 \\
Self-evaluation & 0.12 & (0.027) & 0.00 & 0.043 & (0.0085) & 0.00 \\
Female & -0.0047 & (0.12) & 0.97 & 0.001 & (0.047) & 0.98 \\
Self-evaluation x Female & -0.021 & (0.03) & 0.50 & -0.008 & (0.011) & 0.47 \\
\bottomrule
\end{tabular}
\end{table}



In [16]:
print(hyp5_6_table(0, 2))


\begin{table}
\centering
\caption{Wage guesses, with second self-evaluation type and male guessers only}
\begin{tabular}{lrlc|rlc}
\toprule
 & \multicolumn{3}{p{12em}}{Self-evaluation + gender} & \multicolumn{3}{p{12em}}{Self-evaluation + gender + performance*} \\
 & coef. & (s.e.) & p & coef. & (s.e.) & p \\
\midrule
const & 0.76 & (0.08) & 0.00 & 1.1 & (0.038) & 0.00 \\
Self-evaluation & 0.0087 & (0.0014) & 0.00 & 0.048 & (0.0086) & 0.00 \\
Female & -0.011 & (0.16) & 0.95 & 0.0056 & (0.05) & 0.91 \\
Self-evaluation x Female & -0.0009 & (0.0023) & 0.70 & -0.01 & (0.013) & 0.42 \\
\bottomrule
\end{tabular}
\end{table}



In [17]:
print('\n(*) Includes fixed effects on performance')
print('\nStandard errors robust to clustering')

(*) Includes fixed effects on performance

Standard errors robust to clustering
