In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import re

In [2]:
df_bids = pd.read_csv('employer_wage_bids.csv', index_col='employer')
df_bids['bid'] = df_bids['bid'] * 100
df_guesses = pd.read_csv('applicant_wage_guesses.csv', index_col='guesser')
df_guesses['wage_guess'] = df_guesses['wage_guess'] * 100
df_app = pd.read_csv('applicant_data_clean.csv', index_col='applicant')

In [3]:
def signif_level(pvalue):
    if pvalue < 0.01:
        return "***"
    elif pvalue < 0.05:
        return "**"
    elif pvalue < 0.1:
        return "*"
    else:
        return ""

def make_table(fitted_models, title=None, notes=None, colwidth=8):
    tables = []
    for name, fitted in fitted_models.items():
        tables.append(
            pd.DataFrame(
                [
                    rf"\shortstack{{{param:.3g} ({bse:.2g}){signif_level(p)}}}"
                    for param, bse, p in zip(fitted.params, fitted.bse, fitted.pvalues)
                ],
                index=fitted.params.index,
                columns = [name.replace('+', r'\newline +').replace('*', r'$^\dagger$')],
            )
        )
    df = pd.concat(tables, axis=1)
    df = df.loc[[i for i in df.index if not i.startswith('fe')]]
    sty = df.style.format(
        na_rep = ''
    ).format_index(
        escape="latex", axis=0
    )
    
    ncols = len(fitted_models)
    tab = sty.to_latex(
        column_format = 'l' + f'p{{{colwidth}em}}'*ncols,
        hrules = True,
        caption = title,
        position_float='centering',
    )

    if notes is not None:
        tab = re.sub(
            r'(?=\n\\end{tabular})',
            "\n" + rf'\\multicolumn{{{ncols}}}{{p{{{10 + 5 * ncols}em}}}}{{\\textit{{Notes}}: ' + r' \\newline\\quad '.join(notes) + '}', tab)
    return tab

In [4]:
def hyp1_3_table(promote_type = 1):
    data = df_bids[
        (df_bids['treatment'] == 1) & (df_bids['promote_type_seen'] == promote_type)
    ]
    X = sm.add_constant(
        data[f'app_promote{promote_type}']
    )
    X.columns=['const', 'Self-evaluation']

    fitted1 = sm.OLS(data['bid'], X).fit(
        cov_type='cluster', cov_kwds={'groups': data.index}
    )

    data = df_bids[
        (df_bids['treatment'] == 2) & (df_bids['promote_type_seen'] == promote_type)
    ]
    X = pd.DataFrame(
        sm.add_constant(
            np.stack(
                (
                    data[f'app_promote{promote_type}'],
                    data['app_is_female'],
                    data['app_is_female'] * data[f'app_promote{promote_type}']
                ),
                axis=1
            )
        ),
        columns = ['const', 'Self-evaluation', 'Female', 'Self-evaluation x Female'],
        index = data.index
    )
    fitted2 = sm.OLS(data['bid'], X).fit(
        cov_type='cluster', cov_kwds={'groups': data.index}
    )

    data = df_bids[
        (df_bids['treatment'] == 3) & (df_bids['promote_type_seen'] == promote_type)
    ]
    X = pd.DataFrame(
        sm.add_constant(
            np.concatenate(
                (
                    np.stack(
                        (
                            data[f'app_promote{promote_type}'],
                            data['app_is_female'],
                            data['app_is_female'] * data[f'app_promote{promote_type}']
                        ),
                        axis=1
                    ),
                    pd.get_dummies(data['app_eval_correct']).to_numpy()[:, :-1]
                ),
                axis=1
            )
        ),
        columns = [
            'const', 'Self-evaluation', 'Female', 'Self-evaluation x Female',
        ] + [f"fe{i}" for i in range(9)],
        index = data.index
    )
    fitted3 = sm.OLS(data['bid'], X).fit(
        cov_type='cluster', cov_kwds={'groups': data.index}
    )

    return make_table(
        {
            'Self-evaluation': fitted1,
            r'Self-evaluation + gender': fitted2,
            r'Self-evaluation + gender + performance*': fitted3
        },
        title = f'Employer bids, with {"first" if promote_type == 1 else "second"} self-evaluation type',
        notes = ['*$p<0.1$, **$p<0.05$, ***$p<0.01$.', 'Standard errors clustered by employer.', '(†) indicates inclusion of performance fixed effects.']
    )
    

In [5]:
print(hyp1_3_table(1))

\begin{table}
\centering
\caption{Employer bids, with first self-evaluation type}
\begin{tabular}{lp{8em}p{8em}p{8em}}
\toprule
 & Self-evaluation & Self-evaluation \newline + gender & Self-evaluation \newline + gender \newline + performance$^\dagger$ \\
\midrule
const & \shortstack{21.7 (14)} & \shortstack{29.1 (20)} & \shortstack{142 (22)***} \\
Self-evaluation & \shortstack{19.8 (2.9)***} & \shortstack{15.5 (4.5)***} & \shortstack{7.01 (4)*} \\
Female &  & \shortstack{28.9 (27)} & \shortstack{45 (26)*} \\
Self-evaluation x Female &  & \shortstack{-7.03 (6)} & \shortstack{-7.47 (5.7)} \\
\bottomrule
\multicolumn{3}{p{25em}}{\textit{Notes}: *$p<0.1$, **$p<0.05$, ***$p<0.01$. \newline\quad Standard errors clustered by employer. \newline\quad (†) indicates inclusion of performance fixed effects.}
\end{tabular}
\end{table}



In [6]:
print(hyp1_3_table(2))

\begin{table}
\centering
\caption{Employer bids, with second self-evaluation type}
\begin{tabular}{lp{8em}p{8em}p{8em}}
\toprule
 & Self-evaluation & Self-evaluation \newline + gender & Self-evaluation \newline + gender \newline + performance$^\dagger$ \\
\midrule
const & \shortstack{40.8 (19)**} & \shortstack{4.39 (17)} & \shortstack{107 (52)**} \\
Self-evaluation & \shortstack{1 (0.2)***} & \shortstack{1.23 (0.23)***} & \shortstack{-0.0243 (0.27)} \\
Female &  & \shortstack{16.8 (21)} & \shortstack{-3.29 (30)} \\
Self-evaluation x Female &  & \shortstack{-0.122 (0.28)} & \shortstack{0.0497 (0.33)} \\
\bottomrule
\multicolumn{3}{p{25em}}{\textit{Notes}: *$p<0.1$, **$p<0.05$, ***$p<0.01$. \newline\quad Standard errors clustered by employer. \newline\quad (†) indicates inclusion of performance fixed effects.}
\end{tabular}
\end{table}



In [7]:
def get_hyp7_fit(treatment=None, promote_type=1):
    data = df_app if treatment is None else df_app[df_app['treatment'] == treatment]
    X = pd.DataFrame(
        sm.add_constant(
            np.concatenate(
                (
                    data['female'].to_numpy().reshape(-1, 1),
                    pd.get_dummies(data['eval_correct']).to_numpy()[:, :-1]
                ),
                axis=1
            )
        ),
        columns = ['const', 'Female'] + [f"fe{i}" for i in range(9)],
        index = data.index
    )
    return sm.OLS(data[f'promote{promote_type}'], X).fit(
        cov_type='HC1'
    )

def hyp7_table(promote_type=1):
    fits = [get_hyp7_fit(t, promote_type) for t in [None, 1, 2, 3]]
    return make_table(
        {
            'All treatments': fits[0],
            'Self-evaluation': fits[2],
            'Self-evaluation + gender': fits[2],
            'Self-evaluation + gender + performance*': fits[3]
        },
        title = f'Applicant self-evaluation, with {"first" if promote_type == 1 else "second"} self-evaluation type',
        notes = ['*$p<0.1$, **$p<0.05$, ***$p<0.01$.', 'Standard errors clustered by applicant.', '(†) indicates inclusion of performance fixed effects.'],
    )

In [8]:
print(hyp7_table(1))

\begin{table}
\centering
\caption{Applicant self-evaluation, with first self-evaluation type}
\begin{tabular}{lp{8em}p{8em}p{8em}p{8em}}
\toprule
 & All treatments & Self-evaluation & Self-evaluation \newline + gender & Self-evaluation \newline + gender \newline + performance$^\dagger$ \\
\midrule
const & \shortstack{5.17 (0.21)***} & \shortstack{5.59 (0.36)***} & \shortstack{5.59 (0.36)***} & \shortstack{5.08 (0.13)***} \\
Female & \shortstack{0.117 (0.12)} & \shortstack{0.0728 (0.22)} & \shortstack{0.0728 (0.22)} & \shortstack{-0.152 (0.23)} \\
\bottomrule
\multicolumn{4}{p{30em}}{\textit{Notes}: *$p<0.1$, **$p<0.05$, ***$p<0.01$. \newline\quad Standard errors clustered by applicant. \newline\quad (†) indicates inclusion of performance fixed effects.}
\end{tabular}
\end{table}



In [9]:
print(hyp7_table(2))

\begin{table}
\centering
\caption{Applicant self-evaluation, with second self-evaluation type}
\begin{tabular}{lp{8em}p{8em}p{8em}p{8em}}
\toprule
 & All treatments & Self-evaluation & Self-evaluation \newline + gender & Self-evaluation \newline + gender \newline + performance$^\dagger$ \\
\midrule
const & \shortstack{92.5 (2)***} & \shortstack{92.3 (4.9)***} & \shortstack{92.3 (4.9)***} & \shortstack{95.9 (4.9)***} \\
Female & \shortstack{3.18 (2.3)} & \shortstack{4.32 (4)} & \shortstack{4.32 (4)} & \shortstack{-1.83 (4.5)} \\
\bottomrule
\multicolumn{4}{p{30em}}{\textit{Notes}: *$p<0.1$, **$p<0.05$, ***$p<0.01$. \newline\quad Standard errors clustered by applicant. \newline\quad (†) indicates inclusion of performance fixed effects.}
\end{tabular}
\end{table}



In [10]:
def hyp4_fit(promote_type=1):
    data = df_guesses[(df_guesses['treatment'] == 1) & (df_guesses['promote_type_seen'] == promote_type)]
    X = pd.DataFrame(
            sm.add_constant(
            np.stack(
                (
                    data[f'other_promote{promote_type}'],
                    data['guesser_is_female'],
                    data['guesser_is_female'] * data[f'other_promote{promote_type}']
                ),
                axis=1
            )
        ),
        columns = ['const', 'Self-evaluation', 'Female guesser', 'Self-evaluation x Female guesser'],
        index = data.index
    )
    return sm.OLS(data['wage_guess'], X).fit(
        cov_type='cluster', cov_kwds={'groups': data.index}
    )

def hyp4_table():
    return make_table(
        {
            r'Second \newline self-evaluation type': hyp4_fit(2),
        },
        title = 'Wage guesses (self-evaluation-only treatment)',
        notes = ['*$p<0.1$, **$p<0.05$, ***$p<0.01$.', 'Standard errors clustered by guesser.',],
        colwidth = 12,
    )

In [11]:
print(hyp4_table())

\begin{table}
\centering
\caption{Wage guesses (self-evaluation-only treatment)}
\begin{tabular}{lp{12em}}
\toprule
 & Second \newline self-evaluation type \\
\midrule
const & \shortstack{63.9 (4.9)***} \\
Self-evaluation & \shortstack{0.912 (0.062)***} \\
Female guesser & \shortstack{23.5 (9.8)**} \\
Self-evaluation x Female guesser & \shortstack{-0.168 (0.16)} \\
\bottomrule
\multicolumn{1}{p{15em}}{\textit{Notes}: *$p<0.1$, **$p<0.05$, ***$p<0.01$. \newline\quad Standard errors clustered by guesser.}
\end{tabular}
\end{table}



In [12]:
def hyp5_6_table(female = 1, promote_type = 1):
    data = df_guesses[
        (df_guesses['treatment'] == 2) & (df_guesses['promote_type_seen'] == promote_type) & (df_guesses['guesser_is_female'] == female)
    ]
    X = pd.DataFrame(
        sm.add_constant(
            np.stack(
                (
                    data[f'other_promote{promote_type}'],
                    data['other_is_female'],
                    data['other_is_female'] * data[f'other_promote{promote_type}']
                ),
                axis=1
            ),
        ),
        columns = ['const', 'Self-evaluation', 'Female', 'Self-evaluation x Female'],
        index = data.index
    )

    fitted1 = sm.OLS(data['wage_guess'], X).fit(
        cov_type='cluster', cov_kwds={'groups': data.index}
    )

    data = df_guesses[(df_guesses['treatment'] == 3) & (df_guesses['promote_type_seen'] == promote_type) & df_guesses['guesser_is_female'] == female]
    X = pd.DataFrame(
        sm.add_constant(
            np.concatenate(
                (
                    np.stack(
                        (
                            data['other_promote1'],
                            data['other_is_female'],
                            data['other_is_female'] * data['other_promote1']
                        ),
                        axis=1
                    ),
                    pd.get_dummies(data['other_eval_correct']).to_numpy()[:, :-1]
                ),
                axis=1
            )
        ),
        columns = [
            'const', 'Self-evaluation', 'Female', 'Self-evaluation x Female'
        ] + [f"fe{i}" for i in range(10)],
        index = data.index
    )
    fitted2 = sm.OLS(data['wage_guess'], X).fit(
        cov_type='cluster', cov_kwds={'groups': data.index}
    )

    return make_table(
        {
            'Self-evaluation + gender': fitted1,
            'Self-evaluation + gender + performance*': fitted2
        },
        title = f'Wage guesses, with {"first" if promote_type == 1 else "second"} self-evaluation type and {"female" if female == 1 else "male"} guessers only',
        notes = ['*$p<0.1$, **$p<0.05$, ***$p<0.01$.', 'Standard errors clustered by guesser.', '(†) indicates inclusion of performance fixed effects.'],
    )

In [13]:
print(hyp5_6_table(1, 1))

\begin{table}
\centering
\caption{Wage guesses, with first self-evaluation type and female guessers only}
\begin{tabular}{lp{8em}p{8em}}
\toprule
 & Self-evaluation \newline + gender & Self-evaluation \newline + gender \newline + performance$^\dagger$ \\
\midrule
const & \shortstack{65 (16)***} & \shortstack{132 (13)***} \\
Self-evaluation & \shortstack{9.26 (3.6)***} & \shortstack{9.25 (3.2)***} \\
Female & \shortstack{-31.3 (23)} & \shortstack{29.9 (18)} \\
Self-evaluation x Female & \shortstack{10.6 (4.9)**} & \shortstack{-5.31 (4.8)} \\
\bottomrule
\multicolumn{2}{p{20em}}{\textit{Notes}: *$p<0.1$, **$p<0.05$, ***$p<0.01$. \newline\quad Standard errors clustered by guesser. \newline\quad (†) indicates inclusion of performance fixed effects.}
\end{tabular}
\end{table}



In [14]:
print(hyp5_6_table(1, 2))

\begin{table}
\centering
\caption{Wage guesses, with second self-evaluation type and female guessers only}
\begin{tabular}{lp{8em}p{8em}}
\toprule
 & Self-evaluation \newline + gender & Self-evaluation \newline + gender \newline + performance$^\dagger$ \\
\midrule
const & \shortstack{68.2 (11)***} & \shortstack{173 (16)***} \\
Self-evaluation & \shortstack{0.755 (0.18)***} & \shortstack{-1.9 (3)} \\
Female & \shortstack{-35.4 (13)***} & \shortstack{12.9 (18)} \\
Self-evaluation x Female & \shortstack{0.589 (0.19)***} & \shortstack{-0.491 (4.5)} \\
\bottomrule
\multicolumn{2}{p{20em}}{\textit{Notes}: *$p<0.1$, **$p<0.05$, ***$p<0.01$. \newline\quad Standard errors clustered by guesser. \newline\quad (†) indicates inclusion of performance fixed effects.}
\end{tabular}
\end{table}



In [15]:
print(hyp5_6_table(0, 1))

\begin{table}
\centering
\caption{Wage guesses, with first self-evaluation type and male guessers only}
\begin{tabular}{lp{8em}p{8em}}
\toprule
 & Self-evaluation \newline + gender & Self-evaluation \newline + gender \newline + performance$^\dagger$ \\
\midrule
const & \shortstack{83.9 (11)***} & \shortstack{114 (3.9)***} \\
Self-evaluation & \shortstack{11.8 (2.7)***} & \shortstack{4.35 (0.85)***} \\
Female & \shortstack{-0.467 (12)} & \shortstack{0.1 (4.7)} \\
Self-evaluation x Female & \shortstack{-2.06 (3)} & \shortstack{-0.804 (1.1)} \\
\bottomrule
\multicolumn{2}{p{20em}}{\textit{Notes}: *$p<0.1$, **$p<0.05$, ***$p<0.01$. \newline\quad Standard errors clustered by guesser. \newline\quad (†) indicates inclusion of performance fixed effects.}
\end{tabular}
\end{table}



In [16]:
print(hyp5_6_table(0, 2))

\begin{table}
\centering
\caption{Wage guesses, with second self-evaluation type and male guessers only}
\begin{tabular}{lp{8em}p{8em}}
\toprule
 & Self-evaluation \newline + gender & Self-evaluation \newline + gender \newline + performance$^\dagger$ \\
\midrule
const & \shortstack{76.3 (8)***} & \shortstack{113 (3.8)***} \\
Self-evaluation & \shortstack{0.873 (0.14)***} & \shortstack{4.85 (0.86)***} \\
Female & \shortstack{-1.06 (16)} & \shortstack{0.555 (5)} \\
Self-evaluation x Female & \shortstack{-0.0898 (0.23)} & \shortstack{-1.01 (1.3)} \\
\bottomrule
\multicolumn{2}{p{20em}}{\textit{Notes}: *$p<0.1$, **$p<0.05$, ***$p<0.01$. \newline\quad Standard errors clustered by guesser. \newline\quad (†) indicates inclusion of performance fixed effects.}
\end{tabular}
\end{table}

