In [1]:
import os # type:ignore # isort:skip # fmt:skip # noqa # nopep8
import sys # type:ignore # isort:skip # fmt:skip # noqa # nopep8
from pathlib import Path # type:ignore # isort:skip # fmt:skip # noqa # nopep8

mod = sys.modules[__name__]

code_dir = None
code_dir_name = 'Code'
unwanted_subdir_name = 'Analysis'

if code_dir_name not in str(Path.cwd()).split('/')[-1]:
    for _ in range(5):

        parent_path = str(Path.cwd().parents[_]).split('/')[-1]

        if (code_dir_name in parent_path) and (unwanted_subdir_name not in parent_path):

            code_dir = str(Path.cwd().parents[_])

            if code_dir is not None:
                break
else:
    code_dir = str(Path.cwd())
sys.path.append(code_dir)

# %load_ext autoreload
# %autoreload 2


In [2]:
from setup_module.imports import * # type:ignore # isort:skip # fmt:skip # noqa # nopep8
from estimators_get_pipe import * # type:ignore # isort:skip # fmt:skip # noqa # nopep8


Using MPS


0it [00:00, ?it/s]

Using MPS


<Figure size 640x480 with 0 Axes>

### Functions

In [3]:
def save_df_full_summary_excel(
    df_full_summary,
    title,
    text_to_add_list,
    file_save_path,
    sheet_name=None,
    startrow=None,
    startcol=None,
):
    if sheet_name is None:
        sheet_name = 'All'
    if startrow is None:
        startrow = 1
    if startcol is None:
        startcol = 1

    # Define last rows and cols locs
    header_range = 1
    endrow = startrow + header_range + df_full_summary.shape[0]
    endcol = startcol + df_full_summary.shape[1]

    # Remove NAs
    df_full_summary = df_full_summary.fillna('')

    # Write
    writer = pd.ExcelWriter(f'{file_save_path}.xlsx')
    df_full_summary.to_excel(writer, sheet_name=sheet_name, merge_cells=True, startrow=startrow, startcol=startcol)
    workbook  = writer.book
    worksheet = writer.sheets[sheet_name]
    worksheet.set_column(startrow, 1, None, None, {'hidden': True}) # hide the index column

    # Title
    worksheet.merge_range(1, startcol, 1, endcol, title, workbook.add_format({'italic': True, 'font_name': 'Times New Roman', 'font_size': 12, 'font_color': 'black', 'align': 'left', 'top': True, 'bottom': True, 'left': False, 'right': False}))

    # Main body
    body_max_row_idx, body_max_col_idx = df_full_summary.shape

    for c, r in tqdm_product(range(body_max_col_idx), range(body_max_row_idx)):
        row_to_write = startrow + header_range + r
        col_to_write = startcol + 1 + c # 1 is for index
        body_formats = {'num_format': '0.00', 'font_name': 'Times New Roman', 'font_size': 12, 'font_color': 'black', 'align': 'center', 'text_wrap': True, 'left': False, 'right': False}

        if r == 0:
            body_formats |= {'top': True, 'bottom': True, 'left': False, 'right': False}
            worksheet.set_column(col_to_write, col_to_write, 10)

        if r == body_max_row_idx-1:
            body_formats |= {'bottom': True}

        if c == 0:
            body_formats |= {'align': 'left'}
            worksheet.set_column(col_to_write, col_to_write, 15)

        worksheet.write(row_to_write, col_to_write, df_full_summary.iloc[r, c], workbook.add_format(body_formats))

    # Add Note
    note_format = {'italic': True, 'font_name': 'Times New Roman', 'font_size': 10, 'font_color': 'black', 'align': 'left', 'left': False, 'right': False}
    worksheet.merge_range(endrow, startcol, endrow, endcol, 'Note.', workbook.add_format(note_format))
    # Add text
    for i, text in enumerate(text_to_add_list):
        worksheet.merge_range(endrow + 1 + i , startcol, endrow + 1 + i, endcol, text, workbook.add_format(note_format))

    writer.close()


In [4]:
def make_full_report(
    results, dv, dvs_name, dv_type,
    regression_info_dict=None, regressor_order=None, text_to_add_list=None, title=None, model_names=None
):
    '''
    Make a full report for a regression analysis.
    results: statsmodels regression results object or list of results objects
    dv: str, dependent variable name
    '''

    if regression_info_dict is None:
        # Regression info dict
        regression_info_dict = {
            'Model Name': lambda x: f'{x.model.__class__.__name__}',
            'N': lambda x: f'{int(x.nobs):d}',
            'R-squared': lambda x: f'{x.rsquared:.5f}',
            'R-squared Adj.': lambda x: f'{x.rsquared_adj:.5f}',
            'Log-Likelihood': lambda x: f'{x.llf:.5f}',
            'Pseudo R2': lambda x: f'{x.prsquared:.5f}',
            'F': lambda x: f'{x.fvalue:.5f}',
            'F (p-value)': lambda x: f'{x.f_pvalue:.5f}',
            'df_model': lambda x: f'{x.df_model:.0f}',
            'df_total': lambda x: f'{x.df_resid + x.df_model + 1:.0f}',
            'df_resid': lambda x: f'{x.df_resid:.0f}',
            'AIC': lambda x: f'{x.aic:.5f}',
            'BIC': lambda x: f'{x.bic:.5f}',
            'ICC': lambda x: f'{x.rsquared / (x.rsquared + (x.nobs - 1) * x.mse_resid):.5f}',
            'RMSE': lambda x: f'{x.mse_resid ** 0.5:.5f}',
            'RMSE (std)': lambda x: f'{x.mse_resid ** 0.5 / x.model.endog.std():.5f}',
            'Omnibus': lambda x: f'{sms.omni_normtest(x.resid).statistic:.5f}',
            'Omnibus (p-value)': lambda x: f'{sms.omni_normtest(x.resid).pvalue:.5f}',
            'Skew': lambda x: f'{sms.jarque_bera(x.resid)[-2]:.5f}',
            'Kurtosis': lambda x: f'{sms.jarque_bera(x.resid)[-1]:.5f}',
            'Jarque-Bera (JB)': lambda x: f'{sms.jarque_bera(x.resid)[0]:.5f}',
            'Jarque-Bera (p-value)': lambda x: f'{sms.jarque_bera(x.resid)[1]:.5f}',
            'Intercept': lambda x: f'{x.params["const"]:.5f}',
            'Intercept (std)': lambda x: f'{x.bse["const"]:.5f}',
            'Intercept t': lambda x: f'{x.tvalues["const"]:.5f}',
            'Intercept t (p-value)': lambda x: f'{x.pvalues["const"]:.5f}',
            'Intercept (95% CI)': lambda x: f'{x.conf_int().loc["const"][0]:.5f} - {x.conf_int().loc["const"][1]:.5f}',
            'Unstandardized Coefficent B (b)': lambda x: f'{x.params[0]:.5f}',
            'Standard Error (SE)': lambda x: f'{x.bse[0]:.5f}',
            'Standardized Coefficient b* (β)': lambda x: f'{x.params[0] / x.model.endog.std():.5f}',
            't': lambda x: f'{x.tvalues[0]:.5f}',
            't (p-value)': lambda x: f'{x.pvalues[0]:.5f}',
            '95% CI': lambda x: f'{x.conf_int().iloc[0, 1]:.5f} - {x.conf_int().iloc[0, 1]:.5f}',
            # 'Summary': lambda x: f'{x.summary()}',
            # 'F (p-value - FDR)': lambda x: f'{x.f_pvalue_fdr:.5f}',
            # 'F (p-value - Bonferroni)': lambda x: f'{x.f_pvalue_bonf:.5f}',
            # 't (p-value - FDR)': lambda x: f'{x.pvalues_fdr[1]:.5f}',
            # 't (p-value - Bonferroni)': lambda x: f'{x.pvalues_bonf[1]:.5f}',
        }
    if model_names is None:
        if isinstance(results, list):
            model_names = [
                f'{results[0].model.endog_names.split("_")[0] if "_" in results[0].model.endog_names else results[0].model.endog_names} Model {i}'
                for i in range(len(results[0].model.endog_names))
            ]
            model_names[0] = model_names[0].replace('Model 0', 'Full Model')
        else:
            model_names = [
                f'{results.model.endog_names.split("_")[0] if "_" in results.model.endog_names else results.model.endog_names}'
            ]

    order_type = 'unordered' if regressor_order is None else 'ordered'
    if text_to_add_list is None:
        text_to_add_list = []
        if regressor_order is not None:
            text_to_add_list.append('Models are ordered by independent variable type.')

        else:
            text_to_add_list.append('Models are ordered by coefficient size, largest to smallest.')

    if title is None:
        title = f'{dv_type} OLS Regression {dv}'

    try:
        # Statsmodels summary_col
        full_summary = summary_col(
            results,
            stars=True,
            info_dict=regression_info_dict,
            regressor_order=regressor_order,
            float_format='%0.3f',
            model_names=model_names,
        )
        if isinstance(results, list) and len(results) > 4:
            full_summary.tables[0][full_summary.tables[0].filter(regex='Full Model').columns[0]].loc['Unstandardized Coefficent B (b)': '95% CI'] = ''

        # Add title and notes
        full_summary.add_title(title)
        text_to_add_list.extend(full_summary.extra_txt)
        for text in text_to_add_list:
            full_summary.add_text(text)
        # Save
        save_name = f'{table_save_path}{dv_type} OLS Regression {dv}'
        print(f'Saving {save_name}...')
        df_full_summary = pd.read_html(full_summary.as_html())[0]
        df_full_summary.to_csv(f'{save_name}.csv')
        df_full_summary.style.to_latex(f'{save_name}.tex', hrules=True)
        save_df_full_summary_excel(df_full_summary, title, text_to_add_list, save_name)

        return full_summary
    except IndexError:
        return None


In [5]:
def get_standardized_coefficients(results):

    # # Get standardized regression coefficients
    # std = np.asarray(constant.std(0))

    # if 'const' in results.params and 'const' in constant:
    #     std[0] = 1
    # tt = results.t_test(np.diag(std))
    # tt.c_names = results.model.exog_names

    # t-test
    std = results.model.exog.std(0)
    if 'const' in results.params:
        std[0] = 1
    tt = results.t_test(np.diag(std))
    if results.model.__class__.__name__ == 'MixedLM' or 'Group Var' in results.model.exog_names:
        offset = slice(None, -1)
        tt.c_names = results.model.exog_names[offset]
    else:
        offset = slice(None, None)
        tt.c_names = results.model.exog_names

    # Make df with standardized and unstandardized coefficients
    df_std_coef = pd.DataFrame(
        {
            'coef': results.params[offset].apply(lambda x: f'{x:.5f}'),
            'std err': results.bse[offset].apply(lambda x: f'{x:.5f}'),
            'std coef': (results.params[offset] / results.model.exog[offset].std(axis=0)).apply(lambda x: f'{x:.5f}'),
            't': results.tvalues[offset].apply(lambda x: f'{x:.5f}'),
            'P>|t|': results.pvalues[offset].apply(lambda x: f'{x:.5f}'),
            '[0.025': results.conf_int()[0][offset].apply(lambda x: f'{x:.5f}'),
            '0.975]': results.conf_int()[1][offset].apply(lambda x: f'{x:.5f}'),
        }
    )
    # if 'Group Var' in df_std_coef.index:
    #     df_std_coef = df_std_coef.drop('Group Var', axis='index')
    # # Add standardized coefficients and other data from t-test
    # df_std_coef['std coef'] = tt.effect
    # df_std_coef['std err'] = tt.sd
    # df_std_coef['t'] = tt.statistic
    # df_std_coef['P>|t|'] = tt.pvalue
    # df_std_coef['[0.025'] = tt.conf_int()[:, 0]
    # df_std_coef['0.975]'] = tt.conf_int()[:, 1]
    # df_std_coef['var'] = [names[i] for i in range(len(results.model.exog_names))]
    # df_std_coef = df_std_coef.sort_values('std coef', ascending=False)
    df_std_coef = df_std_coef.reset_index().rename(columns={'index': 'var'})
    df_std_coef = df_std_coef.rename(
        columns={
            'var': 'Variable',
            'coef': 'Unstandardized Coefficent B (b)',
            'std err': 'Standard Error',
            'std coef':'Standardized Coefficient b* (β)',
            't': 't-value',
            'P>|t|': 'p-value',
            '[0.025': '95% CI Lower',
            '0.975]': '95% CI Upper'
        }
    )
    # Reorder columns
    df_std_coef = df_std_coef[[
        'Variable',
        'Unstandardized Coefficent B (b)',
        'Standard Error',
        'Standardized Coefficient b* (β)',
        't-value',
        'p-value',
        '95% CI Lower',
        '95% CI Upper'
    ]]

    return tt, df_std_coef


### READ DATA

In [6]:
with open(f'{data_dir}df_manual_len.txt', 'r') as f:
    df_manual_len = int(f.read())

df_manual = pd.read_pickle(f'{df_save_dir}df_manual_for_training.pkl')
assert len(df_manual) == df_manual_len, f'DATAFRAME MISSING DATA! DF SHOULD BE OF LENGTH {df_manual_len} BUT IS OF LENGTH {len(df_manual)}'
print(f'Dataframe df_manual_for_training loaded with shape: {df_manual.shape}')


Dataframe df_manual_for_training loaded with shape: (5947, 72)


In [7]:
with open(f'{data_dir}df_jobs_len.txt', 'r') as f:
    df_jobs_len = int(f.read())

df_jobs = pd.read_pickle(f'{df_save_dir}df_jobs_classified.pkl')
assert len(df_jobs) == df_jobs_len, f'DATAFRAME MISSING DATA! DF SHOULD BE OF LENGTH {df_jobs_len} BUT IS OF LENGTH {len(df_jobs)}'
print(f'Dataframe df_jobs_classified loaded with shape: {df_jobs.shape}')


Dataframe df_jobs_classified loaded with shape: (307154, 92)


## Check biased and unbiased regressions models using human annotated and classifier predicted Warmth and Competence
Source: https://mochenyang.github.io/mochenyangblog/research/2022/01/10/ForestIV.html

### Get job ad sentences that are shared between df_manual and df_jobs

In [8]:
df_intersecting = pd.merge(
    df_manual[['Job ID', 'Job Description spacy_sentencized', 'Warmth', 'Competence']],
    df_jobs[['Job ID', 'Job Description spacy_sentencized', 'Warmth', 'Competence'] + ivs_dummy_perc_and_perc_interactions + controls[:2]],
    how='inner',
    on=['Job ID', 'Job Description spacy_sentencized'],
    suffixes=('_actual', '_predicted')
)


In [9]:
df_intersecting.head()


Unnamed: 0,Job ID,Job Description spacy_sentencized,Warmth_actual,Competence_actual,Warmth_predicted,Competence_predicted,Gender_Female,Gender_Mixed,Gender_Male,Gender_Female_% per Sector,Gender_Male_% per Sector,Age_Older,Age_Mixed,Age_Younger,Age_Older_% per Sector,Age_Younger_% per Sector,Interaction_Female_Older_% per Sector,Interaction_Female_Younger_% per Sector,Interaction_Male_Older_% per Sector,Interaction_Male_Younger_% per Sector,% Sector per Workforce,Job Description spacy_sentencized_num_words
0,3768944208,Work closely with our Sales and Product leader...,1,1,1,1,1,0,0,84.3,15.63,0,1,0,46.13,53.73,3888.45,4529.66,721.04,839.94,5.67,23
1,3768944208,Assist Marketing & Product to position our pro...,1,1,1,1,1,0,0,84.3,15.63,0,1,0,46.13,53.73,3888.45,4529.66,721.04,839.94,5.67,17
2,3768944208,Were growing our Sales team for the EMEA Marke...,1,0,0,0,1,0,0,84.3,15.63,0,1,0,46.13,53.73,3888.45,4529.66,721.04,839.94,5.67,19
3,3768944208,We don't believe in a complex hierarchy.,1,0,1,0,1,0,0,84.3,15.63,0,1,0,46.13,53.73,3888.45,4529.66,721.04,839.94,5.67,7
4,3768944208,Fun secondary benefits that we would love to h...,1,0,1,0,1,0,0,84.3,15.63,0,1,0,46.13,53.73,3888.45,4529.66,721.04,839.94,5.67,12


In [10]:
df_intersecting.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3843 entries, 0 to 3842
Data columns (total 22 columns):
 #   Column                                       Non-Null Count  Dtype  
---  ------                                       --------------  -----  
 0   Job ID                                       3843 non-null   object 
 1   Job Description spacy_sentencized            3843 non-null   object 
 2   Warmth_actual                                3843 non-null   int64  
 3   Competence_actual                            3843 non-null   int64  
 4   Warmth_predicted                             3843 non-null   int64  
 5   Competence_predicted                         3843 non-null   int64  
 6   Gender_Female                                3843 non-null   int64  
 7   Gender_Mixed                                 3843 non-null   int64  
 8   Gender_Male                                  3843 non-null   int64  
 9   Gender_Female_% per Sector                   3843 non-null   float64
 10  

In [11]:
df_intersecting.describe()


Unnamed: 0,Warmth_actual,Competence_actual,Warmth_predicted,Competence_predicted,Gender_Female,Gender_Mixed,Gender_Male,Gender_Female_% per Sector,Gender_Male_% per Sector,Age_Older,Age_Mixed,Age_Younger,Age_Older_% per Sector,Age_Younger_% per Sector,Interaction_Female_Older_% per Sector,Interaction_Female_Younger_% per Sector,Interaction_Male_Older_% per Sector,Interaction_Male_Younger_% per Sector,% Sector per Workforce,Job Description spacy_sentencized_num_words
count,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0,3843.0
mean,0.32,0.51,0.36,0.53,0.18,0.62,0.2,46.11,53.87,0.12,0.37,0.51,38.64,61.12,1793.74,2807.24,2069.63,3303.82,4.51,14.42
std,0.47,0.5,0.48,0.5,0.39,0.49,0.4,17.25,17.28,0.32,0.48,0.5,8.76,8.82,906.49,954.25,936.54,1096.44,5.2,8.97
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.44,15.63,0.0,0.0,0.0,18.94,44.44,903.69,864.2,721.04,839.94,0.11,3.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,42.75,56.87,0.0,0.0,0.0,31.96,53.14,1366.56,2843.37,1829.74,2830.2,2.14,8.0
50%,0.0,1.0,0.0,1.0,0.0,1.0,0.0,42.75,57.25,0.0,0.0,1.0,31.96,67.83,1366.56,2900.09,1829.74,3883.05,3.85,13.0
75%,1.0,1.0,1.0,1.0,0.0,1.0,0.0,43.13,57.25,0.0,1.0,1.0,46.68,67.83,2100.27,2900.09,1938.77,3883.05,3.85,19.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,84.3,80.56,1.0,1.0,1.0,58.33,80.81,3888.45,4529.66,4699.07,4869.2,31.39,68.0


#### Check that actual human annotated and classifier predicted warmth and competence are different

In [12]:
df_intersecting['Warmth_actual'].equals(df_intersecting['Warmth_predicted'])


False

In [13]:
df_intersecting['Competence_actual'].equals(df_intersecting['Competence_predicted'])


False

### Unbiased OLS regression with human annotated actual Warmth values as DV and all IVs

In [14]:
def compare_actual_and_predicted(df):
    exog_names = ivs_dummy_perc_and_perc_interactions + controls[:2]
    exog = df[exog_names]
    constant = sm.add_constant(exog)

    for dv in dvs:
        endog_names_dict = {'Unbiased': [f'{dv}_actual'], 'Biased': [f'{dv}_predicted']}

        for dv_type, endog_names in endog_names_dict.items():
            endog = df[endog_names[0]]
            model = sm.OLS(endog=endog, exog=constant, data=df)
            results = model.fit()
            tt, df_std_coef = get_standardized_coefficients(results)
            full_summary = make_full_report(
                results, dv, dvs_name=dv, dv_type=dv_type, title=f'{dv_type} OLS Regression {dv}'
            )
            endog_names_dict[dv_type].append(results.rsquared)

            print('\n')
            print('-'*20)
            print(f'{dv_type.upper()} {dv}\n')
            print('-'*20)
            print('\n')
            print(f'{dv_type.upper()} SUMMARY RESULTS:')
            print(results.summary())
            print(full_summary)
            print('\n')
            print('-'*20)
            print(f'{dv_type.upper()} STANDARDIZED BETA REGRESSION COEFFICIENTS FOR {dv}:\n{df_std_coef}')
            print('\n')
            print('-'*20)

            save_name = f'{table_save_path}{dv_type} OLS Regression {dv}'
            df_summary_results = pd.DataFrame(csv.reader(results.summary().as_csv().split('\n'), delimiter=','))
            df_summary_results.to_csv(f'{save_name}.csv')
            df_summary_results.style.to_latex(f'{save_name}.tex', hrules=True)
            df_std_coef.to_csv(f'{save_name} - standardized coefficients.csv')
            df_std_coef.style.to_latex(f'{save_name} - standardized coefficients.tex', hrules=True)

        if endog_names_dict[list(endog_names_dict)[0]][-1] != endog_names_dict[list(endog_names_dict)[-1]][-1]:
            print('\n')
            print('-'*20)
            print(f'{dv} {list(endog_names_dict.keys())[0]} R-Squared does not equal {list(endog_names_dict.keys())[-1]} R-Squared:')
            print(f'{dv} {list(endog_names_dict.keys())[0]} = {endog_names_dict[list(endog_names_dict)[0]][-1]:.3f}')
            print(f'{dv} {list(endog_names_dict.keys())[-1]} = {endog_names_dict[list(endog_names_dict)[-1]][-1]:.3f}')
            print('\n')
            print('-'*20)
        print('\n')
        print('-'*20)
        print(f'{dv} R-Squared:\n')
        print(endog_names_dict)
        print('\n')
        print('-'*20)


In [15]:
compare_actual_and_predicted(df_intersecting)


Saving /Users/nyxinsane/Documents/Work - UvA/Automating Equity/Automating_Equity1/Automating_Equity1_Code/data/output tables/Unbiased OLS Regression Warmth...


  0%|          | 0/140 [00:00<?, ?it/s]



--------------------
UNBIASED Warmth

--------------------


UNBIASED SUMMARY RESULTS:
                            OLS Regression Results                            
Dep. Variable:          Warmth_actual   R-squared:                       0.056
Model:                            OLS   Adj. R-squared:                  0.052
Method:                 Least Squares   F-statistic:                     16.15
Date:                Sat, 28 Oct 2023   Prob (F-statistic):           4.47e-39
Time:                        16:59:22   Log-Likelihood:                -2410.8
No. Observations:                3843   AIC:                             4852.
Df Residuals:                    3828   BIC:                             4945.
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
---------

  0%|          | 0/140 [00:00<?, ?it/s]



--------------------
BIASED Warmth

--------------------


BIASED SUMMARY RESULTS:
                            OLS Regression Results                            
Dep. Variable:       Warmth_predicted   R-squared:                       0.071
Model:                            OLS   Adj. R-squared:                  0.068
Method:                 Least Squares   F-statistic:                     21.04
Date:                Sat, 28 Oct 2023   Prob (F-statistic):           2.27e-52
Time:                        16:59:22   Log-Likelihood:                -2499.7
No. Observations:                3843   AIC:                             5029.
Df Residuals:                    3828   BIC:                             5123.
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
-------------

  0%|          | 0/140 [00:00<?, ?it/s]



--------------------
UNBIASED Competence

--------------------


UNBIASED SUMMARY RESULTS:
                            OLS Regression Results                            
Dep. Variable:      Competence_actual   R-squared:                       0.081
Model:                            OLS   Adj. R-squared:                  0.077
Method:                 Least Squares   F-statistic:                     23.96
Date:                Sat, 28 Oct 2023   Prob (F-statistic):           2.89e-60
Time:                        16:59:22   Log-Likelihood:                -2627.1
No. Observations:                3843   AIC:                             5284.
Df Residuals:                    3828   BIC:                             5378.
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                                                  coef    std err          t      P>|t|      [0.025      0.975]
-----

  0%|          | 0/140 [00:00<?, ?it/s]



--------------------
BIASED Competence

--------------------


BIASED SUMMARY RESULTS:
                             OLS Regression Results                             
Dep. Variable:     Competence_predicted   R-squared:                       0.093
Model:                              OLS   Adj. R-squared:                  0.090
Method:                   Least Squares   F-statistic:                     28.12
Date:                  Sat, 28 Oct 2023   Prob (F-statistic):           2.01e-71
Time:                          16:59:23   Log-Likelihood:                -2592.0
No. Observations:                  3843   AIC:                             5214.
Df Residuals:                      3828   BIC:                             5308.
Df Model:                            14                                         
Covariance Type:              nonrobust                                         
                                                  coef    std err          t      P>|t|      [0.025  

# Make Instrumental Variables