In [1]:
import os  # type:ignore # isort:skip # fmt:skip # noqa # nopep8
import sys  # type:ignore # isort:skip # fmt:skip # noqa # nopep8
from pathlib import Path  # type:ignore # isort:skip # fmt:skip # noqa # nopep8

mod = sys.modules[__name__]

code_dir = None
code_dir_name = 'Code'
unwanted_subdir_name = 'Analysis'

for _ in range(5):

    parent_path = str(Path.cwd().parents[_]).split('/')[-1]

    if (code_dir_name in parent_path) and (unwanted_subdir_name not in parent_path):

        code_dir = str(Path.cwd().parents[_])

        if code_dir is not None:
            break

sys.path.append(code_dir)
# %load_ext autoreload
# %autoreload 2


In [2]:
from setup_module.imports import *  # type:ignore # isort:skip # fmt:skip # noqa # nopep8
from setup_module import researchpy_fork as rp # type:ignore # isort:skip # fmt:skip # noqa # nopep8
from setup_module import specification_curve_fork as specy # type:ignore # isort:skip # fmt:skip # noqa # nopep8


Using MPS


0it [00:00, ?it/s]

<Figure size 640x480 with 0 Axes>

# Functions

In [3]:
# Function to order categories
def categorize_df_results_gender_age(df, gender_order=None, age_order=None, ivs=None):
    if gender_order is None:
        gender_order = ['Female', 'Mixed Gender', 'Male']
    if age_order is None:
        age_order = ['Older', 'Mixed Age', 'Younger']
    if ivs is None:
        ivs = ['Gender', 'Age']
    # Arrange Categories
    for iv in ivs:
        if iv == 'Gender':
            order = gender_order
        elif iv == 'Age':
            order = age_order
        try:
            df[iv] = df[iv].astype('category').cat.reorder_categories(order, ordered=True)

            df[iv] = pd.Categorical(
                df[iv], categories=order, ordered=True
            )
            df[f'{iv}_Num'] = pd.to_numeric(df[iv].cat.codes).astype('int64')
        except ValueError as e:
            print(e)

    return df


# READ DATA

In [4]:
with open(f'{data_dir}df_manual_len.txt', 'r') as f:
    df_manual_len = int(f.read())

df_manual = pd.read_pickle(f'{df_save_dir}df_manual_for_training.pkl')
assert len(df_manual) == df_manual_len, f'DATAFRAME MISSING DATA! DF SHOULD BE OF LENGTH {df_manual_len} BUT IS OF LENGTH {len(df_manual)}'
print(f'Dataframe loaded with shape: {df_manual.shape}')


Dataframe loaded with shape: (5947, 68)


In [5]:
with open(f'{data_dir}df_jobs_len.txt', 'r') as f:
    df_jobs_len = int(f.read())

# df_jobs = pd.read_pickle(f'{df_save_dir}df_jobs_for_analysis.pkl')
df_jobs = pd.read_pickle(f'{df_save_dir}df_jobs_for_analysis.pkl')
assert len(df_jobs) == df_jobs_len, f'DATAFRAME MISSING DATA! DF SHOULD BE OF LENGTH {df_jobs_len} BUT IS OF LENGTH {len(df_jobs)}'
print(f'Dataframe loaded with shape: {df_jobs.shape}')


Dataframe loaded with shape: (307300, 83)


In [6]:
df_manual = categorize_df_results_gender_age(df_manual)

In [7]:
df_jobs = categorize_df_results_gender_age(df_jobs)

## Set dataframes

In [8]:
dataframes = {
    'df_manual': df_manual,
    'df_jobs': df_jobs,
}

# Analysis plan:

1. ## [Descriptives, visualizations, and tables](./1.%20descriptives_visualization_and_tables.ipynb)
2. ## [Frequencies and Normality tests](./2.%20frequencies_and_normality_test.ipynb)
   1. ### Frequencies, histograms, and QQ plots
      * Normal test
      * Kurtosis test
      * Shapiro
      * Anderson
      * Bartlett
   2. ### Correlation between independent variables (ivs) and control variables and Multicolinarity test
      * Pearson's R
      * VIF
     - ***ivs_dummy*** (binary nominal) = Social category designation (Female, Male, Mixed Gender)
     - ***ivs_perc*** (continous ratio) = Social category percentage per sector (0-100)
     - ***% Sector per Workforce*** (continous ratio) = Sector percentage per worksforce (0-100)
     - ***num_words*** (continous ratio) = Number of words in job description
     - ***English Requirement in Job Ad*** (binary nominal) = English requirement in job description (0 vs. 1)
     - ***Dutch Requirement in Job Ad*** (binary nominal) = Dutch requirement in job description (0 vs. 1)
     - ***Platform*** (binary dummy) = LinkedIn (0 vs. 1), Indeed (0 vs. 1), Glassdoor (0 vs. 1)

3. ## [Secondary Analysis](./3.%20chisqt_anova_and_regression.ipynb)

   1. ### Chi-square
      * **df_manual:**
        - ***dvs*** (binary nominal) = 'Warmth' and 'Competence' (0 vs. 1)
        - ***ivs*** (binary nominal) = Social category designation (Female, Male, Mixed Gender)
      * **df_jobs:**
        - ***dvs*** (binary nominal) = 'Warmth' and 'Competence' (0 vs. 1)
        - ***ivs*** (binary nominal) = Social category designation (Female, Male, Mixed Gender)

   2. ### One-way ANOVA, interactions, and post-hoc test
      * **df_manual:**
        - ***dvs*** (binary nominal) = 'Warmth' and 'Competence' (0 vs. 1)
        - ***ivs*** (binary nominal) = Social category designation (Female, Male, Mixed Gender)
          - If Levene's test is *not significant*, use classic ANOVA and Tukey's post hoc test
          - If Levene's test is *significant*, use Welch's and Kruskal-Wallis ANOVA and Games Howell's post hoc test
      * **df_jobs:**
         - ***dvs_prob*** (continous ratio) = 'Warmth' and 'Competence' probabilities (0-1)
         - ***ivs*** (binary nominal) = Social category designation (Female, Male, Mixed Gender)
           - If Levene's test is *not significant*, use classic ANOVA and Tukey's post hoc test
           - If Levene's test is *significant*, use Welch's and Kruskal-Wallis ANOVA and Games Howell's post hoc test

   3. ### Logistic Regression  with all interaction (smf):
      * **df_manual:**
        - ***dvs*** (binary nominal) = 'Warmth' and 'Competence' (0 vs. 1)
        - ***ivs_perc*** (continous ratio) = Social category percentage per sector (0-100)
      * **df_jobs:**
        - ***dvs*** (binary nominal) = 'Warmth' and 'Competence' (0 vs. 1)
        - ***ivs_perc*** (continous ratio) = Social category percentage per sector (0-100)
   4. ### OLS Regression with all interaction:
      * **df_jobs:**
        - ***dvs_prob*** (continous ratio) = 'Warmth' and 'Competence' probabilities (0-1)
        - ***ivs_perc*** (continous ratio) = Social category percentage per sector (0-100)
   5. ### Multilevel OLS Regression with all interaction:
      * **df_jobs:**
        - ***dvs_prob*** (continous ratio) = 'Warmth' and 'Competence' probabilities (0-1)
        - ***ivs_perc*** (continous ratio) = Social category percentage per sector (0-100)

4. ## [Main Analysis](./4.%20specification_curve_analysis.ipynb)

   1. ### Logistic Specification Curve Analysis:
      * **df_manual:**
        - ***dvs*** (binary nominal) = 'Warmth' and 'Competence' (0 vs. 1)
        - ***ivs_perc*** (continous ratio) = Social category percentage per sector (0-100)
      * **df_jobs:**
        - ***dvs*** (binary nominal) = 'Warmth' and 'Competence' (0 vs. 1)
        - ***ivs_perc*** (continous ratio) = Social category percentage per sector (0-100)
   1. ### OLS Specification Curve Analysis:
      * **df_jobs:**
        - ***dvs_prob*** (continous ratio) = 'Warmth' and 'Competence' probabilities (0-1)
        - ***ivs_perc*** (continous ratio) = Social category percentage per sector (0-100)


# Chi-square

In [9]:
for df_name, df in dataframes.items():
    print('+'*120)
    print(f'{"="*50} RESULTS FOR {df_name} {"="*50}')

    for iv, dv in tqdm_product(ivs_dummy, dvs):

        # Chi-square
        chisqt = pd.crosstab(df[iv], df[dv])
        pearson_r, p_value, dof, expected = scipy.stats.chi2_contingency(chisqt)
        reject_H0 = p_value > alpha
        if not reject_H0:
            print('+'*120)
            print(f'Dependent Variable: {dv}\nIndependent Variable: {iv}')
            print('+'*120)
            print('\n')
            print('~' * 20)
            print(f"The Pearsons's R value: {pearson_r:.3f}\nDegree of freedom: {dof}")
            print('-'*20)
            print(f'Observed Count:\n{chisqt}\n')
            print('-'*20)
            print(f'Expected Count:\n{expected}\n')
            print('-'*20)
            print('-' * 20)
            print(f"Pearsons's R p-value: {p_value:.3f}. Rejected: {p_value < 0.05}")
            print('~' * 20)

            # # Plot acceptance region distribution
            # x = np.linspace(0, 10, 100)
            # fig,ax = plt.subplots(1,1, figsize=(15,10))
            # #plotting vertical line for critical value 
            # plt.axvline(x=scipy.stats.chi2.isf(0.05,dof), ymin=0, ymax= 0.3,label='X-Critical',color='black')
            # #plotting vertical line for calculated value. 
            # plt.axvline(x=stat, ymin=0, ymax= 0.3,label='X-calculated',color='blue')
            # #plotting distribution graph for our calculated degrees of freedom
            # ax.plot(x, scipy.stats.chi2.pdf(x, dof), label=f'df = {str(dof)}', color='red')
            # ax.set_xlabel('Value',fontsize=12, fontweight='bold')
            # ax.set_ylabel('Probability Distribution',fontsize=12,fontweight='bold')
            # ax.set_title(f'Chi-Square Distribution for {dv} x {iv}',fontsize=16,fontweight='bold')
            # plt.xlim(0, 10)
            # plt.ylim(0, 0.6)
            # plt.ion()
            # plt.legend()
            # plt.show()


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


  0%|          | 0/12 [00:00<?, ?it/s]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dependent Variable: Competence
Independent Variable: Gender_Female
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


~~~~~~~~~~~~~~~~~~~~
The Pearsons's R value: 3.961
Degree of freedom: 1
--------------------
Observed Count:
Competence       0     1 
Gender_Female            
0              2638  2349
1               542   418

--------------------
Expected Count:
[[2666.66554565 2320.33445435]
 [ 513.33445435  446.66554565]]

--------------------
--------------------
Pearsons's R p-value: 0.047. Rejected: True
~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dependent Variable: Warmth
Independent Variable: Gender_Mixed
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

  0%|          | 0/12 [00:00<?, ?it/s]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dependent Variable: Competence
Independent Variable: Gender_Female
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


~~~~~~~~~~~~~~~~~~~~
The Pearsons's R value: 468.625
Degree of freedom: 1
--------------------
Observed Count:
Competence        0       1  
Gender_Female                
0              108101  121103
1               40334   37762

--------------------
Expected Count:
[[110712.31936219 118491.68063781]
 [ 37722.68063781  40373.31936219]]

--------------------
--------------------
Pearsons's R p-value: 0.000. Rejected: True
~~~~~~~~~~~~~~~~~~~~
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dependent Variable: Warmth
Independent Variable: Gender_Mixed
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# ANOVA

In [10]:
for df_name, df in dataframes.items():

    print('+'*120)
    print(f'{"="*50} RESULTS FOR {df_name} {"="*50}')
    if df_name == 'df_manual':
        dvs_ = dvs
    elif df_name == 'df_jobs':
        dvs_ = dvs_all

    for iv, dv in tqdm_product(ivs, dvs_):
        print('+'*120)
        print(f'Dependent Variable: {dv}\nIndependent Variable: {iv}')
        print('+'*120)

        # LEVENE'S TESTS
        print("LEVENE'S TEST")
        print('\n')
        print('~' * 20)
        print(f'{iv} x {dv}')
        levene = pg.homoscedasticity(data=df, dv=dv, group=iv, method='levene').round(3) #dv
        equal_var_levene = bool(levene['equal_var'].to_string(index=False))
        print(f"{iv} x {dv} Levene's test:\n{levene}")
        levene.to_csv(f"{table_save_path}levene's {df_name} - {iv} x {dv}.csv")
        print('~' * 20)
        print('\n')

        # SCIPY ANOVAS
        print('ANOVA SIGNIFICANCE')
        print('\n')
        print('~' * 20)
        print(f'{iv} x {dv}')
        f_statistic, p_value = f_oneway(
            df[dv][df[iv] == ivs_dict[iv][0]],
            df[dv][df[iv] == ivs_dict[iv][1]],
            df[dv][df[iv] == ivs_dict[iv][2]]
        )
        reject_H0 = p_value > alpha
        print('-' * 20)
        print(f'One-way ANOVA p-value: {p_value}. Rejected: {p_value < 0.05}')
        print('~' * 20)

        if not reject_H0:
            # INTERACTION MODEL
            print(f'INTEACTION ANOVA {dv}')
            print('\n')
            print('~' * 20)
            print(f'{iv} x {dv}')
            formula = f'{dv} ~ C({ivs[0]})*C({ivs[1]})'
            model = ols(data = df, formula = formula).fit()
            anova_interaction = sm.stats.anova_lm(model, typ=2).round(3)
            print(f'{iv} x {dv} ANOVA INTERACTION:\n{anova_interaction}')
            print('~' * 20)
            print('\n')

            if equal_var_levene:
                # ONE-WAY ANOVA
                print('ONE-WAY ANOVA')
                print('\n')
                print('~' * 20)
                print(f'{iv} x {dv}')
                anova1 = pg.anova(data=df, dv=dv, between=iv, detailed=True).round(3)
                print(f'{iv} x {dv} ONE-WAY ANOVA:\n{anova1}')
                anova1.to_csv(f'{table_save_path}one-way anova {df_name} - {iv} x {dv}.csv')
                print('~' * 20)
                print('\n')

                # TWO-WAY ANOVA
                print('TWO-WAY ANOVA')
                print('\n')
                print('~' * 20)
                print(f'{iv} x {dv}')
                anova2 = pg.anova(data=df, dv=dv, between=ivs, detailed=True).round(3)
                print(f'{iv} x {dv} TWO-WAY ANOVA:\n{anova2}')
                anova2.to_csv(f'{table_save_path}two-way anova {df_name} - {ivs[0]} and {ivs[1]} x {dv}.csv')
                print('~' * 20)
                print('\n')

                # TUKEY POST HOC
                print("POST HOC TUKEY'S ANOVA")
                print('\n')
                print('~' * 20)
                print(f'{iv} x {dv}')
                anova_pairwise_tukey = pg.pairwise_tukey(
                    data=df, dv=dv, between=iv, effsize='eta-square'
                ).round(3)
                pg.print_table(anova_pairwise_tukey)
                anova_pairwise_tukey.to_csv(f'{table_save_path}post hoc tukey {df_name} - {iv} x {dv}.csv')
                print('~' * 20)
                print('\n')

            if not equal_var_levene:
                # WELCH ANOVA
                print('WELCH ANOVA')
                print('\n')
                print('~' * 20)
                print(f'{iv} x {dv}')
                anova_welch = pg.welch_anova(data=df, dv=dv, between=iv).round(3)
                pg.print_table(anova_welch)
                anova_welch.to_csv(f'{table_save_path}welch anova {df_name} - {iv} x {dv}.csv')
                print('~' * 20)
                print('\n')

                # ## INTERACTION ANOVA
                # print('WELCH INTERACTION ANOVA')
                # print('\n')
                # print('~' * 20)
                # print(f'{ivs[0]} and {ivs[1]} x {dv}')
                # anova_welch_interaction = pg.welch_anova(data=df, dv=dv, between=ivs).round(3)
                # pg.print_table(anova_welch_interaction)
                # anova_welch_interaction.to_csv(f'{table_save_path}welch interaction anova {df_name} - {ivs} x {dv}.csv')
                # print('~' * 20)
                # print('\n')

                # KRUSKAL-WALLIS ANOVA
                print('KRUSKAL-WALLIS ANOVA')
                print('\n')
                print('~' * 20)
                print(f'{iv} x {dv}')
                anova_kruskal = pg.kruskal(data=df, dv=dv, between=iv).round(3)
                pg.print_table(anova_kruskal)
                anova_kruskal.to_csv(f'{table_save_path}kruskal-wallis anova {df_name} - {iv} x {dv}.csv')
                print('~' * 20)
                print('\n')

                # ## INTERACTION ANOVA
                # print('KRUSKAL-WALLIS INTERACTION ANOVA')
                # print('\n')
                # print('~' * 20)
                # print(f'{ivs[0]} and {ivs[1]} x {dv}')
                # anova_kruskal_interaction = pg.kruskal(data=df, dv=dv, between=ivs).round(3)
                # pg.print_table(anova_kruskal_interaction)
                # anova_kruskal_interaction.to_csv(f'{table_save_path}kruskal-wallis interaction anova {df_name} - {ivs} x {dv}.csv')
                # print('~' * 20)
                # print('\n')

                # GAMES HOWELL POST HOC
                print('POST HOC GAMES HOWELL ANOVA')
                print('\n')
                print('~' * 20)
                print(f'{iv} x {dv}')
                anova_games_posthoc = pg.pairwise_gameshowell(
                    data=df, dv=dv, between=iv, effsize='eta-square'
                ).round(3)
                pg.print_table(anova_games_posthoc)
                anova_games_posthoc.to_csv(f'{table_save_path}post hoc gameshowell {df_name} - {iv} x {dv}.csv')
                print('~' * 20)
                print('\n')
                print('+'*120)
                print('\n')


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


  0%|          | 0/4 [00:00<?, ?it/s]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dependent Variable: Warmth
Independent Variable: Gender
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
LEVENE'S TEST


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth
Gender x Warmth Levene's test:
         W   pval  equal_var
levene 6.17  0.00    False  
~~~~~~~~~~~~~~~~~~~~


ANOVA SIGNIFICANCE


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth
--------------------
One-way ANOVA p-value: 0.0020999575987719604. Rejected: True
~~~~~~~~~~~~~~~~~~~~
INTEACTION ANOVA Warmth


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth




Gender x Warmth ANOVA INTERACTION:
                  sum_sq    df      F   PR(>F)
C(Gender)           0.00    2.00  0.00   1.00 
C(Age)             -0.00    2.00 -0.00   1.00 
C(Gender):C(Age)    2.74    4.00  3.47   0.03 
Residual         1172.09 5939.00   NaN    NaN 
~~~~~~~~~~~~~~~~~~~~


ONE-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth
Gender x Warmth ONE-WAY ANOVA:
   Source    SS     DF   MS    F   p-unc  np2
0  Gender    2.44     2 1.22 6.17  0.00  0.00
1  Within 1173.98  5944 0.20  NaN   NaN   NaN
~~~~~~~~~~~~~~~~~~~~


TWO-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth




Gender x Warmth TWO-WAY ANOVA:
      Source       SS      DF     MS     F   p-unc   np2
0        Gender   -0.00    2.00 -0.00 -0.00  1.00  -0.00
1           Age    0.00    2.00  0.00  0.00  1.00   0.00
2  Gender * Age    2.70    4.00  0.68  3.42  0.02   0.00
3      Residual 1172.73 5938.00  0.20   NaN   NaN    NaN
~~~~~~~~~~~~~~~~~~~~


POST HOC TUKEY'S ANOVA


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth

POST HOC TESTS

A             B               mean(A)    mean(B)    diff     se       T    p-tukey    eta-square
------------  ------------  ---------  ---------  ------  -----  ------  ---------  ------------
Female        Mixed Gender      0.281      0.282  -0.000  0.016  -0.018      1.000         0.000
Female        Male              0.281      0.230   0.051  0.019   2.626      0.024         0.003
Mixed Gender  Male              0.282      0.230   0.051  0.015   3.435      0.002         0.003

~~~~~~~~~~~~~~~~~~~~


+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++



Age x Warmth ANOVA INTERACTION:
                  sum_sq    df      F   PR(>F)
C(Gender)           0.00    2.00  0.00   1.00 
C(Age)             -0.00    2.00 -0.00   1.00 
C(Gender):C(Age)    2.74    4.00  3.47   0.03 
Residual         1172.09 5939.00   NaN    NaN 
~~~~~~~~~~~~~~~~~~~~


ONE-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Age x Warmth
Age x Warmth ONE-WAY ANOVA:
   Source    SS     DF   MS     F   p-unc  np2
0     Age    4.04     2 2.02 10.24  0.00  0.00
1  Within 1172.38  5944 0.20   NaN   NaN   NaN
~~~~~~~~~~~~~~~~~~~~


TWO-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Age x Warmth




Age x Warmth TWO-WAY ANOVA:
      Source       SS      DF     MS     F   p-unc   np2
0        Gender   -0.00    2.00 -0.00 -0.00  1.00  -0.00
1           Age    0.00    2.00  0.00  0.00  1.00   0.00
2  Gender * Age    2.70    4.00  0.68  3.42  0.02   0.00
3      Residual 1172.73 5938.00  0.20   NaN   NaN    NaN
~~~~~~~~~~~~~~~~~~~~


POST HOC TUKEY'S ANOVA


~~~~~~~~~~~~~~~~~~~~
Age x Warmth

POST HOC TESTS

A          B            mean(A)    mean(B)    diff     se       T    p-tukey    eta-square
---------  ---------  ---------  ---------  ------  -----  ------  ---------  ------------
Older      Mixed Age      0.200      0.276  -0.077  0.020  -3.897      0.000         0.008
Older      Younger        0.200      0.284  -0.084  0.019  -4.487      0.000         0.009
Mixed Age  Younger        0.276      0.284  -0.007  0.013  -0.596      0.822         0.000

~~~~~~~~~~~~~~~~~~~~


+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

  0%|          | 0/8 [00:00<?, ?it/s]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dependent Variable: Warmth
Independent Variable: Gender
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
LEVENE'S TEST


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth
Gender x Warmth Levene's test:
          W    pval  equal_var
levene 371.20  0.00    False  
~~~~~~~~~~~~~~~~~~~~


ANOVA SIGNIFICANCE


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth
--------------------
One-way ANOVA p-value: 9.60272638498814e-162. Rejected: True
~~~~~~~~~~~~~~~~~~~~
INTEACTION ANOVA Warmth


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth




Gender x Warmth ANOVA INTERACTION:
                  sum_sq      df       F   PR(>F)
C(Gender)           -0.00      2.00 -0.00   1.00 
C(Age)               0.00      2.00  0.00   1.00 
C(Gender):C(Age)     1.08      4.00  1.27   0.28 
Residual         65573.55 307292.00   NaN    NaN 
~~~~~~~~~~~~~~~~~~~~


ONE-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth
Gender x Warmth ONE-WAY ANOVA:
   Source    SS       DF     MS     F    p-unc  np2
0  Gender   158.48       2 79.24 371.20  0.00  0.00
1  Within 65599.19  307297  0.21    NaN   NaN   NaN
~~~~~~~~~~~~~~~~~~~~


TWO-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth
Gender x Warmth TWO-WAY ANOVA:
      Source       SS        DF      MS     F   p-unc   np2
0        Gender    -0.00      2.00 -0.00 -0.00  1.00  -0.00
1           Age    -0.00      2.00 -0.00 -0.00  1.00  -0.00
2  Gender * Age     0.72      4.00  0.18  0.85  0.49   0.00
3      Residual 65573.56 307291.00  0.21   NaN   NaN    NaN
~~~~~~~~~~~~~~~~~~~~


POST HOC TUKEY'S ANOVA



Gender x Competence ANOVA INTERACTION:
                  sum_sq      df      F   PR(>F)
C(Gender)            0.00      2.00 0.00   1.00 
C(Age)               0.00      2.00 0.00   1.00 
C(Gender):C(Age)     1.36      4.00 1.36   0.26 
Residual         76535.76 307292.00  NaN    NaN 
~~~~~~~~~~~~~~~~~~~~


ONE-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Gender x Competence
Gender x Competence ONE-WAY ANOVA:
   Source    SS       DF     MS     F    p-unc  np2
0  Gender   143.59       2 71.80 288.05  0.00  0.00
1  Within 76592.91  307297  0.25    NaN   NaN   NaN
~~~~~~~~~~~~~~~~~~~~


TWO-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Gender x Competence
Gender x Competence TWO-WAY ANOVA:
      Source       SS        DF      MS     F   p-unc   np2
0        Gender    -0.00      2.00 -0.00 -0.00  1.00  -0.00
1           Age    -0.00      2.00 -0.00 -0.00  1.00  -0.00
2  Gender * Age     0.38      4.00  0.09  0.38  0.83   0.00
3      Residual 76535.31 307291.00  0.25   NaN   NaN    NaN
~~~~~~~~~~~~~~~~~~~~


POST HO



Gender x Warmth_Probability ANOVA INTERACTION:
                  sum_sq      df       F   PR(>F)
C(Gender)           -0.00      2.00 -0.00   1.00 
C(Age)               0.00      2.00  0.00   1.00 
C(Gender):C(Age)     0.77      4.00  1.38   0.25 
Residual         43167.11 307292.00   NaN    NaN 
~~~~~~~~~~~~~~~~~~~~


ONE-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth_Probability
Gender x Warmth_Probability ONE-WAY ANOVA:
   Source    SS       DF     MS     F    p-unc  np2
0  Gender   124.91       2 62.45 444.38  0.00  0.00
1  Within 43187.67  307297  0.14    NaN   NaN   NaN
~~~~~~~~~~~~~~~~~~~~


TWO-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Gender x Warmth_Probability
Gender x Warmth_Probability TWO-WAY ANOVA:
      Source       SS        DF      MS     F   p-unc   np2
0        Gender    -0.00      2.00 -0.00 -0.00  1.00  -0.00
1           Age    -0.00      2.00 -0.00 -0.00  1.00  -0.00
2  Gender * Age     0.35      4.00  0.09  0.63  0.64   0.00
3      Residual 43167.19 307291.00  0.14   Na



Gender x Competence_Probability ANOVA INTERACTION:
                  sum_sq      df       F   PR(>F)
C(Gender)           -0.00      2.00 -0.00   1.00 
C(Age)               0.00      2.00  0.00   1.00 
C(Gender):C(Age)     0.71      4.00  1.46   0.23 
Residual         37457.47 307292.00   NaN    NaN 
~~~~~~~~~~~~~~~~~~~~


ONE-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Gender x Competence_Probability
Gender x Competence_Probability ONE-WAY ANOVA:
   Source    SS       DF     MS     F    p-unc  np2
0  Gender    75.91       2 37.95 311.08  0.00  0.00
1  Within 37492.71  307297  0.12    NaN   NaN   NaN
~~~~~~~~~~~~~~~~~~~~


TWO-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Gender x Competence_Probability
Gender x Competence_Probability TWO-WAY ANOVA:
      Source       SS        DF     MS    F   p-unc  np2
0        Gender     0.00      2.00 0.00 0.00  1.00  0.00
1           Age     0.00      2.00 0.00 0.00  1.00  0.00
2  Gender * Age     0.09      4.00 0.02 0.18  0.95  0.00
3      Residual 37457.25 307291.00 0.



Age x Warmth ANOVA INTERACTION:
                  sum_sq      df       F   PR(>F)
C(Gender)           -0.00      2.00 -0.00   1.00 
C(Age)               0.00      2.00  0.00   1.00 
C(Gender):C(Age)     1.08      4.00  1.27   0.28 
Residual         65573.55 307292.00   NaN    NaN 
~~~~~~~~~~~~~~~~~~~~


ONE-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Age x Warmth
Age x Warmth ONE-WAY ANOVA:
   Source    SS       DF     MS     F    p-unc  np2
0     Age    70.41       2 35.21 164.71  0.00  0.00
1  Within 65687.26  307297  0.21    NaN   NaN   NaN
~~~~~~~~~~~~~~~~~~~~


TWO-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Age x Warmth
Age x Warmth TWO-WAY ANOVA:
      Source       SS        DF      MS     F   p-unc   np2
0        Gender    -0.00      2.00 -0.00 -0.00  1.00  -0.00
1           Age    -0.00      2.00 -0.00 -0.00  1.00  -0.00
2  Gender * Age     0.72      4.00  0.18  0.85  0.49   0.00
3      Residual 65573.56 307291.00  0.21   NaN   NaN    NaN
~~~~~~~~~~~~~~~~~~~~


POST HOC TUKEY'S ANOVA


~~~~~~~~~~~~



Age x Competence ANOVA INTERACTION:
                  sum_sq      df      F   PR(>F)
C(Gender)            0.00      2.00 0.00   1.00 
C(Age)               0.00      2.00 0.00   1.00 
C(Gender):C(Age)     1.36      4.00 1.36   0.26 
Residual         76535.76 307292.00  NaN    NaN 
~~~~~~~~~~~~~~~~~~~~


ONE-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Age x Competence
Age x Competence ONE-WAY ANOVA:
   Source    SS       DF     MS     F    p-unc  np2
0     Age    99.10       2 49.55 198.68  0.00  0.00
1  Within 76637.40  307297  0.25    NaN   NaN   NaN
~~~~~~~~~~~~~~~~~~~~


TWO-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Age x Competence
Age x Competence TWO-WAY ANOVA:
      Source       SS        DF      MS     F   p-unc   np2
0        Gender    -0.00      2.00 -0.00 -0.00  1.00  -0.00
1           Age    -0.00      2.00 -0.00 -0.00  1.00  -0.00
2  Gender * Age     0.38      4.00  0.09  0.38  0.83   0.00
3      Residual 76535.31 307291.00  0.25   NaN   NaN    NaN
~~~~~~~~~~~~~~~~~~~~


POST HOC TUKEY'S ANOVA



Age x Warmth_Probability ANOVA INTERACTION:
                  sum_sq      df       F   PR(>F)
C(Gender)           -0.00      2.00 -0.00   1.00 
C(Age)               0.00      2.00  0.00   1.00 
C(Gender):C(Age)     0.77      4.00  1.38   0.25 
Residual         43167.11 307292.00   NaN    NaN 
~~~~~~~~~~~~~~~~~~~~


ONE-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Age x Warmth_Probability
Age x Warmth_Probability ONE-WAY ANOVA:
   Source    SS       DF     MS     F    p-unc  np2
0     Age    50.93       2 25.46 180.87  0.00  0.00
1  Within 43261.65  307297  0.14    NaN   NaN   NaN
~~~~~~~~~~~~~~~~~~~~


TWO-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Age x Warmth_Probability
Age x Warmth_Probability TWO-WAY ANOVA:
      Source       SS        DF      MS     F   p-unc   np2
0        Gender    -0.00      2.00 -0.00 -0.00  1.00  -0.00
1           Age    -0.00      2.00 -0.00 -0.00  1.00  -0.00
2  Gender * Age     0.35      4.00  0.09  0.63  0.64   0.00
3      Residual 43167.19 307291.00  0.14   NaN   NaN    NaN




Age x Competence_Probability ANOVA INTERACTION:
                  sum_sq      df       F   PR(>F)
C(Gender)           -0.00      2.00 -0.00   1.00 
C(Age)               0.00      2.00  0.00   1.00 
C(Gender):C(Age)     0.71      4.00  1.46   0.23 
Residual         37457.47 307292.00   NaN    NaN 
~~~~~~~~~~~~~~~~~~~~


ONE-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Age x Competence_Probability
Age x Competence_Probability ONE-WAY ANOVA:
   Source    SS       DF     MS     F    p-unc  np2
0     Age    59.27       2 29.63 242.77  0.00  0.00
1  Within 37509.36  307297  0.12    NaN   NaN   NaN
~~~~~~~~~~~~~~~~~~~~


TWO-WAY ANOVA


~~~~~~~~~~~~~~~~~~~~
Age x Competence_Probability
Age x Competence_Probability TWO-WAY ANOVA:
      Source       SS        DF     MS    F   p-unc  np2
0        Gender     0.00      2.00 0.00 0.00  1.00  0.00
1           Age     0.00      2.00 0.00 0.00  1.00  0.00
2  Gender * Age     0.09      4.00 0.02 0.18  0.95  0.00
3      Residual 37457.25 307291.00 0.12  NaN   NaN  

# Regressions

## Logistic Regression

In [11]:
# Edit variable names for formula use: remove % and replace spaces with underscores
ivs_perc_ = list(map(lambda x: x.replace('%', 'percentage').replace(' ', '_'), ivs_perc))
print('-'*20)
print(f'IVs to use:\n{ivs_perc_}')
print('\n')
controls_ = list(map(lambda x: x.replace('%', 'percentage').replace(' ', '_'), controls))
print('-'*20)
print(f'All controls:\n{controls_}')
print('\n')
controls_for_formula = ' + '.join(controls_[:6])
print('-'*20)
print(f'Controls to use:\n{controls_for_formula}')
print('\n')

--------------------
IVs to use:
['Gender_Female_percentage_per_Sector', 'Gender_Male_percentage_per_Sector', 'Age_Older_percentage_per_Sector', 'Age_Younger_percentage_per_Sector']


--------------------
All controls:
['percentage_Sector_per_Workforce', 'Job_Description_num_words', 'English_Requirement_in_Job_Ad_Yes', 'Dutch_Requirement_in_Job_Ad_Yes', 'Platform_LinkedIn', 'Platform_Indeed', 'Platform_Glassdoor', 'English_Requirement_in_Job_Ad', 'Dutch_Requirement_in_Job_Ad', 'Platform', 'Job_Description_num_unique_words', 'Job_Description_num_chars', 'Job_Description_num_chars_no_whitespact_and_punt', 'Industry', 'Sector_n']


--------------------
Controls to use:
percentage_Sector_per_Workforce + Job_Description_num_words + English_Requirement_in_Job_Ad_Yes + Dutch_Requirement_in_Job_Ad_Yes + Platform_LinkedIn + Platform_Indeed




In [12]:
dataframes_ = {
    'df_manual_': df_manual.copy().rename(columns={x: x.replace('%', 'percentage').replace(' ', '_') for x in df_manual.columns}),
    'df_jobs': df_jobs.copy().rename(columns={x: x.replace('%', 'percentage').replace(' ', '_') for x in df_jobs.columns}),
}


### Logistic Regression with Social Category Dummies

In [13]:
%%time
# Logistic Regression for 0:1 Warmth and Competence x 0:1 Gender and Age
for df_name, df in dataframes_.items():

    print('+'*120)
    print(f'{"="*50} RESULTS FOR {df_name} {"="*50}')
    for dv in tqdm.tqdm(dvs):
        print('+'*120)
        print('\n')
        print(f'DEPENDENT VARIABLE: {dv}\n\nINDEPENDENT VARIABLE: {ivs_dummy[0]} + {ivs_dummy[2]} + {ivs_dummy[3]} + {ivs_dummy[5]}')
        print('\n')
        print('+'*120)

        # model = sm.Logit(endog=df[dv], exog=df[ivs_perc], data=df)
        # formula = f'{dv} ~ {ivs_dummy[0]}*{ivs_dummy[3]} + {ivs_dummy[0]}*{ivs_dummy[4]} + {ivs_dummy[0]}*{ivs_dummy[5]} + {ivs_dummy[1]}*{ivs_dummy[3]} + {ivs_dummy[1]}*{ivs_dummy[4]} + {ivs_dummy[1]}*{ivs_dummy[5]} + {ivs_dummy[2]}*{ivs_dummy[3]} + {ivs_dummy[2]}*{ivs_dummy[4]} + {ivs_dummy[2]}*{ivs_dummy[5]} + {controls_[0]} + {controls_[1]} + C({controls_[2]}) + C({controls_[3]})'
        # formula = f'{dv} ~ {ivs_dummy[0]} + {ivs_dummy[1]} + {ivs_dummy[2]} + {ivs_dummy[3]} + {ivs_dummy[4]} + {ivs_dummy[5]} + {controls_[0]} + {controls_[1]} + {controls_[2]} + {controls_[3]} + {controls_[4]} + {controls_[5]}'
        # formula = f'{dv} ~ {ivs_dummy[0]} + {ivs_dummy[2]} + {ivs_dummy[3]} + {ivs_dummy[5]} + {controls_[0]} + {controls_[1]} + {controls_[2]} + {controls_[3]} + {controls_[4]} + {controls_[5]}'
        # formula = f'{dv} ~ {ivs_dummy[0]} + {ivs_dummy[1]} + {ivs_dummy[2]} + {ivs_dummy[3]} + {ivs_dummy[4]} + {ivs_dummy[5]}'

        formula = f'{dv} ~ {ivs_dummy[0]} + {ivs_dummy[2]} + {ivs_dummy[3]} + {ivs_dummy[5]} + {controls_for_formula}'

        print('-'*20)
        print(f'Using formula: {formula}')
        print('-'*20)

        with contextlib.suppress(np.linalg.LinAlgError):
            model = smf.logit(formula=formula, data=df)
            results = model.fit()
            df_summary_results = pd.DataFrame(csv.reader(results.summary().as_csv().split('\n'), delimiter=','))

            # Display Results
            print('~'*20)
            print('\n')
            print(f'SUMMARY RESULTS:\n{results.summary()}\n')
            print('~'*20)
            # print(f'SUMMARY RESULTS2:\n{results.summary2()}')
            # print('-'*20)
            # print(f'y = {results.params.const:.2f} + {results.params.x:.2f} * x')
            # print('-'*20)
            # print(f'COEFFICIENT:\n{results.params}')
            # print('-'*20)
            # print(f'CONFIDENCE INTERVALS:\n{results.conf_int()}')
            # print(f'P-VALUES:\n{results.pvalues}')
            # print('-'*20)
            # print(f'ODDS RATIOS:\n{np.exp(results.params)}')
            # print(f'AIC:\n{results.aic:.2f}')
            # print('-'*20)
            # print(f'BIC:\n{results.bic:.2f}')
            # print('-'*20)
            # print(f'Coehn\'s F2:\n{results.prsquared:.3f}')
            # print('-'*20)

            # save results
            df_summary_results = pd.DataFrame(csv.reader(results.summary().as_csv().split('\n'), delimiter=','))
            df_summary_results.to_csv(f'{table_save_path}logistic regression on categories {df_name} - {dv} x Social Category Percentages.csv', index=False)


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


  0%|          | 0/2 [00:00<?, ?it/s]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


DEPENDENT VARIABLE: Warmth

INDEPENDENT VARIABLE: Gender_Female + Gender_Male + Age_Older + Age_Younger


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--------------------
Using formula: Warmth ~ Gender_Female + Gender_Male + Age_Older + Age_Younger + percentage_Sector_per_Workforce + Job_Description_num_words + English_Requirement_in_Job_Ad_Yes + Dutch_Requirement_in_Job_Ad_Yes + Platform_LinkedIn + Platform_Indeed
--------------------
Optimization terminated successfully.
         Current function value: 0.547905
         Iterations 6


 50%|█████     | 1/2 [00:17<00:17, 17.26s/it]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


DEPENDENT VARIABLE: Competence

INDEPENDENT VARIABLE: Gender_Female + Gender_Male + Age_Older + Age_Younger


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--------------------
Using formula: Competence ~ Gender_Female + Gender_Male + Age_Older + Age_Younger + percentage_Sector_per_Workforce + Job_Description_num_words + English_Requirement_in_Job_Ad_Yes + Dutch_Requirement_in_Job_Ad_Yes + Platform_LinkedIn + Platform_Indeed
--------------------
Optimization terminated successfully.
         Current function value: 0.637758
         Iterations 5


100%|██████████| 2/2 [00:34<00:00, 17.20s/it]


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


  0%|          | 0/2 [00:00<?, ?it/s]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


DEPENDENT VARIABLE: Warmth

INDEPENDENT VARIABLE: Gender_Female + Gender_Male + Age_Older + Age_Younger


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--------------------
Using formula: Warmth ~ Gender_Female + Gender_Male + Age_Older + Age_Younger + percentage_Sector_per_Workforce + Job_Description_num_words + English_Requirement_in_Job_Ad_Yes + Dutch_Requirement_in_Job_Ad_Yes + Platform_LinkedIn + Platform_Indeed
--------------------
Optimization terminated successfully.
         Current function value: 0.561992
         Iterations 6


 50%|█████     | 1/2 [02:34<02:34, 154.01s/it]

~~~~~~~~~~~~~~~~~~~~


SUMMARY RESULTS:
                           Logit Regression Results                           
Dep. Variable:                 Warmth   No. Observations:               307300
Model:                          Logit   Df Residuals:                   307289
Method:                           MLE   Df Model:                           10
Date:                Sun, 30 Apr 2023   Pseudo R-squ.:                 0.09251
Time:                        21:43:24   Log-Likelihood:            -1.7270e+05
converged:                       True   LL-Null:                   -1.9030e+05
Covariance Type:            nonrobust   LLR p-value:                     0.000
                                        coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Intercept                            -1.5031      0.019    -79.270      0.000      -1.540      -1.466
Gender_Female         

100%|██████████| 2/2 [04:32<00:00, 136.39s/it]

~~~~~~~~~~~~~~~~~~~~


SUMMARY RESULTS:
                           Logit Regression Results                           
Dep. Variable:             Competence   No. Observations:               307300
Model:                          Logit   Df Residuals:                   307289
Method:                           MLE   Df Model:                           10
Date:                Sun, 30 Apr 2023   Pseudo R-squ.:                  0.1038
Time:                        21:45:22   Log-Likelihood:            -1.9075e+05
converged:                       True   LL-Null:                   -2.1283e+05
Covariance Type:            nonrobust   LLR p-value:                     0.000
                                        coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Intercept                            -0.8180      0.018    -45.889      0.000      -0.853      -0.783
Gender_Female         




### Logistic Regression with Social Category percentage per Sector

In [14]:
%%time
# Logistic Regression for 0:1 Warmth and Competence x percentage Gender and Age
for df_name, df in dataframes_.items():

    print('+'*120)
    print(f'{"="*50} RESULTS FOR {df_name} {"="*50}')
    for dv in tqdm.tqdm(dvs):
        print('+'*120)
        print('\n')
        print(f'DEPENDENT VARIABLE: {dv}\n\nINDEPENDENT VARIABLE: {ivs_perc_}')
        print('\n')
        print('+'*120)

        # model = sm.Logit(endog=df[dv], exog=df[ivs_perc], data=df)
        # formula = f'{dv} ~ {ivs_perc_[0]} + {ivs_perc_[1]} + {ivs_perc_[2]} + {ivs_perc_[3]} + {ivs_perc_[0]}:{ivs_perc_[2]} + {ivs_perc_[0]}:{ivs_perc_[3]} + {ivs_perc_[1]}:{ivs_perc_[2]} + {ivs_perc_[1]}:{ivs_perc_[3]}'
        # formula = f'{dv} ~ {ivs_perc_[0]}*{ivs_perc_[2]} + {ivs_perc_[0]}*{ivs_perc_[3]} + {ivs_perc_[1]}*{ivs_perc_[2]} + {ivs_perc_[1]}*{ivs_perc_[3]} + {controls_[0]} + {controls_[1]} + {controls_[2]} + {controls_[3]} + {controls_[4]} + {controls_[5]}'

        formula = f'{dv} ~ {ivs_perc_[0]}*{ivs_perc_[2]} + {ivs_perc_[0]}*{ivs_perc_[3]} + {ivs_perc_[1]}*{ivs_perc_[2]} + {ivs_perc_[1]}*{ivs_perc_[3]}'

        print('-'*20)
        print(f'Using formula: {formula}')
        print('-'*20)

        model = smf.logit(formula=formula, data=df)
        results = model.fit()

        # Display Results
        print('~'*20)
        print('\n')
        print(f'SUMMARY RESULTS:\n{results.summary()}\n')
        print('~'*20)
        # print(f'SUMMARY RESULTS2:\n{results.summary2()}')
        # print('-'*20)
        # print(f'y = {results.params.const:.2f} + {results.params.x:.2f} * x')
        # print('-'*20)
        # print(f'COEFFICIENT:\n{results.params}')
        # print('-'*20)
        # print(f'CONFIDENCE INTERVALS:\n{results.conf_int()}')
        # print(f'P-VALUES:\n{results.pvalues}')
        # print('-'*20)
        # print(f'ODDS RATIOS:\n{np.exp(results.params)}')
        # print(f'AIC:\n{results.aic:.2f}')
        # print('-'*20)
        # print(f'BIC:\n{results.bic:.2f}')
        # print('-'*20)
        # print(f'Coehn\'s F2:\n{results.prsquared:.3f}')
        # print('-'*20)

        # save results
        df_summary_results = pd.DataFrame(csv.reader(results.summary().as_csv().split('\n'), delimiter=','))
        df_summary_results.to_csv(f'{table_save_path}logistic regression on percentages {df_name} - {dv} x Social Category Percentages.csv', index=False)


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


  0%|          | 0/2 [00:00<?, ?it/s]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


DEPENDENT VARIABLE: Warmth

INDEPENDENT VARIABLE: ['Gender_Female_percentage_per_Sector', 'Gender_Male_percentage_per_Sector', 'Age_Older_percentage_per_Sector', 'Age_Younger_percentage_per_Sector']


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--------------------
Using formula: Warmth ~ Gender_Female_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Female_percentage_per_Sector*Age_Younger_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Younger_percentage_per_Sector
--------------------
         Current function value: 0.581892
         Iterations: 35




~~~~~~~~~~~~~~~~~~~~


SUMMARY RESULTS:
                           Logit Regression Results                           
Dep. Variable:                 Warmth   No. Observations:                 5947
Model:                          Logit   Df Residuals:                     5938
Method:                           MLE   Df Model:                            8
Date:                Sun, 30 Apr 2023   Pseudo R-squ.:                0.004989
Time:                        21:46:44   Log-Likelihood:                -3460.5
converged:                      False   LL-Null:                       -3477.9
Covariance Type:            nonrobust   LLR p-value:                 3.030e-05
                                                                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------------------------------------------
Intercept                                           

 50%|█████     | 1/2 [01:21<01:21, 81.72s/it]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


DEPENDENT VARIABLE: Competence

INDEPENDENT VARIABLE: ['Gender_Female_percentage_per_Sector', 'Gender_Male_percentage_per_Sector', 'Age_Older_percentage_per_Sector', 'Age_Younger_percentage_per_Sector']


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--------------------
Using formula: Competence ~ Gender_Female_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Female_percentage_per_Sector*Age_Younger_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Younger_percentage_per_Sector
--------------------
         Current function value: 0.687667
         Iterations: 35


100%|██████████| 2/2 [02:37<00:00, 78.70s/it]


~~~~~~~~~~~~~~~~~~~~


SUMMARY RESULTS:
                           Logit Regression Results                           
Dep. Variable:             Competence   No. Observations:                 5947
Model:                          Logit   Df Residuals:                     5938
Method:                           MLE   Df Model:                            8
Date:                Sun, 30 Apr 2023   Pseudo R-squ.:                0.004439
Time:                        21:48:00   Log-Likelihood:                -4089.6
converged:                      False   LL-Null:                       -4107.8
Covariance Type:            nonrobust   LLR p-value:                 1.440e-05
                                                                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------------------------------------------
Intercept                                           

  0%|          | 0/2 [00:00<?, ?it/s]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


DEPENDENT VARIABLE: Warmth

INDEPENDENT VARIABLE: ['Gender_Female_percentage_per_Sector', 'Gender_Male_percentage_per_Sector', 'Age_Older_percentage_per_Sector', 'Age_Younger_percentage_per_Sector']


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--------------------
Using formula: Warmth ~ Gender_Female_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Female_percentage_per_Sector*Age_Younger_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Younger_percentage_per_Sector
--------------------
         Current function value: 0.617386
         Iterations: 35




~~~~~~~~~~~~~~~~~~~~




 50%|█████     | 1/2 [10:03<10:03, 603.39s/it]

SUMMARY RESULTS:
                           Logit Regression Results                           
Dep. Variable:                 Warmth   No. Observations:               307300
Model:                          Logit   Df Residuals:                   307291
Method:                           MLE   Df Model:                            8
Date:                Sun, 30 Apr 2023   Pseudo R-squ.:                0.003059
Time:                        21:58:03   Log-Likelihood:            -1.8972e+05
converged:                      False   LL-Null:                   -1.9030e+05
Covariance Type:            nonrobust   LLR p-value:                5.407e-246
                                                                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------------------------------------------
Intercept                                                             -1698



~~~~~~~~~~~~~~~~~~~~




100%|██████████| 2/2 [18:06<00:00, 543.20s/it]

SUMMARY RESULTS:
                           Logit Regression Results                           
Dep. Variable:             Competence   No. Observations:               307300
Model:                          Logit   Df Residuals:                   307291
Method:                           MLE   Df Model:                            8
Date:                Sun, 30 Apr 2023   Pseudo R-squ.:                0.002328
Time:                        22:06:06   Log-Likelihood:            -2.1233e+05
converged:                      False   LL-Null:                   -2.1283e+05
Covariance Type:            nonrobust   LLR p-value:                1.394e-208
                                                                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------------------------------------------
Intercept                                                               -86




## Linear Regression

In [15]:
# Edit variable names for formula use: remove % and replace spaces with underscores
ivs_perc_ = list(map(lambda x: x.replace('%', 'percentage').replace(' ', '_'), ivs_perc))
print('-'*20)
print(f'IVs to use:\n{ivs_perc_}')
print('\n')
controls_ = list(map(lambda x: x.replace('%', 'percentage').replace(' ', '_'), controls))
print('-'*20)
print(f'All controls:\n{controls_}')
print('\n')
controls_for_formula = ' + '.join(controls_[:6])
print('-'*20)
print(f'Controls to use:\n{controls_for_formula}')
print('\n')


--------------------
IVs to use:
['Gender_Female_percentage_per_Sector', 'Gender_Male_percentage_per_Sector', 'Age_Older_percentage_per_Sector', 'Age_Younger_percentage_per_Sector']


--------------------
All controls:
['percentage_Sector_per_Workforce', 'Job_Description_num_words', 'English_Requirement_in_Job_Ad_Yes', 'Dutch_Requirement_in_Job_Ad_Yes', 'Platform_LinkedIn', 'Platform_Indeed', 'Platform_Glassdoor', 'English_Requirement_in_Job_Ad', 'Dutch_Requirement_in_Job_Ad', 'Platform', 'Job_Description_num_unique_words', 'Job_Description_num_chars', 'Job_Description_num_chars_no_whitespact_and_punt', 'Industry', 'Sector_n']


--------------------
Controls to use:
percentage_Sector_per_Workforce + Job_Description_num_words + English_Requirement_in_Job_Ad_Yes + Dutch_Requirement_in_Job_Ad_Yes + Platform_LinkedIn + Platform_Indeed




In [16]:
dataframes_ = {
    'df_manual_': df_manual.copy().rename(columns={x: x.replace('%', 'percentage').replace(' ', '_') for x in df_manual.columns}),
    'df_jobs': df_jobs.copy().rename(columns={x: x.replace('%', 'percentage').replace(' ', '_') for x in df_jobs.columns}),
}


### OLS Regression

In [17]:
for df_name, df in dataframes_.items():

    if df_name == 'df_manual_':
        dvs_ = dvs
    elif df_name == 'df_jobs_':
        dvs_ = dvs_all

    print('+'*120)
    print(f'{"="*50} RESULTS FOR {df_name} {"="*50}')
    for dv in tqdm.tqdm(dvs_):
        print('+'*120)
        print('\n')
        print(f'DEPENDENT VARIABLE: {dv}\n\nINDEPENDENT VARIABLE: {ivs_perc_}')
        print('\n')
        print('+'*120)

        # constant = sm.add_constant(df[ivs_perc_])
        # model = sm.OLS(df[dv], constant)
        # formula = f'{dv} ~ {ivs_perc_[0]} + {ivs_perc_[1]} + {ivs_perc_[2]} + {ivs_perc_[3]} + {ivs_perc_[0]}:{ivs_perc_[2]} + {ivs_perc_[0]}:{ivs_perc_[3]} + {ivs_perc_[1]}:{ivs_perc_[2]} + {ivs_perc_[1]}:{ivs_perc_[3]} + {controls_[0]} + {controls_[1]} + C({controls_[2]}) + C({controls_[3]})'
        formula = f'{dv} ~ {ivs_perc_[0]}*{ivs_perc_[2]} + {ivs_perc_[0]}*{ivs_perc_[3]} + {ivs_perc_[1]}*{ivs_perc_[2]} + {ivs_perc_[1]}*{ivs_perc_[3]} + {controls_for_formula}'

        print('-'*20)
        print(f'Using formula: {formula}')
        print('-'*20)

        model = smf.ols(formula=formula, data=df)
        results = model.fit()

        # Display Results
        print('~'*20)
        print('\n')
        print(f'SUMMARY RESULTS:\n{results.summary()}\n')
        print('~'*20)
        # print(f'SUMMARY RESULTS2:\n{results.summary2()}')
        # print('-'*20)
        # print(f'y = {results.params.const:.2f} + {results.params.x:.2f} * x')
        # print('-'*20)
        # print(f'COEFFICIENT:\n{results.params}')
        # print('-'*20)
        # print(f'CONFIDENCE INTERVALS:\n{results.conf_int()}')
        # print(f'P-VALUES:\n{results.pvalues}')
        # print('-'*20)
        # print(f'ODDS RATIOS:\n{np.exp(results.params)}')
        # print(f'AIC:\n{results.aic:.2f}')
        # print('-'*20)
        # print(f'BIC:\n{results.bic:.2f}')
        # print('-'*20)
        # print(f'Coehn\'s F2:\n{results.rsquared_adj:.3f}')
        print('-'*20)
        table = sm.stats.anova_lm(results, typ=2)
        print(f'ANOVA:\n{table}')
        print('-'*20)

        # # Boxplot
        # boxplot = df.boxplot([dv], by = [ivs_perc_[2], ivs_perc_[0]],
        #                     figsize = (16, 9),
        #                     showmeans = True,
        #                     notch = True)

        # boxplot.set_xlabel('Categories')
        # boxplot.set_ylabel(dv)
        # # Creating a path to save the plot.
        # plt.ion()
        # plt.show()
        # plt.pause(.001)
        # # for image_save_format in ['eps', 'png', 'svg']:
        # #     save_path = f'{plot_save_path}Probability Boxplot - {df_name} - {dv} x Social Category Percentages.{image_save_format}'
        # #     boxplot.figure.savefig(
        # #         save_path, format=image_save_format,
        # #     )
        # plt.close()


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


  0%|          | 0/2 [00:00<?, ?it/s]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


DEPENDENT VARIABLE: Warmth

INDEPENDENT VARIABLE: ['Gender_Female_percentage_per_Sector', 'Gender_Male_percentage_per_Sector', 'Age_Older_percentage_per_Sector', 'Age_Younger_percentage_per_Sector']


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--------------------
Using formula: Warmth ~ Gender_Female_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Female_percentage_per_Sector*Age_Younger_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Younger_percentage_per_Sector + percentage_Sector_per_Workforce + Job_Description_num_words + English_Requirement_in_Job_Ad_Yes + Dutch_Requirement_in_Job_Ad_Yes + Platform_LinkedIn + Platform_Indeed
--------------------
~~~~~~~~~~~~~~~~~~~~


SUMMARY RESULTS:
 

  return np.sqrt(eigvals[0]/eigvals[-1])
  F /= J
 50%|█████     | 1/2 [00:00<00:00,  1.55it/s]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


DEPENDENT VARIABLE: Competence

INDEPENDENT VARIABLE: ['Gender_Female_percentage_per_Sector', 'Gender_Male_percentage_per_Sector', 'Age_Older_percentage_per_Sector', 'Age_Younger_percentage_per_Sector']


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--------------------
Using formula: Competence ~ Gender_Female_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Female_percentage_per_Sector*Age_Younger_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Younger_percentage_per_Sector + percentage_Sector_per_Workforce + Job_Description_num_words + English_Requirement_in_Job_Ad_Yes + Dutch_Requirement_in_Job_Ad_Yes + Platform_LinkedIn + Platform_Indeed
--------------------


  return np.sqrt(eigvals[0]/eigvals[-1])
  F /= J
100%|██████████| 2/2 [00:01<00:00,  1.21it/s]


~~~~~~~~~~~~~~~~~~~~


SUMMARY RESULTS:
                            OLS Regression Results                            
Dep. Variable:             Competence   R-squared:                       0.099
Model:                            OLS   Adj. R-squared:                  0.097
Method:                 Least Squares   F-statistic:                     50.17
Date:                Sun, 30 Apr 2023   Prob (F-statistic):          5.67e-124
Time:                        22:06:11   Log-Likelihood:                -3991.8
No. Observations:                5947   AIC:                             8012.
Df Residuals:                    5933   BIC:                             8105.
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
                                                                            coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------

  0%|          | 0/2 [00:00<?, ?it/s]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


DEPENDENT VARIABLE: Warmth

INDEPENDENT VARIABLE: ['Gender_Female_percentage_per_Sector', 'Gender_Male_percentage_per_Sector', 'Age_Older_percentage_per_Sector', 'Age_Younger_percentage_per_Sector']


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--------------------
Using formula: Warmth ~ Gender_Female_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Female_percentage_per_Sector*Age_Younger_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Younger_percentage_per_Sector + percentage_Sector_per_Workforce + Job_Description_num_words + English_Requirement_in_Job_Ad_Yes + Dutch_Requirement_in_Job_Ad_Yes + Platform_LinkedIn + Platform_Indeed
--------------------
~~~~~~~~~~~~~~~~~~~~


SUMMARY RESULTS:
 

 50%|█████     | 1/2 [00:01<00:01,  1.48s/it]

ANOVA:
                                                    sum_sq      df        F     PR(>F)
Gender_Female_percentage_per_Sector                    8.65      1.00    45.56   0.00 
Age_Older_percentage_per_Sector                       15.57      1.00    81.98   0.00 
Gender_Female_percentage_per_Sector:Age_Older_p...    13.94      1.00    73.42   0.00 
Age_Younger_percentage_per_Sector                     17.70      1.00    93.22   0.00 
Gender_Female_percentage_per_Sector:Age_Younger...    14.45      1.00    76.09   0.00 
Gender_Male_percentage_per_Sector                      8.67      1.00    45.67   0.00 
Gender_Male_percentage_per_Sector:Age_Older_per...    13.95      1.00    73.44   0.00 
Gender_Male_percentage_per_Sector:Age_Younger_p...    14.45      1.00    76.08   0.00 
percentage_Sector_per_Workforce                       10.19      1.00    53.65   0.00 
Job_Description_num_words                           7060.43      1.00 37177.83   0.00 
English_Requirement_in_Job_Ad_Yes   

100%|██████████| 2/2 [00:03<00:00,  1.75s/it]

SUMMARY RESULTS:
                            OLS Regression Results                            
Dep. Variable:             Competence   R-squared:                       0.115
Model:                            OLS   Adj. R-squared:                  0.115
Method:                 Least Squares   F-statistic:                     2843.
Date:                Sun, 30 Apr 2023   Prob (F-statistic):               0.00
Time:                        22:06:14   Log-Likelihood:            -2.0414e+05
No. Observations:              307300   AIC:                         4.083e+05
Df Residuals:                  307285   BIC:                         4.085e+05
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                                                                            coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------




### Multi-level OLS Regression

In [18]:
# Edit variable names for formula use: remove % and replace spaces with underscores
ivs_perc_ = list(map(lambda x: x.replace('%', 'percentage').replace(' ', '_'), ivs_perc))
print('-'*20)
print(f'IVs to use:\n{ivs_perc_}')
print('\n')
controls_ = list(map(lambda x: x.replace('%', 'percentage').replace(' ', '_'), controls))
print('-'*20)
print(f'All controls:\n{controls_}')
print('\n')
controls_for_formula = ' + '.join(controls_[:6])
print('-'*20)
print(f'Controls to use:\n{controls_for_formula}')
print('\n')


--------------------
IVs to use:
['Gender_Female_percentage_per_Sector', 'Gender_Male_percentage_per_Sector', 'Age_Older_percentage_per_Sector', 'Age_Younger_percentage_per_Sector']


--------------------
All controls:
['percentage_Sector_per_Workforce', 'Job_Description_num_words', 'English_Requirement_in_Job_Ad_Yes', 'Dutch_Requirement_in_Job_Ad_Yes', 'Platform_LinkedIn', 'Platform_Indeed', 'Platform_Glassdoor', 'English_Requirement_in_Job_Ad', 'Dutch_Requirement_in_Job_Ad', 'Platform', 'Job_Description_num_unique_words', 'Job_Description_num_chars', 'Job_Description_num_chars_no_whitespact_and_punt', 'Industry', 'Sector_n']


--------------------
Controls to use:
percentage_Sector_per_Workforce + Job_Description_num_words + English_Requirement_in_Job_Ad_Yes + Dutch_Requirement_in_Job_Ad_Yes + Platform_LinkedIn + Platform_Indeed




In [19]:
dataframes_ = {
    'df_manual_': df_manual.copy().rename(columns={x: x.replace('%', 'percentage').replace(' ', '_') for x in df_manual.columns}),
    'df_jobs_': df_jobs.copy().rename(columns={x: x.replace('%', 'percentage').replace(' ', '_') for x in df_jobs.columns}),
}


In [21]:
for df_name, df in dataframes_.items():

    if df_name == 'df_manual_':
        dvs_ = dvs
    elif df_name == 'df_jobs_':
        dvs_ = dvs_all

    df['Intercept'] = 1

    print('+'*120)
    print(f'{"="*50} RESULTS FOR {df_name} {"="*50}')
    for dv in tqdm.tqdm(dvs_):
        print('+'*120)
        print('\n')
        print(f'DEPENDENT VARIABLE: {dv}\n\nINDEPENDENT VARIABLE: {ivs_perc_}')
        print('\n')
        print('+'*120)

        save_name = f'Multilevel Logistic Regression {df_name} - {list(iter(ivs_dict))[0]} + {list(iter(ivs_dict))[1]} x {dv}'
        # endog = df[dv]
        # exog0 = df[['Intercept', f'{list(iter(ivs_dict))[0]}']]
        # exog1 = df[['Intercept', f'{list(iter(ivs_dict))[1]}']]
        # iv_1 = list(iter(ivs_dict))[0]
        # iv_1_treatment = ivs_dict[iv_1][0]
        # iv_2 = list(iter(ivs_dict))[1]
        # iv_2_treatment = ivs_dict[iv_2][0]

        # formula = f'{dv} ~ {ivs_perc_[0]} + {ivs_perc_[1]} + {ivs_perc_[2]} + {ivs_perc_[3]} + {controls_for_formula}'
        # formula = f'{dv} ~ {ivs_perc_[0]} + {ivs_perc_[1]} + {ivs_perc_[2]} + {ivs_perc_[3]} + {controls_for_formula}'
        # formula = f'{dv} ~ {ivs_perc_[0]} + {ivs_perc_[1]} + {ivs_perc_[2]} + {ivs_perc_[3]} + {ivs_perc_[0]}:{ivs_perc_[2]} + {ivs_perc_[0]}:{ivs_perc_[3]} + {ivs_perc_[1]}:{ivs_perc_[2]} + {ivs_perc_[1]}:{ivs_perc_[3]} + {controls_for_formula}'
        formula = f'{dv} ~ {ivs_perc_[0]}*{ivs_perc_[2]} + {ivs_perc_[0]}*{ivs_perc_[3]} + {ivs_perc_[1]}*{ivs_perc_[2]} + {ivs_perc_[1]}*{ivs_perc_[3]} + {controls_[0]} + {controls_for_formula}'

        print('-'*20)
        print(f'Using formula: {formula}')
        print('-'*20)

        vc_formula = {f'{controls_[1]}': f'0 + {controls_[1]}'}
        re_formula = f'1 + {controls_[1]}'

        model = smf.mixedlm(formula=formula, data=df, groups='Job_ID',) #vc_formula=vc_formula, re_formula=re_formula)
        results = model.fit(method='lbfgs')
        gradient = model.score(results.params_object)

        # Display Results
        print('~'*20)
        print(f'Gradient:\n{gradient}')
        print('\n')
        print(f'SUMMARY RESULTS:\n{results.summary()}\n')
        print('~'*20)
        # print(f'SUMMARY RESULTS2:\n{results.summary2()}')
        # print('-'*20)
        # print(f'y = {results.params.const:.2f} + {results.params.x:.2f} * x')
        # print('-'*20)
        # print(f'COEFFICIENT:\n{results.params}')
        # print('-'*20)
        # print(f'CONFIDENCE INTERVALS:\n{results.conf_int()}')
        # print(f'P-VALUES:\n{results.pvalues}')
        # print('-'*20)
        # print(f'ODDS RATIOS:\n{np.exp(results.params)}')
        # print(f'AIC:\n{results.aic:.2f}')
        # print('-'*20)
        # print(f'BIC:\n{results.bic:.2f}')
        # print('-'*20)
        # print(f'Coehn\'s F2:\n{results.rsquared_adj:.3f}')
        # print('-'*20)
        # table = sm.stats.anova_lm(results, typ=2)
        # print(f'ANOVA:\n{table}')
        # print('-'*20)

        # df_results = pd.DataFrame(index=['Descriptives', 'Results'], columns=[f'{save_name}'])
        # df_results[f'{save_name}']['Descriptives'] = results.summary().tables[0]
        # df_results[f'{save_name}']['Results'] = results.summary().tables[1]

        # df_results.to_csv(f'{table_save_path}{save_name.split(" results")[0].lower() + " results" + save_name.split(" results")[1]}.csv', header=True, index=True, index_label=['Index col: Descriptives and Results'])

        # # Normality Tests (https://www.pythonfordatascience.org/mixed-effects-regression-python/)
        # ## Residual and Kernal Density Estimate (KDE) Plot for Homoskedasticity
        # fig = plt.figure(figsize = (16, 9))

        # ax = sns.distplot(results.resid, hist = True, kde_kws = {"shade" : True, "lw": 1}, fit = scipy.stats.norm, kde=True, palette='colorblind')

        # ax.set_title(f"Kernal Density Estimate (KDE) Plot of Model Residuals (Blue) and Normal Distribution (Black)\n{save_name}")
        # ax.set_xlabel("Residuals")
        # plt.ion()
        # fig.show('notebook')
        # plt.pause(.001)

        # # Q-Q Plot
        # fig = plt.figure(figsize = (16, 9))
        # ax = fig.add_subplot(111)

        # qq = sm.qqplot(results.resid, dist = scipy.stats.norm, line = 's', ax = ax, color='blue', markerfacecolor='blue')
        # ax.set_title(f"Q-Q Plot\n{save_name}",fontsize=15)
        # ax.xaxis.get_label().set_fontsize(12)
        # ax.yaxis.get_label().set_fontsize(12)
        # ax.get_lines()[0].set_color('black')
        # ax.get_lines()[0].set_linewidth('2')
        # ax.get_lines()[1].set_color('black')
        # ax.get_lines()[1].set_linewidth('2')
        # plt.ion()
        # fig.show('notebook')
        # plt.pause(.001)

        # # Test of Normality
        # norm = scipy.stats.normaltest(results.resid)

        # print('='*80)
        # print(f'{dv} Test of Normality:')
        # print('-'*80)
        # for key, val in dict(zip(normality_tests_labels, norm)).items():
        #     print(key,': ', val) # Significant
        # print('\n')

        # # Skewness-Kurtosis Test of Normality
        # norm_sk = scipy.stats.kurtosistest(results.resid)

        # print('='*80)
        # print(f'{dv} Skewness-Kurtosis Test of Normality:')
        # print('-'*80)
        # for key, val in dict(zip(normality_tests_labels, norm_sk)).items():
        #     print(key,': ', val) # Significant
        # print('\n')

        # # Shapir-Wilk Test of Normality
        # norm_res = scipy.stats.shapiro(results.resid)

        # print('='*80)
        # print(f'{dv} Shapir-Wilk Test of Normality:')
        # print('-'*80)
        # for key, val in dict(zip(normality_tests_labels, norm_res)).items():
        #     print(key,': ', val) # Significant
        # print('\n')

        # # Anderson-Darling Test of Normality
        # norm_and = scipy.stats.anderson(results.resid)

        # print('='*80)
        # print(f'{dv} Anderson-Darling Test of Normality:')
        # print('-'*80)
        # for key, val in dict(zip(normality_tests_labels, norm_and)).items():
        #     print(key,': ', val) # Significant
        # print('\n')

        # # Residuals versus Fitted values (RVF) Plot for Homoskedasticity
        # fig = plt.figure(figsize = (16, 9))

        # ax = sns.scatterplot(y = results.resid, x = results.fittedvalues, palette='colorblind')

        # ax.set_title(f"Residuals versus Fitted values (RVF) Plot\n{save_name}")
        # ax.set_xlabel("Fitted Values")
        # ax.set_ylabel("Residuals")
        # plt.ion()
        # fig.show('notebook')
        # plt.pause(.001)

        # # White’s Lagrange Multiplier Test for Heteroscedasticity
        # het_white_res = het_white(results.resid, results.model.exog)

        # het_white_labels = ["LM Statistic", "LM-Test p-value", "F-Statistic", "F-Test p-value"]

        # print('='*80)
        # print('White’s Lagrange Multiplier Test for Heteroscedasticity')
        # print('-'*80)
        # for key, val in dict(zip(het_white_labels, het_white_res)).items():
        #     print(key, val)
        # print('\n')
        # print('\n')
        # print('+'*120)
        # print('\n')


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


  0%|          | 0/2 [00:00<?, ?it/s]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


DEPENDENT VARIABLE: Warmth

INDEPENDENT VARIABLE: ['Gender_Female_percentage_per_Sector', 'Gender_Male_percentage_per_Sector', 'Age_Older_percentage_per_Sector', 'Age_Younger_percentage_per_Sector']


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--------------------
Using formula: Warmth ~ Gender_Female_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Female_percentage_per_Sector*Age_Younger_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Older_percentage_per_Sector + Gender_Male_percentage_per_Sector*Age_Younger_percentage_per_Sector + percentage_Sector_per_Workforce + percentage_Sector_per_Workforce + Job_Description_num_words + English_Requirement_in_Job_Ad_Yes + Dutch_Requirement_in_Job_Ad_Yes + Platform_LinkedIn + Platform_Indeed
--------------------


  0%|          | 0/2 [00:20<?, ?it/s]
