In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from scipy.stats import poisson, norm
from statsmodels.iolib.summary2 import summary_col
import scipy.stats
import scipy as sc
import matplotlib.pyplot as plt
import math
%matplotlib inline
import statsmodels.api as sm
from statsmodels.discrete.discrete_model import Probit
from patsy import dmatrices

In [2]:
data = pd.read_csv("../clean_data/employ_sample.csv")

In [3]:
trait = ['const','openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism']
trait_control = ['const','openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism', 'age', 'age_sqr', 'sex', 
                 'marital_status', 'german_nationality', 'hh_income_satisfaction']

In [4]:
data = sm.add_constant(data)

In [5]:
probit_simple = sm.Probit(data.employment_status, data[trait]).fit()
print(probit_simple.summary())

Optimization terminated successfully.
         Current function value: 0.607358
         Iterations 5
                          Probit Regression Results                           
Dep. Variable:      employment_status   No. Observations:                 5171
Model:                         Probit   Df Residuals:                     5165
Method:                           MLE   Df Model:                            5
Date:                Wed, 20 Mar 2019   Pseudo R-squ.:                 0.01549
Time:                        22:33:11   Log-Likelihood:                -3140.6
converged:                       True   LL-Null:                       -3190.1
                                        LLR p-value:                 9.261e-20
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                -0.4867      0.019    -26.115      0.000      -0.523      -0.450
openness

In [6]:
Order=[ 'const', 'openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism', 'age', 'age_sqr', 'sex', 
                 'marital_status', 'german_nationality', 'hh_income_satisfaction']

In [7]:
info_dict={'No. observations' : lambda x: f"{int(x.nobs):d}"}

results_table = summary_col(results=[probit_simple],
                            float_format='%0.2f',
                            stars = True,
                            model_names=['employment_status'],
                            info_dict=info_dict,
                            regressor_order=Order)

results_table.add_title('Probit model - simple')

print(results_table)

       Probit model - simple
                  employment_status
-----------------------------------
const             -0.49***         
                  (0.02)           
openness          0.09***          
                  (0.02)           
conscientiousness -0.01            
                  (0.03)           
extraversion      -0.07**          
                  (0.03)           
agreeableness     0.10***          
                  (0.02)           
neuroticism       0.17***          
                  (0.02)           
No. observations  5171             
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


In [8]:
probit_control = sm.Probit(data.employment_status, data[trait_control]).fit()
print(probit_control.summary())

Optimization terminated successfully.
         Current function value: 0.497266
         Iterations 6
                          Probit Regression Results                           
Dep. Variable:      employment_status   No. Observations:                 5171
Model:                         Probit   Df Residuals:                     5159
Method:                           MLE   Df Model:                           11
Date:                Wed, 20 Mar 2019   Pseudo R-squ.:                  0.1939
Time:                        22:33:11   Log-Likelihood:                -2571.4
converged:                       True   LL-Null:                       -3190.1
                                        LLR p-value:                1.395e-258
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      6.7023      0.306     21.891      0.000       6.102      

In [9]:
info_dict={'No. observations' : lambda x: f"{int(x.nobs):d}"}

results_table = summary_col(results=[probit_control],
                            float_format='%0.2f',
                            stars = True,
                            model_names=['employment_status'],
                            info_dict=info_dict,
                            regressor_order=Order)

results_table.add_title('Probit model - control')

print(results_table)

         Probit model - control
                       employment_status
----------------------------------------
const                  6.70***          
                       (0.31)           
openness               0.06**           
                       (0.03)           
conscientiousness      -0.06            
                       (0.04)           
extraversion           0.00             
                       (0.04)           
agreeableness          0.04             
                       (0.03)           
neuroticism            0.14***          
                       (0.03)           
age                    -0.34***         
                       (0.01)           
age_sqr                0.00***          
                       (0.00)           
sex                    -0.40***         
                       (0.04)           
marital_status         -0.06            
                       (0.05)           
german_nationality     -0.28***         
                       (0