In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from scipy.stats import poisson, norm
from statsmodels.iolib.summary2 import summary_col
import scipy.stats
import scipy as sc
import matplotlib.pyplot as plt
import math
%matplotlib inline
import statsmodels.api as sm
from statsmodels.discrete.discrete_model import Probit
from patsy import dmatrices

In [2]:
data = pd.read_csv("../clean_data/sample_regression.csv", index_col=[0])

In [3]:
data = sm.add_constant(data)
data_male = data.loc[data['sex'] == 1.0] 
data_female = data.loc[data['sex'] == 0.0] 

In [4]:
trait = ['const','openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism', 'locus_of_control_std']
trait_control = ['const','openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism', 'locus_of_control_std',
                  'hh_income_satisfaction', 'marital_status', 'german_nationality', 'health_status', 'years_schooling', 'children', 'age', 
                 'age_sqr', 'umemployed_last_year']

In [5]:
probit_simple = sm.Probit(data.umemployed, data[trait]).fit()
print(probit_simple.summary())

Optimization terminated successfully.
         Current function value: 0.397626
         Iterations 6
                          Probit Regression Results                           
Dep. Variable:             umemployed   No. Observations:                34118
Model:                         Probit   Df Residuals:                    34111
Method:                           MLE   Df Model:                            6
Date:                Mon, 15 Apr 2019   Pseudo R-squ.:                 0.04096
Time:                        15:07:43   Log-Likelihood:                -13566.
converged:                       True   LL-Null:                       -14146.
                                        LLR p-value:                4.077e-247
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                   -1.0648      0.009   -123.364      0.000      -1.082      -1.048

In [6]:
Order=[ 'const', 'openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism', 'locus_of_control_std', 'sex', 'hh_income_satisfaction', 'marital_status', 'german_nationality', 'health_status', 'years_schooling', 'children', 'age', 
                 'age_sqr', 'umemployed_last_year']

In [7]:
info_dict={'No. observations' : lambda x: f"{int(x.nobs):d}"}

results_table = summary_col(results=[probit_simple],
                            float_format='%0.2f',
                            stars = True,
                            model_names=['unemploymed'],
                            info_dict=info_dict,
                            regressor_order=Order)

results_table.add_title('Probit model - simple')

print(results_table)

     Probit model - simple
                     unemploymed
--------------------------------
const                -1.06***   
                     (0.01)     
openness             0.03***    
                     (0.01)     
conscientiousness    -0.09***   
                     (0.01)     
extraversion         -0.04***   
                     (0.01)     
agreeableness        0.10***    
                     (0.01)     
neuroticism          0.16***    
                     (0.01)     
locus_of_control_std -0.16***   
                     (0.01)     
No. observations     34118      
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


In [8]:
probit_control = sm.Probit(data.umemployed, data[trait_control]).fit()
print(probit_control.summary())

Optimization terminated successfully.
         Current function value: 0.218512
         Iterations 7
                          Probit Regression Results                           
Dep. Variable:             umemployed   No. Observations:                34118
Model:                         Probit   Df Residuals:                    34102
Method:                           MLE   Df Model:                           15
Date:                Mon, 15 Apr 2019   Pseudo R-squ.:                  0.4730
Time:                        15:07:43   Log-Likelihood:                -7455.2
converged:                       True   LL-Null:                       -14146.
                                        LLR p-value:                     0.000
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      2.3654      0.302      7.839      0.000       1.774      

In [9]:
info_dict={'No. observations' : lambda x: f"{int(x.nobs):d}"}

results_table = summary_col(results=[probit_control],
                            float_format='%0.2f',
                            stars = True,
                            model_names=['unemploymed'],
                            info_dict=info_dict,
                            regressor_order=Order)

results_table.add_title('Probit model - control')

print(results_table)

      Probit model - control
                       unemploymed
----------------------------------
const                  2.37***    
                       (0.30)     
openness               0.04***    
                       (0.01)     
conscientiousness      -0.05***   
                       (0.02)     
extraversion           -0.03*     
                       (0.02)     
agreeableness          0.06***    
                       (0.01)     
neuroticism            0.06***    
                       (0.01)     
locus_of_control_std   -0.04***   
                       (0.01)     
hh_income_satisfaction -0.08***   
                       (0.01)     
marital_status         0.07***    
                       (0.03)     
german_nationality     -0.08      
                       (0.05)     
health_status          0.25***    
                       (0.03)     
years_schooling        -0.03***   
                       (0.00)     
children               0.11***    
                       (0.

In [10]:
probit_male_control = sm.Probit(data_male.umemployed, data_male[trait_control]).fit()
print(probit_male_control.summary())

Optimization terminated successfully.
         Current function value: 0.140078
         Iterations 8
                          Probit Regression Results                           
Dep. Variable:             umemployed   No. Observations:                15836
Model:                         Probit   Df Residuals:                    15820
Method:                           MLE   Df Model:                           15
Date:                Mon, 15 Apr 2019   Pseudo R-squ.:                  0.5218
Time:                        15:07:43   Log-Likelihood:                -2218.3
converged:                       True   LL-Null:                       -4638.4
                                        LLR p-value:                     0.000
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      3.2958      0.530      6.215      0.000       2.256      

In [11]:
info_dict={'No. observations' : lambda x: f"{int(x.nobs):d}"}

results_table = summary_col(results=[probit_male_control],
                            float_format='%0.2f',
                            stars = True,
                            model_names=['unemploymed'],
                            info_dict=info_dict,
                            regressor_order=Order)

results_table.add_title('Probit model (male sample with control)')

print(results_table)

Probit model (male sample with control)
                       unemploymed
----------------------------------
const                  3.30***    
                       (0.53)     
openness               0.01       
                       (0.02)     
conscientiousness      -0.07**    
                       (0.03)     
extraversion           -0.01      
                       (0.03)     
agreeableness          0.02       
                       (0.02)     
neuroticism            0.05**     
                       (0.02)     
locus_of_control_std   -0.08***   
                       (0.02)     
hh_income_satisfaction -0.14***   
                       (0.01)     
marital_status         -0.17***   
                       (0.05)     
german_nationality     -0.01      
                       (0.10)     
health_status          0.40***    
                       (0.05)     
years_schooling        -0.05***   
                       (0.01)     
children               0.01       
               

In [12]:
probit_female_control = sm.Probit(data_female.umemployed, data_female[trait_control]).fit()
print(probit_female_control.summary())

Optimization terminated successfully.
         Current function value: 0.275214
         Iterations 7
                          Probit Regression Results                           
Dep. Variable:             umemployed   No. Observations:                18282
Model:                         Probit   Df Residuals:                    18266
Method:                           MLE   Df Model:                           15
Date:                Mon, 15 Apr 2019   Pseudo R-squ.:                  0.4452
Time:                        15:07:44   Log-Likelihood:                -5031.5
converged:                       True   LL-Null:                       -9069.6
                                        LLR p-value:                     0.000
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      2.2746      0.376      6.043      0.000       1.537      

In [13]:
info_dict={'No. observations' : lambda x: f"{int(x.nobs):d}"}

results_table = summary_col(results=[probit_female_control],
                            float_format='%0.2f',
                            stars = True,
                            model_names=['unemploymed'],
                            info_dict=info_dict,
                            regressor_order=Order)

results_table.add_title('Probit model (female sample with control)')

print(results_table)

Probit model (female sample with control)
                       unemploymed
----------------------------------
const                  2.27***    
                       (0.38)     
openness               0.04***    
                       (0.01)     
conscientiousness      -0.05**    
                       (0.02)     
extraversion           -0.03*     
                       (0.02)     
agreeableness          0.04**     
                       (0.02)     
neuroticism            0.02       
                       (0.02)     
locus_of_control_std   -0.02      
                       (0.02)     
hh_income_satisfaction -0.06***   
                       (0.01)     
marital_status         0.17***    
                       (0.03)     
german_nationality     -0.13**    
                       (0.06)     
health_status          0.19***    
                       (0.04)     
years_schooling        -0.03***   
                       (0.01)     
children               0.15***    
             