In [10]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from scipy.stats import poisson, norm
from statsmodels.iolib.summary2 import summary_col
import scipy.stats
import scipy as sc
import matplotlib.pyplot as plt
import math
%matplotlib inline
import statsmodels.api as sm
from statsmodels.discrete.discrete_model import Probit
from patsy import dmatrices

In [11]:
data = pd.read_csv("../clean_data/employ_sample.csv")

In [12]:
trait = ['const','openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism']
trait_control = ['const','openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism', 'age', 'age_sqr', 'sex', 
                 'marital_status', 'german_nationality', 'hh_income_satisfaction']

In [13]:
data = sm.add_constant(data)

In [14]:
Order=[ 'const', 'openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism', 'age', 'age_sqr', 'sex',
                 'marital_status', 'german_nationality', 'hh_income_satisfaction']

In [15]:
ols_simple = sm.OLS(data.total_unemployment_years, data[trait]).fit()
print(ols_simple.summary())

                               OLS Regression Results                               
Dep. Variable:     total_unemployment_years   R-squared:                       0.017
Model:                                  OLS   Adj. R-squared:                  0.017
Method:                       Least Squares   F-statistic:                     18.39
Date:                      Fri, 22 Mar 2019   Prob (F-statistic):           3.84e-18
Time:                              16:31:34   Log-Likelihood:                -10971.
No. Observations:                      5171   AIC:                         2.195e+04
Df Residuals:                          5165   BIC:                         2.199e+04
Df Model:                                 5                                         
Covariance Type:                  nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------

In [16]:
ols_control = sm.OLS(data.total_unemployment_years, data[trait_control]).fit()
print(ols_control.summary())

                               OLS Regression Results                               
Dep. Variable:     total_unemployment_years   R-squared:                       0.301
Model:                                  OLS   Adj. R-squared:                  0.299
Method:                       Least Squares   F-statistic:                     201.5
Date:                      Fri, 22 Mar 2019   Prob (F-statistic):               0.00
Time:                              16:31:34   Log-Likelihood:                -10092.
No. Observations:                      5171   AIC:                         2.021e+04
Df Residuals:                          5159   BIC:                         2.029e+04
Df Model:                                11                                         
Covariance Type:                  nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------

In [17]:
info_dict={'R-squared' : lambda x: f"{x.rsquared:.2f}",
           'No. observations' : lambda x: f"{int(x.nobs):d}"}

results_table = summary_col(results=[ols_simple],
                            float_format='%0.2f',
                            stars = True,
                            model_names=['duration of unemployment'],
                            info_dict=info_dict,
                            regressor_order=Order)

results_table.add_title('OLS model - simple')

print(results_table)

            OLS model - simple
                  duration of unemployment
------------------------------------------
const             1.09***                 
                  (0.03)                  
openness          0.12***                 
                  (0.04)                  
conscientiousness 0.05                    
                  (0.05)                  
extraversion      -0.20***                
                  (0.05)                  
agreeableness     0.18***                 
                  (0.04)                  
neuroticism       0.20***                 
                  (0.03)                  
R-squared         0.02                    
No. observations  5171                    
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


In [18]:
info_dict={'R-squared' : lambda x: f"{x.rsquared:.2f}",
           'No. observations' : lambda x: f"{int(x.nobs):d}"}

results_table = summary_col(results=[ols_control],
                            float_format='%0.2f',
                            stars = True,
                            model_names=['duration of unemployment'],
                            info_dict=info_dict,
                            regressor_order=Order)

results_table.add_title('OLS model - control')

print(results_table)

              OLS model - control
                       duration of unemployment
-----------------------------------------------
const                  10.04***                
                       (0.35)                  
openness               0.02                    
                       (0.03)                  
conscientiousness      -0.10**                 
                       (0.04)                  
extraversion           0.00                    
                       (0.04)                  
agreeableness          0.10***                 
                       (0.03)                  
neuroticism            0.14***                 
                       (0.03)                  
age                    -0.46***                
                       (0.02)                  
age_sqr                0.01***                 
                       (0.00)                  
sex                    -0.40***                
                       (0.05)                  
marita