In [67]:
import pandas as pd
import numpy as np
import seaborn as sns
import io
import statsmodels.formula.api as smf
import statsmodels.api as sm 
import scipy
import scipy.stats as stats
import re
import matplotlib.pyplot as plt
from statsmodels.iolib.summary2 import summary_params # вывод результатов тестирования
from statsmodels.iolib.summary2 import summary_col # вывод результатов тестирования
from statsmodels.stats.outliers_influence import variance_inflation_factor # VIF

# 1 sleep equation #1

In [68]:
date='''
totwrk                       -0.169***         
                              (0.018)          
                                               
age                           2.689*           
                              (1.469)          
                                               
south                        101.568**         
                             (41.837)          
                                               
male                         87.669**          
                             (35.104)          
                                               
smsa                         -54.748*          
                             (33.123)          
                                               
yngkid                        -13.962          
                             (50.341)          
                                               
marr                          31.211           
                             (42.233)          
                                               
Constant                    3450.913***        
                             (80.726)          
'''

In [69]:
date_lst=re.findall(r'([-+]?\d+.\d+)', date)
date_lst

['-0.169',
 '0.018',
 '2.689',
 '1.469',
 '101.568',
 '41.837',
 '87.669',
 '35.104',
 '-54.748',
 '33.123',
 '-13.962',
 '50.341',
 '31.211',
 '42.233',
 '3450.913',
 '80.726']

In [70]:
beta = np.reshape(date_lst, (8,2))
beta

array([['-0.169', '0.018'],
       ['2.689', '1.469'],
       ['101.568', '41.837'],
       ['87.669', '35.104'],
       ['-54.748', '33.123'],
       ['-13.962', '50.341'],
       ['31.211', '42.233'],
       ['3450.913', '80.726']], dtype='<U8')

In [71]:
beta[:,0]

array(['-0.169', '2.689', '101.568', '87.669', '-54.748', '-13.962',
       '31.211', '3450.913'], dtype='<U8')

In [72]:
beta = beta[:,0].astype(float)
beta

array([-1.690000e-01,  2.689000e+00,  1.015680e+02,  8.766900e+01,
       -5.474800e+01, -1.396200e+01,  3.121100e+01,  3.450913e+03])

In [73]:
empiric='''
 totwrk age south male smsa yngkid marr
1  2150  37    0    1    1     0     1  
2  1950  28    1    1    0     1     0  
3  2240  26    0    0    1     0     0  
'''

In [74]:
X = re.findall(r'([-+]?\d+)', empiric)
X

['1',
 '2150',
 '37',
 '0',
 '1',
 '1',
 '0',
 '1',
 '2',
 '1950',
 '28',
 '1',
 '1',
 '0',
 '1',
 '0',
 '3',
 '2240',
 '26',
 '0',
 '0',
 '1',
 '0',
 '0']

In [75]:
X = np.reshape(X, (3,8))
X

array([['1', '2150', '37', '0', '1', '1', '0', '1'],
       ['2', '1950', '28', '1', '1', '0', '1', '0'],
       ['3', '2240', '26', '0', '0', '1', '0', '0']], dtype='<U4')

In [76]:
X = np.delete(X, 0, 1) # (array; номер столбца; 0 - удаляем строку, 1 - удаляем столбец)
X

array([['2150', '37', '0', '1', '1', '0', '1'],
       ['1950', '28', '1', '1', '0', '1', '0'],
       ['2240', '26', '0', '0', '1', '0', '0']], dtype='<U4')

In [77]:
X = np.c_[X, np.ones(3)]
X

array([['2150', '37', '0', '1', '1', '0', '1', '1.0'],
       ['1950', '28', '1', '1', '0', '1', '0', '1.0'],
       ['2240', '26', '0', '0', '1', '0', '0', '1.0']], dtype='<U32')

In [78]:
beta = beta.astype(float)
X = X.astype(float)
beta

array([-1.690000e-01,  2.689000e+00,  1.015680e+02,  8.766900e+01,
       -5.474800e+01, -1.396200e+01,  3.121100e+01,  3.450913e+03])

In [79]:
X@beta

array([3251.188, 3371.93 , 3087.519])

# 2 sleep equation #2

In [211]:
date='''
totwrk                       -0.167***         
                              (0.018)          
                                               
age                           -6.255           
                             (11.191)          
                                               
I(age2)                        0.109           
                              (0.134)          
                                               
south                       114.547***         
                             (40.637)          
                                               
male                         90.457***         
                             (34.257)          
                                               
Constant                    3613.995***        
                             (218.433)                  
'''

In [212]:
date_lst=re.findall(r'([-+]?\d+.\d+)', date)
date_lst

['-0.167',
 '0.018',
 '-6.255',
 '11.191',
 '0.109',
 '0.134',
 '114.547',
 '40.637',
 '90.457',
 '34.257',
 '3613.995',
 '218.433']

In [213]:
beta = np.reshape(date_lst, (6,2)) ## (6 строк, 2 столбца)
beta

array([['-0.167', '0.018'],
       ['-6.255', '11.191'],
       ['0.109', '0.134'],
       ['114.547', '40.637'],
       ['90.457', '34.257'],
       ['3613.995', '218.433']], dtype='<U8')

In [214]:
beta = beta[:,0].astype(float)
beta

array([-1.670000e-01, -6.255000e+00,  1.090000e-01,  1.145470e+02,
        9.045700e+01,  3.613995e+03])

In [227]:
empiric=''' 
totwrk age age*age south male
1  2160  32  1024  1    0  
2  1720  24  576    0    1  
3  2390  44  1936  0    1 
'''

In [228]:
X = re.findall(r'([-+]?\d+)', empiric)
X

['1',
 '2160',
 '32',
 '1024',
 '1',
 '0',
 '2',
 '1720',
 '24',
 '576',
 '0',
 '1',
 '3',
 '2390',
 '44',
 '1936',
 '0',
 '1']

In [229]:
X = np.reshape(X, (3,6))
X

array([['1', '2160', '32', '1024', '1', '0'],
       ['2', '1720', '24', '576', '0', '1'],
       ['3', '2390', '44', '1936', '0', '1']], dtype='<U4')

In [230]:
X = np.delete(X, 0, 1) # (array; 0 - удаляем строку, 1 - удаляем столбец)
X

array([['2160', '32', '1024', '1', '0'],
       ['1720', '24', '576', '0', '1'],
       ['2390', '44', '1936', '0', '1']], dtype='<U4')

In [231]:
X = np.c_[X, np.ones(3)]
X

array([['2160', '32', '1024', '1', '0', '1.0'],
       ['1720', '24', '576', '0', '1', '1.0'],
       ['2390', '44', '1936', '0', '1', '1.0']], dtype='<U32')

In [232]:
beta = beta.astype(float)
X = X.astype(float)
beta

array([-1.670000e-01, -6.255000e+00,  1.090000e-01,  1.145470e+02,
        9.045700e+01,  3.613995e+03])

In [233]:
X@beta

array([3279.278, 3329.876, 3241.126])

# 3 wage equation #1

In [135]:
date='''
age                          0.021***          
                              (0.004)          
                                               
IQ                           0.008***          
                              (0.001)          
                                               
south                        -0.099***         
                              (0.027)          
                                               
married                      0.201***          
                              (0.040)          
                                               
urban                        0.175***          
                              (0.028)          
                                               
Constant                     4.974***          
                              (0.165)    
'''

In [136]:
date_lst=re.findall(r'([-+]?\d+.\d+)', date)
date_lst

['0.021',
 '0.004',
 '0.008',
 '0.001',
 '-0.099',
 '0.027',
 '0.201',
 '0.040',
 '0.175',
 '0.028',
 '4.974',
 '0.165']

In [137]:
beta = np.reshape(date_lst, (6,2)) ## (6 строк, 2 столбца)
beta

array([['0.021', '0.004'],
       ['0.008', '0.001'],
       ['-0.099', '0.027'],
       ['0.201', '0.040'],
       ['0.175', '0.028'],
       ['4.974', '0.165']], dtype='<U6')

In [138]:
beta = beta[:,0].astype(float)
beta

array([ 0.021,  0.008, -0.099,  0.201,  0.175,  4.974])

In [139]:
empiric='''
age IQ  south married urban
1 36  105   1      1      1  
2 29  123   0      1      0  
3 25  112   1      0      1  
'''

In [140]:
X = re.findall(r'([-+]?\d+)', empiric)
X

['1',
 '36',
 '105',
 '1',
 '1',
 '1',
 '2',
 '29',
 '123',
 '0',
 '1',
 '0',
 '3',
 '25',
 '112',
 '1',
 '0',
 '1']

In [141]:
X = np.reshape(X, (3,6))
X

array([['1', '36', '105', '1', '1', '1'],
       ['2', '29', '123', '0', '1', '0'],
       ['3', '25', '112', '1', '0', '1']], dtype='<U3')

In [142]:
X = np.delete(X, 0, 1) # (array; 0 - удаляем строку, 1 - удаляем столбец)
X

array([['36', '105', '1', '1', '1'],
       ['29', '123', '0', '1', '0'],
       ['25', '112', '1', '0', '1']], dtype='<U3')

In [143]:
X = np.c_[X, np.ones(3)]
X

array([['36', '105', '1', '1', '1', '1.0'],
       ['29', '123', '0', '1', '0', '1.0'],
       ['25', '112', '1', '0', '1', '1.0']], dtype='<U32')

In [144]:
beta = beta.astype(float)
X = X.astype(float)
beta

array([ 0.021,  0.008, -0.099,  0.201,  0.175,  4.974])

In [145]:
X@beta

array([6.847, 6.768, 6.471])

# 4 wage equation #2

In [242]:
date='''
exper                        0.037***          
                              (0.006)          
                                               
I(exper2)                    -0.001***         
                             (0.0001)          
                                               
female                       -0.363***         
                              (0.040)          
                                               
married                      0.144***          
                              (0.045)          
                                               
smsa                         0.273***          
                              (0.044)          
                                               
Constant                     1.250***          
                              (0.061)          
'''

In [243]:
date_lst=re.findall(r'([-+]?\d+.\d+)', date)
date_lst

['0.037',
 '0.006',
 '-0.001',
 '0.0001',
 '-0.363',
 '0.040',
 '0.144',
 '0.045',
 '0.273',
 '0.044',
 '1.250',
 '0.061']

In [244]:
beta = np.reshape(date_lst, (6,2)) ## (6 строк, 2 столбца)
beta

array([['0.037', '0.006'],
       ['-0.001', '0.0001'],
       ['-0.363', '0.040'],
       ['0.144', '0.045'],
       ['0.273', '0.044'],
       ['1.250', '0.061']], dtype='<U6')

In [245]:
beta = beta[:,0].astype(float)
beta

array([ 3.70e-02, -1.00e-03, -3.63e-01,  1.44e-01,  2.73e-01,  1.25e+00])

In [246]:
empiric='''
exper exper*exper female married smsa
1   5  25   1       1     1  
2  26  676  0       0     1  
3  38  1444   1       1     0  
'''

In [247]:
X = re.findall(r'([-+]?\d+)', empiric)
X

['1',
 '5',
 '25',
 '1',
 '1',
 '1',
 '2',
 '26',
 '676',
 '0',
 '0',
 '1',
 '3',
 '38',
 '1444',
 '1',
 '1',
 '0']

In [248]:
X = np.reshape(X, (3,6))
X

array([['1', '5', '25', '1', '1', '1'],
       ['2', '26', '676', '0', '0', '1'],
       ['3', '38', '1444', '1', '1', '0']], dtype='<U4')

In [249]:
X = np.delete(X, 0, 1) # (array; номер столбца; 0 - удаляем сроку, 1 - удаляем столбец)
X

array([['5', '25', '1', '1', '1'],
       ['26', '676', '0', '0', '1'],
       ['38', '1444', '1', '1', '0']], dtype='<U4')

In [250]:
X = np.c_[X, np.ones(3)]
X

array([['5', '25', '1', '1', '1', '1.0'],
       ['26', '676', '0', '0', '1', '1.0'],
       ['38', '1444', '1', '1', '0', '1.0']], dtype='<U32')

In [251]:
beta = beta.astype(float)
X = X.astype(float)
beta

array([ 3.70e-02, -1.00e-03, -3.63e-01,  1.44e-01,  2.73e-01,  1.25e+00])

In [252]:
X@beta

array([1.464, 1.809, 0.993])

In [254]:
np.exp(X@beta).round(2)

array([4.32, 6.1 , 2.7 ])

## 5 output equation #1

In [257]:
date='''
log(capital)                 0.208***          
                              (0.017)          
                                               
log(labour)                  0.715***          
                              (0.023)          
                                               
Constant                     -1.711***         
                              (0.097)      
'''

In [258]:
date_lst=re.findall(r'([-+]?\d+.\d+)', date)
date_lst

['0.208', '0.017', '0.715', '0.023', '-1.711', '0.097']

In [259]:
beta = np.reshape(date_lst, (3,2)) ## (3 строки, 2 столбца)
beta

array([['0.208', '0.017'],
       ['0.715', '0.023'],
       ['-1.711', '0.097']], dtype='<U6')

In [260]:
beta = beta[:,0].astype(float)
beta

array([ 0.208,  0.715, -1.711])

In [261]:
empiric='''
capital labour
1  2.970    85  
2  10.450    60  
3  3.850   105    
'''

In [262]:
df = pd.read_csv(io.StringIO(empiric), sep='\s+')
df

Unnamed: 0,capital,labour
1,2.97,85
2,10.45,60
3,3.85,105


In [263]:
df['capital']=np.log(df['capital'])
df

Unnamed: 0,capital,labour
1,1.088562,85
2,2.346602,60
3,1.348073,105


In [264]:
df['labour']=np.log(df['labour'])
df

Unnamed: 0,capital,labour
1,1.088562,4.442651
2,2.346602,4.094345
3,1.348073,4.65396


In [268]:
empiric_x2='''
capital labour
1  1.088562  4.442651
2  2.346602  4.094345
3  1.348073  4.653960
'''

In [269]:
X=re.findall(r'([-+]?\d+.\d+|[-+]?\d+)', empiric_x2)
X

['1',
 '1.088562',
 '4.442651',
 '2',
 '2.346602',
 '4.094345',
 '3',
 '1.348073',
 '4.653960']

In [270]:
X = np.reshape(X, (3,3))
X

array([['1', '1.088562', '4.442651'],
       ['2', '2.346602', '4.094345'],
       ['3', '1.348073', '4.653960']], dtype='<U8')

In [271]:
X = np.delete(X, 0, 1) # (array; номер столбца; 0 - удаляем сроку, 1 - удаляем столбец)
X

array([['1.088562', '4.442651'],
       ['2.346602', '4.094345'],
       ['1.348073', '4.653960']], dtype='<U8')

In [275]:
X = np.c_[X, np.ones(3)]
X

array([[1.088562, 4.442651, 1.      ],
       [2.346602, 4.094345, 1.      ],
       [1.348073, 4.65396 , 1.      ]])

In [276]:
beta = beta.astype(float)
X = X.astype(float)
beta

array([ 0.208,  0.715, -1.711])

In [277]:
np.exp(X@beta).round(2)

array([5.43, 5.5 , 6.67])

# 6 output equation #2

In [307]:
date='''
log(capital)                 0.183***          
                              (0.017)          
                                               
log(labour)                  0.515***          
                              (0.083)          
                                               
I(log(capital)2)             0.023***          
                              (0.005)          
                                               
I(log(labour)2)               0.020**          
                              (0.010)          
                                               
Constant                     -1.304***         
                              (0.189)      
'''

In [308]:
date_lst=re.findall(r'([-+]?\d+.\d+)', date)
date_lst

['0.183',
 '0.017',
 '0.515',
 '0.083',
 '0.023',
 '0.005',
 '0.020',
 '0.010',
 '-1.304',
 '0.189']

In [309]:
beta = np.reshape(date_lst, (5,2)) ## (3 строк, 2 столбца)
beta

array([['0.183', '0.017'],
       ['0.515', '0.083'],
       ['0.023', '0.005'],
       ['0.020', '0.010'],
       ['-1.304', '0.189']], dtype='<U6')

In [310]:
beta = beta[:,0].astype(float)
beta

array([ 0.183,  0.515,  0.023,  0.02 , -1.304])

In [311]:
empiric='''
 capital labour
1  22.140   407  
2  7.320   197  
3  0.670    31   
'''

In [312]:
df = pd.read_csv(io.StringIO(empiric), sep='\s+')
df

Unnamed: 0,capital,labour
1,22.14,407
2,7.32,197
3,0.67,31


In [313]:
df['capital']=np.log(df['capital'])
df

Unnamed: 0,capital,labour
1,3.097386,407
2,1.99061,197
3,-0.400478,31


In [314]:
df['labour']=np.log(df['labour'])
df

Unnamed: 0,capital,labour
1,3.097386,6.008813
2,1.99061,5.283204
3,-0.400478,3.433987


In [315]:
df['log(capital)2']=np.abs(df['capital']**2)
df

Unnamed: 0,capital,labour,log(capital)2
1,3.097386,6.008813,9.5938
2,1.99061,5.283204,3.962529
3,-0.400478,3.433987,0.160382


In [316]:
df['log(labour)2']=np.abs(df['labour']**2)
df

Unnamed: 0,capital,labour,log(capital)2,log(labour)2
1,3.097386,6.008813,9.5938,36.105836
2,1.99061,5.283204,3.962529,27.912242
3,-0.400478,3.433987,0.160382,11.792268


In [320]:
empiric_x2='''
capital  labour  log(capital)*log(capital)  log(labour)*log(labour)
1  3.097386  6.008813  9.593800  36.105836
2  1.990610  5.283204  3.962529  27.912242
3  -0.400478  3.433987  0.160382  11.792268
'''

In [321]:
X=re.findall(r'([-+]?\d+.\d+|[-+]?\d+)', empiric_x2)
X

['1',
 '3.097386',
 '6.008813',
 '9.593800',
 '36.105836',
 '2',
 '1.990610',
 '5.283204',
 '3.962529',
 '27.912242',
 '3',
 '-0.400478',
 '3.433987',
 '0.160382',
 '11.792268']

In [322]:
X = np.reshape(X, (3,5))
X

array([['1', '3.097386', '6.008813', '9.593800', '36.105836'],
       ['2', '1.990610', '5.283204', '3.962529', '27.912242'],
       ['3', '-0.400478', '3.433987', '0.160382', '11.792268']],
      dtype='<U9')

In [323]:
X = np.delete(X, 0, 1) # (array; номер столбца; 0 - удаляем сроку, 1 - удаляем столбец)
X

array([['3.097386', '6.008813', '9.593800', '36.105836'],
       ['1.990610', '5.283204', '3.962529', '27.912242'],
       ['-0.400478', '3.433987', '0.160382', '11.792268']], dtype='<U9')

In [324]:
X = np.c_[X, np.ones(3)]
X

array([['3.097386', '6.008813', '9.593800', '36.105836', '1.0'],
       ['1.990610', '5.283204', '3.962529', '27.912242', '1.0'],
       ['-0.400478', '3.433987', '0.160382', '11.792268', '1.0']],
      dtype='<U32')

In [325]:
beta = beta.astype(float)
X = X.astype(float)
beta

array([ 0.183,  0.515,  0.023,  0.02 , -1.304])

In [326]:
np.exp(X@beta).round(2)

array([27.12, 11.36,  1.88])