In [67]:
import pandas as pd
import numpy as np
import seaborn as sns
import io
import statsmodels.formula.api as smf
import statsmodels.api as sm 
import scipy
import scipy.stats as stats
import re
import matplotlib.pyplot as plt
from statsmodels.iolib.summary2 import summary_params # вывод результатов тестирования
from statsmodels.iolib.summary2 import summary_col # вывод результатов тестирования
from statsmodels.stats.outliers_influence import variance_inflation_factor # VIF

# 1 sleep equation #1

In [68]:
date='''
totwrk                       -0.169***         
                              (0.018)          
                                               
age                           2.689*           
                              (1.469)          
                                               
south                        101.568**         
                             (41.837)          
                                               
male                         87.669**          
                             (35.104)          
                                               
smsa                         -54.748*          
                             (33.123)          
                                               
yngkid                        -13.962          
                             (50.341)          
                                               
marr                          31.211           
                             (42.233)          
                                               
Constant                    3450.913***        
                             (80.726)          
'''

In [69]:
date_lst=re.findall(r'([-+]?\d+.\d+)', date)
date_lst

['-0.169',
 '0.018',
 '2.689',
 '1.469',
 '101.568',
 '41.837',
 '87.669',
 '35.104',
 '-54.748',
 '33.123',
 '-13.962',
 '50.341',
 '31.211',
 '42.233',
 '3450.913',
 '80.726']

In [70]:
beta = np.reshape(date_lst, (8,2))
beta

array([['-0.169', '0.018'],
       ['2.689', '1.469'],
       ['101.568', '41.837'],
       ['87.669', '35.104'],
       ['-54.748', '33.123'],
       ['-13.962', '50.341'],
       ['31.211', '42.233'],
       ['3450.913', '80.726']], dtype='<U8')

In [71]:
beta[:,0]

array(['-0.169', '2.689', '101.568', '87.669', '-54.748', '-13.962',
       '31.211', '3450.913'], dtype='<U8')

In [72]:
beta = beta[:,0].astype(float)
beta

array([-1.690000e-01,  2.689000e+00,  1.015680e+02,  8.766900e+01,
       -5.474800e+01, -1.396200e+01,  3.121100e+01,  3.450913e+03])

In [73]:
empiric='''
 totwrk age south male smsa yngkid marr
1  2150  37    0    1    1     0     1  
2  1950  28    1    1    0     1     0  
3  2240  26    0    0    1     0     0  
'''

In [74]:
X = re.findall(r'([-+]?\d+)', empiric)
X

['1',
 '2150',
 '37',
 '0',
 '1',
 '1',
 '0',
 '1',
 '2',
 '1950',
 '28',
 '1',
 '1',
 '0',
 '1',
 '0',
 '3',
 '2240',
 '26',
 '0',
 '0',
 '1',
 '0',
 '0']

In [75]:
X = np.reshape(X, (3,8))
X

array([['1', '2150', '37', '0', '1', '1', '0', '1'],
       ['2', '1950', '28', '1', '1', '0', '1', '0'],
       ['3', '2240', '26', '0', '0', '1', '0', '0']], dtype='<U4')

In [76]:
X = np.delete(X, 0, 1) # (array; номер столбца; 0 - удаляем строку, 1 - удаляем столбец)
X

array([['2150', '37', '0', '1', '1', '0', '1'],
       ['1950', '28', '1', '1', '0', '1', '0'],
       ['2240', '26', '0', '0', '1', '0', '0']], dtype='<U4')

In [77]:
X = np.c_[X, np.ones(3)]
X

array([['2150', '37', '0', '1', '1', '0', '1', '1.0'],
       ['1950', '28', '1', '1', '0', '1', '0', '1.0'],
       ['2240', '26', '0', '0', '1', '0', '0', '1.0']], dtype='<U32')

In [78]:
beta = beta.astype(float)
X = X.astype(float)
beta

array([-1.690000e-01,  2.689000e+00,  1.015680e+02,  8.766900e+01,
       -5.474800e+01, -1.396200e+01,  3.121100e+01,  3.450913e+03])

In [79]:
X@beta

array([3251.188, 3371.93 , 3087.519])

# 2 sleep equation #2

In [80]:
date='''
totwrk                       -0.167***         
                              (0.018)          
                                               
age                           -6.255           
                             (11.191)          
                                               
I(age2)                        0.109           
                              (0.134)          
                                               
south                       114.547***         
                             (40.637)          
                                               
male                         90.457***         
                             (34.257)          
                                               
Constant                    3613.995***        
                             (218.433)                  
'''

In [81]:
date_lst=re.findall(r'([-+]?\d+.\d+)', date)
date_lst

['-0.167',
 '0.018',
 '-6.255',
 '11.191',
 '0.109',
 '0.134',
 '114.547',
 '40.637',
 '90.457',
 '34.257',
 '3613.995',
 '218.433']

In [82]:
beta = np.reshape(date_lst, (6,2)) ## (6 строк, 2 столбца)
beta

array([['-0.167', '0.018'],
       ['-6.255', '11.191'],
       ['0.109', '0.134'],
       ['114.547', '40.637'],
       ['90.457', '34.257'],
       ['3613.995', '218.433']], dtype='<U8')

In [83]:
beta = beta[:,0].astype(float)
beta

array([-1.670000e-01, -6.255000e+00,  1.090000e-01,  1.145470e+02,
        9.045700e+01,  3.613995e+03])

In [128]:
empiric=''' 
totwrk age south male
1  2160  32    1    0  
2  1720  24    0    1  
3  2390  44    0    1 
'''

In [129]:
X = re.findall(r'([-+]?\d+)', empiric)
X

['1',
 '2160',
 '32',
 '1',
 '0',
 '2',
 '1720',
 '24',
 '0',
 '1',
 '3',
 '2390',
 '44',
 '0',
 '1']

In [130]:
X = np.reshape(X, (3,5))
X

array([['1', '2160', '32', '1', '0'],
       ['2', '1720', '24', '0', '1'],
       ['3', '2390', '44', '0', '1']], dtype='<U4')

In [131]:
X = np.delete(X, 0, 1) # (array; 0 - удаляем строку, 1 - удаляем столбец)
X

array([['2160', '32', '1', '0'],
       ['1720', '24', '0', '1'],
       ['2390', '44', '0', '1']], dtype='<U4')

In [132]:
X = np.c_[X, np.ones(3)]
X

array([['2160', '32', '1', '0', '1.0'],
       ['1720', '24', '0', '1', '1.0'],
       ['2390', '44', '0', '1', '1.0']], dtype='<U32')

In [133]:
beta = beta.astype(float)
X = X.astype(float)
beta

array([ 0.208,  0.715, -1.711])

In [134]:
X@beta

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 3 is different from 5)

# 3 wage equation #1

In [135]:
date='''
age                          0.021***          
                              (0.004)          
                                               
IQ                           0.008***          
                              (0.001)          
                                               
south                        -0.099***         
                              (0.027)          
                                               
married                      0.201***          
                              (0.040)          
                                               
urban                        0.175***          
                              (0.028)          
                                               
Constant                     4.974***          
                              (0.165)    
'''

In [136]:
date_lst=re.findall(r'([-+]?\d+.\d+)', date)
date_lst

['0.021',
 '0.004',
 '0.008',
 '0.001',
 '-0.099',
 '0.027',
 '0.201',
 '0.040',
 '0.175',
 '0.028',
 '4.974',
 '0.165']

In [137]:
beta = np.reshape(date_lst, (6,2)) ## (6 строк, 2 столбца)
beta

array([['0.021', '0.004'],
       ['0.008', '0.001'],
       ['-0.099', '0.027'],
       ['0.201', '0.040'],
       ['0.175', '0.028'],
       ['4.974', '0.165']], dtype='<U6')

In [138]:
beta = beta[:,0].astype(float)
beta

array([ 0.021,  0.008, -0.099,  0.201,  0.175,  4.974])

In [139]:
empiric='''
age IQ  south married urban
1 36  105   1      1      1  
2 29  123   0      1      0  
3 25  112   1      0      1  
'''

In [140]:
X = re.findall(r'([-+]?\d+)', empiric)
X

['1',
 '36',
 '105',
 '1',
 '1',
 '1',
 '2',
 '29',
 '123',
 '0',
 '1',
 '0',
 '3',
 '25',
 '112',
 '1',
 '0',
 '1']

In [141]:
X = np.reshape(X, (3,6))
X

array([['1', '36', '105', '1', '1', '1'],
       ['2', '29', '123', '0', '1', '0'],
       ['3', '25', '112', '1', '0', '1']], dtype='<U3')

In [142]:
X = np.delete(X, 0, 1) # (array; 0 - удаляем строку, 1 - удаляем столбец)
X

array([['36', '105', '1', '1', '1'],
       ['29', '123', '0', '1', '0'],
       ['25', '112', '1', '0', '1']], dtype='<U3')

In [143]:
X = np.c_[X, np.ones(3)]
X

array([['36', '105', '1', '1', '1', '1.0'],
       ['29', '123', '0', '1', '0', '1.0'],
       ['25', '112', '1', '0', '1', '1.0']], dtype='<U32')

In [144]:
beta = beta.astype(float)
X = X.astype(float)
beta

array([ 0.021,  0.008, -0.099,  0.201,  0.175,  4.974])

In [145]:
X@beta

array([6.847, 6.768, 6.471])

# 4 wage equation #2

In [146]:
date='''
exper                        0.037***          
                              (0.006)          
                                               
I(exper2)                    -0.001***         
                             (0.0001)          
                                               
female                       -0.363***         
                              (0.040)          
                                               
married                      0.144***          
                              (0.045)          
                                               
smsa                         0.273***          
                              (0.044)          
                                               
Constant                     1.250***          
                              (0.061)          
'''

In [147]:
date_lst=re.findall(r'([-+]?\d+.\d+)', date)
date_lst

['0.037',
 '0.006',
 '-0.001',
 '0.0001',
 '-0.363',
 '0.040',
 '0.144',
 '0.045',
 '0.273',
 '0.044',
 '1.250',
 '0.061']

In [148]:
beta = np.reshape(date_lst, (6,2)) ## (6 строк, 2 столбца)
beta

array([['0.037', '0.006'],
       ['-0.001', '0.0001'],
       ['-0.363', '0.040'],
       ['0.144', '0.045'],
       ['0.273', '0.044'],
       ['1.250', '0.061']], dtype='<U6')

In [149]:
beta = beta[:,0].astype(float)
beta

array([ 3.70e-02, -1.00e-03, -3.63e-01,  1.44e-01,  2.73e-01,  1.25e+00])

In [150]:
empiric='''
exper female married smsa
1   5     1       1     1  
2  26     0       0     1  
3  38     1       1     0  
'''

In [151]:
X = re.findall(r'([-+]?\d+)', empiric)
X

['1', '5', '1', '1', '1', '2', '26', '0', '0', '1', '3', '38', '1', '1', '0']

In [152]:
X = np.reshape(X, (3,5))
X

array([['1', '5', '1', '1', '1'],
       ['2', '26', '0', '0', '1'],
       ['3', '38', '1', '1', '0']], dtype='<U2')

In [153]:
X = np.delete(X, 0, 1) # (array; номер столбца; 0 - удаляем сроку, 1 - удаляем столбец)
X

array([['5', '1', '1', '1'],
       ['26', '0', '0', '1'],
       ['38', '1', '1', '0']], dtype='<U2')

In [154]:
X = np.c_[X, np.ones(3)]
X

array([['5', '1', '1', '1', '1.0'],
       ['26', '0', '0', '1', '1.0'],
       ['38', '1', '1', '0', '1.0']], dtype='<U32')

In [155]:
beta = beta.astype(float)
X = X.astype(float)
beta

array([ 3.70e-02, -1.00e-03, -3.63e-01,  1.44e-01,  2.73e-01,  1.25e+00])

In [156]:
X@beta

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 6 is different from 5)

## 5 output equation #1

In [157]:
date='''
log(capital)                 0.208***          
                              (0.017)          
                                               
log(labour)                  0.715***          
                              (0.023)          
                                               
Constant                     -1.711***         
                              (0.097)      
'''

In [158]:
date_lst=re.findall(r'([-+]?\d+.\d+)', date)
date_lst

['0.208', '0.017', '0.715', '0.023', '-1.711', '0.097']

In [159]:
beta = np.reshape(date_lst, (3,2)) ## (3 строки, 2 столбца)
beta

array([['0.208', '0.017'],
       ['0.715', '0.023'],
       ['-1.711', '0.097']], dtype='<U6')

In [160]:
beta = beta[:,0].astype(float)
beta

array([ 0.208,  0.715, -1.711])

In [161]:
empiric='''
capital labour
1  2.970    85  
2  10.450    60  
3  3.850   105    
'''

In [162]:
X=re.findall(r'([-+]?\d+.\d+|[-+]?\d+)', empiric)
X

['1', '2.970', '85', '2', '10.450', '60', '3', '3.850', '105']

In [163]:
X = np.reshape(X, (3,3))
X

array([['1', '2.970', '85'],
       ['2', '10.450', '60'],
       ['3', '3.850', '105']], dtype='<U6')

In [164]:
X = np.delete(X, 0, 1) # (array; номер столбца; 0 - удаляем сроку, 1 - удаляем столбец)
X

array([['2.970', '85'],
       ['10.450', '60'],
       ['3.850', '105']], dtype='<U6')

In [188]:
beta = beta.astype(float)
X = X.astype(float)
beta

array([ 0.183,  0.515,  0.023,  0.02 , -1.304])

In [189]:
X@beta

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 5 is different from 4)

# 6 output equation #2

In [190]:
date='''
log(capital)                 0.183***          
                              (0.017)          
                                               
log(labour)                  0.515***          
                              (0.083)          
                                               
I(log(capital)2)             0.023***          
                              (0.005)          
                                               
I(log(labour)2)               0.020**          
                              (0.010)          
                                               
Constant                     -1.304***         
                              (0.189)      
'''

In [191]:
date_lst=re.findall(r'([-+]?\d+.\d+)', date)
date_lst

['0.183',
 '0.017',
 '0.515',
 '0.083',
 '0.023',
 '0.005',
 '0.020',
 '0.010',
 '-1.304',
 '0.189']

In [192]:
beta = np.reshape(date_lst, (5,2)) ## (3 строк, 2 столбца)
beta

array([['0.183', '0.017'],
       ['0.515', '0.083'],
       ['0.023', '0.005'],
       ['0.020', '0.010'],
       ['-1.304', '0.189']], dtype='<U6')

In [193]:
beta = beta[:,0].astype(float)
beta

array([ 0.183,  0.515,  0.023,  0.02 , -1.304])

In [194]:
empiric='''
 capital labour
1  22.140   407  
2  7.320   197  
3  0.670    31   
'''

In [195]:
X=re.findall(r'([-+]?\d+.\d+|[-+]?\d+)', empiric)
X

['1', '22.140', '407', '2', '7.320', '197', '3', '0.670', '31']

In [196]:
X = np.reshape(X, (3,3))
X

array([['1', '22.140', '407'],
       ['2', '7.320', '197'],
       ['3', '0.670', '31']], dtype='<U6')

In [197]:
X = np.delete(X, 0, 1) # (array; номер столбца; 0 - удаляем сроку, 1 - удаляем столбец)
X

array([['22.140', '407'],
       ['7.320', '197'],
       ['0.670', '31']], dtype='<U6')

In [198]:
X = np.c_[X, np.ones(3)]
X

array([['22.140', '407', '1.0'],
       ['7.320', '197', '1.0'],
       ['0.670', '31', '1.0']], dtype='<U32')

In [199]:
beta = beta.astype(float)
X = X.astype(float)
beta

array([ 0.183,  0.515,  0.023,  0.02 , -1.304])

In [200]:
X@beta

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 5 is different from 3)