In [156]:
import pandas as pd
import numpy as np
import io
import statsmodels.formula.api as smf
import scipy.stats as stats
import re

# 1 F-тест: значимость регрессии

## 1.1 sleep equation #1

In [157]:
table_11 = '''
Observations     706         706         706         706    
R2              0.132       0.131       0.128       0.007   
Adjusted_R2     0.121       0.123       0.121       0.002  
'''

In [158]:
df = pd.read_csv(io.StringIO(table_11), sep='\s+', header=None, index_col=0)
df.reset_index()

Unnamed: 0,0,1,2,3,4
0,Observations,706.0,706.0,706.0,706.0
1,R2,0.132,0.131,0.128,0.007
2,Adjusted_R2,0.121,0.123,0.121,0.002


In [159]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2
1,706.0,0.132,0.121
2,706.0,0.131,0.123
3,706.0,0.128,0.121
4,706.0,0.007,0.002


In [160]:
df['m']=[10,8,6,5] #количество коэффициентов в регрессии
df

Unnamed: 0,Observations,R2,Adjusted_R2,m
1,706.0,0.132,0.121,10
2,706.0,0.131,0.123,8
3,706.0,0.128,0.121,6
4,706.0,0.007,0.002,5


In [161]:
df['F_obs']=df['R2']/(1-df['R2'])*(df['Observations']-df['m'])/(df['m']-1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_obs
1,706.0,0.132,0.121,10,11.760369
2,706.0,0.131,0.123,8,15.031728
3,706.0,0.128,0.121,6,20.550459
4,706.0,0.007,0.002,5,1.235398


In [162]:
alpha=0.05
df['F_crit']=stats.f.ppf(1-alpha,df['m']-1,df['Observations']-df['m'])
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_obs,F_crit
1,706.0,0.132,0.121,10,11.760369,1.893317
2,706.0,0.131,0.123,8,15.031728,2.022681
3,706.0,0.128,0.121,6,20.550459,2.226901
4,706.0,0.007,0.002,5,1.235398,2.384638


In [163]:
df['Znachimost']=df.apply(lambda x: 'znachimo' if x['F_obs']>x['F_crit'] else 'ne znachimo', axis = 1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_obs,F_crit,Znachimost
1,706.0,0.132,0.121,10,11.760369,1.893317,znachimo
2,706.0,0.131,0.123,8,15.031728,2.022681,znachimo
3,706.0,0.128,0.121,6,20.550459,2.226901,znachimo
4,706.0,0.007,0.002,5,1.235398,2.384638,ne znachimo


## 
$ H_0: \beta_{0} = \beta_{1} = ... = \beta_{k} = 0 $

$ H_1: \beta_{0}^2 + \beta_{1}^2 + ... + \beta_{k}^2 > 0 $

## 1.2 cost equation #1

In [164]:
table_12 = '''
Observations    158       158       158       158      158  
R2             0.982     0.965     0.982     0.053    0.026 
Adjusted_R2    0.982     0.965     0.982     0.035    0.020 
'''

In [165]:
df = pd.read_csv(io.StringIO(table_12), sep='\s+', header=None, index_col=0)
df.reset_index()

Unnamed: 0,0,1,2,3,4,5
0,Observations,158.0,158.0,158.0,158.0,158.0
1,R2,0.982,0.965,0.982,0.053,0.026
2,Adjusted_R2,0.982,0.965,0.982,0.035,0.02


In [166]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2
1,158.0,0.982,0.982
2,158.0,0.965,0.965
3,158.0,0.982,0.982
4,158.0,0.053,0.035
5,158.0,0.026,0.02


In [167]:
df['m']=[5,2,4,4,2] #количество коэффициентов в регрессии
df

Unnamed: 0,Observations,R2,Adjusted_R2,m
1,158.0,0.982,0.982,5
2,158.0,0.965,0.965,2
3,158.0,0.982,0.982,4
4,158.0,0.053,0.035,4
5,158.0,0.026,0.02,2


In [168]:
df['F_nabl']=df['R2']/(1-df['R2'])*(df['Observations']-df['m'])/(df['m']-1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl
1,158.0,0.982,0.982,5,2086.75
2,158.0,0.965,0.965,2,4301.142857
3,158.0,0.982,0.982,4,2800.518519
4,158.0,0.053,0.035,4,2.872932
5,158.0,0.026,0.02,2,4.164271


In [169]:
alpha=0.01
df['F_crit']=stats.f.ppf(1-alpha,df['m']-1,df['Observations']-df['m'])
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl,F_crit
1,158.0,0.982,0.982,5,2086.75,3.444189
2,158.0,0.965,0.965,2,4301.142857,6.800161
3,158.0,0.982,0.982,4,2800.518519,3.911342
4,158.0,0.053,0.035,4,2.872932,3.911342
5,158.0,0.026,0.02,2,4.164271,6.800161


In [170]:
df['Znachimost']=df.apply(lambda x: 'znachimo' if x['F_nabl']>x['F_crit'] else 'ne znachimo', axis = 1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nabl,F_crit,Znachimost
1,158.0,0.982,0.982,5,2086.75,3.444189,znachimo
2,158.0,0.965,0.965,2,4301.142857,6.800161,znachimo
3,158.0,0.982,0.982,4,2800.518519,3.911342,znachimo
4,158.0,0.053,0.035,4,2.872932,3.911342,ne znachimo
5,158.0,0.026,0.02,2,4.164271,6.800161,ne znachimo


## 1.3 sleep equation #2

In [171]:
table_13 = '''
Observations            706         706         706         706    
R2                     0.118       0.022       0.015       0.007   
Adjusted_R2            0.110       0.014       0.012       0.002   
Residual_Std_Error   419.371     441.301     441.775     444.018  
F_Statistic          13.387    2.663    5.224      1.314    
'''

In [172]:
df = pd.read_csv(io.StringIO(table_13), sep='\s+', header=None, index_col=0)
df.reset_index()

Unnamed: 0,0,1,2,3,4
0,Observations,706.0,706.0,706.0,706.0
1,R2,0.118,0.022,0.015,0.007
2,Adjusted_R2,0.11,0.014,0.012,0.002
3,Residual_Std_Error,419.371,441.301,441.775,444.018
4,F_Statistic,13.387,2.663,5.224,1.314


In [173]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic
1,706.0,0.118,0.11,419.371,13.387
2,706.0,0.022,0.014,441.301,2.663
3,706.0,0.015,0.012,441.775,5.224
4,706.0,0.007,0.002,444.018,1.314


In [174]:
df['m']=[8,7,3,5] #количество коэффициентов в регрессии
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m
1,706.0,0.118,0.11,419.371,13.387,8
2,706.0,0.022,0.014,441.301,2.663,7
3,706.0,0.015,0.012,441.775,5.224,3
4,706.0,0.007,0.002,444.018,1.314,5


In [175]:
df['F_nabl']=df['R2']/(1-df['R2'])*(df['Observations']-df['m'])/(df['m']-1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m,F_nabl
1,706.0,0.118,0.11,419.371,13.387,8,13.34046
2,706.0,0.022,0.014,441.301,2.663,7,2.620654
3,706.0,0.015,0.012,441.775,5.224,3,5.352792
4,706.0,0.007,0.002,444.018,1.314,5,1.235398


In [176]:
alpha=0.01
df['F_crit']=stats.f.ppf(1-alpha,df['m']-1,df['Observations']-df['m'])
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m,F_nabl,F_crit
1,706.0,0.118,0.11,419.371,13.387,8,13.34046,2.66493
2,706.0,0.022,0.014,441.301,2.663,7,2.620654,2.827785
3,706.0,0.015,0.012,441.775,5.224,3,5.352792,4.63547
4,706.0,0.007,0.002,444.018,1.314,5,1.235398,3.346


In [177]:
df['Znachimost']=df.apply(lambda x: 'znachimo' if x['F_nabl']>x['F_crit'] else 'ne znachimo', axis = 1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m,F_nabl,F_crit,Znachimost
1,706.0,0.118,0.11,419.371,13.387,8,13.34046,2.66493,znachimo
2,706.0,0.022,0.014,441.301,2.663,7,2.620654,2.827785,ne znachimo
3,706.0,0.015,0.012,441.775,5.224,3,5.352792,4.63547,znachimo
4,706.0,0.007,0.002,444.018,1.314,5,1.235398,3.346,ne znachimo


## 1.4 cost equation #2

In [178]:
table_14 = '''
Observations            158         158         158        158      158  
R2                     0.992       0.976       0.982      0.053    0.026 
Adjusted_R2            0.992       0.976       0.982      0.035    0.020 
Residual_Std_Error    0.138       0.239       0.209      1.516    1.527 
F_Statistic         3880.407 3185.767 2106.934  2.881  4.158
'''

In [179]:
df = pd.read_csv(io.StringIO(table_14), sep='\s+', header=None, index_col=0)
df.reset_index()

Unnamed: 0,0,1,2,3,4,5
0,Observations,158.0,158.0,158.0,158.0,158.0
1,R2,0.992,0.976,0.982,0.053,0.026
2,Adjusted_R2,0.992,0.976,0.982,0.035,0.02
3,Residual_Std_Error,0.138,0.239,0.209,1.516,1.527
4,F_Statistic,3880.407,3185.767,2106.934,2.881,4.158


In [180]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic
1,158.0,0.992,0.992,0.138,3880.407
2,158.0,0.976,0.976,0.239,3185.767
3,158.0,0.982,0.982,0.209,2106.934
4,158.0,0.053,0.035,1.516,2.881
5,158.0,0.026,0.02,1.527,4.158


In [181]:
df['m']=[6,3,5,4,2] #количество коэффициентов в регрессии
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m
1,158.0,0.992,0.992,0.138,3880.407,6
2,158.0,0.976,0.976,0.239,3185.767,3
3,158.0,0.982,0.982,0.209,2106.934,5
4,158.0,0.053,0.035,1.516,2.881,4
5,158.0,0.026,0.02,1.527,4.158,2


In [182]:
df['F_nabl']=df['R2']/(1-df['R2'])*(df['Observations']-df['m'])/(df['m']-1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m,F_nabl
1,158.0,0.992,0.992,0.138,3880.407,6,3769.6
2,158.0,0.976,0.976,0.239,3185.767,3,3151.666667
3,158.0,0.982,0.982,0.209,2106.934,5,2086.75
4,158.0,0.053,0.035,1.516,2.881,4,2.872932
5,158.0,0.026,0.02,1.527,4.158,2,4.164271


In [183]:
alpha=0.01
df['F_crit']=stats.f.ppf(1-alpha,df['m']-1,df['Observations']-df['m'])
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m,F_nabl,F_crit
1,158.0,0.992,0.992,0.138,3880.407,6,3769.6,3.139909
2,158.0,0.976,0.976,0.239,3185.767,3,3151.666667,4.744744
3,158.0,0.982,0.982,0.209,2106.934,5,2086.75,3.444189
4,158.0,0.053,0.035,1.516,2.881,4,2.872932,3.911342
5,158.0,0.026,0.02,1.527,4.158,2,4.164271,6.800161


In [184]:
df['Znachimost']=df.apply(lambda x: 'znachimo' if x['F_nabl']>x['F_crit'] else 'ne znachimo', axis = 1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m,F_nabl,F_crit,Znachimost
1,158.0,0.992,0.992,0.138,3880.407,6,3769.6,3.139909,znachimo
2,158.0,0.976,0.976,0.239,3185.767,3,3151.666667,4.744744,znachimo
3,158.0,0.982,0.982,0.209,2106.934,5,2086.75,3.444189,znachimo
4,158.0,0.053,0.035,1.516,2.881,4,2.872932,3.911342,ne znachimo
5,158.0,0.026,0.02,1.527,4.158,2,4.164271,6.800161,ne znachimo


# 2 F-тест: совместная значимость

## 2.1 sleep equation #1

In [185]:
table_21 = '''
Names               Estimate  Std_Error t_nabl    
Intercept 3446.830301   81.839915 42.1167  
totwrk        -0.169130    0.018074 -9.3577  
age            2.714483    1.472374  1.8436   
male          87.108150   35.173210  2.4765    
south        102.271833   41.925047  2.4394  
smsa         -54.187710   33.193402 -1.6325     
yngkid       -13.051272   50.459104 -0.2587      
marr          31.360412   42.263190  0.7420      
union         11.865664   38.185877  0.3107      
'''

In [191]:
df = pd.read_csv(io.StringIO(table_21), sep='\s+')
df.reset_index()

Unnamed: 0,index,Names,Estimate,Std_Error,t_nabl
0,0,Intercept,3446.830301,81.839915,42.1167
1,1,totwrk,-0.16913,0.018074,-9.3577
2,2,age,2.714483,1.472374,1.8436
3,3,male,87.10815,35.17321,2.4765
4,4,south,102.271833,41.925047,2.4394
5,5,smsa,-54.18771,33.193402,-1.6325
6,6,yngkid,-13.051272,50.459104,-0.2587
7,7,marr,31.360412,42.26319,0.742
8,8,union,11.865664,38.185877,0.3107


In [192]:
alpha=0.05
t_crit = stats.t.ppf(1-alpha/2,706-9-1) # t_crit = (1-alpha/2, n-k-1)
round(t_crit,3)

1.963

In [193]:
df['Znachimost']=df.apply(lambda x: 'znanchim' if np.abs(x['t_nabl'])>t_crit else 'ne znachim', axis=1)
df

Unnamed: 0,Names,Estimate,Std_Error,t_nabl,Znachimost
0,Intercept,3446.830301,81.839915,42.1167,znanchim
1,totwrk,-0.16913,0.018074,-9.3577,znanchim
2,age,2.714483,1.472374,1.8436,ne znachim
3,male,87.10815,35.17321,2.4765,znanchim
4,south,102.271833,41.925047,2.4394,znanchim
5,smsa,-54.18771,33.193402,-1.6325,ne znachim
6,yngkid,-13.051272,50.459104,-0.2587,ne znachim
7,marr,31.360412,42.26319,0.742,ne znachim
8,union,11.865664,38.185877,0.3107,ne znachim


## 
$ H_0: \beta_{marr} = \beta_{union} = \beta_{ynkid} = \beta_{smsa} = 0 $

$ H_1: \beta_{marr}^2 + \beta_{union}^2 + \beta_{ynkid}^2 + \beta_{smsa}^2 > 0 $

In [194]:
unrest, rest = np.float_(re.findall(r'([-+]?\d+.\d+)', 'R2                      0.131          0.127'))
print(unrest, rest)

0.131 0.127


In [197]:
F_nabl = (unrest - rest)/(1-unrest)*(706-9)/(4)
F_nabl

0.8020713463751445

In [198]:
alpha=0.05
F_crit=stats.f.ppf(1-alpha, 4, 706-9)
F_crit.round(3)

2.385

$F_{nabl}<F_{crit}$, следовательно, данные согласуются с гипотезой $H_{0}$. 

Коэффициенты совместно незначимы.

## 2.2 sleep equation #2

In [199]:
table_22 = '''
Names              Estimate  Std_Error t_nabl    
Intercept 3613.994663  218.432669 16.5451 
totwrk        -0.167320    0.018132 -9.2278 
age           -6.254665   11.191179 -0.5589     
I(age^2)       0.108862    0.133736  0.8140      
male          90.456708   34.257144  2.6405  
south        114.547494   40.637198  2.8188        
'''

In [200]:
df = pd.read_csv(io.StringIO(table_22), sep='\s+')
df.reset_index()

Unnamed: 0,index,Names,Estimate,Std_Error,t_nabl
0,0,Intercept,3613.994663,218.432669,16.5451
1,1,totwrk,-0.16732,0.018132,-9.2278
2,2,age,-6.254665,11.191179,-0.5589
3,3,I(age^2),0.108862,0.133736,0.814
4,4,male,90.456708,34.257144,2.6405
5,5,south,114.547494,40.637198,2.8188


In [201]:
alpha=0.01
t_crit = stats.t.ppf(1-alpha/2,706-9-1) # t_crit = (1-alpha/2, n-k-1)
round(t_crit,3)

2.583

In [211]:
df['Znachimost']=df.apply(lambda x: 'znanchim' if np.abs(x['t_nabl'])>t_crit else 'ne znachim', axis=1)
df

Unnamed: 0,Names,Estimate,Std_Error,t_nabl,Znachimost
0,Intercept,3587.178292,220.238404,16.2877,znanchim
1,totwrk,-0.149121,0.026286,-5.673,znanchim
2,age,-6.258663,11.191863,-0.5592,ne znachim
3,I(age^2),0.106517,0.133767,0.7963,ne znachim
4,male,161.890143,82.177481,1.97,ne znachim
5,south,112.143584,40.717344,2.7542,znanchim
6,totwrk:male,-0.034694,0.036279,-0.9563,ne znachim


## 
$ H_0: \beta_{age} = \beta_{age}^2 = 0 $

$ H_1: \beta_{age} + \beta_{age}^2 > 0 $

In [203]:
unrest, rest = np.float_(re.findall(r'([-+]?\d+.\d+)', 'R2                      0.128          0.122'))
print(unrest, rest)

0.128 0.122


In [204]:
F_nabl = (unrest - rest)/(1-unrest)*(706-6)/(2)
F_nabl

2.4082568807339473

In [206]:
alpha=0.01
F_crit=stats.f.ppf(1-alpha, 2, 706-6)
F_crit.round(3)

4.636

$F_{nabl}<F_{crit}$, следовательно, данные согласуются с гипотезой $H_{0}$. 

Коэффициенты совместно незначимы.

## 2.3 sleep equation #3

In [207]:
table_23 = '''
 Names             Estimate  Std_Error t_nabl     
Intercept 3587.178292  220.238404 16.2877 
totwrk        -0.149121    0.026286 -5.6730 
age           -6.258663   11.191863 -0.5592    
I(age^2)       0.106517    0.133767  0.7963    
male         161.890143   82.177481  1.9700    
south        112.143584   40.717344  2.7542  
totwrk:male   -0.034694    0.036279 -0.9563        
'''

In [208]:
df = pd.read_csv(io.StringIO(table_23), sep='\s+')
df.reset_index()

Unnamed: 0,index,Names,Estimate,Std_Error,t_nabl
0,0,Intercept,3587.178292,220.238404,16.2877
1,1,totwrk,-0.149121,0.026286,-5.673
2,2,age,-6.258663,11.191863,-0.5592
3,3,I(age^2),0.106517,0.133767,0.7963
4,4,male,161.890143,82.177481,1.97
5,5,south,112.143584,40.717344,2.7542
6,6,totwrk:male,-0.034694,0.036279,-0.9563


In [209]:
alpha=0.01
t_crit = stats.t.ppf(1-alpha/2,706-7-1) # t_crit = (1-alpha/2, n-k-1)
round(t_crit,3)

2.583

In [210]:
df['Znachimost']=df.apply(lambda x: 'znanchim' if np.abs(x['t_nabl'])>t_crit else 'ne znachim', axis=1)
df

Unnamed: 0,Names,Estimate,Std_Error,t_nabl,Znachimost
0,Intercept,3587.178292,220.238404,16.2877,znanchim
1,totwrk,-0.149121,0.026286,-5.673,znanchim
2,age,-6.258663,11.191863,-0.5592,ne znachim
3,I(age^2),0.106517,0.133767,0.7963,ne znachim
4,male,161.890143,82.177481,1.97,ne znachim
5,south,112.143584,40.717344,2.7542,znanchim
6,totwrk:male,-0.034694,0.036279,-0.9563,ne znachim


## 
$ H_0: \beta_{male} = \beta_{male*totwork} = 0 $

$ H_1: \beta_{male} + \beta_{male*totwork} > 0 $

In [212]:
unrest, rest = np.float_(re.findall(r'([-+]?\d+.\d+)', 'R2                      0.129          0.119'))
print(unrest, rest)

0.129 0.119


In [213]:
F_nabl = (unrest - rest)/(1-unrest)*(706-7)/(2)
F_nabl

4.012629161882897

In [214]:
alpha=0.01
F_crit=stats.f.ppf(1-alpha, 2, 706-7)
F_crit.round(3)

4.636

$F_{nabl}<F_{crit}$, следовательно, данные согласуются с гипотезой $H_{0}$. 

Коэффициенты совместно незначимы.

## 2.4 wage equation #1

In [215]:
table_24 = '''
 Names             Estimate  Std_Error t_nabl     
 Intercept  2.93780233  1.65623416  1.7738   
age          0.14475408  0.10003338  1.4471     
I(age^2)    -0.00185652  0.00150253 -1.2356     
IQ           0.00821604  0.00084095  9.7699 
south       -0.10027331  0.02683810 -3.7362 
urban        0.17319472  0.02766716  6.2599 
married      0.20153862  0.04023976  5.0084         
'''

In [216]:
df = pd.read_csv(io.StringIO(table_24), sep='\s+')
df.reset_index()

Unnamed: 0,index,Names,Estimate,Std_Error,t_nabl
0,0,Intercept,2.937802,1.656234,1.7738
1,1,age,0.144754,0.100033,1.4471
2,2,I(age^2),-0.001857,0.001503,-1.2356
3,3,IQ,0.008216,0.000841,9.7699
4,4,south,-0.100273,0.026838,-3.7362
5,5,urban,0.173195,0.027667,6.2599
6,6,married,0.201539,0.04024,5.0084


In [217]:
alpha=0.01
t_crit = stats.t.ppf(1-alpha/2,935-7-1) # t_crit = (1-alpha/2, n-k-1)
round(t_crit,3)

2.581

In [218]:
df['Znachimost']=df.apply(lambda x: 'znanchim' if np.abs(x['t_nabl'])>t_crit else 'ne znachim', axis=1)
df

Unnamed: 0,Names,Estimate,Std_Error,t_nabl,Znachimost
0,Intercept,2.937802,1.656234,1.7738,ne znachim
1,age,0.144754,0.100033,1.4471,ne znachim
2,I(age^2),-0.001857,0.001503,-1.2356,ne znachim
3,IQ,0.008216,0.000841,9.7699,znanchim
4,south,-0.100273,0.026838,-3.7362,znanchim
5,urban,0.173195,0.027667,6.2599,znanchim
6,married,0.201539,0.04024,5.0084,znanchim


## 
$ H_0: \beta_{age} = \beta_{age}^2 = 0 $

$ H_1: \beta_{age} + \beta_{age}^2 > 0 $

In [219]:
unrest, rest = np.float_(re.findall(r'([-+]?\d+.\d+)', 'R2                      0.201          0.175'))
print(unrest, rest)

0.201 0.175


In [220]:
F_nabl = (unrest - rest)/(1-unrest)*(935-7)/(2)
F_nabl

15.098873591990001

In [221]:
alpha=0.01
F_crit=stats.f.ppf(1-alpha, 2, 935-7)
F_crit.round(3)

4.628

$F_{nabl}>F_{crit}$, следовательно, данные согласуются с гипотезой $H_{1}$. 

Коэффициенты совместно значимы.

# 3 F-тест: структурные ограничения

## 3.1 cost equation

In [227]:
df_elec=pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Electricity.csv')
df_elec

Unnamed: 0,cost,q,pl,sl,pk,sk,pf,sf
0,0.2130,8.0,6869.47,0.3291,64.945,0.4197,18.000,0.2512
1,3.0427,869.0,8372.96,0.1030,68.227,0.2913,21.067,0.6057
2,9.4059,1412.0,7960.90,0.0891,40.692,0.1567,41.530,0.7542
3,0.7606,65.0,8971.89,0.2802,41.243,0.1282,28.539,0.5916
4,2.2587,295.0,8218.40,0.1772,71.940,0.1623,39.200,0.6606
...,...,...,...,...,...,...,...,...
153,6.8293,946.6,10642.16,0.0883,43.600,0.1914,51.463,0.7203
154,3.7605,377.0,7432.24,0.2117,74.120,0.2274,33.436,0.5609
155,3.9822,391.0,5826.04,0.1926,78.288,0.0924,44.633,0.7151
156,30.1880,5317.0,9586.63,0.0845,78.008,0.2009,41.840,0.7147


In [249]:
models31=smf.ols(data=df_elec, formula='np.log(cost)~np.log(q)+I(np.log(q)**2)+np.log(pl)+np.log(pk)+np.log(pf)').fit()
models31.summary()

0,1,2,3
Dep. Variable:,np.log(cost),R-squared:,0.992
Model:,OLS,Adj. R-squared:,0.992
Method:,Least Squares,F-statistic:,3880.0
Date:,"Tue, 04 Apr 2023",Prob (F-statistic):,2.45e-158
Time:,23:46:56,Log-Likelihood:,91.515
No. Observations:,158,AIC:,-171.0
Df Residuals:,152,BIC:,-152.7
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-6.7387,0.706,-9.541,0.000,-8.134,-5.343
np.log(q),0.4030,0.032,12.734,0.000,0.340,0.466
I(np.log(q) ** 2),0.0304,0.002,14.024,0.000,0.026,0.035
np.log(pl),0.1461,0.070,2.073,0.040,0.007,0.285
np.log(pk),0.1571,0.058,2.721,0.007,0.043,0.271
np.log(pf),0.6847,0.043,16.043,0.000,0.600,0.769

0,1,2,3
Omnibus:,2.342,Durbin-Watson:,1.798
Prob(Omnibus):,0.31,Jarque-Bera (JB):,2.078
Skew:,0.109,Prob(JB):,0.354
Kurtosis:,3.518,Cond. No.,4990.0


In [250]:
table_31='''
Names        coef   std_err t_nabl
Intercept   -6.7387 0.706   -9.541 
np.log(q)    0.4030 0.032   12.734
np.log(q)^2 0.0304 0.002   14.024
np.log(pl)   0.1461 0.070   2.073 
np.log(pk)   0.1571 0.058   2.721
np.log(pf)   0.6847 0.043   16.043
'''

In [251]:
df = pd.read_csv(io.StringIO(table_31), sep='\s+')
df.reset_index()

Unnamed: 0,index,Names,coef,std_err,t_nabl
0,0,Intercept,-6.7387,0.706,-9.541
1,1,np.log(q),0.403,0.032,12.734
2,2,np.log(q)^2,0.0304,0.002,14.024
3,3,np.log(pl),0.1461,0.07,2.073
4,4,np.log(pk),0.1571,0.058,2.721
5,5,np.log(pf),0.6847,0.043,16.043


In [252]:
alpha=0.01
t_crit = stats.t.ppf(1-alpha/2,158-6-1) # t_crit = (1-alpha/2, n-k-1) # ???
round(t_crit,3)

2.609

In [253]:
df['Znachimost']=df.apply(lambda x: 'znanchim' if np.abs(x['t_nabl'])>t_crit else 'ne znachim', axis=1)
df

Unnamed: 0,Names,coef,std_err,t_nabl,Znachimost
0,Intercept,-6.7387,0.706,-9.541,znanchim
1,np.log(q),0.403,0.032,12.734,znanchim
2,np.log(q)^2,0.0304,0.002,14.024,znanchim
3,np.log(pl),0.1461,0.07,2.073,ne znachim
4,np.log(pk),0.1571,0.058,2.721,znanchim
5,np.log(pf),0.6847,0.043,16.043,znanchim


## 3.1.1 Гипотеза 1

In [254]:
unrestricted_model = smf.ols(data = df_elec, formula = 'np.log(cost) ~ np.log(q) + I(np.log(q)**2) + np.log(pl) + np.log(pk) + np.log(pf)').fit() # формируем неограниченную регрессионную модель

In [255]:
f_test = unrestricted_model.f_test("np.log(pl) + np.log(pk) + np.log(pf) = 1")
round(f_test.fvalue,3)

0.015

In [256]:
alpha=0.01
F_crit=stats.f.ppf(1-alpha, 3, 158-6-1) # ??
F_crit.round(3)

3.914

## 3.2 output equation

In [257]:
df_lab=pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Labour.csv')
df_lab

Unnamed: 0,capital,labour,output,wage
0,2.606563,184,9.250759,43.080307
1,1.323237,91,3.664310,27.780016
2,22.093692,426,28.781516,44.467748
3,10.737851,72,4.124642,39.734710
4,1.161365,46,2.890150,34.650709
...,...,...,...,...
564,2.625403,20,1.424376,33.477545
565,1.276386,61,2.109048,26.300732
566,1.953869,117,6.241870,41.153979
567,1.318527,46,7.902237,66.720139


In [258]:
models32=smf.ols(data=df_lab, formula='np.log(output)~np.log(capital)+np.log(labour)+np.log(wage)').fit()
models32.summary()

0,1,2,3
Dep. Variable:,np.log(output),R-squared:,0.888
Model:,OLS,Adj. R-squared:,0.888
Method:,Least Squares,F-statistic:,1499.0
Date:,"Tue, 04 Apr 2023",Prob (F-statistic):,1.65e-268
Time:,23:47:34,Log-Likelihood:,-279.62
No. Observations:,569,AIC:,567.2
Df Residuals:,565,BIC:,584.6
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-5.0073,0.221,-22.649,0.000,-5.442,-4.573
np.log(capital),0.1493,0.015,10.141,0.000,0.120,0.178
np.log(labour),0.7204,0.019,37.487,0.000,0.683,0.758
np.log(wage),0.9214,0.058,16.001,0.000,0.808,1.034

0,1,2,3
Omnibus:,252.01,Durbin-Watson:,2.008
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3723.965
Skew:,1.547,Prob(JB):,0.0
Kurtosis:,15.145,Cond. No.,82.3


In [259]:
table_32='''
Names        coef   std_err t_nabl
Intercept -5.0073 0.221 -22.649 0.000 -5.442 -4.573
np.log(capital) 0.1493 0.015 10.141 0.000 0.120 0.178
np.log(labour) 0.7204 0.019 37.487 0.000 0.683 0.758
np.log(wage) 0.9214 0.058 16.001 0.000 0.808 1.034
'''

In [260]:
df = pd.read_csv(io.StringIO(table_32), sep='\s+')
df.reset_index()

Unnamed: 0,level_0,level_1,level_2,Names,coef,std_err,t_nabl
0,Intercept,-5.0073,0.221,-22.649,0.0,-5.442,-4.573
1,np.log(capital),0.1493,0.015,10.141,0.0,0.12,0.178
2,np.log(labour),0.7204,0.019,37.487,0.0,0.683,0.758
3,np.log(wage),0.9214,0.058,16.001,0.0,0.808,1.034


In [261]:
alpha=0.05
t_crit = stats.t.ppf(1-alpha/2,569-4-1) # t_crit = (1-alpha/2, n-k-1)
round(t_crit,3)

1.964