In [56]:
import numpy as np
import pandas as pd
import io
import statsmodels.formula.api as smf
import scipy.stats as stats
import re

# Задача 1.1

In [57]:
data_string='''Observations     706         706         706         706    
R2              0.132       0.131       0.128       0.007   
Adjusted_R2     0.121       0.123       0.121       0.002'''

In [58]:
df = pd.read_csv(io.StringIO(data_string), 
    delimiter='\s+', header=None, index_col=0)
df

Unnamed: 0_level_0,1,2,3,4
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Observations,706.0,706.0,706.0,706.0
R2,0.132,0.131,0.128,0.007
Adjusted_R2,0.121,0.123,0.121,0.002


In [59]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2
1,706.0,0.132,0.121
2,706.0,0.131,0.123
3,706.0,0.128,0.121
4,706.0,0.007,0.002


In [60]:
df['m'] = [10,8,6,5]
df

Unnamed: 0,Observations,R2,Adjusted_R2,m
1,706.0,0.132,0.121,10
2,706.0,0.131,0.123,8
3,706.0,0.128,0.121,6
4,706.0,0.007,0.002,5


In [61]:
df['F_nab'] = (df['R2']/(1-df['R2']))*((df['Observations']-df['m'])/(df['m']-1))
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nab
1,706.0,0.132,0.121,10,11.760369
2,706.0,0.131,0.123,8,15.031728
3,706.0,0.128,0.121,6,20.550459
4,706.0,0.007,0.002,5,1.235398


In [62]:
alpha = 0.05
df['F_cr'] = stats.f.ppf(1-alpha,df['m']-1,df['Observations'] - df['m']) # F(1-alpha, k, n-k)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nab,F_cr
1,706.0,0.132,0.121,10,11.760369,1.893317
2,706.0,0.131,0.123,8,15.031728,2.022681
3,706.0,0.128,0.121,6,20.550459,2.226901
4,706.0,0.007,0.002,5,1.235398,2.384638


In [63]:
df['znachimost'] = df.apply(lambda x: 'znachimo' if x['F_nab'] > x['F_cr'] else 'ne znachimo', axis = 1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nab,F_cr,znachimost
1,706.0,0.132,0.121,10,11.760369,1.893317,znachimo
2,706.0,0.131,0.123,8,15.031728,2.022681,znachimo
3,706.0,0.128,0.121,6,20.550459,2.226901,znachimo
4,706.0,0.007,0.002,5,1.235398,2.384638,ne znachimo


# Задача 1.2

In [64]:
data_string='''Observations    158       158       158       158      158  
R2             0.982     0.965     0.982     0.053    0.026 
Adjusted_R2    0.982     0.965     0.982     0.035    0.020'''

In [65]:
df = pd.read_csv(io.StringIO(data_string), 
    delimiter='\s+', header=None, index_col=0)
df

Unnamed: 0_level_0,1,2,3,4,5
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Observations,158.0,158.0,158.0,158.0,158.0
R2,0.982,0.965,0.982,0.053,0.026
Adjusted_R2,0.982,0.965,0.982,0.035,0.02


In [66]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2
1,158.0,0.982,0.982
2,158.0,0.965,0.965
3,158.0,0.982,0.982
4,158.0,0.053,0.035
5,158.0,0.026,0.02


In [67]:
df['m'] = [5,2,4,4,2]
df

Unnamed: 0,Observations,R2,Adjusted_R2,m
1,158.0,0.982,0.982,5
2,158.0,0.965,0.965,2
3,158.0,0.982,0.982,4
4,158.0,0.053,0.035,4
5,158.0,0.026,0.02,2


In [68]:
df['F_nab'] = (df['R2']/(1-df['R2']))*((df['Observations']-df['m'])/(df['m']-1))
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nab
1,158.0,0.982,0.982,5,2086.75
2,158.0,0.965,0.965,2,4301.142857
3,158.0,0.982,0.982,4,2800.518519
4,158.0,0.053,0.035,4,2.872932
5,158.0,0.026,0.02,2,4.164271


In [69]:
alpha = 0.01
df['F_cr'] = stats.f.ppf(1-alpha,df['m']-1,df['Observations'] - df['m']) # F(1-alpha, k, n-k)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nab,F_cr
1,158.0,0.982,0.982,5,2086.75,3.444189
2,158.0,0.965,0.965,2,4301.142857,6.800161
3,158.0,0.982,0.982,4,2800.518519,3.911342
4,158.0,0.053,0.035,4,2.872932,3.911342
5,158.0,0.026,0.02,2,4.164271,6.800161


In [70]:
df['znachimost'] = df.apply(lambda x: 'znachimo' if x['F_nab'] > x['F_cr'] else 'ne znachimo', axis = 1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_nab,F_cr,znachimost
1,158.0,0.982,0.982,5,2086.75,3.444189,znachimo
2,158.0,0.965,0.965,2,4301.142857,6.800161,znachimo
3,158.0,0.982,0.982,4,2800.518519,3.911342,znachimo
4,158.0,0.053,0.035,4,2.872932,3.911342,ne znachimo
5,158.0,0.026,0.02,2,4.164271,6.800161,ne znachimo


# Задача 1.3

In [71]:
data_string='''Observations            706         706         706         706    
R2                     0.118       0.022       0.015       0.007   
Adjusted_R2            0.110       0.014       0.012       0.002   
Residual_Std_Error   419.371     441.301     441.775     444.018  
F_Statistic          13.387   2.663    5.224      1.314 '''

In [72]:
df = pd.read_csv(io.StringIO(data_string), 
    delimiter='\s+', header=None, index_col=0)
df

Unnamed: 0_level_0,1,2,3,4
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Observations,706.0,706.0,706.0,706.0
R2,0.118,0.022,0.015,0.007
Adjusted_R2,0.11,0.014,0.012,0.002
Residual_Std_Error,419.371,441.301,441.775,444.018
F_Statistic,13.387,2.663,5.224,1.314


In [73]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic
1,706.0,0.118,0.11,419.371,13.387
2,706.0,0.022,0.014,441.301,2.663
3,706.0,0.015,0.012,441.775,5.224
4,706.0,0.007,0.002,444.018,1.314


In [74]:
df['m']=[8,7,3,5]
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m
1,706.0,0.118,0.11,419.371,13.387,8
2,706.0,0.022,0.014,441.301,2.663,7
3,706.0,0.015,0.012,441.775,5.224,3
4,706.0,0.007,0.002,444.018,1.314,5


In [75]:
df['F_nab'] = (df['R2']/(1-df['R2']))*((df['Observations']-df['m'])/(df['m']-1))
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m,F_nab
1,706.0,0.118,0.11,419.371,13.387,8,13.34046
2,706.0,0.022,0.014,441.301,2.663,7,2.620654
3,706.0,0.015,0.012,441.775,5.224,3,5.352792
4,706.0,0.007,0.002,444.018,1.314,5,1.235398


In [76]:
alpha = 0.01
df['F_cr'] = stats.f.ppf(1-alpha,df['m']-1,df['Observations'] - df['m']) # F(1-alpha, k, n-k)
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m,F_nab,F_cr
1,706.0,0.118,0.11,419.371,13.387,8,13.34046,2.66493
2,706.0,0.022,0.014,441.301,2.663,7,2.620654,2.827785
3,706.0,0.015,0.012,441.775,5.224,3,5.352792,4.63547
4,706.0,0.007,0.002,444.018,1.314,5,1.235398,3.346


In [77]:
df['znachimost'] = df.apply(lambda x: 'znachimo' if x['F_nab'] > x['F_cr'] else 'ne znachimo', axis = 1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m,F_nab,F_cr,znachimost
1,706.0,0.118,0.11,419.371,13.387,8,13.34046,2.66493,znachimo
2,706.0,0.022,0.014,441.301,2.663,7,2.620654,2.827785,ne znachimo
3,706.0,0.015,0.012,441.775,5.224,3,5.352792,4.63547,znachimo
4,706.0,0.007,0.002,444.018,1.314,5,1.235398,3.346,ne znachimo


# Задача 1.4

In [78]:
data_string='''Observations            158         158         158        158      158  
R2                     0.992       0.976       0.982      0.053    0.026 
Adjusted_R2            0.992       0.976       0.982      0.035    0.020 
Residual_Std_Error    0.138       0.239       0.209      1.516    1.527 
F_Statistic         3880.407 3185.767 2106.934  2.881  4.158'''

In [79]:
df = pd.read_csv(io.StringIO(data_string), 
    delimiter='\s+', header=None, index_col=0)
df

Unnamed: 0_level_0,1,2,3,4,5
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Observations,158.0,158.0,158.0,158.0,158.0
R2,0.992,0.976,0.982,0.053,0.026
Adjusted_R2,0.992,0.976,0.982,0.035,0.02
Residual_Std_Error,0.138,0.239,0.209,1.516,1.527
F_Statistic,3880.407,3185.767,2106.934,2.881,4.158


In [80]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic
1,158.0,0.992,0.992,0.138,3880.407
2,158.0,0.976,0.976,0.239,3185.767
3,158.0,0.982,0.982,0.209,2106.934
4,158.0,0.053,0.035,1.516,2.881
5,158.0,0.026,0.02,1.527,4.158


In [81]:
df['m']=[6,3,5,4,2]
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m
1,158.0,0.992,0.992,0.138,3880.407,6
2,158.0,0.976,0.976,0.239,3185.767,3
3,158.0,0.982,0.982,0.209,2106.934,5
4,158.0,0.053,0.035,1.516,2.881,4
5,158.0,0.026,0.02,1.527,4.158,2


In [82]:
df['F_nab'] = (df['R2']/(1-df['R2']))*((df['Observations']-df['m'])/(df['m']-1))
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m,F_nab
1,158.0,0.992,0.992,0.138,3880.407,6,3769.6
2,158.0,0.976,0.976,0.239,3185.767,3,3151.666667
3,158.0,0.982,0.982,0.209,2106.934,5,2086.75
4,158.0,0.053,0.035,1.516,2.881,4,2.872932
5,158.0,0.026,0.02,1.527,4.158,2,4.164271


In [83]:
alpha = 0.01
df['F_cr'] = stats.f.ppf(1-alpha,df['m']-1,df['Observations'] - df['m']) # F(1-alpha, k, n-k)
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m,F_nab,F_cr
1,158.0,0.992,0.992,0.138,3880.407,6,3769.6,3.139909
2,158.0,0.976,0.976,0.239,3185.767,3,3151.666667,4.744744
3,158.0,0.982,0.982,0.209,2106.934,5,2086.75,3.444189
4,158.0,0.053,0.035,1.516,2.881,4,2.872932,3.911342
5,158.0,0.026,0.02,1.527,4.158,2,4.164271,6.800161


In [84]:
df['znachimost'] = df.apply(lambda x: 'znachimo' if x['F_nab'] > x['F_cr'] else 'ne znachimo', axis = 1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,Residual_Std_Error,F_Statistic,m,F_nab,F_cr,znachimost
1,158.0,0.992,0.992,0.138,3880.407,6,3769.6,3.139909,znachimo
2,158.0,0.976,0.976,0.239,3185.767,3,3151.666667,4.744744,znachimo
3,158.0,0.982,0.982,0.209,2106.934,5,2086.75,3.444189,znachimo
4,158.0,0.053,0.035,1.516,2.881,4,2.872932,3.911342,ne znachimo
5,158.0,0.026,0.02,1.527,4.158,2,4.164271,6.800161,ne znachimo


# Задача 2.1

In [97]:
data_string='''
Names       Estimate  Std_Error t_nablud  
Intercept    3446.830301   81.839915 42.1167 
totwrk        -0.169130    0.018074 -9.3577 
age            2.714483    1.472374  1.8436  
male          87.108150   35.173210  2.4765  
south        102.271833   41.925047  2.4394  
smsa         -54.187710   33.193402 -1.6325  
yngkid       -13.051272   50.459104 -0.2587   
marr          31.360412   42.263190  0.7420     
union         11.865664   38.185877  0.3107  '''

In [98]:
df=pd.read_csv(io.StringIO(data_string),sep='\s+')
df

Unnamed: 0,Names,Estimate,Std_Error,t_nablud
0,Intercept,3446.830301,81.839915,42.1167
1,totwrk,-0.16913,0.018074,-9.3577
2,age,2.714483,1.472374,1.8436
3,male,87.10815,35.17321,2.4765
4,south,102.271833,41.925047,2.4394
5,smsa,-54.18771,33.193402,-1.6325
6,yngkid,-13.051272,50.459104,-0.2587
7,marr,31.360412,42.26319,0.742
8,union,11.865664,38.185877,0.3107


In [100]:
alpha=0.05
t_crit = stats.t.ppf(1-alpha/2,706-9-1) #(1-alpha/2, n-k-1)
round(t_crit,3)

1.963

In [111]:
df['Znachimosty'] = df.apply(lambda x: 'znachim' if np.abs(x['t_nabl'])>t_crit else 'ne znachim', axis=1) # прописываем условие
df

Unnamed: 0,Names,Estimate,Std_Error,t_nabl,Znachimosty
0,(Intercept),3613.994663,218.432669,16.5451,znachim
1,totwrk,-0.16732,0.018132,-9.2278,znachim
2,age,-6.254665,11.191179,-0.5589,ne znachim
3,I(age^2),0.108862,0.133736,0.814,ne znachim
4,male,90.456708,34.257144,2.6405,znachim
5,south,114.547494,40.637198,2.8188,znachim


$$ H_0: \beta_{marr} = \beta_{union} = \beta_{yngkid} = \beta_{smsa} = 0 $$
$$ H_1: \beta_{marr}^2 + \beta_{union}^2 + \beta_{yngkid}^2 + \beta_{smsa}^2 > 0 $$

In [85]:
unrest, rest = np.float_(re.findall(r'([-+]?\d+.\d+)', 'R2                      0.131          0.127 '))
print(unrest, rest)

0.131 0.127


In [86]:
F_obs=(unrest-rest)/(1-unrest)*(706-9)/(4)
round(F_obs,3)

0.802

$$
F_{nabl}=0.802
$$

In [87]:
alpha=0.05
F_cr=stats.f.ppf(1-alpha,4,706-9)
round(F_cr,3) 

2.385

$$
F_{crit}=2.385
$$

$ F_{nabl} < F_{critical} $ следовательно, данные согласуются с гипотезой $H_0$. Коэффициенты совместно незначимы.

# Задача 2.2

In [104]:
data_string='''Names               Estimate  Std_Error t_nabl  
(Intercept) 3613.994663  218.432669 16.5451 
totwrk        -0.167320    0.018132 -9.2278 
age           -6.254665   11.191179 -0.5589  
I(age^2)       0.108862    0.133736  0.8140    
male          90.456708   34.257144  2.6405 
south        114.547494   40.637198  2.8188  '''

In [105]:
df=pd.read_csv(io.StringIO(data_string),sep='\s+')
df

Unnamed: 0,Names,Estimate,Std_Error,t_nabl
0,(Intercept),3613.994663,218.432669,16.5451
1,totwrk,-0.16732,0.018132,-9.2278
2,age,-6.254665,11.191179,-0.5589
3,I(age^2),0.108862,0.133736,0.814
4,male,90.456708,34.257144,2.6405
5,south,114.547494,40.637198,2.8188


In [107]:
alpha=0.01
t_crit = stats.t.ppf(1-alpha/2,706-6-1) #(1-alpha/2, n-k-1)
round(t_crit,3)

2.583

In [110]:
df['Znachimosty'] = df.apply(lambda x: 'znachim' if np.abs(x['t_nabl'])>t_crit else 'ne znachim', axis=1) # прописываем условие
df

Unnamed: 0,Names,Estimate,Std_Error,t_nabl,Znachimosty
0,(Intercept),3613.994663,218.432669,16.5451,znachim
1,totwrk,-0.16732,0.018132,-9.2278,znachim
2,age,-6.254665,11.191179,-0.5589,ne znachim
3,I(age^2),0.108862,0.133736,0.814,ne znachim
4,male,90.456708,34.257144,2.6405,znachim
5,south,114.547494,40.637198,2.8188,znachim


$$ H_0: \beta_{age} = \beta_{age^2} = 0 $$
$$ H_1: \beta_{age} + \beta_{age^2} > 0 $$

In [117]:
unrest, rest = np.float_(re.findall(r'([-+]?\d+.\d+)', 'R2                      0.128          0.122'))
print(unrest, rest)

0.128 0.122


In [124]:
F_obs=(unrest-rest)/(1-unrest)*(706-6)/(2)
round(F_obs,3)

2.408

$$
F_{nabl}=2.408
$$

In [125]:
alpha=0.01
F_cr=stats.f.ppf(1-alpha,2,706-6)
round(F_cr,3) 

4.636

$$
F_{crit}=4.636
$$

$ F_{nabl} < F_{critical} $ следовательно, данные согласуются с гипотезой $H_0$. Коэффициенты совместно незначимы.

# Задача 2.3

In [126]:
data_string='''Names               Estimate  Std_Error t_nabl   
(Intercept) 3587.178292  220.238404 16.2877 
totwrk        -0.149121    0.026286 -5.6730 
age           -6.258663   11.191863 -0.5592    
I(age^2)       0.106517    0.133767  0.7963    
male         161.890143   82.177481  1.9700  
south        112.143584   40.717344  2.7542  
totwrk:male   -0.034694    0.036279 -0.9563      '''

In [127]:
df=pd.read_csv(io.StringIO(data_string),sep='\s+')
df

Unnamed: 0,Names,Estimate,Std_Error,t_nabl
0,(Intercept),3587.178292,220.238404,16.2877
1,totwrk,-0.149121,0.026286,-5.673
2,age,-6.258663,11.191863,-0.5592
3,I(age^2),0.106517,0.133767,0.7963
4,male,161.890143,82.177481,1.97
5,south,112.143584,40.717344,2.7542
6,totwrk:male,-0.034694,0.036279,-0.9563


In [128]:
alpha=0.01
t_crit = stats.t.ppf(1-alpha/2,706-7-1) #(1-alpha/2, n-k-1)
round(t_crit,3)

2.583

In [129]:
df['Znachimosty'] = df.apply(lambda x: 'znachim' if np.abs(x['t_nabl'])>t_crit else 'ne znachim', axis=1) # прописываем условие
df

Unnamed: 0,Names,Estimate,Std_Error,t_nabl,Znachimosty
0,(Intercept),3587.178292,220.238404,16.2877,znachim
1,totwrk,-0.149121,0.026286,-5.673,znachim
2,age,-6.258663,11.191863,-0.5592,ne znachim
3,I(age^2),0.106517,0.133767,0.7963,ne znachim
4,male,161.890143,82.177481,1.97,ne znachim
5,south,112.143584,40.717344,2.7542,znachim
6,totwrk:male,-0.034694,0.036279,-0.9563,ne znachim


$$ H_0: \beta_{male} = \beta_{male*totwrk} = 0 $$
$$ H_1: \beta_{male} + \beta_{male*totwrk} > 0 $$

In [130]:
unrest, rest = np.float_(re.findall(r'([-+]?\d+.\d+)', 'R2                      0.129          0.119   '))
print(unrest, rest)

0.129 0.119


In [133]:
F_obs=(unrest-rest)/(1-unrest)*(706-7)/(2)
round(F_obs,3)

4.013

$$
F_{nabl}=4.018
$$

In [136]:
alpha=0.01
F_cr=stats.f.ppf(1-alpha,2,706-7)
round(F_cr,3) 

4.636

$$
F_{crit}=4.636
$$

$ F_{nabl} < F_{critical} $ следовательно, данные согласуются с гипотезой $H_0$. Коэффициенты совместно незначимы.

# Задача 2.4

In [137]:
data_string=''' Names              Estimate  Std_Error t_nabl  
(Intercept)  2.93780233  1.65623416  1.7738   
age          0.14475408  0.10003338  1.4471 
I(age^2)    -0.00185652  0.00150253 -1.2356 
IQ           0.00821604  0.00084095  9.7699 
south       -0.10027331  0.02683810 -3.7362 
urban        0.17319472  0.02766716  6.2599 
married      0.20153862  0.04023976  5.0084     '''

In [138]:
df=pd.read_csv(io.StringIO(data_string),sep='\s+')
df

Unnamed: 0,Names,Estimate,Std_Error,t_nabl
0,(Intercept),2.937802,1.656234,1.7738
1,age,0.144754,0.100033,1.4471
2,I(age^2),-0.001857,0.001503,-1.2356
3,IQ,0.008216,0.000841,9.7699
4,south,-0.100273,0.026838,-3.7362
5,urban,0.173195,0.027667,6.2599
6,married,0.201539,0.04024,5.0084


In [139]:
alpha=0.01
t_crit = stats.t.ppf(1-alpha/2,935-7-1) #(1-alpha/2, n-k-1)
round(t_crit,3)

2.581

In [140]:
df['Znachimosty'] = df.apply(lambda x: 'znachim' if np.abs(x['t_nabl'])>t_crit else 'ne znachim', axis=1) # прописываем условие
df

Unnamed: 0,Names,Estimate,Std_Error,t_nabl,Znachimosty
0,(Intercept),2.937802,1.656234,1.7738,ne znachim
1,age,0.144754,0.100033,1.4471,ne znachim
2,I(age^2),-0.001857,0.001503,-1.2356,ne znachim
3,IQ,0.008216,0.000841,9.7699,znachim
4,south,-0.100273,0.026838,-3.7362,znachim
5,urban,0.173195,0.027667,6.2599,znachim
6,married,0.201539,0.04024,5.0084,znachim


$$ H_0: \beta_{age} = \beta_{age^2} = 0 $$
$$ H_1: \beta_{age} + \beta_{age^2} > 0 $$

In [141]:
unrest, rest = np.float_(re.findall(r'([-+]?\d+.\d+)', 'R2                      0.201          0.175   '))
print(unrest, rest)

0.201 0.175


In [142]:
F_obs=(unrest-rest)/(1-unrest)*(935-7)/(2)
round(F_obs,3)

15.099

$$
F_{nabl}=15.099
$$

In [144]:
alpha=0.01
F_cr=stats.f.ppf(1-alpha,2,935-7)
round(F_cr,3) 

4.628

$$
F_{crit}=4.628
$$

$ F_{nabl} > F_{critical} $ следовательно, данные согласуются с гипотезой $H_1$. Коэффициенты совместно значимы.

# Задача 3.1

In [149]:
df_elec=pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Electricity.csv')
df_elec

Unnamed: 0,cost,q,pl,sl,pk,sk,pf,sf
0,0.2130,8.0,6869.47,0.3291,64.945,0.4197,18.000,0.2512
1,3.0427,869.0,8372.96,0.1030,68.227,0.2913,21.067,0.6057
2,9.4059,1412.0,7960.90,0.0891,40.692,0.1567,41.530,0.7542
3,0.7606,65.0,8971.89,0.2802,41.243,0.1282,28.539,0.5916
4,2.2587,295.0,8218.40,0.1772,71.940,0.1623,39.200,0.6606
...,...,...,...,...,...,...,...,...
153,6.8293,946.6,10642.16,0.0883,43.600,0.1914,51.463,0.7203
154,3.7605,377.0,7432.24,0.2117,74.120,0.2274,33.436,0.5609
155,3.9822,391.0,5826.04,0.1926,78.288,0.0924,44.633,0.7151
156,30.1880,5317.0,9586.63,0.0845,78.008,0.2009,41.840,0.7147


In [182]:
models = smf.ols(data=df_elec, formula='np.log(cost) ~ np.log(q) + I(np.log(q)**2) + np.log(pl) + np.log(pk) + np.log(pf)').fit()
models.summary()

0,1,2,3
Dep. Variable:,np.log(cost),R-squared:,0.992
Model:,OLS,Adj. R-squared:,0.992
Method:,Least Squares,F-statistic:,3880.0
Date:,"Sun, 02 Apr 2023",Prob (F-statistic):,2.45e-158
Time:,10:32:23,Log-Likelihood:,91.515
No. Observations:,158,AIC:,-171.0
Df Residuals:,152,BIC:,-152.7
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-6.7387,0.706,-9.541,0.000,-8.134,-5.343
np.log(q),0.4030,0.032,12.734,0.000,0.340,0.466
I(np.log(q) ** 2),0.0304,0.002,14.024,0.000,0.026,0.035
np.log(pl),0.1461,0.070,2.073,0.040,0.007,0.285
np.log(pk),0.1571,0.058,2.721,0.007,0.043,0.271
np.log(pf),0.6847,0.043,16.043,0.000,0.600,0.769

0,1,2,3
Omnibus:,2.342,Durbin-Watson:,1.798
Prob(Omnibus):,0.31,Jarque-Bera (JB):,2.078
Skew:,0.109,Prob(JB):,0.354
Kurtosis:,3.518,Cond. No.,4990.0


In [189]:
data_string='''
Names        coef   std_err t_nabl
Intercept   -6.7387 0.706   -9.541 
np.log(q)    0.4030 0.032   12.734
np.log(q)^2 0.0304 0.002   14.024
np.log(pl)   0.1461 0.070   2.073 
np.log(pk)   0.1571 0.058   2.721
np.log(pf)   0.6847 0.043   16.043'''

In [190]:
df=pd.read_csv(io.StringIO(data_string),sep='\s+')
df

Unnamed: 0,Names,coef,std_err,t_nabl
0,Intercept,-6.7387,0.706,-9.541
1,np.log(q),0.403,0.032,12.734
2,np.log(q)^2,0.0304,0.002,14.024
3,np.log(pl),0.1461,0.07,2.073
4,np.log(pk),0.1571,0.058,2.721
5,np.log(pf),0.6847,0.043,16.043


In [191]:
alpha=0.01
t_crit = stats.t.ppf(1-alpha/2,158-6-1) #(1-alpha/2, n-k-1)
round(t_crit,3)

2.609

In [192]:
df['Znachimosty'] = df.apply(lambda x: 'znachim' if np.abs(x['t_nabl'])>t_crit else 'ne znachim', axis=1) # прописываем условие
df

Unnamed: 0,Names,coef,std_err,t_nabl,Znachimosty
0,Intercept,-6.7387,0.706,-9.541,znachim
1,np.log(q),0.403,0.032,12.734,znachim
2,np.log(q)^2,0.0304,0.002,14.024,znachim
3,np.log(pl),0.1461,0.07,2.073,ne znachim
4,np.log(pk),0.1571,0.058,2.721,znachim
5,np.log(pf),0.6847,0.043,16.043,znachim


In [202]:
unrestricted_model = smf.ols(data = df_elec, formula = 'np.log(cost) ~ np.log(q) + I(np.log(q)**2) + np.log(pl) + np.log(pk) + np.log(pf)' ).fit()

In [215]:
f_test = unrestricted_model.f_test("np.log(pl) + np.log(pk) + np.log(pf) = 1")
round(f_test.fvalue,3)

0.015

In [219]:
alpha=0.01
F_cr=stats.f.ppf(1-alpha,3,158-6)
round(F_cr,3) 

3.913