In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import io
import statsmodels.formula.api as smf
import statsmodels.api as sm 
import scipy
import scipy.stats as stats
import re
import matplotlib.pyplot as plt
from statsmodels.iolib.summary2 import summary_params # вывод результатов тестирования
from statsmodels.iolib.summary2 import summary_col # вывод результатов тестирования
from statsmodels.stats.outliers_influence import variance_inflation_factor # VIF

# 1 Диагностические тесты (RESET-тест)

## 1.3 output equation #1

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Labour.csv')
df

Unnamed: 0,capital,labour,output,wage
0,2.606563,184,9.250759,43.080307
1,1.323237,91,3.664310,27.780016
2,22.093692,426,28.781516,44.467748
3,10.737851,72,4.124642,39.734710
4,1.161365,46,2.890150,34.650709
...,...,...,...,...
564,2.625403,20,1.424376,33.477545
565,1.276386,61,2.109048,26.300732
566,1.953869,117,6.241870,41.153979
567,1.318527,46,7.902237,66.720139


In [3]:
model_1 = smf.ols(data=df, formula='output~capital+labour+wage').fit()
hat_y = model_1.predict()
hat_y

array([ 1.22906950e+01,  4.57101298e-01,  3.26713978e+01,  8.45249490e+00,
        1.15007554e+00,  4.95323301e+02,  4.96225925e-01,  2.69781742e+00,
        6.93083590e-01,  2.43757091e+00,  5.66564085e+00,  1.92791692e+00,
       -2.55367472e-01,  3.71340262e+00,  3.00062549e+00,  5.58104740e+00,
       -5.65128192e+00,  1.60377117e+01,  2.27977417e+00, -1.63642251e+00,
       -5.20310116e+00,  7.25038098e+00,  4.06867541e+00, -6.25486350e-01,
        3.32128894e+00,  4.22318186e+00,  5.97177164e+00,  6.60880530e+00,
        1.37823245e+01,  5.40857894e+00, -1.68422671e+00,  3.97020603e+00,
        1.11408840e+01, -9.36989184e-01,  9.15360522e+00, -6.27008499e-01,
        4.01437577e+00,  1.68675245e+01,  2.41409083e+00,  3.64642225e+00,
        3.49784717e+01,  5.31658996e+00,  3.26044590e+00,  1.96810268e+01,
        7.68688994e+00,  6.09158964e+00,  3.71223636e+01,  6.83356225e-01,
        5.77238852e+00,  2.03922830e+01,  3.94775369e+00,  8.78354170e+00,
        6.07147791e+01,  

In [4]:
df['out2'] = hat_y**2
df['out3'] = hat_y**3
df['out4'] = hat_y**4
df

Unnamed: 0,capital,labour,output,wage,out2,out3,out4
0,2.606563,184,9.250759,43.080307,151.061183,1856.646929,2.281948e+04
1,1.323237,91,3.664310,27.780016,0.208942,0.095507,4.365659e-02
2,22.093692,426,28.781516,44.467748,1067.420235,34874.111139,1.139386e+06
3,10.737851,72,4.124642,39.734710,71.444670,603.885709,5.104341e+03
4,1.161365,46,2.890150,34.650709,1.322674,1.521175,1.749466e+00
...,...,...,...,...,...,...,...
564,2.625403,20,1.424376,33.477545,0.006650,-0.000542,4.422515e-05
565,1.276386,61,2.109048,26.300732,2.906347,-4.954744,8.446854e+00
566,1.953869,117,6.241870,41.153979,61.588363,483.334718,3.793126e+03
567,1.318527,46,7.902237,66.720139,231.830599,3529.849074,5.374543e+04


In [5]:
model_2 = smf.ols(data=df, formula='output~capital+wage+labour+out2+out3+out4').fit()
model_2.params

Intercept   -1.061334e+01
capital     -9.941386e-02
wage         3.498226e-01
labour       3.636213e-02
out2         6.738526e-03
out3        -1.633088e-05
out4         9.095123e-09
dtype: float64

In [6]:
R2_unres = model_2.rsquared
R2_res = model_1.rsquared

In [8]:
F_nabl = (R2_unres-R2_res)/(1-R2_unres)*(len(df)-7)/3 # q (3) - количество регрессоров
F_nabl

69.53250929274195

In [9]:
alpha = 0.05
stats.f.ppf(1-alpha, 3, len(df)-7)

2.620761015118207

$$
H_{0}:\gamma_{1}=\gamma_{2}=\gamma_{3}=0
$$
$$
H_{1}:\gamma_{1}^2+\gamma_{2}^2+\gamma_{3}^2>0
$$

$$
F_{nabl}>F_{crit}=>H_{0} отвергаем
$$

## 1.2 sleep equation #2

In [10]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/sleep75.csv')
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,...,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,...,0,3438,0,3438,0,14,0,13,7.070004,1024
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,...,0,5020,0,5020,0,11,0,0,1.429999,961
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,...,1,2815,0,2815,0,21,0,0,20.529997,1936
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,...,1,3786,0,3786,0,12,0,12,9.619998,900
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,...,1,2580,0,2580,0,44,0,33,2.750000,4096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,...,1,2026,0,2026,0,27,0,18,,2025
702,34,0,703,0.182331,0.030075,10,2500,0,0,5885,...,0,675,1,465,210,18,0,4,,1156
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,...,1,1851,0,1851,0,19,0,17,,1369
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,...,1,1961,1,1481,480,31,0,22,,2916


In [11]:
df['totwrk*south'] = df['totwrk']*df['south']
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,...,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq,totwrk*south
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,...,3438,0,3438,0,14,0,13,7.070004,1024,0
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,...,5020,0,5020,0,11,0,0,1.429999,961,5020
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,...,2815,0,2815,0,21,0,0,20.529997,1936,0
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,...,3786,0,3786,0,12,0,12,9.619998,900,0
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,...,2580,0,2580,0,44,0,33,2.750000,4096,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,...,2026,0,2026,0,27,0,18,,2025,0
702,34,0,703,0.182331,0.030075,10,2500,0,0,5885,...,675,1,465,210,18,0,4,,1156,0
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,...,1851,0,1851,0,19,0,17,,1369,0
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,...,1961,1,1481,480,31,0,22,,2916,1961


In [12]:
model_1 = smf.ols(data=df, formula='sleep~totwrk+south+totwrk*south+age+age**2+male+smsa+marr').fit()
hat_y = model_1.predict()
hat_y

array([3059.7878713 , 3027.2982769 , 3150.14824668, 2906.16253706,
       3304.38212744, 3480.86759964, 3302.85549107, 3018.58625027,
       3251.70153209, 3103.16655328, 2565.13027726, 3435.42675042,
       3158.81153771, 3237.97874758, 3169.35522302, 3119.99576601,
       3122.69701138, 2972.94125121, 3267.34993886, 3378.03927478,
       3009.57887624, 3057.00303608, 3285.56012952, 3105.656311  ,
       2794.44184501, 3094.17710558, 2997.0741813 , 3107.7213396 ,
       3212.89504595, 3184.98789179, 2986.31956339, 3010.04799954,
       3102.57008721, 3196.83500392, 3071.3642847 , 2942.67985756,
       3177.66019248, 3462.19865696, 3113.84188955, 3122.60074779,
       3121.63290454, 3146.17668375, 3142.31222092, 3307.64173116,
       3164.08023326, 3242.96541184, 3278.97618864, 3463.65838475,
       3332.20393894, 2749.78812358, 3027.73683892, 3064.62039459,
       3226.3359574 , 3133.51879223, 3144.99343399, 3210.05308127,
       3296.45292169, 3273.52388798, 3227.96381037, 3224.78187

In [13]:
df['slp2'] = hat_y**2
df['slp3'] = hat_y**3
df['slp4'] = hat_y**4
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,...,workscnd,exper,yngkid,yrsmarr,hrwage,agesq,totwrk*south,slp2,slp3,slp4
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,...,0,14,0,13,7.070004,1024,0,9.362302e+06,2.864666e+10,8.765270e+13
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,...,0,11,0,0,1.429999,961,5020,9.164535e+06,2.774378e+10,8.398870e+13
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,...,0,21,0,0,20.529997,1936,0,9.923434e+06,3.126029e+10,9.847454e+13
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,...,0,12,0,12,9.619998,900,0,8.445781e+06,2.454481e+10,7.133121e+13
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,...,0,44,0,33,2.750000,4096,0,1.091894e+07,3.608035e+10,1.192233e+14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,...,0,27,0,18,,2025,0,1.028060e+07,3.296307e+10,1.056907e+14
702,34,0,703,0.182331,0.030075,10,2500,0,0,5885,...,210,18,0,4,,1156,0,1.263309e+07,4.490189e+10,1.595951e+14
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,...,0,19,0,17,,1369,0,1.068460e+07,3.492507e+10,1.141606e+14
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,...,480,31,0,22,,2916,1961,1.144936e+07,3.874111e+10,1.310879e+14


In [14]:
model_2 = smf.ols(data=df, formula='sleep~totwrk+south+totwrk*south+age+age**2+male+smsa+marr+slp2+slp3+slp4').fit()
model_2.params

Intercept      -4.978117e-03
totwrk         -2.137072e-02
south           2.755881e+01
totwrk:south   -8.554904e-03
age             4.961400e-01
male            8.541274e+00
smsa           -6.216897e+00
marr            1.247640e+00
slp2            6.010367e-04
slp3           -7.265371e-08
slp4           -5.108139e-12
dtype: float64

In [15]:
R2_unres = model_2.rsquared
R2_res = model_1.rsquared

In [53]:
F_nabl = (R2_unres-R2_res)/(1-R2_unres)*(len(df)-12)/3 # q (3) - количество регрессоров
F_nabl

60.43333158283564

In [54]:
alpha = 0.05
stats.f.ppf(1-alpha, 3, len(df)-12)

2.620903840464402

$$
H_{0}:\gamma_{1}=\gamma_{2}=\gamma_{3}=0
$$
$$
H_{1}:\gamma_{1}^2+\gamma_{2}^2+\gamma_{3}^2>0
$$

$$ 
F_{crit}>F_{nabl}=>H_{0} принимаем 
$$

## 1.1 sleep equation #1

In [32]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/sleep75.csv')
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,...,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,...,0,3438,0,3438,0,14,0,13,7.070004,1024
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,...,0,5020,0,5020,0,11,0,0,1.429999,961
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,...,1,2815,0,2815,0,21,0,0,20.529997,1936
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,...,1,3786,0,3786,0,12,0,12,9.619998,900
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,...,1,2580,0,2580,0,44,0,33,2.750000,4096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,...,1,2026,0,2026,0,27,0,18,,2025
702,34,0,703,0.182331,0.030075,10,2500,0,0,5885,...,0,675,1,465,210,18,0,4,,1156
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,...,1,1851,0,1851,0,19,0,17,,1369
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,...,1,1961,1,1481,480,31,0,22,,2916


In [33]:
model_1 = smf.ols(data=df, formula='sleep~totwrk+age+male+smsa+south').fit()
hat_y = model_1.predict()
hat_y

array([3067.10583631, 2894.49476254, 3150.37075855, 2910.94924531,
       3303.75993003, 3472.6900719 , 3301.14172725, 3023.8804497 ,
       3251.79465914, 3104.60304347, 2591.50610935, 3415.84208454,
       3157.91546836, 3239.22243895, 3168.41882119, 3124.69052876,
       3124.03220455, 2983.72677965, 3263.51683965, 3369.7221562 ,
       3044.02875134, 3060.57363178, 3285.21066833, 3135.81565247,
       2807.46105408, 3101.03250584, 3002.42541199, 3129.80523542,
       3214.60758145, 3188.66233468, 2991.99642153, 3018.92151018,
       3136.97517574, 3199.27100653, 3074.11687847, 2978.93503636,
       3180.58032354, 3450.79249364, 3119.08529912, 3152.00332795,
       3122.41382707, 3150.48016232, 3167.61968067, 3243.68584648,
       3168.42290594, 3243.74401963, 3220.49507817, 3498.46452305,
       3299.07444219, 2768.41983181, 3036.14254043, 3096.3245862 ,
       3227.14627219, 3133.77301111, 3173.5133905 , 3232.01790829,
       3266.75411605, 3224.56832679, 3228.83170859, 3226.16578

In [34]:
df['slp2'] = hat_y**2
df['slp3'] = hat_y**3
df['slp4'] = hat_y**4
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,...,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq,slp2,slp3,slp4
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,...,3438,0,14,0,13,7.070004,1024,9.407138e+06,2.885269e+10,8.849425e+13
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,...,5020,0,11,0,0,1.429999,961,8.378100e+06,2.425037e+10,7.019256e+13
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,...,2815,0,21,0,0,20.529997,1936,9.924836e+06,3.126691e+10,9.850237e+13
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,...,3786,0,12,0,12,9.619998,900,8.473626e+06,2.466629e+10,7.180233e+13
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,...,2580,0,44,0,33,2.750000,4096,1.091483e+07,3.605998e+10,1.191335e+14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,...,2026,0,27,0,18,,2025,1.021601e+07,3.265289e+10,1.043668e+14
702,34,0,703,0.182331,0.030075,10,2500,0,0,5885,...,465,210,18,0,4,,1156,1.255346e+07,4.447801e+10,1.575895e+14
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,...,1851,0,19,0,17,,1369,1.062855e+07,3.465060e+10,1.129660e+14
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,...,1481,480,31,0,22,,2916,1.148575e+07,3.892594e+10,1.319225e+14


In [35]:
model_2 = smf.ols(data=df, formula='sleep~totwrk+age+male+smsa+south+slp2+slp3+slp4').fit()
model_2.params

Intercept   -3.821000e-04
totwrk      -6.838269e-03
age          2.731615e-01
male         1.417044e+00
smsa        -2.967532e+00
south       -2.989739e+00
slp2         5.111848e-04
slp3        -2.871410e-08
slp4        -1.028501e-11
dtype: float64

In [36]:
R2_unres = model_2.rsquared
R2_res = model_1.rsquared

In [55]:
F_nabl = (R2_unres-R2_res)/(1-R2_unres)*(len(df)-9)/3 # q (3) - количество регрессоров
F_nabl

60.75882528974498

In [56]:
alpha = 0.05
stats.f.ppf(1-alpha, 3, len(df)-9)

2.6208178380700073

$$
H_{0}:\gamma_{1}=\gamma_{2}=\gamma_{3}=0
$$
$$
H_{1}:\gamma_{1}^2+\gamma_{2}^2+\gamma_{3}^2>0
$$

$$ 
F_{crit}>F_{nabl}=>H_{0} принимаем 
$$

## 1.4 output equation #2

In [86]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Labour.csv')
df

Unnamed: 0,capital,labour,output,wage
0,2.606563,184,9.250759,43.080307
1,1.323237,91,3.664310,27.780016
2,22.093692,426,28.781516,44.467748
3,10.737851,72,4.124642,39.734710
4,1.161365,46,2.890150,34.650709
...,...,...,...,...
564,2.625403,20,1.424376,33.477545
565,1.276386,61,2.109048,26.300732
566,1.953869,117,6.241870,41.153979
567,1.318527,46,7.902237,66.720139


In [87]:
model_1 = smf.ols(data=df, formula='np.log(output)~np.log(capital)+np.log(labour)+np.log(wage)').fit()
hat_y = model_1.predict()
hat_y

array([ 2.35989958,  1.34716989,  3.3130779 ,  1.82090689,  1.03983458,
        5.8587148 ,  1.06531002,  1.55498428,  0.81379468,  1.4684784 ,
        1.42691402,  0.72993199,  1.2954135 ,  1.18047975,  1.14403561,
        1.97163239, -1.40792342,  2.64881065,  1.67272941,  0.85864447,
       -0.52432467,  1.62340063,  1.60175018,  0.34378623,  1.47904166,
        1.59523602,  1.82552042,  1.41043394,  2.53071436,  1.8703142 ,
       -0.19605089,  1.56305837,  1.86238377,  0.85801498,  2.17668439,
        1.32592943,  1.34163888,  2.68341674,  1.51511337,  1.10757717,
        3.28302213,  1.82326807,  1.33270466,  0.53995352,  1.56315707,
        1.6835631 ,  3.39917857,  1.1523541 ,  1.62574217,  1.03271288,
        1.65491718,  1.86661549,  3.60003683,  5.0342221 ,  1.73071522,
        5.76252664,  0.48941252,  0.95806217,  0.33583131,  1.03056803,
        2.1978038 ,  3.71648169,  2.87648876,  1.11821811,  0.80414367,
        1.64710272,  0.81197612,  1.56116845,  2.35593082,  3.63

In [88]:
df['out2'] = hat_y**2
df['out3'] = hat_y**3
df['out4'] = hat_y**4
df

Unnamed: 0,capital,labour,output,wage,out2,out3,out4
0,2.606563,184,9.250759,43.080307,5.569126,13.142578,31.015165
1,1.323237,91,3.664310,27.780016,1.814867,2.444934,3.293741
2,22.093692,426,28.781516,44.467748,10.976485,36.365950,120.483226
3,10.737851,72,4.124642,39.734710,3.315702,6.037584,10.993879
4,1.161365,46,2.890150,34.650709,1.081256,1.124327,1.169114
...,...,...,...,...,...,...,...
564,2.625403,20,1.424376,33.477545,0.280766,0.148770,0.078829
565,1.276386,61,2.109048,26.300732,1.006439,1.009674,1.012919
566,1.953869,117,6.241870,41.153979,3.796779,7.398146,14.415527
567,1.318527,46,7.902237,66.720139,2.763817,4.594771,7.638684


In [89]:
model_2 = smf.ols(data=df, formula='np.log(output)~np.log(capital)+np.log(labour)+np.log(wage)+np.log(out2)+np.log(out3)+np.log(out4)').fit()
model_2.params

  result = getattr(ufunc, method)(*inputs, **kwargs)


Intercept         -5.852117
np.log(capital)    0.148561
np.log(labour)     0.846376
np.log(wage)       1.010862
np.log(out2)      -0.011578
np.log(out3)      -0.017367
np.log(out4)      -0.023156
dtype: float64

In [90]:
R2_unres = model_2.rsquared
R2_res = model_1.rsquared

In [91]:
F_nabl = (R2_unres-R2_res)/(1-R2_unres)*(len(df)-7)/3 # q (3) - количество регрессоров
F_nabl

20.68261472922801

In [92]:
alpha = 0.05
stats.f.ppf(1-alpha, 3, len(df)-7)

2.620761015118207

$$
F_{nabl}>F_{crit}=>H_{0} отвергаем
$$

## 1.5 output equation #3

In [107]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Labour.csv')
df

Unnamed: 0,capital,labour,output,wage
0,2.606563,184,9.250759,43.080307
1,1.323237,91,3.664310,27.780016
2,22.093692,426,28.781516,44.467748
3,10.737851,72,4.124642,39.734710
4,1.161365,46,2.890150,34.650709
...,...,...,...,...
564,2.625403,20,1.424376,33.477545
565,1.276386,61,2.109048,26.300732
566,1.953869,117,6.241870,41.153979
567,1.318527,46,7.902237,66.720139


In [108]:
model_1 = smf.ols(data=df, formula='np.log(output)~np.log(capital)+labour+labour*labour+np.log(wage)').fit()
hat_y = model_1.predict()
hat_y

array([ 1.8381479 ,  1.148544  ,  2.9138635 ,  2.34254438,  1.2459745 ,
        7.46996719,  1.35661241,  1.18764945,  1.12974183,  1.69008591,
        1.07678602,  1.0870282 ,  1.45004743,  0.94044655,  1.33081122,
        1.59377779, -0.1597961 ,  2.30854169,  1.35831001,  0.58215871,
       -0.05247648,  1.98503336,  1.46888352,  1.52797977,  1.19938528,
        1.80065473,  2.0063883 ,  2.16594978,  2.09773687,  1.45811185,
        0.37844699,  1.81584236,  2.13664056,  1.3945006 ,  2.05776207,
        0.82523707,  2.02433467,  2.37019293,  1.48836192,  1.17579181,
        2.29338815,  0.98771568,  1.49659526,  0.4972591 ,  1.8933043 ,
        1.64662397,  2.77307897,  1.7983176 ,  1.55930047,  1.02940031,
        1.94145181,  0.96411491,  3.46094438,  4.70541386,  2.22194623,
        5.41947735,  0.91866084,  1.47900039,  0.73803718,  0.95430253,
        2.36017539,  3.05401164,  2.26193333,  1.46371221,  1.1466689 ,
        2.03157218,  0.98603549,  2.2041357 ,  2.16054274,  3.32

In [109]:
df['out2'] = hat_y**2
df['out3'] = hat_y**3
df['out4'] = hat_y**4
df

Unnamed: 0,capital,labour,output,wage,out2,out3,out4
0,2.606563,184,9.250759,43.080307,3.378788,6.210711,11.416206
1,1.323237,91,3.664310,27.780016,1.319153,1.515106,1.740165
2,22.093692,426,28.781516,44.467748,8.490600,24.740451,72.090297
3,10.737851,72,4.124642,39.734710,5.487514,12.854746,30.112812
4,1.161365,46,2.890150,34.650709,1.552452,1.934316,2.410109
...,...,...,...,...,...,...,...
564,2.625403,20,1.424376,33.477545,2.446751,3.827230,5.986588
565,1.276386,61,2.109048,26.300732,1.156091,1.243048,1.336546
566,1.953869,117,6.241870,41.153979,2.702933,4.443784,7.305848
567,1.318527,46,7.902237,66.720139,3.321487,6.053392,11.032274


In [110]:
model_2 = smf.ols(data=df, formula='np.log(output)~np.log(capital)+labour+labour*labour+np.log(wage)+np.log(out2)+np.log(out3)+np.log(out4)').fit()
model_2.params

  result = getattr(ufunc, method)(*inputs, **kwargs)


Intercept         -1.359278
np.log(capital)    0.418797
labour             0.000435
np.log(wage)       0.695401
np.log(out2)       0.012404
np.log(out3)       0.018606
np.log(out4)       0.024808
dtype: float64

In [111]:
R2_unres = model_2.rsquared
R2_res = model_1.rsquared

In [112]:
F_nabl = (R2_unres-R2_res)/(1-R2_unres)*(len(df)-8)/3 # q (3) - количество регрессоров
F_nabl

9.011610475532292

In [113]:
alpha = 0.05
stats.f.ppf(1-alpha, 3, len(df)-8)

2.6207893757624725

$$
F_{nabl}>F_{crit}=>H_{0} отвергаем
$$