In [2]:
import pandas as pd
import numpy as np
import io
import statsmodels.formula.api as smf
import scipy.stats as stats
import re

# 1.4 cost equation #2

In [7]:
date_string = '''Observations            158         158         158        158      158  
R2                     0.992       0.976       0.982      0.053    0.026 
AdjustedR2            0.992       0.976       0.982      0.035    0.020 ''' 
df = pd.read_csv(io.StringIO(date_string), 
    delimiter='\s+', header=None, index_col=0)
df

Unnamed: 0_level_0,1,2,3,4,5
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Observations,158.0,158.0,158.0,158.0,158.0
R2,0.992,0.976,0.982,0.053,0.026
AdjustedR2,0.992,0.976,0.982,0.035,0.02


In [8]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,AdjustedR2
1,158.0,0.992,0.992
2,158.0,0.976,0.976
3,158.0,0.982,0.982
4,158.0,0.053,0.035
5,158.0,0.026,0.02


In [10]:
df['m'] = [6,3,5,4,2]
df

Unnamed: 0,Observations,R2,AdjustedR2,m
1,158.0,0.992,0.992,6
2,158.0,0.976,0.976,3
3,158.0,0.982,0.982,5
4,158.0,0.053,0.035,4
5,158.0,0.026,0.02,2


In [11]:
df['F_nab'] = round((df['R2']/(1-df['R2']))*((df['Observations']-df['m'])/(df['m']-1)),2)
df

Unnamed: 0,Observations,R2,AdjustedR2,m,F_nab
1,158.0,0.992,0.992,6,3769.6
2,158.0,0.976,0.976,3,3151.67
3,158.0,0.982,0.982,5,2086.75
4,158.0,0.053,0.035,4,2.87
5,158.0,0.026,0.02,2,4.16


In [12]:
alpha = 0.01
df['F_cr'] = stats.f.ppf(1-alpha,df['m']-1,df['Observations'] - df['m'])
df

Unnamed: 0,Observations,R2,AdjustedR2,m,F_nab,F_cr
1,158.0,0.992,0.992,6,3769.6,3.139909
2,158.0,0.976,0.976,3,3151.67,4.744744
3,158.0,0.982,0.982,5,2086.75,3.444189
4,158.0,0.053,0.035,4,2.87,3.911342
5,158.0,0.026,0.02,2,4.16,6.800161


In [13]:
df['znachimost'] = df.apply(lambda x : 'znachima' if x['F_nab'] > x['F_cr'] else 'neznachima', axis = 1)
df

Unnamed: 0,Observations,R2,AdjustedR2,m,F_nab,F_cr,znachimost
1,158.0,0.992,0.992,6,3769.6,3.139909,znachima
2,158.0,0.976,0.976,3,3151.67,4.744744,znachima
3,158.0,0.982,0.982,5,2086.75,3.444189,znachima
4,158.0,0.053,0.035,4,2.87,3.911342,neznachima
5,158.0,0.026,0.02,2,4.16,6.800161,neznachima


# 2.1 sleep equation #1

In [3]:
date_string = '''Naming Estimate  Std.Error t_value  Pr(>|t|)    
(Intercept) 3587.178292  220.238404 16.2877 <2.2e-16***
totwrk        -0.149121    0.026286 -5.6730 2.055e-08***
age           -6.258663   11.191863 -0.5592  0.576194    
I(age^2)       0.106517    0.133767  0.7963  0.426135    
male         161.890143   82.177481  1.9700  0.049232*  
south        112.143584   40.717344  2.7542  0.006037** 
totwrk:male   -0.034694    0.036279 -0.9563  0.339238   '''
df = pd.read_csv(io.StringIO(date_string), 
    delimiter='\s+', index_col=0)
df

Unnamed: 0_level_0,Estimate,Std.Error,t_value,Pr(>|t|)
Naming,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
(Intercept),3587.178292,220.238404,16.2877,<2.2e-16***
totwrk,-0.149121,0.026286,-5.673,2.055e-08***
age,-6.258663,11.191863,-0.5592,0.576194
I(age^2),0.106517,0.133767,0.7963,0.426135
male,161.890143,82.177481,1.97,0.049232*
south,112.143584,40.717344,2.7542,0.006037**
totwrk:male,-0.034694,0.036279,-0.9563,0.339238


In [4]:
alpha = 0.05
t_crit = stats.t.ppf(1-alpha/2, 706-7)
t_crit

1.963363575920925

In [5]:
df['znachimost'] = df.apply(lambda x: 'znachimo' if np.abs(x['t_value']) > t_crit else 'ne_znachimo', axis=1 )
df

Unnamed: 0_level_0,Estimate,Std.Error,t_value,Pr(>|t|),znachimost
Naming,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
(Intercept),3587.178292,220.238404,16.2877,<2.2e-16***,znachimo
totwrk,-0.149121,0.026286,-5.673,2.055e-08***,znachimo
age,-6.258663,11.191863,-0.5592,0.576194,ne_znachimo
I(age^2),0.106517,0.133767,0.7963,0.426135,ne_znachimo
male,161.890143,82.177481,1.97,0.049232*,znachimo
south,112.143584,40.717344,2.7542,0.006037**,znachimo
totwrk:male,-0.034694,0.036279,-0.9563,0.339238,ne_znachimo


In [6]:
dt = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/sleep75.csv')
unrestricted_model = smf.ols(data = dt, formula = 'sleep~totwrk+age+I(age^2) + male+south + smsa+yngkid+marr+union' ).fit()
restricted_model = smf.ols(data = dt, formula = 'sleep~totwrk+age+I(age^2) + male+south' ).fit()

In [7]:

print(unrestricted_model.f_test("smsa = yngkid = marr = union = 0"))

<F test: F=0.909595410409731, p=0.4577812422153851, df_denom=696, df_num=4>


In [18]:
n=len(dt)
DF=unrestricted_model.df_resid
q = 4
R2_ur=unrestricted_model.rsquared 
R2_r = restricted_model.rsquared

In [20]:
F_nabl = ((R2_ur - R2_r)/(1-R2_ur))*(DF/q)
F_nabl

0.9095954104097206

In [21]:
f_test = unrestricted_model.f_test("smsa = yngkid = marr = union = 0")

In [22]:
f_test

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=0.909595410409731, p=0.4577812422153851, df_denom=696, df_num=4>

In [25]:
alpha = 0.05
F_cr = stats.f.ppf(1-0.05,q,DF)
F_cr

2.384729463756667

In [26]:
if (F_nabl > F_cr ):
    print('Отвергаем H0, коэффициенты совместно значимы')
else:
    print('Не отвергаем H0, коэффициенты совместно незначимы')

Не отвергаем H0, коэффициенты совместно незначимы


# 2.2 sleep equation #2

In [49]:
date_string = '''Name Estimate  Std.Error t_value  Pr(>|t|)    
(Intercept) 3613.994663  218.432669 16.5451 <2.2e-16***
totwrk        -0.167320    0.018132 -9.2278 <2.2e-16***
age           -6.254665   11.191179 -0.5589  0.576414    
I(age^2)       0.108862    0.133736  0.8140  0.415919    
male          90.456708   34.257144  2.6405  0.008462** 
south        114.547494   40.637198  2.8188  0.004957**'''
df = pd.read_csv(io.StringIO(date_string), 
    delimiter='\s+', index_col=0)
df

Unnamed: 0_level_0,Estimate,Std.Error,t_value,Pr(>|t|)
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
(Intercept),3613.994663,218.432669,16.5451,<2.2e-16***
totwrk,-0.16732,0.018132,-9.2278,<2.2e-16***
age,-6.254665,11.191179,-0.5589,0.576414
I(age^2),0.108862,0.133736,0.814,0.415919
male,90.456708,34.257144,2.6405,0.008462**
south,114.547494,40.637198,2.8188,0.004957**


In [50]:
alpha = 0.01
t_crit = stats.t.ppf(1-alpha/2, 706-7)
t_crit

2.582881108461278

In [51]:
df['znachimost'] = df.apply(lambda x: 'znachimo' if np.abs(x['t_value']) > t_crit else 'ne_znachimo', axis=1 )
df

Unnamed: 0_level_0,Estimate,Std.Error,t_value,Pr(>|t|),znachimost
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
(Intercept),3613.994663,218.432669,16.5451,<2.2e-16***,znachimo
totwrk,-0.16732,0.018132,-9.2278,<2.2e-16***,znachimo
age,-6.254665,11.191179,-0.5589,0.576414,ne_znachimo
I(age^2),0.108862,0.133736,0.814,0.415919,ne_znachimo
male,90.456708,34.257144,2.6405,0.008462**,znachimo
south,114.547494,40.637198,2.8188,0.004957**,znachimo


In [52]:
dt = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/sleep75.csv')
unrestricted_model = smf.ols(data = dt, formula = 'sleep~totwrk+age+I(age^2) + male+south + smsa+yngkid+marr+union' ).fit()
restricted_model = smf.ols(data = dt, formula = 'sleep~totwrk+age+I(age^2) + male+south' ).fit()

In [67]:
dt['I(age**2)'] = dt['age']**2
dt

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,...,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq,I(age^2),I(age**2)
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,...,0,3438,0,14,0,13,7.070004,1024,1024,1024
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,...,0,5020,0,11,0,0,1.429999,961,961,961
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,...,0,2815,0,21,0,0,20.529997,1936,1936,1936
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,...,0,3786,0,12,0,12,9.619998,900,900,900
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,...,0,2580,0,44,0,33,2.750000,4096,4096,4096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,...,0,2026,0,27,0,18,,2025,2025,2025
702,34,0,703,0.182331,0.030075,10,2500,0,0,5885,...,1,465,210,18,0,4,,1156,1156,1156
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,...,0,1851,0,19,0,17,,1369,1369,1369
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,...,1,1481,480,31,0,22,,2916,2916,2916


In [68]:
print(unrestricted_model.f_test("age = I(age**2) = 0"))

PatsyError: unrecognized token in constraint
    age = I(age**2) = 0
          ^