In [1]:
import pandas as pd
import numpy as np
import io
import statsmodels.formula.api as smf
import scipy.stats as stats
import re

# 1.1

In [2]:
data_string = '''Observations     706         706         706         706    
R2              0.132       0.131       0.128       0.007   
AdjustedR2     0.121       0.123       0.121       0.002 ''' 

In [3]:
df = pd.read_csv(io.StringIO(data_string), 
    delimiter='\s+', header=None, index_col=0)
df.reset_index()

Unnamed: 0,0,1,2,3,4
0,Observations,706.0,706.0,706.0,706.0
1,R2,0.132,0.131,0.128,0.007
2,AdjustedR2,0.121,0.123,0.121,0.002


In [4]:
df = df.transpose()
df

Unnamed: 0,Observations,R2,AdjustedR2
1,706.0,0.132,0.121
2,706.0,0.131,0.123
3,706.0,0.128,0.121
4,706.0,0.007,0.002


In [5]:
df['m'] = [10,8,6,5]
df

Unnamed: 0,Observations,R2,AdjustedR2,m
1,706.0,0.132,0.121,10
2,706.0,0.131,0.123,8
3,706.0,0.128,0.121,6
4,706.0,0.007,0.002,5


In [6]:
df['F_nab'] = (df['R2']/(1-df['R2']))*((df['Observations']-df['m'])/(df['m']-1))
df

Unnamed: 0,Observations,R2,AdjustedR2,m,F_nab
1,706.0,0.132,0.121,10,11.760369
2,706.0,0.131,0.123,8,15.031728
3,706.0,0.128,0.121,6,20.550459
4,706.0,0.007,0.002,5,1.235398


In [7]:
alpha = 0.05
#F критическое: (df['m']-1) - количество регрессоров без константы, (df['Observations']-df['m']) - количество степеней свободы
df['F_cr'] = stats.f.ppf(1-alpha,df['m']-1,df['Observations'] - df['m'])
df

Unnamed: 0,Observations,R2,AdjustedR2,m,F_nab,F_cr
1,706.0,0.132,0.121,10,11.760369,1.893317
2,706.0,0.131,0.123,8,15.031728,2.022681
3,706.0,0.128,0.121,6,20.550459,2.226901
4,706.0,0.007,0.002,5,1.235398,2.384638


In [8]:
df['result'] = df.apply(lambda x: 'регрессоры не значимы' if abs(x['F_nab'])<abs(x['F_cr']) else 'регрессоры значимы', axis=1)
df

Unnamed: 0,Observations,R2,AdjustedR2,m,F_nab,F_cr,result
1,706.0,0.132,0.121,10,11.760369,1.893317,регрессоры значимы
2,706.0,0.131,0.123,8,15.031728,2.022681,регрессоры значимы
3,706.0,0.128,0.121,6,20.550459,2.226901,регрессоры значимы
4,706.0,0.007,0.002,5,1.235398,2.384638,регрессоры не значимы


# 1.4

In [9]:
data = '''Observations            158         158         158        158      158  
R2                     0.992       0.976       0.982      0.053    0.026'''

In [10]:
df = pd.read_csv(io.StringIO(data), 
    delimiter='\s+', header=None, index_col=0)
df=df.transpose() 
df

Unnamed: 0,Observations,R2
1,158.0,0.992
2,158.0,0.976
3,158.0,0.982
4,158.0,0.053
5,158.0,0.026


In [11]:
df['m'] = [6,3,5,4,2]
df

Unnamed: 0,Observations,R2,m
1,158.0,0.992,6
2,158.0,0.976,3
3,158.0,0.982,5
4,158.0,0.053,4
5,158.0,0.026,2


In [12]:
df['F_nab'] = (df['R2']/(1-df['R2']))*((df['Observations']-df['m'])/(df['m']-1))
df

Unnamed: 0,Observations,R2,m,F_nab
1,158.0,0.992,6,3769.6
2,158.0,0.976,3,3151.666667
3,158.0,0.982,5,2086.75
4,158.0,0.053,4,2.872932
5,158.0,0.026,2,4.164271


In [13]:
alpha = 0.01
df['F_cr'] = stats.f.ppf(1-alpha,df['m']-1,df['Observations'] - df['m'])
df

Unnamed: 0,Observations,R2,m,F_nab,F_cr
1,158.0,0.992,6,3769.6,3.139909
2,158.0,0.976,3,3151.666667,4.744744
3,158.0,0.982,5,2086.75,3.444189
4,158.0,0.053,4,2.872932,3.911342
5,158.0,0.026,2,4.164271,6.800161


In [14]:
df['result'] = df.apply(lambda x: 'коэффициенты значимы' if abs(x['F_nab'])<abs(x['F_cr']) else 'коэффициенты не значимы', axis=1)
df

Unnamed: 0,Observations,R2,m,F_nab,F_cr,result
1,158.0,0.992,6,3769.6,3.139909,коэффициенты не значимы
2,158.0,0.976,3,3151.666667,4.744744,коэффициенты не значимы
3,158.0,0.982,5,2086.75,3.444189,коэффициенты не значимы
4,158.0,0.053,4,2.872932,3.911342,коэффициенты значимы
5,158.0,0.026,2,4.164271,6.800161,коэффициенты значимы


# 2.1

$$
 H_0:\beta_{smsa}=\beta_{yngkid}=\beta_{yngkid}=\beta_{marr}=0
 \\H_1: \beta_{smsa}^2=\beta_{yngkid}^2=\beta_{yngkid}^2=\beta_{marr}^2>0
$$

In [15]:
unrestricted, restricted = np.float_(re.findall(r'([-+]?\d+.\d+)','R2      0.131    0.127'))
print(unrestricted,restricted)

0.131 0.127


In [16]:
F_obs = (unrestricted - restricted)/(1-unrestricted)*(706-9)/4
F_obs



0.8020713463751445

In [17]:
F_cr = stats.f.ppf(1-alpha, 4, 706-9)
F_cr

3.346154627901364

$$
F_{obs}<F_{cr}
$$

# Согласование с гипотезой Н0

# 2.4

In [18]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/wage2.csv')

In [19]:
fitted = smf.ols(data=df, formula='np.log(wage)~age + I(age**2) + IQ + south + urban + married').fit()
fitted.params

Intercept      2.937802
age            0.144754
I(age ** 2)   -0.001857
IQ             0.008216
south         -0.100273
urban          0.173195
married        0.201539
dtype: float64

In [20]:
fitted.summary(alpha=0.01).tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.005,0.995]
Intercept,2.9378,1.656,1.774,0.076,-1.337,7.213
age,0.1448,0.100,1.447,0.148,-0.113,0.403
I(age ** 2),-0.0019,0.002,-1.236,0.217,-0.006,0.002
IQ,0.0082,0.001,9.770,0.000,0.006,0.010
south,-0.1003,0.027,-3.736,0.000,-0.170,-0.031
urban,0.1732,0.028,6.260,0.000,0.102,0.245
married,0.2015,0.040,5.008,0.000,0.098,0.305


In [21]:
alpha = 0.01
n=len(df)
k=6
t_cr = stats.t.ppf(1-alpha/2, n-k-1)
t_cr

2.5811375814942803

In [22]:
data = '''coef	std.err	t	P>|t|	[0.005	0.995]
Intercept	2.9378	1.656	1.774	0.076	-1.337	7.213
age	0.1448	0.100	1.447	0.148	-0.113	0.403
I(age**2)	-0.0019	0.002	-1.236	0.217	-0.006	0.002
IQ	0.0082	0.001	9.770	0.000	0.006	0.010
south	-0.1003	0.027	-3.736	0.000	-0.170	-0.031
urban	0.1732	0.028	6.260	0.000	0.102	0.245
married	0.2015	0.040	5.008	0.000	0.098	0.305'''

In [23]:
df = pd.read_csv(io.StringIO(data), sep='\s+')
df

Unnamed: 0,coef,std.err,t,P>|t|,[0.005,0.995]
Intercept,2.9378,1.656,1.774,0.076,-1.337,7.213
age,0.1448,0.1,1.447,0.148,-0.113,0.403
I(age**2),-0.0019,0.002,-1.236,0.217,-0.006,0.002
IQ,0.0082,0.001,9.77,0.0,0.006,0.01
south,-0.1003,0.027,-3.736,0.0,-0.17,-0.031
urban,0.1732,0.028,6.26,0.0,0.102,0.245
married,0.2015,0.04,5.008,0.0,0.098,0.305


In [24]:
df['result']=df.apply(lambda x: 'значим' if abs(x['t'])>t_cr else 'не значим', axis=1)
df

Unnamed: 0,coef,std.err,t,P>|t|,[0.005,0.995],result
Intercept,2.9378,1.656,1.774,0.076,-1.337,7.213,не значим
age,0.1448,0.1,1.447,0.148,-0.113,0.403,не значим
I(age**2),-0.0019,0.002,-1.236,0.217,-0.006,0.002,не значим
IQ,0.0082,0.001,9.77,0.0,0.006,0.01,значим
south,-0.1003,0.027,-3.736,0.0,-0.17,-0.031,значим
urban,0.1732,0.028,6.26,0.0,0.102,0.245,значим
married,0.2015,0.04,5.008,0.0,0.098,0.305,значим


In [25]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/wage2.csv')

In [26]:
fitted_unrestricted = smf.ols(data=df, formula='np.log(wage)~age + I(age**2) + IQ + south + urban + married').fit()
fitted_restricted = smf.ols(data=df, formula='np.log(wage)~ IQ + south + urban + married').fit()

$$
H_0:\beta_{age}=\beta_{age^2}=0
 \\H_1: \beta_{age}^2=\beta_{age^2}^2>0
$$

In [27]:
R2_ur = fitted_unrestricted.rsquared  # коэффициент детерминации "длинной регрессии" 
R2_ur

0.20095663871585157

In [28]:
R2_r = fitted_restricted.rsquared  # коэффициент детерминации "длинной регрессии" 
R2_r

0.1754137020049945

In [29]:
F_obs = (R2_ur - R2_r)/(1-R2_ur)*(706-9)/4
F_obs

5.570231776550696

In [30]:
F_cr = stats.f.ppf(1-0.01, 6, 935-7)
F_cr

2.821394800507497

# Ингтерпретация результатов 
$$
F_{obs}>F{cr}
$$

# Согласование с гипотезой Н1

# Регрессоры значимы, следовательно возраст влияет на уровень зараплаты с вероятностью 99%

# 3.2

In [31]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Labour.csv')

In [32]:
fitted = smf.ols(data=df, formula='np.log(output)~np.log(capital)+np.log(labour)+np.log(wage)').fit()
fitted.params

Intercept         -5.007316
np.log(capital)    0.149343
np.log(labour)     0.720410
np.log(wage)       0.921390
dtype: float64

In [33]:
fitted.summary(alpha=0.01).tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.005,0.995]
Intercept,-5.0073,0.221,-22.649,0.000,-5.579,-4.436
np.log(capital),0.1493,0.015,10.141,0.000,0.111,0.187
np.log(labour),0.7204,0.019,37.487,0.000,0.671,0.770
np.log(wage),0.9214,0.058,16.001,0.000,0.773,1.070


In [34]:
alpha = 0.05
n=len(df)
k=3
t_cr = stats.t.ppf(1-alpha/2, n-k-1)
t_cr

1.9641715505901485

In [35]:
data = '''coef	std.err	t	P>|t|	[0.005	0.995]
Intercept	-5.0073	0.221	-22.649	0.000	-5.579	-4.436
np.log(capital)	0.1493	0.015	10.141	0.000	0.111	0.187
np.log(labour)	0.7204	0.019	37.487	0.000	0.671	0.770
np.log(wage)	0.9214	0.058	16.001	0.000	0.773	1.070'''

In [36]:
df = pd.read_csv(io.StringIO(data), sep='\s+')
df

Unnamed: 0,coef,std.err,t,P>|t|,[0.005,0.995]
Intercept,-5.0073,0.221,-22.649,0.0,-5.579,-4.436
np.log(capital),0.1493,0.015,10.141,0.0,0.111,0.187
np.log(labour),0.7204,0.019,37.487,0.0,0.671,0.77
np.log(wage),0.9214,0.058,16.001,0.0,0.773,1.07


In [37]:
df['result']=df.apply(lambda x: 'значимо' if abs(x['t'])>t_cr else 'не значимо', axis =1)
df

Unnamed: 0,coef,std.err,t,P>|t|,[0.005,0.995],result
Intercept,-5.0073,0.221,-22.649,0.0,-5.579,-4.436,значимо
np.log(capital),0.1493,0.015,10.141,0.0,0.111,0.187,значимо
np.log(labour),0.7204,0.019,37.487,0.0,0.671,0.77,значимо
np.log(wage),0.9214,0.058,16.001,0.0,0.773,1.07,значимо


# 3.2.1

$$
H_0:\beta_{capital}+\beta_{labour}+\beta_{wage}=1
 \\H_1: \beta_{capital}+\beta_{labour}+\beta_{wage}\neq1
$$

In [39]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Labour.csv')

In [40]:
fitted_res = smf.ols(data=df, formula='np.log(output)~(1-np.log(labour)-np.log(wage))+np.log(labour)+np.log(wage)').fit()
fitted_res.params

Intercept        -5.973322
np.log(labour)    0.848246
np.log(wage)      1.065677
dtype: float64

In [41]:
fitted = smf.ols(data=df, formula='np.log(output)~np.log(capital)+np.log(labour)+np.log(wage)').fit()
fitted.params

Intercept         -5.007316
np.log(capital)    0.149343
np.log(labour)     0.720410
np.log(wage)       0.921390
dtype: float64

In [42]:
R2_r = fitted_res.rsquared  # коэффициент детерминации "короткой регрессии" 
R2_r

0.8680882235383158

In [43]:
R2_ur = fitted.rsquared  
R2_ur

0.8883995592136719

In [44]:
n = len(df) 
m = 4 
F_obs = (R2_ur - R2_r)/(1-R2_ur)*(569-m)/1
F_obs

102.8302807383001

In [45]:
F_cr = stats.f.ppf(1-0.05, 1, 569-4)
F_cr

3.8579698801476354

# Гипотеза H1 принимается

# 3.2.2

$$
H_0:\beta_{labour}=\beta_{wage}
 \\H_1: \beta_{labour}\neq\beta_{wage}
$$

In [47]:
fitted_res = smf.ols(data=df, formula='np.log(output)~np.log(capital)+np.log(labour)+np.log(labour)').fit()
fitted_res.params

Intercept         -1.711459
np.log(capital)    0.207570
np.log(labour)     0.714847
dtype: float64

In [48]:
fitted = smf.ols(data=df, formula='np.log(output)~np.log(capital)+np.log(labour)+np.log(wage)').fit()
fitted.params

Intercept         -5.007316
np.log(capital)    0.149343
np.log(labour)     0.720410
np.log(wage)       0.921390
dtype: float64

In [49]:
R2_r = fitted_res.rsquared 
R2_r

0.8378296125960738

In [50]:
R2_ur = fitted.rsquared  
R2_ur

0.8883995592136719

In [51]:
n = len(df) 
m = 4
F_obs = (R2_ur - R2_r)/(1-R2_ur)*(569-4)/1
F_obs

256.02067194024187

In [52]:
F_cr = stats.f.ppf(1-0.05,1,569-4)
F_cr

3.8579698801476354

# Принимается гипотеза Н1

# 3.2.3

$$
H_0:\beta_{labour}=\beta_{wage}=\beta_{capital}
 \\H_1: \beta_{labour}\neq\beta_{wage}\neq\beta_{capital}
$$

In [54]:
fitted = smf.ols(data=df, formula='np.log(output)~np.log(capital)+np.log(labour)+np.log(wage)').fit()
fitted.params

Intercept         -5.007316
np.log(capital)    0.149343
np.log(labour)     0.720410
np.log(wage)       0.921390
dtype: float64

In [55]:
fitted_res = smf.ols(data=df, formula='np.log(output)~np.log(labour)+np.log(labour)+np.log(labour)').fit()
fitted_res.params

Intercept        -2.377408
np.log(labour)    0.902788
dtype: float64

In [56]:
R2_r = fitted_res.rsquared  
R2_r

0.7960410913148555

In [57]:
R2_ur = fitted.rsquared  
R2_ur

0.8883995592136719

In [59]:
n = len(df) 
m = 4 
F_obs = (R2_ur - R2_r)/(1-R2_ur)*(n-4)/2
F_obs

233.79179327230761

In [60]:
F_cr = stats.f.ppf(1-0.05,2,569-4)
F_cr

3.011672483767199

# Принимается гипотеза Н1