In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import io
import statsmodels.formula.api as smf
import scipy.stats as stats
import re

# 1 Построение доверительных интервалов

## 1.1 sleep equation

In [4]:
df_sleep = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/sleep75.csv')
df_sleep

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,...,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,...,0,3438,0,3438,0,14,0,13,7.070004,1024
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,...,0,5020,0,5020,0,11,0,0,1.429999,961
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,...,1,2815,0,2815,0,21,0,0,20.529997,1936
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,...,1,3786,0,3786,0,12,0,12,9.619998,900
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,...,1,2580,0,2580,0,44,0,33,2.750000,4096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,...,1,2026,0,2026,0,27,0,18,,2025
702,34,0,703,0.182331,0.030075,10,2500,0,0,5885,...,0,675,1,465,210,18,0,4,,1156
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,...,1,1851,0,1851,0,19,0,17,,1369
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,...,1,1961,1,1481,480,31,0,22,,2916


In [5]:
table_11='''
totwrk                       -0.169***         
                              (0.018)          
                                               
age                           2.689*           
                              (1.469)          
                                               
south                        101.568**         
                             (41.837)          
                                               
male                         87.669**          
                             (35.104)          
                                               
smsa                         -54.748*          
                             (33.123)          
                                               
yngkid                        -13.962          
                             (50.341)          
                                               
marr                          31.211           
                             (42.233)          
                                               
Constant                    3450.913***        
                             (80.726)          
'''

In [6]:
table_11.strip().split()

['totwrk',
 '-0.169***',
 '(0.018)',
 'age',
 '2.689*',
 '(1.469)',
 'south',
 '101.568**',
 '(41.837)',
 'male',
 '87.669**',
 '(35.104)',
 'smsa',
 '-54.748*',
 '(33.123)',
 'yngkid',
 '-13.962',
 '(50.341)',
 'marr',
 '31.211',
 '(42.233)',
 'Constant',
 '3450.913***',
 '(80.726)']

In [7]:
np.reshape(table_11.strip().split(),(8,3)) # strip - удаление пробелов и переходов на новую строку; split - создание списка из непустых строк; 8 - колв-во коэффициентов

array([['totwrk', '-0.169***', '(0.018)'],
       ['age', '2.689*', '(1.469)'],
       ['south', '101.568**', '(41.837)'],
       ['male', '87.669**', '(35.104)'],
       ['smsa', '-54.748*', '(33.123)'],
       ['yngkid', '-13.962', '(50.341)'],
       ['marr', '31.211', '(42.233)'],
       ['Constant', '3450.913***', '(80.726)']], dtype='<U11')

In [8]:
df = pd.DataFrame(np.reshape(table_11.strip().split(),(8,3)),columns=['Name', 'Estimate', 'Std.error']) # запишем результаты оценивания + подпишем столбцы
df

Unnamed: 0,Name,Estimate,Std.error
0,totwrk,-0.169***,(0.018)
1,age,2.689*,(1.469)
2,south,101.568**,(41.837)
3,male,87.669**,(35.104)
4,smsa,-54.748*,(33.123)
5,yngkid,-13.962,(50.341)
6,marr,31.211,(42.233)
7,Constant,3450.913***,(80.726)


In [9]:
# убираем "*" в столбцах
df['Estimate_number']=df['Estimate'].str.extract(r'([-+]?\d+.\d+)').astype(float)
df['Std_err_number']=df['Std.error'].str.extract(r'([-+]?\d+.\d+)').astype(float) 
df

Unnamed: 0,Name,Estimate,Std.error,Estimate_number,Std_err_number
0,totwrk,-0.169***,(0.018),-0.169,0.018
1,age,2.689*,(1.469),2.689,1.469
2,south,101.568**,(41.837),101.568,41.837
3,male,87.669**,(35.104),87.669,35.104
4,smsa,-54.748*,(33.123),-54.748,33.123
5,yngkid,-13.962,(50.341),-13.962,50.341
6,marr,31.211,(42.233),31.211,42.233
7,Constant,3450.913***,(80.726),3450.913,80.726


In [10]:
alpha=0.1
t_crit=stats.t.ppf(1-alpha/2, 706-7)
t_crit.round(3)

1.647

In [11]:
df['left']=df['Estimate_number']-t_crit*df['Std_err_number']
df['right']=df['Estimate_number']+t_crit*df['Std_err_number']
df

Unnamed: 0,Name,Estimate,Std.error,Estimate_number,Std_err_number,left,right
0,totwrk,-0.169***,(0.018),-0.169,0.018,-0.198647,-0.139353
1,age,2.689*,(1.469),2.689,1.469,0.269503,5.108497
2,south,101.568**,(41.837),101.568,41.837,32.660935,170.475065
3,male,87.669**,(35.104),87.669,35.104,29.851432,145.486568
4,smsa,-54.748*,(33.123),-54.748,33.123,-109.302789,-0.193211
5,yngkid,-13.962,(50.341),-13.962,50.341,-96.875463,68.951463
6,marr,31.211,(42.233),31.211,42.233,-38.348291,100.770291
7,Constant,3450.913***,(80.726),3450.913,80.726,3317.954334,3583.871666


## 1.2 output equation

In [12]:
df_lab=pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Labour.csv')
df_lab

Unnamed: 0,capital,labour,output,wage
0,2.606563,184,9.250759,43.080307
1,1.323237,91,3.664310,27.780016
2,22.093692,426,28.781516,44.467748
3,10.737851,72,4.124642,39.734710
4,1.161365,46,2.890150,34.650709
...,...,...,...,...
564,2.625403,20,1.424376,33.477545
565,1.276386,61,2.109048,26.300732
566,1.953869,117,6.241870,41.153979
567,1.318527,46,7.902237,66.720139


In [29]:
table_12='''
log(capital)                 0.208***          
                              (0.017)          
                                               
log(labour)                  0.715***          
                              (0.023)          
                                               
Constant                     -1.711***         
                              (0.097)              
'''

In [30]:
table_12.strip().split()

['log(capital)',
 '0.208***',
 '(0.017)',
 'log(labour)',
 '0.715***',
 '(0.023)',
 'Constant',
 '-1.711***',
 '(0.097)']

In [31]:
np.reshape(table_12.strip().split(),(3,3)) # strip - удаление пробелов и переходов на новую строку; split - создание списка из непустых строк; 3 - колв-во коэффициентов

array([['log(capital)', '0.208***', '(0.017)'],
       ['log(labour)', '0.715***', '(0.023)'],
       ['Constant', '-1.711***', '(0.097)']], dtype='<U12')

In [32]:
df = pd.DataFrame(np.reshape(table_12.strip().split(),(3,3)),columns=['Name', 'Estimate', 'Std.error']) # запишем результаты оценивания + подпишем столбцы
df

Unnamed: 0,Name,Estimate,Std.error
0,log(capital),0.208***,(0.017)
1,log(labour),0.715***,(0.023)
2,Constant,-1.711***,(0.097)


In [33]:
# убираем "*" в столбцах
df['Estimate_number']=df['Estimate'].str.extract(r'([-+]?\d+.\d+)').astype(float)
df['Std_err_number']=df['Std.error'].str.extract(r'([-+]?\d+.\d+)').astype(float) 
df

Unnamed: 0,Name,Estimate,Std.error,Estimate_number,Std_err_number
0,log(capital),0.208***,(0.017),0.208,0.017
1,log(labour),0.715***,(0.023),0.715,0.023
2,Constant,-1.711***,(0.097),-1.711,0.097


In [34]:
alpha=0.05
t_crit=stats.t.ppf(1-alpha/2, 569-3)
t_crit.round(2)

1.96

In [35]:
df['left']=df['Estimate_number']-t_crit*df['Std_err_number']
df['right']=df['Estimate_number']+t_crit*df['Std_err_number']
df

Unnamed: 0,Name,Estimate,Std.error,Estimate_number,Std_err_number,left,right
0,log(capital),0.208***,(0.017),0.208,0.017,0.174609,0.241391
1,log(labour),0.715***,(0.023),0.715,0.023,0.669824,0.760176
2,Constant,-1.711***,(0.097),-1.711,0.097,-1.901524,-1.520476


## 1.3 cost equation

In [36]:
df_elec=pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Electricity.csv')
df_elec

Unnamed: 0,cost,q,pl,sl,pk,sk,pf,sf
0,0.2130,8.0,6869.47,0.3291,64.945,0.4197,18.000,0.2512
1,3.0427,869.0,8372.96,0.1030,68.227,0.2913,21.067,0.6057
2,9.4059,1412.0,7960.90,0.0891,40.692,0.1567,41.530,0.7542
3,0.7606,65.0,8971.89,0.2802,41.243,0.1282,28.539,0.5916
4,2.2587,295.0,8218.40,0.1772,71.940,0.1623,39.200,0.6606
...,...,...,...,...,...,...,...,...
153,6.8293,946.6,10642.16,0.0883,43.600,0.1914,51.463,0.7203
154,3.7605,377.0,7432.24,0.2117,74.120,0.2274,33.436,0.5609
155,3.9822,391.0,5826.04,0.1926,78.288,0.0924,44.633,0.7151
156,30.1880,5317.0,9586.63,0.0845,78.008,0.2009,41.840,0.7147


In [37]:
table_13='''
log(q)                       0.403***          
                              (0.032)          
                                               
I(log(q)2)                   0.030***          
                              (0.002)          
                                               
log(pl)                       0.146**          
                              (0.070)          
                                               
log(pk)                      0.157***          
                              (0.058)          
                                               
log(pf)                      0.685***          
                              (0.043)          
                                               
Constant                     -6.739***         
                              (0.706)                     
'''

In [38]:
table_13.strip().split()

['log(q)',
 '0.403***',
 '(0.032)',
 'I(log(q)2)',
 '0.030***',
 '(0.002)',
 'log(pl)',
 '0.146**',
 '(0.070)',
 'log(pk)',
 '0.157***',
 '(0.058)',
 'log(pf)',
 '0.685***',
 '(0.043)',
 'Constant',
 '-6.739***',
 '(0.706)']

In [39]:
np.reshape(table_13.strip().split(),(6,3)) # strip - удаление пробелов и переходов на новую строку; split - создание списка из непустых строк; 6 - колв-во коэффициентов

array([['log(q)', '0.403***', '(0.032)'],
       ['I(log(q)2)', '0.030***', '(0.002)'],
       ['log(pl)', '0.146**', '(0.070)'],
       ['log(pk)', '0.157***', '(0.058)'],
       ['log(pf)', '0.685***', '(0.043)'],
       ['Constant', '-6.739***', '(0.706)']], dtype='<U10')

In [40]:
df = pd.DataFrame(np.reshape(table_13.strip().split(),(6,3)),columns=['Name', 'Estimate', 'Std.error']) # запишем результаты оценивания + подпишем столбцы
df

Unnamed: 0,Name,Estimate,Std.error
0,log(q),0.403***,(0.032)
1,I(log(q)2),0.030***,(0.002)
2,log(pl),0.146**,(0.070)
3,log(pk),0.157***,(0.058)
4,log(pf),0.685***,(0.043)
5,Constant,-6.739***,(0.706)


In [41]:
# убираем "*" в столбцах
df['Estimate_number']=df['Estimate'].str.extract(r'([-+]?\d+.\d+)').astype(float)
df['Std_err_number']=df['Std.error'].str.extract(r'([-+]?\d+.\d+)').astype(float) 
df

Unnamed: 0,Name,Estimate,Std.error,Estimate_number,Std_err_number
0,log(q),0.403***,(0.032),0.403,0.032
1,I(log(q)2),0.030***,(0.002),0.03,0.002
2,log(pl),0.146**,(0.070),0.146,0.07
3,log(pk),0.157***,(0.058),0.157,0.058
4,log(pf),0.685***,(0.043),0.685,0.043
5,Constant,-6.739***,(0.706),-6.739,0.706


In [42]:
alpha=0.01
t_crit=stats.t.ppf(1-alpha/2, 158-5)
t_crit.round(2)

2.61

In [43]:
df['left']=df['Estimate_number']-t_crit*df['Std_err_number']
df['right']=df['Estimate_number']+t_crit*df['Std_err_number']
df

Unnamed: 0,Name,Estimate,Std.error,Estimate_number,Std_err_number,left,right
0,log(q),0.403***,(0.032),0.403,0.032,0.319533,0.486467
1,I(log(q)2),0.030***,(0.002),0.03,0.002,0.024783,0.035217
2,log(pl),0.146**,(0.070),0.146,0.07,-0.036584,0.328584
3,log(pk),0.157***,(0.058),0.157,0.058,0.005716,0.308284
4,log(pf),0.685***,(0.043),0.685,0.043,0.572841,0.797159
5,Constant,-6.739***,(0.706),-6.739,0.706,-8.580491,-4.897509
