# 1.1

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import io
import statsmodels.formula.api as smf
import scipy.stats as stats

In [4]:
data_string = '''totwrk                       -0.169***         
                              (0.018)          
                                               
age                           2.689*           
                              (1.469)          
                                               
south                        101.568**         
                             (41.837)          
                                               
male                         87.669**          
                             (35.104)          
                                               
smsa                         -54.748*          
                             (33.123)          
                                               
yngkid                        -13.962          
                             (50.341)          
                                               
marr                          31.211           
                             (42.233)          
                                               
Constant                    3450.913***        
                             (80.726)  '''


In [5]:
np.reshape(data_string.strip().split(), (8,3))

array([['totwrk', '-0.169***', '(0.018)'],
       ['age', '2.689*', '(1.469)'],
       ['south', '101.568**', '(41.837)'],
       ['male', '87.669**', '(35.104)'],
       ['smsa', '-54.748*', '(33.123)'],
       ['yngkid', '-13.962', '(50.341)'],
       ['marr', '31.211', '(42.233)'],
       ['Constant', '3450.913***', '(80.726)']], dtype='<U11')

In [6]:
df = pd.DataFrame(np.reshape(data_string.strip().split(), (8,3) ), columns=['Coeff','Est str','std.err. str'])
df

Unnamed: 0,Coeff,Est str,std.err. str
0,totwrk,-0.169***,(0.018)
1,age,2.689*,(1.469)
2,south,101.568**,(41.837)
3,male,87.669**,(35.104)
4,smsa,-54.748*,(33.123)
5,yngkid,-13.962,(50.341)
6,marr,31.211,(42.233)
7,Constant,3450.913***,(80.726)


In [7]:
# оценки. str(- строка).extract( - извлекает), astype - преобразование в число
df['Estimate_numb'] = df['Est str'].str.extract(r'([-+]?\d+.\d+)').astype(float)
# ошибки
df['Std.error'] = df['std.err. str'].str.extract(r'([-+]?\d+.\d+)').astype(float)
df

Unnamed: 0,Coeff,Est str,std.err. str,Estimate_numb,Std.error
0,totwrk,-0.169***,(0.018),-0.169,0.018
1,age,2.689*,(1.469),2.689,1.469
2,south,101.568**,(41.837),101.568,41.837
3,male,87.669**,(35.104),87.669,35.104
4,smsa,-54.748*,(33.123),-54.748,33.123
5,yngkid,-13.962,(50.341),-13.962,50.341
6,marr,31.211,(42.233),31.211,42.233
7,Constant,3450.913***,(80.726),3450.913,80.726


In [8]:
alpha = 0.1
t_cr=stats.t.ppf(1-alpha/2, 706-8)
t_cr

1.6470395952054095

In [9]:
df['low']=df['Estimate_numb']-t_cr*df['Std.error']
df['up']=df['Estimate_numb']+t_cr*df['Std.error']
df

Unnamed: 0,Coeff,Est str,std.err. str,Estimate_numb,Std.error,low,up
0,totwrk,-0.169***,(0.018),-0.169,0.018,-0.198647,-0.139353
1,age,2.689*,(1.469),2.689,1.469,0.269499,5.108501
2,south,101.568**,(41.837),101.568,41.837,32.660804,170.475196
3,male,87.669**,(35.104),87.669,35.104,29.851322,145.486678
4,smsa,-54.748*,(33.123),-54.748,33.123,-109.302893,-0.193107
5,yngkid,-13.962,(50.341),-13.962,50.341,-96.87562,68.95162
6,marr,31.211,(42.233),31.211,42.233,-38.348423,100.770423
7,Constant,3450.913***,(80.726),3450.913,80.726,3317.954082,3583.871918


# 1.3

In [10]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Electricity.csv')
df

Unnamed: 0,cost,q,pl,sl,pk,sk,pf,sf
0,0.2130,8.0,6869.47,0.3291,64.945,0.4197,18.000,0.2512
1,3.0427,869.0,8372.96,0.1030,68.227,0.2913,21.067,0.6057
2,9.4059,1412.0,7960.90,0.0891,40.692,0.1567,41.530,0.7542
3,0.7606,65.0,8971.89,0.2802,41.243,0.1282,28.539,0.5916
4,2.2587,295.0,8218.40,0.1772,71.940,0.1623,39.200,0.6606
...,...,...,...,...,...,...,...,...
153,6.8293,946.6,10642.16,0.0883,43.600,0.1914,51.463,0.7203
154,3.7605,377.0,7432.24,0.2117,74.120,0.2274,33.436,0.5609
155,3.9822,391.0,5826.04,0.1926,78.288,0.0924,44.633,0.7151
156,30.1880,5317.0,9586.63,0.0845,78.008,0.2009,41.840,0.7147


In [11]:
fitted = smf.ols(data=df, formula='np.log(cost)~np.log(q)+I(np.log(q)**2)+np.log(pl)+np.log(pk)+np.log(pf)').fit()
fitted.summary(alpha=0.01).tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.005,0.995]
Intercept,-6.7387,0.706,-9.541,0.000,-8.581,-4.896
np.log(q),0.4030,0.032,12.734,0.000,0.320,0.486
I(np.log(q) ** 2),0.0304,0.002,14.024,0.000,0.025,0.036
np.log(pl),0.1461,0.070,2.073,0.040,-0.038,0.330
np.log(pk),0.1571,0.058,2.721,0.007,0.007,0.308
np.log(pf),0.6847,0.043,16.043,0.000,0.573,0.796


In [12]:
data =	'''coef	std.err	t	P>|t|	[0.005	0.995]
Intercept	-6.7387	0.706	-9.541	0.000	-8.581	-4.896
np.log(q)	0.4030	0.032	12.734	0.000	0.320	0.486
I(np.log(q)^2)	0.0304	0.002	14.024	0.000	0.025	0.036
np.log(pl)	0.1461	0.070	2.073	0.040	-0.038	0.330
np.log(pk)	0.1571	0.058	2.721	0.007	0.007	0.308
np.log(pf)	0.6847	0.043	16.043	0.000	0.573	0.796'''

In [13]:
df_mini = pd.read_csv(io.StringIO(data), sep='\s+')
df_mini

Unnamed: 0,coef,std.err,t,P>|t|,[0.005,0.995]
Intercept,-6.7387,0.706,-9.541,0.0,-8.581,-4.896
np.log(q),0.403,0.032,12.734,0.0,0.32,0.486
I(np.log(q)^2),0.0304,0.002,14.024,0.0,0.025,0.036
np.log(pl),0.1461,0.07,2.073,0.04,-0.038,0.33
np.log(pk),0.1571,0.058,2.721,0.007,0.007,0.308
np.log(pf),0.6847,0.043,16.043,0.0,0.573,0.796


In [14]:
alpha = 0.01
t_cr=stats.t.ppf(1-alpha/2, 158-6)
t_cr

2.608560883167519

In [15]:
df_mini['low']=df_mini['coef']-t_cr*df_mini['std.err']
df_mini['up']=df_mini['coef']+t_cr*df_mini['std.err']
df_mini

Unnamed: 0,coef,std.err,t,P>|t|,[0.005,0.995],low,up
Intercept,-6.7387,0.706,-9.541,0.0,-8.581,-4.896,-8.580344,-4.897056
np.log(q),0.403,0.032,12.734,0.0,0.32,0.486,0.319526,0.486474
I(np.log(q)^2),0.0304,0.002,14.024,0.0,0.025,0.036,0.025183,0.035617
np.log(pl),0.1461,0.07,2.073,0.04,-0.038,0.33,-0.036499,0.328699
np.log(pk),0.1571,0.058,2.721,0.007,0.007,0.308,0.005803,0.308397
np.log(pf),0.6847,0.043,16.043,0.0,0.573,0.796,0.572532,0.796868


# 2.2

In [16]:
fitted = smf.ols(data=df, formula='np.log(cost)~np.log(q)+I(np.log(q)**2)+np.log(pl)+I(np.log(pl)**2)+np.log(pk)+I(np.log(pk)**2)+np.log(pf)+I(np.log(pf)**2)').fit()
fitted.summary(alpha=0.01).tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.005,0.995]
Intercept,-44.4035,23.432,-1.895,0.060,-105.542,16.735
np.log(q),0.3963,0.032,12.333,0.000,0.312,0.480
I(np.log(q) ** 2),0.0309,0.002,14.020,0.000,0.025,0.037
np.log(pl),8.3334,5.330,1.564,0.120,-5.573,22.240
I(np.log(pl) ** 2),-0.4554,0.297,-1.535,0.127,-1.230,0.319
np.log(pk),0.4362,1.522,0.287,0.775,-3.536,4.408
I(np.log(pk) ** 2),-0.0360,0.187,-0.192,0.848,-0.525,0.453
np.log(pf),0.8988,0.616,1.459,0.147,-0.709,2.507
I(np.log(pf) ** 2),-0.0305,0.093,-0.329,0.743,-0.272,0.211


In [17]:
data = '''coef	std.err	t	P>|t|	[0.005	0.995]
Intercept	-44.4035	23.432	-1.895	0.060	-105.542	16.735
np.log(q)	0.3963	0.032	12.333	0.000	0.312	0.480
I(np.log(q)^2)	0.0309	0.002	14.020	0.000	0.025	0.037
np.log(pl)	8.3334	5.330	1.564	0.120	-5.573	22.240
I(np.log(pl)^2)	-0.4554	0.297	-1.535	0.127	-1.230	0.319
np.log(pk)	0.4362	1.522	0.287	0.775	-3.536	4.408
I(np.log(pk)^2)	-0.0360	0.187	-0.192	0.848	-0.525	0.453
np.log(pf)	0.8988	0.616	1.459	0.147	-0.709	2.507
I(np.log(pf)^2)	-0.0305	0.093	-0.329	0.743	-0.272	0.211'''

In [18]:
df_mini = pd.read_csv(io.StringIO(data), sep='\s+')
df_mini

Unnamed: 0,coef,std.err,t,P>|t|,[0.005,0.995]
Intercept,-44.4035,23.432,-1.895,0.06,-105.542,16.735
np.log(q),0.3963,0.032,12.333,0.0,0.312,0.48
I(np.log(q)^2),0.0309,0.002,14.02,0.0,0.025,0.037
np.log(pl),8.3334,5.33,1.564,0.12,-5.573,22.24
I(np.log(pl)^2),-0.4554,0.297,-1.535,0.127,-1.23,0.319
np.log(pk),0.4362,1.522,0.287,0.775,-3.536,4.408
I(np.log(pk)^2),-0.036,0.187,-0.192,0.848,-0.525,0.453
np.log(pf),0.8988,0.616,1.459,0.147,-0.709,2.507
I(np.log(pf)^2),-0.0305,0.093,-0.329,0.743,-0.272,0.211


In [19]:
alpha = 0.01
t_cr=stats.t.ppf(1-alpha/2, 158-9)
t_cr

2.6092279073321927

In [20]:
df_mini['Significance']=df_mini.apply(lambda x: 'значим' if abs(x['t'])>t_cr else 'незначим',axis=1)
df_mini

Unnamed: 0,coef,std.err,t,P>|t|,[0.005,0.995],Significance
Intercept,-44.4035,23.432,-1.895,0.06,-105.542,16.735,незначим
np.log(q),0.3963,0.032,12.333,0.0,0.312,0.48,значим
I(np.log(q)^2),0.0309,0.002,14.02,0.0,0.025,0.037,значим
np.log(pl),8.3334,5.33,1.564,0.12,-5.573,22.24,незначим
I(np.log(pl)^2),-0.4554,0.297,-1.535,0.127,-1.23,0.319,незначим
np.log(pk),0.4362,1.522,0.287,0.775,-3.536,4.408,незначим
I(np.log(pk)^2),-0.036,0.187,-0.192,0.848,-0.525,0.453,незначим
np.log(pf),0.8988,0.616,1.459,0.147,-0.709,2.507,незначим
I(np.log(pf)^2),-0.0305,0.093,-0.329,0.743,-0.272,0.211,незначим
