In [73]:
import numpy as np
import pandas as pd
import io
import statsmodels.formula.api as smf

# 1. Суммы квадратов, остатки, предсказанные значения, R2

## 1.1 Предсказанные и остатки

## 1.1.1 Задача

In [74]:
check_111 = '''sleep totwrk age male hrwage
3113   3438  32   1    7.07 
2920   5020  31   1    1.43 
2670   2815  44   1   20.53 
3083   3786  30   0    9.62 
3448   2580  64   1    2.75 
4063   1205  41   1   19.25'''

In [75]:
df = pd.read_csv(io.StringIO(check_111), sep='\s+')
df

Unnamed: 0,sleep,totwrk,age,male,hrwage
0,3113,3438,32,1,7.07
1,2920,5020,31,1,1.43
2,2670,2815,44,1,20.53
3,3083,3786,30,0,9.62
4,3448,2580,64,1,2.75
5,4063,1205,41,1,19.25


In [76]:
df['y_pred'] = 3525.14 + df['totwrk']*(-0.16) + df['age']*(1.62) + df['male']*(51.84) + np.log(df['hrwage'])*(-9.88) # создаём переменную y_predicted
df

Unnamed: 0,sleep,totwrk,age,male,hrwage,y_pred
0,3113,3438,32,1,7.07,3059.416098
1,2920,5020,31,1,1.43,2820.466176
2,2670,2815,44,1,20.53,3168.003754
3,3083,3786,30,0,9.62,2945.613219
4,3448,2580,64,1,2.75,3257.865383
5,4063,1205,41,1,19.25,3421.379791


In [77]:
df['residual'] = df['sleep'] - df['y_pred']
df

Unnamed: 0,sleep,totwrk,age,male,hrwage,y_pred,residual
0,3113,3438,32,1,7.07,3059.416098,53.583902
1,2920,5020,31,1,1.43,2820.466176,99.533824
2,2670,2815,44,1,20.53,3168.003754,-498.003754
3,3083,3786,30,0,9.62,2945.613219,137.386781
4,3448,2580,64,1,2.75,3257.865383,190.134617
5,4063,1205,41,1,19.25,3421.379791,641.620209


## 1.1.2 Задача

In [78]:
check_112 = '''output capital labour
9.25    2.61    184  
3.66    1.32     91  
28.78   22.09   426  
4.12    10.74    72  
2.89    1.16     46'''

In [79]:
df = pd.read_csv(io.StringIO(check_112), sep='\s+')
df

Unnamed: 0,output,capital,labour
0,9.25,2.61,184
1,3.66,1.32,91
2,28.78,22.09,426
3,4.12,10.74,72
4,2.89,1.16,46


In [80]:
df['y_pred'] = -1.71 + 0.21*np.log(df['capital']) + 0.71*np.log(df['labour']) # создаём переменную y_predicted
df

Unnamed: 0,output,capital,labour,y_pred
0,9.25,2.61,184,2.194068
1,3.66,1.32,91,1.551013
2,28.78,22.09,426,3.238628
3,4.12,10.74,72,1.824968
4,2.89,1.16,46,1.039504


In [81]:
df['residual'] = np.log(df['output']) - df['y_pred'] # остатки
df.round(2)

Unnamed: 0,output,capital,labour,y_pred,residual
0,9.25,2.61,184,2.19,0.03
1,3.66,1.32,91,1.55,-0.25
2,28.78,22.09,426,3.24,0.12
3,4.12,10.74,72,1.82,-0.41
4,2.89,1.16,46,1.04,0.02


## 1.2 Суммы квадратов и R^2

## 1.2.1 Задача

In [82]:
TSS = 100.35 # общая сумма квадратов
ESS = 47.56 # объяснённая сумма квадратов

In [83]:
RSS = TSS - ESS # остаточная сумма квадратов
RSS

52.78999999999999

In [84]:
R2=ESS/TSS # R^2
round(R2,2)

0.47

## 1.2.2 Задача

In [85]:
TSS = 240.82 # общая сумма квадратов
RSS = 93.52 # остаточная сумма квадратов

In [86]:
ESS = TSS - RSS # объединённая сумма квадратов
ESS

147.3

In [87]:
R2=ESS/TSS # R^2
round(R2,2)

0.61

## 1.2.3 Задача

In [88]:
RSS = 782.83 # остаточная сумма квадратов
ESS = 418.38 # объяснённая сумма квадратов

In [89]:
TSS = RSS + ESS # общая сумма квадратов
TSS

1201.21

In [90]:
R2=ESS/TSS # R^2
round(R2,2)

0.35

## 1.2.4 Задача

In [91]:
check124 = '''Residual
-0.24
0.41
1.22
-0.93
-0.37
-1.33
0.16
-0.27
0.06
1.29'''

In [92]:
df = pd.read_csv(io.StringIO(check124), sep='\s+')
df

Unnamed: 0,Residual
0,-0.24
1,0.41
2,1.22
3,-0.93
4,-0.37
5,-1.33
6,0.16
7,-0.27
8,0.06
9,1.29


In [93]:
df['Residual^2']=(pd.read_csv(io.StringIO(check124), sep='\s+'))**2
df

Unnamed: 0,Residual,Residual^2
0,-0.24,0.0576
1,0.41,0.1681
2,1.22,1.4884
3,-0.93,0.8649
4,-0.37,0.1369
5,-1.33,1.7689
6,0.16,0.0256
7,-0.27,0.0729
8,0.06,0.0036
9,1.29,1.6641


In [94]:
RSS=np.sum(df['Residual^2'])
round (RSS,2)

6.25

## Стандартная ошибка регрессии:
$$
SER=\sqrt{\frac{RSS}{(n-k-1)}}
$$

In [95]:
n=10
k=2
SER=np.sqrt(RSS/(n-k-1))
round(SER,2)

0.94

## 1.2.5 Задача

In [106]:
check125='''sleep/60 totwrk age y.hat
51.88     3438  32  51.00
48.67     5020  31  44.43
44.50     2815  44  53.72
51.38     3786  30  49.53
57.47     2580  64  54.92
67.72     1205  41  60.36
53        2113  35  56.53
48.80     3608  47  50.46
56.13     2353  32  55.50
50.30     2851  30  53.41
'''

In [114]:
df = pd.read_csv(io.StringIO(check125), sep='\s+')
df

Unnamed: 0,sleep/60,totwrk,age,y.hat
0,51.88,3438,32,51.0
1,48.67,5020,31,44.43
2,44.5,2815,44,53.72
3,51.38,3786,30,49.53
4,57.47,2580,64,54.92
5,67.72,1205,41,60.36
6,53.0,2113,35,56.53
7,48.8,3608,47,50.46
8,56.13,2353,32,55.5
9,50.3,2851,30,53.41


In [115]:
TSS = ((df['sleep/60'] - np.mean(df['sleep/60']))**2).sum() # общая сумма квадратов
TSS.round(2)

366.26

In [116]:
RSS = ((df['sleep/60'] - df['y.hat'])**2).sum() # остаточная сумма квадртов
RSS.round(2)

193.14

In [117]:
ESS=TSS-RSS # объяснённая сумма квадратов
round(ESS,2)

173.12

## 1.2.6 Задача

In [118]:
check126='''log(output) log(capital) log(labour) y.hat
2.22            0.96        5.21     2.19 
1.30            0.28        4.51     1.47 
3.36            3.10        6.05     3.23 
1.42            2.37        4.28     1.56 
1.06            0.15        3.83     0.85 
6.17            5.62        8.94     6.15 
0.77            0.52        3.87     0.94 
1.42            0.15        4.68     1.61 
0.90           -0.13        3.56     0.57 
1.35            1.25        4.28     1.40 
'''

In [149]:
df = pd.read_csv(io.StringIO(check126), sep='\s+')
df

Unnamed: 0,log(output),log(capital),log(labour),y.hat
0,2.22,0.96,5.21,2.19
1,1.3,0.28,4.51,1.47
2,3.36,3.1,6.05,3.23
3,1.42,2.37,4.28,1.56
4,1.06,0.15,3.83,0.85
5,6.17,5.62,8.94,6.15
6,0.77,0.52,3.87,0.94
7,1.42,0.15,4.68,1.61
8,0.9,-0.13,3.56,0.57
9,1.35,1.25,4.28,1.4


In [104]:
TSS = ((df['log(output)'] - np.mean(df['log(output)']))**2).sum() # общая сумма квадратов
TSS.round(2)

24.48

In [120]:
ESS = ((df['y.hat'] - np.mean(df['log(output)']))**2).sum() # объяснённая сумма квадратов
ESS.round(2)

24.25

In [121]:
RSS = ((df['log(output)'] - df['y.hat'])**2).sum() # остаточная сумма квадртов
RSS.round(2)

0.29

## 1.2.7 Задача

In [122]:
n=935 # observations
k=5 
SER=0.378

## Стандартная ошибка регрессии:
$$
SER=\sqrt{\frac{RSS}{(n-k-1)}}
$$

## => RSS:
$$
RSS=(n-k-1)*SER^2
$$

In [126]:
RSS=(n-k-1)*(SER**2) # RSS для 1-й регрессии
round(RSS,3)

132.739

## 1.2.8 Задача

In [129]:
n=935 # observations
k=5 
SER=0.383
R2=0.178

## R^2:
$$
R^2=1-\frac{RSS}{TSS}
$$

## => TSS:
$$
TSS = \frac{RSS}{1-R^2}=\frac{(n-k-1)*SER^2}{1-R^2}
$$

In [132]:
TSS=((n-k-1)*SER**2)/(1-R2) # TSS для 3-й регрессии
round(TSS,3)

165.784

## 1.2.9 Задача

In [136]:
n=935 # observations
k=3 
SER=0.389
R2=0.149

## Стандартная ошибка регрессии:
$$
SER=\sqrt{\frac{RSS}{(n-k-1)}}
$$

## => RSS:
$$
RSS=(n-k-1)*SER^2
$$

In [137]:
RSS=(n-k-1)*(SER**2) # RSS для 5-й регрессии
round(RSS,3)

140.88

## R^2:
$$
R^2=1-\frac{RSS}{TSS}
$$

## => TSS:
$$
TSS = \frac{RSS}{1-R^2}=\frac{(n-k-1)*SER^2}{1-R^2}
$$

In [138]:
TSS=((n-k-1)*SER**2)/(1-R2) # TSS для 3-й регрессии
round(TSS,3)

165.546

## ESS:
$$
ESS = TSS - RSS
$$

In [139]:
ESS=TSS-RSS
round(ESS,3)

24.666

# 2 R2adj

## 2.1 Задача

In [144]:
n=526 ## observations
k=5 ## exper, exper^2, female, smsa, south
R2=0.288

$$
R^2{adj}=1-(1-R^2)*\frac{(n-1)}{(n-k)}
$$

In [145]:
R2adj=1-(1-R2)*(n-1)/(n-k)
round(R2adj,3)

0.283

## 2.2 Задача

In [146]:
n=935 ## observations
k=6 ## age, IQ, urban, married, south, IQ*south
R2=0.201

$$
R^2{adj}=1-(1-R^2)*\frac{(n-1)}{(n-k)}
$$

In [143]:
R2adj=1-(1-R2)*(n-1)/(n-k)
round(R2adj,3)

0.197

## 2.3 Задача

In [148]:
check23='''sleep/60 totwrk age y.hat
51.88     3438  32  51.00
48.67     5020  31  44.43
44.50     2815  44  53.72
51.38     3786  30  49.53
57.47     2580  64  54.92
67.72     1205  41  60.36
53        2113  35  56.53
48.80     3608  47  50.46
56.13     2353  32  55.50
50.30     2851  30  53.41
'''

In [150]:
df = pd.read_csv(io.StringIO(check23), sep='\s+')
df

Unnamed: 0,sleep/60,totwrk,age,y.hat
0,51.88,3438,32,51.0
1,48.67,5020,31,44.43
2,44.5,2815,44,53.72
3,51.38,3786,30,49.53
4,57.47,2580,64,54.92
5,67.72,1205,41,60.36
6,53.0,2113,35,56.53
7,48.8,3608,47,50.46
8,56.13,2353,32,55.5
9,50.3,2851,30,53.41


In [154]:
ESS = ((df['y.hat'] - np.mean(df['sleep/60']))**2).sum() # объяснённая сумма квадратов
ESS.round(2)

173.19

In [155]:
TSS = ((df['sleep/60'] - np.mean(df['sleep/60']))**2).sum() # общая сумма квадратов
TSS.round(2)

366.26

In [156]:
R2=ESS/TSS # R^2
round(R2,2)

0.47

In [157]:
n=10
k=2

In [159]:
R2adj=1-(1-R2)*(n-1)/(n-k-1)
round(R2adj,2)

0.32

## 2.4 Задача

In [160]:
check24='''log(output) log(capital) log(labour) y.hat
2.22            0.96        5.21     2.19 
1.30            0.28        4.51     1.47 
3.36            3.10        6.05     3.23 
1.42            2.37        4.28     1.56 
1.06            0.15        3.83     0.85 
6.17            5.62        8.94     6.15 
0.77            0.52        3.87     0.94 
1.42            0.15        4.68     1.61 
0.90           -0.13        3.56     0.57 
1.35            1.25        4.28     1.40
'''

In [161]:
df = pd.read_csv(io.StringIO(check24), sep='\s+')
df

Unnamed: 0,log(output),log(capital),log(labour),y.hat
0,2.22,0.96,5.21,2.19
1,1.3,0.28,4.51,1.47
2,3.36,3.1,6.05,3.23
3,1.42,2.37,4.28,1.56
4,1.06,0.15,3.83,0.85
5,6.17,5.62,8.94,6.15
6,0.77,0.52,3.87,0.94
7,1.42,0.15,4.68,1.61
8,0.9,-0.13,3.56,0.57
9,1.35,1.25,4.28,1.4


In [162]:
ESS = ((df['y.hat'] - np.mean(df['log(output)']))**2).sum() # объяснённая сумма квадратов
ESS.round(2)

24.25

In [163]:
TSS = ((df['log(output)'] - np.mean(df['log(output)']))**2).sum() # общая сумма квадратов
TSS.round(2)

24.48

In [164]:
R2=ESS/TSS # R^2
round(R2,2)

0.99

In [165]:
n=10
k=2

In [166]:
R2adj=1-(1-R2)*(n-1)/(n-k-1)
round(R2adj,2)

0.99

# 3. Суммы квадратов, остатки, предсказанные значения, R2

## 3.1 Sleep equation

In [177]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/sleep75.csv')
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,...,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,...,0,3438,0,3438,0,14,0,13,7.070004,1024
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,...,0,5020,0,5020,0,11,0,0,1.429999,961
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,...,1,2815,0,2815,0,21,0,0,20.529997,1936
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,...,1,3786,0,3786,0,12,0,12,9.619998,900
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,...,1,2580,0,2580,0,44,0,33,2.750000,4096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,...,1,2026,0,2026,0,27,0,18,,2025
702,34,0,703,0.182331,0.030075,10,2500,0,0,5885,...,0,675,1,465,210,18,0,4,,1156
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,...,1,1851,0,1851,0,19,0,17,,1369
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,...,1,1961,1,1481,480,31,0,22,,2916


In [178]:
sleep_eq31 = smf.ols(formula='sleep~totwrk+age+male+smsa', data=df).fit()
sleep_eq31.params

Intercept    3494.223143
totwrk         -0.167681
age             2.806498
male           86.908377
smsa          -75.285755
dtype: float64

In [207]:
check_31 = '''i   Факт  Fitted  Остаток 
1   3113 3094.454  18.546 
4   3083 2943.580 139.420 
6   4063 3494.143 568.857 
10  3018 3111.983 -93.983 
508 3630 3476.914 153.086 
700 2993 3295.398 -302.398'''

In [208]:
df = pd.read_csv(io.StringIO(check_31), sep='\s+')
df

Unnamed: 0,i,Факт,Fitted,Остаток
0,1,3113,3094.454,18.546
1,4,3083,2943.58,139.42
2,6,4063,3494.143,568.857
3,10,3018,3111.983,-93.983
4,508,3630,3476.914,153.086
5,700,2993,3295.398,-302.398


In [209]:
df['Факт']

0    3113
1    3083
2    4063
3    3018
4    3630
5    2993
Name: Факт, dtype: int64

In [182]:
df['Факт'].mean() ## средняя арифметическая

3316.6666666666665

In [183]:
difference = df['Факт'] - df['Факт'].mean()
difference

0   -203.666667
1   -233.666667
2    746.333333
3   -298.666667
4    313.333333
5   -323.666667
Name: Факт, dtype: float64

In [184]:
TSS = np.sum(difference**2) ## общая сумма квадратов
TSS

945233.3333333334

In [185]:
sleep_eq31.centered_tss.round(3) # TSS - общая сумма квадратов

139239835.763

In [186]:
difference = df['Fitted'] - df['Факт'].mean()
difference

0   -222.212667
1   -373.086667
2    177.476333
3   -204.683667
4    160.247333
5    -21.268667
Name: Fitted, dtype: float64

In [187]:
ESS = np.sum(difference**2) # объяснённая сумма квадратов
ESS

288096.9463873332

In [192]:
sleep_eq31.ess.round(3) # ESS - объяснённая сумма квадратов

17188382.234

In [193]:
difference = df['Fitted'] - df['Факт']
difference

0    -18.546
1   -139.420
2   -568.857
3     93.983
4   -153.086
5    302.398
dtype: float64

In [210]:
RSS = np.sum(difference**2) # остаточная сумма квадртов
RSS

467092.85505400004

In [211]:
RSS=sleep_eq31.ssr.round(3) # RSS - остаточная сумма квадртов
RSS

122051453.53

In [212]:
sleep_eq31.rsquared.round(7) # R^2

0.1234444

In [213]:
sleep_eq31.rsquared_adj.round(7) # R2adj

0.1184427

In [214]:
n=706
k=4
SER=np.sqrt(RSS/(n-k-1)) # стандартная ошибка регрессии
round(SER,4)

417.2655

## 3.2 Sleep equation (smsa only)

In [227]:
pd.set_option('display.max_columns', None) # отображаем все столбцы в таблице

In [228]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/sleep75.csv')
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,leis2,leis3,smsa,lhrwage,lothinc,male,marr,prot,rlxall,selfe,sleep,slpnaps,south,spsepay,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,3479,3479,0,1.955861,10.075380,1,1,1,3163,0,3113,3163,0,0,0,3438,0,3438,0,14,0,13,7.070004,1024
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,2140,2140,0,0.357674,0.000000,1,0,1,2920,1,2920,2920,1,0,0,5020,0,5020,0,11,0,0,1.429999,961
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,4505,4227,1,3.021887,0.000000,1,1,0,3038,1,2670,2760,0,20000,1,2815,0,2815,0,21,0,0,20.529997,1936
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,3211,3211,0,2.263844,0.000000,0,1,1,3083,1,3083,3083,0,5000,1,3786,0,3786,0,12,0,12,9.619998,900
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,4007,4007,0,1.011601,9.328213,1,1,1,3493,0,3448,3493,0,2400,1,2580,0,2580,0,44,0,33,2.750000,4096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,4669,4644,1,,9.664660,0,1,1,3410,0,2985,3385,0,16000,1,2026,0,2026,0,27,0,18,,2025
702,34,0,703,0.182331,0.030075,10,2500,0,0,5885,5870,5870,0,,0.000000,1,1,0,3535,0,3520,3535,0,0,0,675,1,465,210,18,0,4,,1156
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,4719,4584,0,,9.235130,0,1,1,3645,0,3510,3510,0,12000,1,1851,0,1851,0,19,0,17,,1369
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,5119,5031,0,,9.210441,0,1,1,3088,1,2970,3000,1,35000,1,1961,1,1481,480,31,0,22,,2916


In [229]:
df[df['smsa']==1]

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,leis2,leis3,smsa,lhrwage,lothinc,male,marr,prot,rlxall,selfe,sleep,slpnaps,south,spsepay,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,4505,4227,1,3.021887,0.000000,1,1,0,3038,1,2670,2760,0,20000,1,2815,0,2815,0,21,0,0,20.529997,1936
7,47,0,8,0.000000,0.000000,13,0,1,1,3544,3469,3439,1,2.708717,10.657280,1,1,1,3033,1,2928,3003,0,0,0,3608,0,3608,0,28,0,24,15.010007,2209
9,30,0,10,0.000000,0.000000,15,11750,1,1,4211,4061,4061,1,1.085189,8.853808,1,1,0,3168,1,3018,3168,0,6000,1,2851,0,2851,0,9,0,7,2.959999,900
11,23,0,12,0.000000,0.000000,16,1000,1,1,6415,6355,6230,1,0.207014,7.313887,0,1,0,3480,0,3295,3355,0,3000,1,370,0,370,0,1,0,4,1.230000,529
12,24,0,13,0.000000,0.000000,16,1000,1,1,3844,3844,3794,1,0.207014,7.313887,1,1,0,3848,0,3798,3798,0,3000,1,2438,0,2438,0,2,0,4,1.230000,576
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
689,31,0,690,0.182331,0.030075,12,10500,1,0,4639,4639,4489,1,,9.282754,0,0,0,3218,0,3068,3068,1,0,0,2373,0,2373,0,13,0,0,,961
693,25,1,694,0.182331,0.030075,12,3500,1,0,5530,4845,4845,1,,0.000000,0,0,1,3725,0,3040,3725,0,0,0,1510,1,1510,0,7,1,0,,625
697,47,0,698,0.182331,0.030075,10,9500,1,0,5882,5747,5747,1,,9.564583,0,1,0,3150,0,3015,3150,0,0,0,1183,1,1183,0,31,0,28,,2209
700,32,0,701,0.182331,0.030075,16,10500,1,0,4707,4587,4512,1,,9.282754,0,0,0,3180,0,2985,3105,1,0,0,2388,0,2388,0,10,0,0,,1024


In [230]:
df = df[df['smsa']==1]
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,leis2,leis3,smsa,lhrwage,lothinc,male,marr,prot,rlxall,selfe,sleep,slpnaps,south,spsepay,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,4505,4227,1,3.021887,0.000000,1,1,0,3038,1,2670,2760,0,20000,1,2815,0,2815,0,21,0,0,20.529997,1936
7,47,0,8,0.000000,0.000000,13,0,1,1,3544,3469,3439,1,2.708717,10.657280,1,1,1,3033,1,2928,3003,0,0,0,3608,0,3608,0,28,0,24,15.010007,2209
9,30,0,10,0.000000,0.000000,15,11750,1,1,4211,4061,4061,1,1.085189,8.853808,1,1,0,3168,1,3018,3168,0,6000,1,2851,0,2851,0,9,0,7,2.959999,900
11,23,0,12,0.000000,0.000000,16,1000,1,1,6415,6355,6230,1,0.207014,7.313887,0,1,0,3480,0,3295,3355,0,3000,1,370,0,370,0,1,0,4,1.230000,529
12,24,0,13,0.000000,0.000000,16,1000,1,1,3844,3844,3794,1,0.207014,7.313887,1,1,0,3848,0,3798,3798,0,3000,1,2438,0,2438,0,2,0,4,1.230000,576
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
689,31,0,690,0.182331,0.030075,12,10500,1,0,4639,4639,4489,1,,9.282754,0,0,0,3218,0,3068,3068,1,0,0,2373,0,2373,0,13,0,0,,961
693,25,1,694,0.182331,0.030075,12,3500,1,0,5530,4845,4845,1,,0.000000,0,0,1,3725,0,3040,3725,0,0,0,1510,1,1510,0,7,1,0,,625
697,47,0,698,0.182331,0.030075,10,9500,1,0,5882,5747,5747,1,,9.564583,0,1,0,3150,0,3015,3150,0,0,0,1183,1,1183,0,31,0,28,,2209
700,32,0,701,0.182331,0.030075,16,10500,1,0,4707,4587,4512,1,,9.282754,0,0,0,3180,0,2985,3105,1,0,0,2388,0,2388,0,10,0,0,,1024


In [231]:
sleep_eq32 = smf.ols(formula='sleep~totwrk+age+male+south', data=df).fit()
sleep_eq32.params

Intercept    3460.427182
totwrk         -0.184996
age             2.276770
male           92.649951
south         168.852598
dtype: float64

In [233]:
sleep_eq32.centered_tss.round(3) # TSS - общая сумма квадратов

50055043.404

In [240]:
sleep_eq32.ess.round(3) # ESS - объяснённая сумма квадратов

7396259.393

In [241]:
RSS=sleep_eq32.ssr.round(3) # RSS - остаточная сумма квадртов
RSS

42658784.011

In [242]:
sleep_eq32.rsquared.round(7) # R^2

0.1477625

In [243]:
sleep_eq32.rsquared_adj.round(7) # R2adj

0.1354558

In [244]:
n=282
k=4
SER=np.sqrt(RSS/(n-k-1)) # стандартная ошибка регрессии
round(SER,4)

392.4319

## 3.3 Sleep equation (men only)

In [245]:
pd.set_option('display.max_columns', None) # отображаем все столбцы в таблице

In [246]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/sleep75.csv')
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,leis2,leis3,smsa,lhrwage,lothinc,male,marr,prot,rlxall,selfe,sleep,slpnaps,south,spsepay,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,3479,3479,0,1.955861,10.075380,1,1,1,3163,0,3113,3163,0,0,0,3438,0,3438,0,14,0,13,7.070004,1024
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,2140,2140,0,0.357674,0.000000,1,0,1,2920,1,2920,2920,1,0,0,5020,0,5020,0,11,0,0,1.429999,961
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,4505,4227,1,3.021887,0.000000,1,1,0,3038,1,2670,2760,0,20000,1,2815,0,2815,0,21,0,0,20.529997,1936
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,3211,3211,0,2.263844,0.000000,0,1,1,3083,1,3083,3083,0,5000,1,3786,0,3786,0,12,0,12,9.619998,900
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,4007,4007,0,1.011601,9.328213,1,1,1,3493,0,3448,3493,0,2400,1,2580,0,2580,0,44,0,33,2.750000,4096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,4669,4644,1,,9.664660,0,1,1,3410,0,2985,3385,0,16000,1,2026,0,2026,0,27,0,18,,2025
702,34,0,703,0.182331,0.030075,10,2500,0,0,5885,5870,5870,0,,0.000000,1,1,0,3535,0,3520,3535,0,0,0,675,1,465,210,18,0,4,,1156
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,4719,4584,0,,9.235130,0,1,1,3645,0,3510,3510,0,12000,1,1851,0,1851,0,19,0,17,,1369
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,5119,5031,0,,9.210441,0,1,1,3088,1,2970,3000,1,35000,1,1961,1,1481,480,31,0,22,,2916


In [247]:
df[df['male']==1]

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,leis2,leis3,smsa,lhrwage,lothinc,male,marr,prot,rlxall,selfe,sleep,slpnaps,south,spsepay,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,3479,3479,0,1.955861,10.075380,1,1,1,3163,0,3113,3163,0,0,0,3438,0,3438,0,14,0,13,7.070004,1024
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,2140,2140,0,0.357674,0.000000,1,0,1,2920,1,2920,2920,1,0,0,5020,0,5020,0,11,0,0,1.429999,961
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,4505,4227,1,3.021887,0.000000,1,1,0,3038,1,2670,2760,0,20000,1,2815,0,2815,0,21,0,0,20.529997,1936
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,4007,4007,0,1.011601,9.328213,1,1,1,3493,0,3448,3493,0,2400,1,2580,0,2580,0,44,0,33,2.750000,4096
5,41,0,6,0.000000,0.000000,12,0,1,1,4812,4797,4797,0,2.957511,10.657280,1,1,1,4078,0,4063,4078,0,0,0,1205,0,0,1205,23,0,23,19.249998,1681
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
692,50,0,693,0.182331,0.030075,12,2500,1,0,4417,4147,4147,0,,9.328213,1,1,0,3330,0,3060,3330,0,2500,1,2603,0,2603,0,32,0,22,,2500
694,44,0,695,0.182331,0.030075,16,16250,1,0,4539,4451,4393,0,,7.824446,1,1,1,3174,0,3028,3116,0,0,0,2513,0,2513,0,22,0,19,,1936
695,29,0,696,0.182331,0.030075,16,16250,1,0,4639,4519,4519,0,,9.328213,1,1,1,3143,0,3023,3143,0,0,0,2418,0,2418,0,7,1,5,,841
699,61,0,700,0.182331,0.030075,17,27500,1,0,4362,4362,4237,0,,9.615872,1,1,0,3118,1,2993,2993,0,0,0,2725,0,2725,0,38,0,30,,3721


In [251]:
df = df[df['male']==1]
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,leis2,leis3,smsa,lhrwage,lothinc,male,marr,prot,rlxall,selfe,sleep,slpnaps,south,spsepay,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,3479,3479,0,1.955861,10.075380,1,1,1,3163,0,3113,3163,0,0,0,3438,0,3438,0,14,0,13,7.070004,1024
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,2140,2140,0,0.357674,0.000000,1,0,1,2920,1,2920,2920,1,0,0,5020,0,5020,0,11,0,0,1.429999,961
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,4505,4227,1,3.021887,0.000000,1,1,0,3038,1,2670,2760,0,20000,1,2815,0,2815,0,21,0,0,20.529997,1936
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,4007,4007,0,1.011601,9.328213,1,1,1,3493,0,3448,3493,0,2400,1,2580,0,2580,0,44,0,33,2.750000,4096
5,41,0,6,0.000000,0.000000,12,0,1,1,4812,4797,4797,0,2.957511,10.657280,1,1,1,4078,0,4063,4078,0,0,0,1205,0,0,1205,23,0,23,19.249998,1681
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
692,50,0,693,0.182331,0.030075,12,2500,1,0,4417,4147,4147,0,,9.328213,1,1,0,3330,0,3060,3330,0,2500,1,2603,0,2603,0,32,0,22,,2500
694,44,0,695,0.182331,0.030075,16,16250,1,0,4539,4451,4393,0,,7.824446,1,1,1,3174,0,3028,3116,0,0,0,2513,0,2513,0,22,0,19,,1936
695,29,0,696,0.182331,0.030075,16,16250,1,0,4639,4519,4519,0,,9.328213,1,1,1,3143,0,3023,3143,0,0,0,2418,0,2418,0,7,1,5,,841
699,61,0,700,0.182331,0.030075,17,27500,1,0,4362,4362,4237,0,,9.615872,1,1,0,3118,1,2993,2993,0,0,0,2725,0,2725,0,38,0,30,,3721


In [254]:
sleep_eq33 = smf.ols(formula='sleep~totwrk+age+smsa+south', data=df).fit()
sleep_eq33.params

Intercept    3600.958512
totwrk         -0.187650
age             3.301936
smsa          -77.686142
south          60.113665
dtype: float64

In [255]:
sleep_eq33.centered_tss.round(3) # TSS - общая сумма квадратов

75570140.578

In [256]:
sleep_eq33.ess.round(3) # ESS - объяснённая сумма квадратов

12125135.598

In [257]:
RSS=sleep_eq33.ssr.round(3) # RSS - остаточная сумма квадртов
RSS

63445004.98

In [258]:
sleep_eq33.rsquared.round(7) # R^2

0.1604488

In [259]:
sleep_eq33.rsquared_adj.round(7) # R2adj

0.151947

In [260]:
n=400
k=4
SER=np.sqrt(RSS/(n-k-1)) # стандартная ошибка регрессии
round(SER,4)

400.7746

## 3.4 Sleep equation (women only)

In [261]:
pd.set_option('display.max_columns', None) # отображаем все столбцы в таблице

In [262]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/sleep75.csv')
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,leis2,leis3,smsa,lhrwage,lothinc,male,marr,prot,rlxall,selfe,sleep,slpnaps,south,spsepay,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,3479,3479,0,1.955861,10.075380,1,1,1,3163,0,3113,3163,0,0,0,3438,0,3438,0,14,0,13,7.070004,1024
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,2140,2140,0,0.357674,0.000000,1,0,1,2920,1,2920,2920,1,0,0,5020,0,5020,0,11,0,0,1.429999,961
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,4505,4227,1,3.021887,0.000000,1,1,0,3038,1,2670,2760,0,20000,1,2815,0,2815,0,21,0,0,20.529997,1936
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,3211,3211,0,2.263844,0.000000,0,1,1,3083,1,3083,3083,0,5000,1,3786,0,3786,0,12,0,12,9.619998,900
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,4007,4007,0,1.011601,9.328213,1,1,1,3493,0,3448,3493,0,2400,1,2580,0,2580,0,44,0,33,2.750000,4096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,4669,4644,1,,9.664660,0,1,1,3410,0,2985,3385,0,16000,1,2026,0,2026,0,27,0,18,,2025
702,34,0,703,0.182331,0.030075,10,2500,0,0,5885,5870,5870,0,,0.000000,1,1,0,3535,0,3520,3535,0,0,0,675,1,465,210,18,0,4,,1156
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,4719,4584,0,,9.235130,0,1,1,3645,0,3510,3510,0,12000,1,1851,0,1851,0,19,0,17,,1369
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,5119,5031,0,,9.210441,0,1,1,3088,1,2970,3000,1,35000,1,1961,1,1481,480,31,0,22,,2916


In [263]:
df[df['male']==0]

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,leis2,leis3,smsa,lhrwage,lothinc,male,marr,prot,rlxall,selfe,sleep,slpnaps,south,spsepay,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,3211,3211,0,2.263844,0.000000,0,1,1,3083,1,3083,3083,0,5000,1,3786,0,3786,0,12,0,12,9.619998,900
11,23,0,12,0.000000,0.000000,16,1000,1,1,6415,6355,6230,1,0.207014,7.313887,0,1,0,3480,0,3295,3355,0,3000,1,370,0,370,0,1,0,4,1.230000,529
27,35,0,28,1.000000,0.000000,12,6750,1,1,4904,4829,4829,1,0.989541,7.313887,0,0,1,3001,0,2926,3001,0,0,0,2250,0,2250,0,17,0,0,2.690000,1225
42,44,0,43,0.000000,0.000000,17,6750,1,1,4804,4591,4591,0,0.770108,7.313887,0,0,0,2978,1,2765,2978,0,0,0,2511,0,2511,0,21,0,0,2.159999,1936
55,24,0,56,0.000000,0.000000,14,1000,1,1,5490,5490,5490,0,0.048790,0.000000,0,0,0,2790,1,2790,2790,0,0,0,1800,1,1800,0,4,0,0,1.050000,576
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,32,0,701,0.182331,0.030075,16,10500,1,0,4707,4587,4512,1,,9.282754,0,0,0,3180,0,2985,3105,1,0,0,2388,0,2388,0,10,0,0,,1024
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,4669,4644,1,,9.664660,0,1,1,3410,0,2985,3385,0,16000,1,2026,0,2026,0,27,0,18,,2025
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,4719,4584,0,,9.235130,0,1,1,3645,0,3510,3510,0,12000,1,1851,0,1851,0,19,0,17,,1369
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,5119,5031,0,,9.210441,0,1,1,3088,1,2970,3000,1,35000,1,1961,1,1481,480,31,0,22,,2916


In [264]:
df = df[df['male']==0]
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,leis2,leis3,smsa,lhrwage,lothinc,male,marr,prot,rlxall,selfe,sleep,slpnaps,south,spsepay,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,3211,3211,0,2.263844,0.000000,0,1,1,3083,1,3083,3083,0,5000,1,3786,0,3786,0,12,0,12,9.619998,900
11,23,0,12,0.000000,0.000000,16,1000,1,1,6415,6355,6230,1,0.207014,7.313887,0,1,0,3480,0,3295,3355,0,3000,1,370,0,370,0,1,0,4,1.230000,529
27,35,0,28,1.000000,0.000000,12,6750,1,1,4904,4829,4829,1,0.989541,7.313887,0,0,1,3001,0,2926,3001,0,0,0,2250,0,2250,0,17,0,0,2.690000,1225
42,44,0,43,0.000000,0.000000,17,6750,1,1,4804,4591,4591,0,0.770108,7.313887,0,0,0,2978,1,2765,2978,0,0,0,2511,0,2511,0,21,0,0,2.159999,1936
55,24,0,56,0.000000,0.000000,14,1000,1,1,5490,5490,5490,0,0.048790,0.000000,0,0,0,2790,1,2790,2790,0,0,0,1800,1,1800,0,4,0,0,1.050000,576
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,32,0,701,0.182331,0.030075,16,10500,1,0,4707,4587,4512,1,,9.282754,0,0,0,3180,0,2985,3105,1,0,0,2388,0,2388,0,10,0,0,,1024
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,4669,4644,1,,9.664660,0,1,1,3410,0,2985,3385,0,16000,1,2026,0,2026,0,27,0,18,,2025
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,4719,4584,0,,9.235130,0,1,1,3645,0,3510,3510,0,12000,1,1851,0,1851,0,19,0,17,,1369
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,5119,5031,0,,9.210441,0,1,1,3088,1,2970,3000,1,35000,1,1961,1,1481,480,31,0,22,,2916


In [265]:
sleep_eq34 = smf.ols(formula='sleep~totwrk+age+smsa+south', data=df).fit()
sleep_eq34.params

Intercept    3468.202663
totwrk         -0.150993
age             1.645390
smsa          -37.687393
south         136.029203
dtype: float64

In [266]:
sleep_eq34.centered_tss.round(3) # TSS - общая сумма квадратов

63490152.118

In [267]:
sleep_eq34.ess.round(3) # ESS - объяснённая сумма квадратов

6300990.569

In [268]:
RSS=sleep_eq34.ssr.round(3) # RSS - остаточная сумма квадртов
RSS

57189161.549

In [269]:
sleep_eq34.rsquared.round(7) # R^2

0.0992436

In [270]:
sleep_eq34.rsquared_adj.round(7) # R2adj

0.0872734

In [271]:
n=306
k=4
SER=np.sqrt(RSS/(n-k-1)) # стандартная ошибка регрессии
round(SER,4)

435.8867

## 3.5 Labour equation (in levels)

In [272]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Labour.csv')
df

Unnamed: 0,capital,labour,output,wage
0,2.606563,184,9.250759,43.080307
1,1.323237,91,3.664310,27.780016
2,22.093692,426,28.781516,44.467748
3,10.737851,72,4.124642,39.734710
4,1.161365,46,2.890150,34.650709
...,...,...,...,...
564,2.625403,20,1.424376,33.477545
565,1.276386,61,2.109048,26.300732
566,1.953869,117,6.241870,41.153979
567,1.318527,46,7.902237,66.720139


In [273]:
labour_eq35 = smf.ols(formula='output~capital+labour+wage', data=df).fit()
labour_eq35.params

Intercept   -16.745898
capital       0.395074
labour        0.049899
wage          0.436984
dtype: float64

In [274]:
labour_eq35.centered_tss.round(3) # TSS - общая сумма квадратов

2235073.295

In [280]:
labour_eq35.ess.round(3) # ESS - объяснённая сумма квадратов

2190376.077

In [281]:
RSS=labour_eq35.ssr.round(3) # RSS - остаточная сумма квадртов
RSS

44697.218

In [282]:
labour_eq35.rsquared.round(7) # R^2

0.9800019

In [283]:
labour_eq35.rsquared_adj.round(7) # R2adj

0.9798957

In [284]:
n=569
k=4
SER=np.sqrt(RSS/(n-k-1)) # стандартная ошибка регрессии
round(SER,4)

8.9023

## 3.6 Labour equation (in log)

In [285]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Labour.csv')
df

Unnamed: 0,capital,labour,output,wage
0,2.606563,184,9.250759,43.080307
1,1.323237,91,3.664310,27.780016
2,22.093692,426,28.781516,44.467748
3,10.737851,72,4.124642,39.734710
4,1.161365,46,2.890150,34.650709
...,...,...,...,...
564,2.625403,20,1.424376,33.477545
565,1.276386,61,2.109048,26.300732
566,1.953869,117,6.241870,41.153979
567,1.318527,46,7.902237,66.720139


In [286]:
labour_eq36 = smf.ols(formula='np.log(output)~np.log(capital)+np.log(labour)+np.log(wage)', data=df).fit()
labour_eq36.params

Intercept         -5.007316
np.log(capital)    0.149343
np.log(labour)     0.720410
np.log(wage)       0.921390
dtype: float64

In [287]:
labour_eq36.centered_tss.round(3) # TSS - общая сумма квадратов

797.667

In [288]:
labour_eq36.ess.round(3) # ESS - объяснённая сумма квадратов

708.647

In [289]:
RSS=labour_eq36.ssr.round(3) # RSS - остаточная сумма квадртов
RSS

89.02

In [290]:
labour_eq36.rsquared.round(7) # R^2

0.8883996

In [291]:
labour_eq36.rsquared_adj.round(7) # R2adj

0.887807

In [292]:
n=569
k=4
SER=np.sqrt(RSS/(n-k-1)) # стандартная ошибка регрессии
round(SER,4)

0.3973

## 3.7 Cost equation (in levels)

In [293]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Electricity.csv')
df

Unnamed: 0,cost,q,pl,sl,pk,sk,pf,sf
0,0.2130,8.0,6869.47,0.3291,64.945,0.4197,18.000,0.2512
1,3.0427,869.0,8372.96,0.1030,68.227,0.2913,21.067,0.6057
2,9.4059,1412.0,7960.90,0.0891,40.692,0.1567,41.530,0.7542
3,0.7606,65.0,8971.89,0.2802,41.243,0.1282,28.539,0.5916
4,2.2587,295.0,8218.40,0.1772,71.940,0.1623,39.200,0.6606
...,...,...,...,...,...,...,...,...
153,6.8293,946.6,10642.16,0.0883,43.600,0.1914,51.463,0.7203
154,3.7605,377.0,7432.24,0.2117,74.120,0.2274,33.436,0.5609
155,3.9822,391.0,5826.04,0.1926,78.288,0.0924,44.633,0.7151
156,30.1880,5317.0,9586.63,0.0845,78.008,0.2009,41.840,0.7147


In [307]:
cost_eq37 = smf.ols(formula='cost~q+(q**2)+pk+pf+pl', data=df).fit()
cost_eq37.params

Intercept   -84.555714
q             0.005471
pk            0.303479
pf            1.071896
pl            0.003239
dtype: float64

In [308]:
cost_eq37.centered_tss.round(3) # TSS - общая сумма квадратов

1189954.209

In [309]:
cost_eq37.ess.round(3) # ESS - объяснённая сумма квадратов

1131262.896

In [310]:
RSS=cost_eq37.ssr.round(3) # RSS - остаточная сумма квадртов
RSS

58691.313

In [311]:
cost_eq37.rsquared.round(7) # R^2

0.9506777

In [312]:
cost_eq37.rsquared_adj.round(7) # R2adj

0.9493882

In [313]:
n=158
k=4
SER=np.sqrt(RSS/(n-k-1)) # стандартная ошибка регрессии
round(SER,4)

19.5858

## 3.8 Cost equation (in logs)

In [305]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Electricity.csv')
df

Unnamed: 0,cost,q,pl,sl,pk,sk,pf,sf
0,0.2130,8.0,6869.47,0.3291,64.945,0.4197,18.000,0.2512
1,3.0427,869.0,8372.96,0.1030,68.227,0.2913,21.067,0.6057
2,9.4059,1412.0,7960.90,0.0891,40.692,0.1567,41.530,0.7542
3,0.7606,65.0,8971.89,0.2802,41.243,0.1282,28.539,0.5916
4,2.2587,295.0,8218.40,0.1772,71.940,0.1623,39.200,0.6606
...,...,...,...,...,...,...,...,...
153,6.8293,946.6,10642.16,0.0883,43.600,0.1914,51.463,0.7203
154,3.7605,377.0,7432.24,0.2117,74.120,0.2274,33.436,0.5609
155,3.9822,391.0,5826.04,0.1926,78.288,0.0924,44.633,0.7151
156,30.1880,5317.0,9586.63,0.0845,78.008,0.2009,41.840,0.7147


In [316]:
cost_eq38 = smf.ols(formula='np.log(cost)~np.log(q)+np.log(q)*np.log(q)+np.log(pk)+np.log(pf)+np.log(pl)', data=df).fit()
cost_eq38.params

Intercept    -7.472227
np.log(q)     0.838111
np.log(pk)    0.188449
np.log(pf)    0.713098
np.log(pl)    0.044258
dtype: float64

In [317]:
cost_eq38.centered_tss.round(3) # TSS - общая сумма квадратов

373.665

In [318]:
cost_eq38.ess.round(3) # ESS - объяснённая сумма квадратов

367.002

In [319]:
RSS=cost_eq38.ssr.round(3) # RSS - остаточная сумма квадртов
RSS

6.663

In [320]:
cost_eq38.rsquared.round(7) # R^2

0.9821694

In [321]:
cost_eq38.rsquared_adj.round(7) # R2adj

0.9817032

In [322]:
n=158
k=4
SER=np.sqrt(RSS/(n-k-1)) # стандартная ошибка регрессии
round(SER,4)

0.2087