# Расчет экономической эффективности решения
## команда fit_predict
## Цифровой Прорыв 2020

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#для простой линейной регрессии
from sklearn import linear_model

#для линейной регрессии со статистическим описанием
import statsmodels.api as sm
#тест Рамсея
from statsmodels.stats.diagnostic import linear_reset
#для множественной проверки гипотез
from statsmodels.sandbox.stats.multicomp import multipletests

from sklearn import model_selection

## Подготовка данных
данные с fedstat.ru

In [29]:
#загружаем статистические данные
#(дефлированный ВРП в тысячах рублей)
total_data = pd.read_csv('total_regression.csv',sep=";",encoding='cp1251',decimal=',')
#готовим данные
total_data = total_data[['yr','reg_code','federal','act_vrp_acc','act_depr_acc','act_labor_acc']].groupby(['yr','reg_code','federal']).sum().reset_index()
total_data['vrp_ln'] = np.log(total_data['act_vrp_acc'])
total_data['labor_ln'] = np.log(total_data['act_labor_acc'])
total_data['depr_ln'] = np.log(total_data['act_depr_acc'])
total_data = sm.tools.add_constant(total_data)
total_data

Unnamed: 0,const,yr,reg_code,federal,act_vrp_acc,act_depr_acc,act_labor_acc,vrp_ln,labor_ln,depr_ln
0,1.0,2005,1,ЦФО,3.581802e+08,1.077639e+07,8.447832e+07,19.696547,18.252005,16.192868
1,1.0,2005,2,ЦФО,1.999489e+08,6.811669e+06,7.014155e+07,19.113573,18.066026,15.734148
2,1.0,2005,3,ЦФО,3.101590e+08,1.422439e+07,9.028226e+07,19.552596,18.318452,16.470469
3,1.0,2005,4,ЦФО,4.929731e+08,3.080616e+07,1.188950e+08,20.015965,18.593751,17.243225
4,1.0,2005,5,ЦФО,1.930694e+08,5.782480e+06,5.610378e+07,19.078560,17.842714,15.570343
...,...,...,...,...,...,...,...,...,...,...
1009,1.0,2017,78,ДФО,2.655317e+08,4.225201e+07,1.086736e+08,19.397245,18.503859,17.559162
1010,1.0,2017,79,ДФО,1.574263e+08,1.424668e+07,5.363570e+07,18.874468,17.797725,16.472034
1011,1.0,2017,80,ДФО,7.703437e+08,1.643668e+08,1.111744e+08,20.462347,18.526611,18.917611
1012,1.0,2017,81,ДФО,5.255018e+07,9.616147e+06,1.620516e+07,17.777279,16.600840,16.078954


## Аппроксимация линейного тренда

In [34]:
#проводим аппроксимацию линейного тренда
lm_fitted = sm.OLS(total_data['vrp_ln'], total_data[['const','labor_ln','depr_ln']]).fit(cov_type='HAC',cov_kwds={'maxlags':1})
lm_fitted.summary()

0,1,2,3
Dep. Variable:,vrp_ln,R-squared:,0.964
Model:,OLS,Adj. R-squared:,0.963
Method:,Least Squares,F-statistic:,9523.0
Date:,"Sat, 28 Nov 2020",Prob (F-statistic):,0.0
Time:,17:53:22,Log-Likelihood:,201.07
No. Observations:,1014,AIC:,-396.1
Df Residuals:,1011,BIC:,-381.4
Df Model:,2,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,1.9681,0.180,10.927,0.000,1.615,2.321
labor_ln,0.8143,0.020,39.733,0.000,0.774,0.855
depr_ln,0.1649,0.015,11.306,0.000,0.136,0.193

0,1,2,3
Omnibus:,172.43,Durbin-Watson:,1.607
Prob(Omnibus):,0.0,Jarque-Bera (JB):,413.962
Skew:,0.92,Prob(JB):,1.29e-90
Kurtosis:,5.533,Cond. No.,628.0


## Индуктивная проверка

In [66]:
#ВВП за 2018 год
vvp_last = (total_data[total_data.yr==2017]['act_vrp_acc'].sum())
depr_last = (total_data[total_data.yr==2017]['act_depr_acc'].sum())
labor_last = (total_data[total_data.yr==2017]['act_labor_acc'].sum())

#проверка индуктивного качества модели
print(np.log(vvp_last),np.log(labor_last),np.log(depr_last))
X_pred = [[1,np.log(labor_last),np.log(depr_last)]]
start_vvp = np.exp(lm_fitted.predict(X_pred))
print(np.log(start_vvp))

24.79138252981371 23.388564057010292 22.56160744285708
[24.73382297]


## Расчет смертности от ССЗ 
данные с fedstat.ru

In [50]:
#значение смертности от ССЗ
main_perc = 583.1/1245.6 #47
#количество смертей от ССЗ за последний год
deaths = 1828910
#количество смертей от ССЗ
heart_deaths = main_perc*deaths
print(heart_deaths)

856163.6327874118

In [74]:
for p in range(1,50,3):
    X_pred = [[1,np.log(labor_last+heart_deaths*p/100),np.log(depr_last)]]
    delta_vvp = np.exp(lm_fitted.predict(X_pred))-start_vvp
    print(p,delta_vvp)

1 [26767.00468445]
4 [107068.00099182]
7 [187368.97106934]
10 [267669.91411591]
13 [347970.83052826]
16 [428271.72031403]
19 [508572.58385468]
22 [588873.42037201]
25 [669174.23025513]
28 [749475.0134964]
31 [829775.77011108]
34 [910076.50009155]
37 [990377.20343018]
40 [1070677.88014221]
43 [1150978.5306015]
46 [1231279.15403748]
49 [1311579.75043488]
