# Odhad CDPF

## Cíl analýzy

Cílem analýzy je odhad neoklasické agregátní CDPF.

## Data a metoda

Pro odhad byly použity následující hodnoty:

- hrubá přidaná hodnota (Gross Value Added) (převedená na stálé ceny roku 2010 pomocí deflátoru HDP),
- čistý fixní nefinanční kapitál (převedený na stálé ceny roku 2010 pomocí deflátoru kapitálových zásob),
- celkový počet odpracovaných hodin.

In [37]:
import eurostat
import pandas as pd
import statsmodels.formula.api as smf
from statsmodels.tsa.stattools import adfuller
from scipy import stats
import statsmodels.tsa.stattools as ts
import numpy
import seaborn
from scipy.stats.mstats import gmean

LOCATIONS = ["CZ", "AT", "EU27_2020"]
BASE_LOCATION = "CZ"
START_YEAR = 1996
END_YEAR = 2020

### Hrubá přidaná hodnota (Gross Value Added)

Hrubá přidaná hodnota je načtena na základě kódu `nama_10_a10`.

In [3]:
data_1_es = eurostat.get_data_df('nama_10_a10')

In [41]:
data_1 = data_1_es
# Value added, gross
data_1 = data_1[data_1["na_item"].isin(["B1G", "D1"])]
# Total, all NACE activities
data_1 = data_1[data_1["nace_r2"] == "TOTAL"]
# Current prices, million euro
data_1 = data_1[data_1["unit"].isin(["CP_MNAC", "PD10_NAC"])]
data_1 = pd.melt(data_1, id_vars=["unit", "na_item", "geo\\time"], value_vars=list(range(START_YEAR, END_YEAR + 1)),
                     value_name="value", var_name="year")
data_1 = data_1.pivot(index=["geo\\time", "year"], columns=["unit", "na_item"], values="value")
data_1 = pd.DataFrame(data_1)
data_1[data_1.index == ("AT", 2018)]

Unnamed: 0_level_0,unit,CP_MNAC,CP_MNAC,PD10_NAC
Unnamed: 0_level_1,na_item,B1G,D1,B1G
geo\time,year,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AT,2018,344416.8,185126.8,115.526


In [42]:
data_1 = data_1.dropna()
data_1[("CP_MNAC", "B1G")] = data_1[("CP_MNAC", "B1G")] / data_1[("PD10_NAC", "B1G")] / 100
data_1[("CP_MNAC", "D1")] = data_1[("CP_MNAC", "D1")] / data_1[("PD10_NAC", "B1G")] / 100
data_1 = data_1.drop([("PD10_NAC", "B1G")], axis=1)
data_1.columns = ['_'.join(col) for col in data_1.columns.values]
data_1 = data_1.rename({"CP_MNAC_B1G": "gross_value_added", "CP_MNAC_D1": "compensation_of_employees"}, axis=1)
data_1 = data_1.reset_index()
data_1.head()

Unnamed: 0,geo\time,year,gross_value_added,compensation_of_employees
0,AT,1996,19.750439,11.034499
1,AT,1997,20.165157,11.131065
2,AT,1998,20.953635,11.522692
3,AT,1999,21.632659,11.920292
4,AT,2000,22.467417,12.181119


In [43]:
data_1_gmean = data_1
data_1_gmean["gross_value_added_t-1"] = data_1_gmean.groupby(["geo\\time"])["gross_value_added"].shift()
data_1_gmean = data_1_gmean.dropna()
data_1_gmean["gross_value_added_index"] = data_1_gmean["gross_value_added"] / data_1["gross_value_added_t-1"]
data_1_gmean = data_1_gmean.groupby(["geo\\time"]).agg({"gross_value_added_index": [gmean]}) - 1
data_1_gmean

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_1_gmean["gross_value_added_index"] = data_1_gmean["gross_value_added"] / data_1["gross_value_added_t-1"]


Unnamed: 0_level_0,gross_value_added_index
Unnamed: 0_level_1,gmean
geo\time,Unnamed: 1_level_2
AT,0.01494
BA,0.010248
BE,0.015873
BG,0.016473
CH,0.018418
CY,0.025671
CZ,0.022327
DE,0.01226
DK,0.014025
EA,0.014163


In [5]:
data_1 = data_1.sort_values(["geo\\time", "year"])
data_1["ln_gross_value_added"] = numpy.log(data_1["gross_value_added"])
data_1["ln_gross_value_added_t_minus_1"] = data_1.groupby("geo\\time")["ln_gross_value_added"].shift(1)
data_1["ln_gross_value_added_diff"] = (data_1["ln_gross_value_added"] - data_1["ln_gross_value_added_t_minus_1"]) / data_1["ln_gross_value_added"]

data_1["ln_compensation_of_employees"] = numpy.log(data_1["compensation_of_employees"])
data_1["ln_compensation_of_employees_t_minus_1"] = data_1.groupby("geo\\time")["ln_compensation_of_employees"].shift(1)
data_1["ln_compensation_of_employees_diff"] = (data_1["ln_compensation_of_employees"] - data_1["ln_compensation_of_employees_t_minus_1"]) / data_1["ln_compensation_of_employees"]

data_1 = data_1[["geo\\time", "year", "ln_gross_value_added", "ln_gross_value_added_diff", "ln_compensation_of_employees", "ln_compensation_of_employees_diff"]]
data_1.tail()

Unnamed: 0,geo\time,year,ln_gross_value_added,ln_gross_value_added_diff,ln_compensation_of_employees,ln_compensation_of_employees_diff
966,UK,2015,14.419808,0.002668,13.809209,0.002659
967,UK,2016,14.478324,0.004042,13.86945,0.004343
968,UK,2017,14.534647,0.003875,13.92965,0.004322
969,UK,2018,14.591866,0.003921,13.99175,0.004438
970,UK,2019,14.648765,0.003884,14.059195,0.004797


In [6]:
data_3_es = eurostat.get_data_df('nama_10_nfa_bs')

In [7]:
data_3 = data_3_es
# Total fixed assets (net)
data_3 = data_3[data_3["asset10"] == "N11N"]
# Total, all activities
data_3 = data_3[data_3["sector"] == "S1"]
# Current fixed prices, millions of national currency
data_3 = data_3[data_3["unit"] == "CP_MNAC"]
data_3 = pandas.melt(data_3, id_vars=["unit", "asset10", "geo\\time"], value_vars=list(range(START_YEAR, END_YEAR + 1)),
                    value_name="total_fixed_assets", var_name="year")
data_3 = data_3.sort_values(["geo\\time", "year"])
data_3 = data_3.dropna()
data_3.head()

Unnamed: 0,unit,asset10,geo\time,year,total_fixed_assets
0,CP_MNAC,N11N,AT,1996,659084.4
29,CP_MNAC,N11N,AT,1997,686219.3
58,CP_MNAC,N11N,AT,1998,710697.9
87,CP_MNAC,N11N,AT,1999,737295.8
116,CP_MNAC,N11N,AT,2000,768843.5


In [8]:
data_4_es = eurostat.get_data_df('nama_10_nfa_fl')

In [9]:
data_4 = data_4_es
data_4 = data_4[data_4["asset10"] == "N11G"]
data_4 = data_4[data_4["nace_r2"] == "TOTAL"]
data_4 = data_4[data_4["unit"] == "PD10_NAC"]
data_4 = pd.melt(data_4, id_vars=["unit", "asset10", "geo\\time"], value_vars=list(range(START_YEAR, END_YEAR + 1)),
                    value_name="capital_stock_deflator", var_name="year")
data_4 = data_4.sort_values(["geo\\time", "year"])
data_4 = data_4.dropna()

In [10]:
data_3 = data_3.merge(data_4, on=["geo\\time", "year"])
data_3["total_fixed_assets"] = data_3["total_fixed_assets"] / data_3["capital_stock_deflator"]
data_3["ln_total_fixed_assets"] = numpy.log(data_3["total_fixed_assets"])
data_3["ln_total_fixed_assets_t_minus_1"] = data_3.groupby("geo\\time")["ln_total_fixed_assets"].shift(1)
data_3["ln_total_fixed_assets_diff"] = (data_3["ln_total_fixed_assets"] - data_3["ln_total_fixed_assets_t_minus_1"]) / data_3["ln_total_fixed_assets"]
data_3 = data_3[["geo\\time", "year", "ln_total_fixed_assets", "ln_total_fixed_assets_diff"]]
data_3.head()

Unnamed: 0,geo\time,year,ln_total_fixed_assets,ln_total_fixed_assets_diff
0,AT,1996,17.78963,
1,AT,1997,17.84137,0.0029
2,AT,1998,17.884289,0.0024
3,AT,1999,17.926085,0.002332
4,AT,2000,17.983888,0.003214


In [11]:
data = pandas.merge(data_1, data_3, on=["geo\\time", "year"])
data = data.dropna()
data = data[["ln_gross_value_added_diff", "ln_compensation_of_employees_diff", "ln_total_fixed_assets_diff", "geo\\time", "year"]]
data = data[data["geo\\time"] == "AT"]
data.head()

Unnamed: 0,ln_gross_value_added_diff,ln_compensation_of_employees_diff,ln_total_fixed_assets_diff,geo\time,year
1,0.003222,0.00232,0.0029,AT,1997
2,0.00361,0.003467,0.0024,AT,1998
3,0.003078,0.003419,0.002332,AT,1999
4,0.005311,0.004172,0.003214,AT,2000
5,0.004375,0.003575,0.002373,AT,2001


In [12]:
data["const"] = 1

import statsmodels.api as sm
mod = sm.OLS(data["ln_gross_value_added_diff"], data[["const", "ln_compensation_of_employees_diff", "ln_total_fixed_assets_diff"]])
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,ln_gross_value_added_diff,R-squared:,0.576
Model:,OLS,Adj. R-squared:,0.536
Method:,Least Squares,F-statistic:,14.27
Date:,"Fri, 21 Jan 2022",Prob (F-statistic):,0.000122
Time:,19:22:11,Log-Likelihood:,129.64
No. Observations:,24,AIC:,-253.3
Df Residuals:,21,BIC:,-249.7
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3.359e-05,0.001,-0.032,0.975,-0.002,0.002
ln_compensation_of_employees_diff,1.2147,0.273,4.452,0.000,0.647,1.782
ln_total_fixed_assets_diff,-0.3517,0.462,-0.762,0.455,-1.312,0.608

0,1,2,3
Omnibus:,1.888,Durbin-Watson:,1.086
Prob(Omnibus):,0.389,Jarque-Bera (JB):,1.506
Skew:,-0.592,Prob(JB):,0.471
Kurtosis:,2.674,Cond. No.,2110.0


In [13]:
data["resid"] = res.resid
data["TFP3"] = data["resid"] + res.params[0]
data["%"] =  data["TFP3"] / data["ln_gross_value_added_diff"]
# data = data[data["year"] != 2009]
data["%"].mean()


0.9103242402462827