In [2]:
import pandas as pd
from statsmodels.formula.api import ols

Aggregating inflation data and merging inflation and location data.

In [4]:
inflation_data = pd.read_feather("../data/interim/inflation-data-clean.ftr")
long_lat_data = pd.read_feather("../data/interim/countries-with-long-lat-data.ftr")

inflation_data_aggregated = (
    inflation_data.groupby("Reference area").agg({"OBS_VALUE": "mean"}).reset_index()
)

print(inflation_data_aggregated.head())
print(long_lat_data.head())

  Reference area  OBS_VALUE
0        Austria   2.634513
1        Belgium   2.473894
2       Bulgaria   3.686726
3        Croatia   2.717257
4        Czechia   3.402655
         name country   latitude  longitude
0    Slovenia      SI  46.151241  14.995463
1     Estonia      EE  58.595272  25.013607
2     Denmark      DK  56.263920   9.501785
3     Belgium      BE  50.503887   4.469936
4  Luxembourg      LU  49.815273   6.129583


In [5]:
inflation_data_aggregated.rename(columns={"Reference area": "name"}, inplace=True)

merged_data = pd.merge(inflation_data_aggregated, long_lat_data, on="name", how="inner")

Linear regressions of the inflation rate on longitude, latitude and combined regression on both.

In [6]:
lm_lat = ols("OBS_VALUE ~ latitude", data=merged_data).fit()
lm_lat.summary()

0,1,2,3
Dep. Variable:,OBS_VALUE,R-squared:,0.034
Model:,OLS,Adj. R-squared:,-0.002
Method:,Least Squares,F-statistic:,0.9382
Date:,"Wed, 13 Nov 2024",Prob (F-statistic):,0.341
Time:,14:35:52,Log-Likelihood:,-72.182
No. Observations:,29,AIC:,148.4
Df Residuals:,27,BIC:,151.1
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,7.1080,4.023,1.767,0.089,-1.146,15.362
latitude,-0.0767,0.079,-0.969,0.341,-0.239,0.086

0,1,2,3
Omnibus:,53.462,Durbin-Watson:,2.29
Prob(Omnibus):,0.0,Jarque-Bera (JB):,336.033
Skew:,3.676,Prob(JB):,1.07e-73
Kurtosis:,17.968,Cond. No.,364.0


In [7]:
lm_long = ols("OBS_VALUE ~ longitude", data=merged_data).fit()
lm_long.summary()

0,1,2,3
Dep. Variable:,OBS_VALUE,R-squared:,0.188
Model:,OLS,Adj. R-squared:,0.158
Method:,Least Squares,F-statistic:,6.255
Date:,"Wed, 13 Nov 2024",Prob (F-statistic):,0.0188
Time:,14:36:28,Log-Likelihood:,-69.656
No. Observations:,29,AIC:,143.3
Df Residuals:,27,BIC:,146.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.9919,0.719,2.769,0.010,0.516,3.468
longitude,0.1061,0.042,2.501,0.019,0.019,0.193

0,1,2,3
Omnibus:,49.469,Durbin-Watson:,2.001
Prob(Omnibus):,0.0,Jarque-Bera (JB):,255.581
Skew:,3.418,Prob(JB):,3.17e-56
Kurtosis:,15.837,Cond. No.,23.7


In [8]:
lm_comb = ols("OBS_VALUE ~ latitude + longitude", data=merged_data).fit()
lm_comb.summary()

0,1,2,3
Dep. Variable:,OBS_VALUE,R-squared:,0.205
Model:,OLS,Adj. R-squared:,0.144
Method:,Least Squares,F-statistic:,3.362
Date:,"Wed, 13 Nov 2024",Prob (F-statistic):,0.0503
Time:,14:36:36,Log-Likelihood:,-69.342
No. Observations:,29,AIC:,144.7
Df Residuals:,26,BIC:,148.8
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.8369,3.838,1.260,0.219,-3.053,12.727
latitude,-0.0556,0.074,-0.755,0.457,-0.207,0.096
longitude,0.1022,0.043,2.372,0.025,0.014,0.191

0,1,2,3
Omnibus:,45.716,Durbin-Watson:,1.979
Prob(Omnibus):,0.0,Jarque-Bera (JB):,198.622
Skew:,3.175,Prob(JB):,7.41e-44
Kurtosis:,14.138,Cond. No.,386.0
