In [2]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import seaborn as sns 
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.neural_network import MLPRegressor

In [2]:
# Loading files with inflation data. Insertion of column with date

In [2]:
ppi = pd.read_csv('cleaned/ppi.csv')

In [3]:
ppi

Unnamed: 0,time,country,sector,value_var,value_100
0,2015M01,EU,general,-0.7,99.7
1,2015M01,EU,intermediate goods,-0.3,100.0
2,2015M01,EU,energy,-4.3,99.5
3,2015M01,EU,capital goods,0.7,99.6
4,2015M01,EU,durable consumer goods,0.3,99.6
...,...,...,...,...,...
14107,2021M12,Sweden,intermediate goods,-1.2,139.1
14108,2021M12,Sweden,energy,19.3,186.4
14109,2021M12,Sweden,capital goods,0.8,114.0
14110,2021M12,Sweden,durable consumer goods,0.5,121.6


In [4]:
ppi[ppi['country']=='EU'].tail(20)

Unnamed: 0,time,country,sector,value_var,value_100
13444,2021M09,EU,durable consumer goods,0.4,107.6
13445,2021M09,EU,non-durable consumer goods,0.3,105.7
13608,2021M10,EU,general,3.7,119.2
13609,2021M10,EU,intermediate goods,1.5,120.4
13610,2021M10,EU,energy,14.7,157.5
13611,2021M10,EU,capital goods,0.6,106.0
13612,2021M10,EU,durable consumer goods,0.6,108.2
13613,2021M10,EU,non-durable consumer goods,0.6,106.3
13776,2021M11,EU,general,1.6,121.1
13777,2021M11,EU,intermediate goods,1.4,122.1


In [14]:
def to_date(x):
    y=x.split('M')[0]
    m=x.split('M')[1]
    temp=y+'-'+m
    temp = pd.to_datetime(temp, utc=False, errors='coerce')
    return temp

In [7]:
ppi['date']=ppi['time'].apply(lambda x :to_date(x))

In [8]:
ppi

Unnamed: 0,time,country,sector,value_var,value_100,date
0,2015M01,EU,general,-0.7,99.7,2015-01-01
1,2015M01,EU,intermediate goods,-0.3,100.0,2015-01-01
2,2015M01,EU,energy,-4.3,99.5,2015-01-01
3,2015M01,EU,capital goods,0.7,99.6,2015-01-01
4,2015M01,EU,durable consumer goods,0.3,99.6,2015-01-01
...,...,...,...,...,...,...
14107,2021M12,Sweden,intermediate goods,-1.2,139.1,2021-12-01
14108,2021M12,Sweden,energy,19.3,186.4,2021-12-01
14109,2021M12,Sweden,capital goods,0.8,114.0,2021-12-01
14110,2021M12,Sweden,durable consumer goods,0.5,121.6,2021-12-01


In [10]:
ppi.to_csv('graphs/ppi.csv', index=False)
ppi.to_excel('graphs/ppi.xls', index=False)

In [12]:
hicp = pd.read_csv('cleaned/hicp.csv')

In [13]:
hicp

Unnamed: 0,time,country,indicator,value_var,value_100
0,2015M01,EU,HICP,-1.3,98.46
1,2015M01,Belgium,HICP,-2.1,97.20
2,2015M01,Bulgaria,HICP,-0.7,99.80
3,2015M01,Czechia,HICP,0.0,99.50
4,2015M01,Denmark,HICP,-0.6,98.90
...,...,...,...,...,...
2347,2021M12,Romania,HICP,0.5,118.76
2348,2021M12,Slovenia,HICP,0.1,109.83
2349,2021M12,Slovakia,HICP,0.2,114.15
2350,2021M12,Finland,HICP,-0.1,107.60


In [14]:
hicp['date']=hicp['time'].apply(lambda x :to_date(x))

In [15]:
hicp

Unnamed: 0,time,country,indicator,value_var,value_100,date
0,2015M01,EU,HICP,-1.3,98.46,2015-01-01
1,2015M01,Belgium,HICP,-2.1,97.20,2015-01-01
2,2015M01,Bulgaria,HICP,-0.7,99.80,2015-01-01
3,2015M01,Czechia,HICP,0.0,99.50,2015-01-01
4,2015M01,Denmark,HICP,-0.6,98.90,2015-01-01
...,...,...,...,...,...,...
2347,2021M12,Romania,HICP,0.5,118.76,2021-12-01
2348,2021M12,Slovenia,HICP,0.1,109.83,2021-12-01
2349,2021M12,Slovakia,HICP,0.2,114.15,2021-12-01
2350,2021M12,Finland,HICP,-0.1,107.60,2021-12-01


In [16]:
hicp.to_csv('graphs/hicp.csv', index=False)
hicp.to_excel('graphs/hicp.xls', index=False)

In [3]:
elect = pd.read_csv('graphs/electricity_EU.csv')

In [5]:
indic = pd.read_csv('cleaned/indicators_all.csv')

In [15]:
indic['date']=indic['time'].apply(lambda x :to_date(x))

In [6]:
indic=indic[indic['country']=='EU']

In [7]:
indic=indic.reset_index(drop=True)

In [8]:
indic['elec_price']=elect['value']

In [9]:
indic['elec_var']=elect['value_var']

In [10]:
indic['elec_100']=elect['value_100']

In [11]:
indic.dtypes

time                  object
country               object
crude_oil_price      float64
crude_oil_var        float64
crude_oil_100        float64
natural_gas_price    float64
natural_gas_var      float64
natural_gas_100      float64
copper_price         float64
copper_var           float64
copper_100           float64
aluminum_price       float64
aluminum_var         float64
aluminum_100         float64
steel_price            int64
steel_var            float64
steel_100            float64
coal_price           float64
coal_var             float64
coal_100             float64
elec_price           float64
elec_var             float64
elec_100             float64
dtype: object

In [None]:
indic.to_csv('commodities.csv', index=False)
indic.to_excel('commodities.xls', index=False)

In [16]:
a=indic[['date', 'crude_oil_100']]
a.columns=['date', 'value']
a['commoditie']='crude oil'

In [17]:
b=indic[['date', 'natural_gas_100']]
b.columns=['date', 'value']
b['commoditie']='natural gas'

In [18]:
c=indic[['date', 'coal_100']]
c.columns=['date', 'value']
c['commoditie']='coal'

In [19]:
d=indic[['date', 'elec_100']]
d.columns=['date', 'value']
d['commoditie']='electricity'

In [20]:
e=indic[['date', 'steel_100']]
e.columns=['date', 'value']
e['commoditie']='steel'

In [21]:
f=indic[['date', 'copper_100']]
f.columns=['date', 'value']
f['commoditie']='copper'

In [22]:
g=indic[['date', 'aluminum_100']]
g.columns=['date', 'value']
g['commoditie']='aluminum'

In [23]:
commod=pd.concat([a, b, c, d, e, f, g], axis=0)

In [24]:
commod=commod.reset_index(drop=True)

In [25]:
commod

Unnamed: 0,date,value,commoditie
0,2015-01-01,100.00,crude oil
1,2015-02-01,88.52,crude oil
2,2015-03-01,91.98,crude oil
3,2015-04-01,88.45,crude oil
4,2015-05-01,111.22,crude oil
...,...,...,...
583,2021-08-01,116.96,aluminum
584,2021-09-01,125.39,aluminum
585,2021-10-01,133.92,aluminum
586,2021-11-01,128.11,aluminum


In [26]:
commod.to_csv('graphs/commodities.csv', index=False)
commod.to_excel('graphs/commodities.xls', index=False)