# Data Extraction

In [1]:
# import relevant libraries
import numpy as np
import pandas as pd
import yfinance as yf
import datetime as dt
import warnings
warnings.filterwarnings('ignore')

# Create Consolidated DataFrame

In [2]:
# function to create dates
from datetime import timedelta, date

def get_dates_between(start_date, end_date):
    return [start_date + timedelta(days=i)
            for i in range((end_date - start_date).days + 1)]

start_date = date(2010, 1, 1)
end_date = date(2023, 9, 30)

dates_between = get_dates_between(start_date, end_date)

# Create consolidated dataframe
eur_usd_df = pd.DataFrame({'Date': dates_between})

# convert date to datetime object
eur_usd_df['Date'] = pd.to_datetime(eur_usd_df['Date'])

eur_usd_df

Unnamed: 0,Date
0,2010-01-01
1,2010-01-02
2,2010-01-03
3,2010-01-04
4,2010-01-05
...,...
5016,2023-09-26
5017,2023-09-27
5018,2023-09-28
5019,2023-09-29


# EUR/USD Exchange Rates

retrieved EUR/USD exchange rates from yahoo finance

In [3]:
# daily EUR/USD exchange rates
x_rates_df = pd.read_csv('EUR_USD Historical Data.csv')
x_rates_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,0.0
1,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,0.0
2,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,0.0
3,2010-01-06,1.436596,1.443460,1.429123,1.440403,1.440403,0.0
4,2010-01-07,1.440300,1.444481,1.430206,1.431803,1.431803,0.0
...,...,...,...,...,...,...,...
3581,2023-09-25,1.064849,1.065542,1.057731,1.064849,1.064849,0.0
3582,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,0.0
3583,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,0.0
3584,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,0.0


In [4]:
# check for null values and data type
x_rates_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3586 entries, 0 to 3585
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       3586 non-null   object 
 1   Open       3581 non-null   float64
 2   High       3581 non-null   float64
 3   Low        3581 non-null   float64
 4   Close      3581 non-null   float64
 5   Adj Close  3581 non-null   float64
 6   Volume     3581 non-null   float64
dtypes: float64(6), object(1)
memory usage: 196.2+ KB


In [5]:
# convert date to datetime object
x_rates_df['Date'] = pd.to_datetime(x_rates_df['Date'])
x_rates_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3586 entries, 0 to 3585
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       3586 non-null   datetime64[ns]
 1   Open       3581 non-null   float64       
 2   High       3581 non-null   float64       
 3   Low        3581 non-null   float64       
 4   Close      3581 non-null   float64       
 5   Adj Close  3581 non-null   float64       
 6   Volume     3581 non-null   float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 196.2 KB


In [6]:
# check null values
# x_rates_df[x_rates_df.isna().any(axis=1)]
x_rates_df.isnull().sum()

Date         0
Open         5
High         5
Low          5
Close        5
Adj Close    5
Volume       5
dtype: int64

In [7]:
# remove rows with at least one missing value
x_rates_df = x_rates_df.dropna()
x_rates_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3581 entries, 0 to 3585
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       3581 non-null   datetime64[ns]
 1   Open       3581 non-null   float64       
 2   High       3581 non-null   float64       
 3   Low        3581 non-null   float64       
 4   Close      3581 non-null   float64       
 5   Adj Close  3581 non-null   float64       
 6   Volume     3581 non-null   float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 223.8 KB


In [8]:
# check volume column
x_rates_df['Volume'].value_counts()

0.0    3581
Name: Volume, dtype: int64

In [9]:
# remove volume column
x_rates_df = x_rates_df[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close']]

# add to consolidated dataframe
eur_usd_df = eur_usd_df.merge(x_rates_df, how='left', on='Date')
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994
1,2010-01-02,,,,,
2,2010-01-03,,,,,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596
...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245


# Interest Rates

## European Central Bank

retrieved daily ECV interest rates from ECB Data Portal

In [10]:
# read csv file
eur_int_rates_df = pd.read_csv('ECB Interest Rates.csv')
eur_int_rates_df

Unnamed: 0,DATE,TIME PERIOD,ECB Marginal lending facility - date of changes (raw data) - Level (FM.D.U2.EUR.4F.KR.MLFR.LEV)
0,1999-01-01,01 Jan 1999,4.50
1,1999-01-02,02 Jan 1999,4.50
2,1999-01-03,03 Jan 1999,4.50
3,1999-01-04,04 Jan 1999,3.25
4,1999-01-05,05 Jan 1999,3.25
...,...,...,...
9043,2023-10-05,05 Oct 2023,4.75
9044,2023-10-06,06 Oct 2023,4.75
9045,2023-10-07,07 Oct 2023,4.75
9046,2023-10-08,08 Oct 2023,4.75


In [11]:
# check for null values and datatype
# eur_int_rates_df.info()
eur_int_rates_df.isnull().sum()

DATE                                                                                               0
TIME PERIOD                                                                                        0
ECB Marginal lending facility - date of changes (raw data) - Level (FM.D.U2.EUR.4F.KR.MLFR.LEV)    0
dtype: int64

In [12]:
# drop time period column
eur_int_rates_df = eur_int_rates_df.drop('TIME PERIOD', axis=1)
eur_int_rates_df.head()

Unnamed: 0,DATE,ECB Marginal lending facility - date of changes (raw data) - Level (FM.D.U2.EUR.4F.KR.MLFR.LEV)
0,1999-01-01,4.5
1,1999-01-02,4.5
2,1999-01-03,4.5
3,1999-01-04,3.25
4,1999-01-05,3.25


In [13]:
# rename columns
eur_int_rates_df = eur_int_rates_df.rename(columns={"DATE": "Date", "ECB Marginal lending facility - date of changes (raw data) - Level (FM.D.U2.EUR.4F.KR.MLFR.LEV)": "EUR I/R"})
eur_int_rates_df.head()

Unnamed: 0,Date,EUR I/R
0,1999-01-01,4.5
1,1999-01-02,4.5
2,1999-01-03,4.5
3,1999-01-04,3.25
4,1999-01-05,3.25


In [14]:
# convert date to datetime object
eur_int_rates_df['Date'] = pd.to_datetime(eur_int_rates_df['Date'])
eur_int_rates_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9048 entries, 0 to 9047
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Date     9048 non-null   datetime64[ns]
 1   EUR I/R  9048 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 141.5 KB


In [15]:
# add to consolidated dataframe
eur_usd_df = eur_usd_df.merge(eur_int_rates_df, how='left', on='Date')
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75
1,2010-01-02,,,,,,1.75
2,2010-01-03,,,,,,1.75
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75
...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75


In [16]:
# check info of updated dataframe
eur_usd_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5021 entries, 0 to 5020
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       5021 non-null   datetime64[ns]
 1   Open       3581 non-null   float64       
 2   High       3581 non-null   float64       
 3   Low        3581 non-null   float64       
 4   Close      3581 non-null   float64       
 5   Adj Close  3581 non-null   float64       
 6   EUR I/R    5021 non-null   float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 313.8 KB


## US Federal Reserve

retrieved csv file on weekly US Fed Rates (mean, ending Friday) from FRED economic data

In [17]:
# read csv file
us_fed_rates_df = pd.read_csv('US Fed Rates.csv')
us_fed_rates_df

Unnamed: 0,DATE,DFF
0,2010-01-01,0.095714
1,2010-01-08,0.095714
2,2010-01-15,0.111429
3,2010-01-22,0.121429
4,2010-01-29,0.117143
...,...,...
714,2023-09-08,5.330000
715,2023-09-15,5.330000
716,2023-09-22,5.330000
717,2023-09-29,5.330000


In [18]:
# check for null values and datatype
us_fed_rates_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 719 entries, 0 to 718
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   DATE    719 non-null    object 
 1   DFF     719 non-null    float64
dtypes: float64(1), object(1)
memory usage: 11.4+ KB


In [19]:
# change column names
us_fed_rates_df = us_fed_rates_df.rename(columns={"DATE": "Date", 'DFF': 'US Fed Rate'})
us_fed_rates_df.head()

Unnamed: 0,Date,US Fed Rate
0,2010-01-01,0.095714
1,2010-01-08,0.095714
2,2010-01-15,0.111429
3,2010-01-22,0.121429
4,2010-01-29,0.117143


In [20]:
# change date to datetime object
us_fed_rates_df['Date'] = pd.to_datetime(us_fed_rates_df['Date'])
us_fed_rates_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 719 entries, 0 to 718
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         719 non-null    datetime64[ns]
 1   US Fed Rate  719 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 11.4 KB


In [21]:
# add to consolidated dataframe
eur_usd_df = eur_usd_df.merge(us_fed_rates_df, how='left', on='Date')
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714
1,2010-01-02,,,,,,1.75,
2,2010-01-03,,,,,,1.75,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,
...,...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75,
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75,
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75,
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75,5.330000


In [22]:
# backward fill fed rates
eur_usd_df['US Fed Rate'] = eur_usd_df['US Fed Rate'].bfill()
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714
1,2010-01-02,,,,,,1.75,0.095714
2,2010-01-03,,,,,,1.75,0.095714
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714
...,...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75,5.330000
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75,5.330000
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75,5.330000
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75,5.330000


In [23]:
# check success of backfill
eur_usd_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5021 entries, 0 to 5020
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         5021 non-null   datetime64[ns]
 1   Open         3581 non-null   float64       
 2   High         3581 non-null   float64       
 3   Low          3581 non-null   float64       
 4   Close        3581 non-null   float64       
 5   Adj Close    3581 non-null   float64       
 6   EUR I/R      5021 non-null   float64       
 7   US Fed Rate  5020 non-null   float64       
dtypes: datetime64[ns](1), float64(7)
memory usage: 353.0 KB


# Inflation Rates

Headline: All goods and services <br/>
Core: All goods and services minus food and energy prices

## US Headline CPI

retrieved csv file on monthly headline CPI values (start of month) from FRED Economic Data

In [24]:
# read csv file
us_head_cpi_df = pd.read_csv('US Headline Inflation Data.csv')
us_head_cpi_df

Unnamed: 0,DATE,CPIAUCSL
0,2010-01-01,217.488
1,2010-02-01,217.281
2,2010-03-01,217.353
3,2010-04-01,217.403
4,2010-05-01,217.290
...,...,...
159,2023-04-01,302.918
160,2023-05-01,303.294
161,2023-06-01,303.841
162,2023-07-01,304.348


In [25]:
# understanding dataframe
us_head_cpi_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 164 entries, 0 to 163
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   DATE      164 non-null    object 
 1   CPIAUCSL  164 non-null    float64
dtypes: float64(1), object(1)
memory usage: 2.7+ KB


In [26]:
# rename columns
us_head_cpi_df = us_head_cpi_df.rename(columns={"DATE": "Date", 'CPIAUCSL': 'US Headline CPI'})
us_head_cpi_df

Unnamed: 0,Date,US Headline CPI
0,2010-01-01,217.488
1,2010-02-01,217.281
2,2010-03-01,217.353
3,2010-04-01,217.403
4,2010-05-01,217.290
...,...,...
159,2023-04-01,302.918
160,2023-05-01,303.294
161,2023-06-01,303.841
162,2023-07-01,304.348


In [27]:
# change date to datetime object
us_head_cpi_df['Date'] = pd.to_datetime(us_head_cpi_df['Date'])
us_head_cpi_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 164 entries, 0 to 163
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Date             164 non-null    datetime64[ns]
 1   US Headline CPI  164 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 2.7 KB


In [28]:
# add to consolidated dataframe
eur_usd_df = eur_usd_df.merge(us_head_cpi_df, how='left', on='Date')
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488
1,2010-01-02,,,,,,1.75,0.095714,
2,2010-01-03,,,,,,1.75,0.095714,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,
...,...,...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75,5.330000,
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75,5.330000,
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75,5.330000,
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75,5.330000,


In [29]:
# forward fill US Headline CPI
eur_usd_df['US Headline CPI'] = eur_usd_df['US Headline CPI'].ffill()
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488
1,2010-01-02,,,,,,1.75,0.095714,217.488
2,2010-01-03,,,,,,1.75,0.095714,217.488
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488
...,...,...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75,5.330000,306.269
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75,5.330000,306.269
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75,5.330000,306.269
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75,5.330000,306.269


## US Core CPI

retrieved csv file on monthly core CPI values (start of month) from FRED Economic Data

In [30]:
# read csv file
us_core_cpi_df = pd.read_csv('US Core Inflation Data.csv')
us_core_cpi_df

Unnamed: 0,DATE,CPILFESL
0,2010-01-01,220.633
1,2010-02-01,220.731
2,2010-03-01,220.783
3,2010-04-01,220.822
4,2010-05-01,220.962
...,...,...
159,2023-04-01,306.489
160,2023-05-01,307.824
161,2023-06-01,308.309
162,2023-07-01,308.801


In [31]:
# understanding dataframe
us_core_cpi_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 164 entries, 0 to 163
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   DATE      164 non-null    object 
 1   CPILFESL  164 non-null    float64
dtypes: float64(1), object(1)
memory usage: 2.7+ KB


In [32]:
# rename columns
us_core_cpi_df = us_core_cpi_df.rename(columns={"DATE": "Date", 'CPILFESL': 'US Core CPI'})
us_core_cpi_df.head()

Unnamed: 0,Date,US Core CPI
0,2010-01-01,220.633
1,2010-02-01,220.731
2,2010-03-01,220.783
3,2010-04-01,220.822
4,2010-05-01,220.962


In [33]:
# change date to datetime object
us_core_cpi_df['Date'] = pd.to_datetime(us_core_cpi_df['Date'])
us_core_cpi_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 164 entries, 0 to 163
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         164 non-null    datetime64[ns]
 1   US Core CPI  164 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 2.7 KB


In [34]:
# add to consolidated dataframe
eur_usd_df = eur_usd_df.merge(us_core_cpi_df, how='left', on='Date')
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633
1,2010-01-02,,,,,,1.75,0.095714,217.488,
2,2010-01-03,,,,,,1.75,0.095714,217.488,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,
...,...,...,...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75,5.330000,306.269,
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75,5.330000,306.269,
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75,5.330000,306.269,
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75,5.330000,306.269,


In [35]:
# forward fill CPI
eur_usd_df['US Core CPI'] = eur_usd_df['US Core CPI'].ffill()
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633
...,...,...,...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75,5.330000,306.269,309.661
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75,5.330000,306.269,309.661
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75,5.330000,306.269,309.661
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75,5.330000,306.269,309.661


## EUR Headline Inflation

retrieved csv file on monthly headline CPI values (end of month) from ECB Economic Data

In [36]:
# read csv file
eu_headline_cpi_df = pd.read_csv('EUR Headline Inflation Data.csv')
eu_headline_cpi_df

Unnamed: 0,DATE,TIME PERIOD,HICP - Overall index (ICP.M.U2.N.000000.4.INX)
0,1996-01-31,1996Jan,70.97
1,1996-02-29,1996Feb,71.29
2,1996-03-31,1996Mar,71.54
3,1996-04-30,1996Apr,71.66
4,1996-05-31,1996May,71.83
...,...,...,...
328,2023-05-31,2023May,123.15
329,2023-06-30,2023Jun,123.47
330,2023-07-31,2023Jul,123.36
331,2023-08-31,2023Aug,124.03


In [37]:
# understanding dataframe
eu_headline_cpi_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 3 columns):
 #   Column                                          Non-Null Count  Dtype  
---  ------                                          --------------  -----  
 0   DATE                                            333 non-null    object 
 1   TIME PERIOD                                     333 non-null    object 
 2   HICP - Overall index (ICP.M.U2.N.000000.4.INX)  333 non-null    float64
dtypes: float64(1), object(2)
memory usage: 7.9+ KB


In [38]:
# drop TIME PERIOD column
eu_headline_cpi_df = eu_headline_cpi_df.drop('TIME PERIOD', axis=1)
eu_headline_cpi_df.head()

Unnamed: 0,DATE,HICP - Overall index (ICP.M.U2.N.000000.4.INX)
0,1996-01-31,70.97
1,1996-02-29,71.29
2,1996-03-31,71.54
3,1996-04-30,71.66
4,1996-05-31,71.83


In [39]:
# rename columns
eu_headline_cpi_df = eu_headline_cpi_df.rename(columns={"DATE": "Date", 'HICP - Overall index (ICP.M.U2.N.000000.4.INX)': 'EUR Headline CPI'})
eu_headline_cpi_df.head()

Unnamed: 0,Date,EUR Headline CPI
0,1996-01-31,70.97
1,1996-02-29,71.29
2,1996-03-31,71.54
3,1996-04-30,71.66
4,1996-05-31,71.83


In [40]:
# change date to datetime object
eu_headline_cpi_df['Date'] = pd.to_datetime(eu_headline_cpi_df['Date'])
eu_headline_cpi_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Date              333 non-null    datetime64[ns]
 1   EUR Headline CPI  333 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 5.3 KB


In [41]:
# add to consolidated dataframe
eur_usd_df = eur_usd_df.merge(eu_headline_cpi_df, how='left', on='Date')
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,
...,...,...,...,...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75,5.330000,306.269,309.661,
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75,5.330000,306.269,309.661,
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75,5.330000,306.269,309.661,
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75,5.330000,306.269,309.661,


In [42]:
# retrieve EUR Headline CPI in Dec 2009
eu_dec2019_headline_cpi = eu_headline_cpi_df.loc[(eu_headline_cpi_df['Date'].dt.month == 12) & (eu_headline_cpi_df['Date'].dt.year == 2009)]['EUR Headline CPI']

# set 2010-01-01 EUR Headline CPI data with EUR Headline CPI in Dec 2009
eur_usd_df['EUR Headline CPI'].iloc[0] = eu_dec2019_headline_cpi

# forward fill CPI
eur_usd_df['EUR Headline CPI'] = eur_usd_df['EUR Headline CPI'].ffill()
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32
...,...,...,...,...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75,5.330000,306.269,309.661,124.03
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75,5.330000,306.269,309.661,124.03
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75,5.330000,306.269,309.661,124.03
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75,5.330000,306.269,309.661,124.03


## EUR Core Inflation

retrieved csv file on monthly headline CPI values (end of month) from ECB Economic Data

In [43]:
# read csv file
eu_core_cpi_df = pd.read_csv('EUR Core Inflation Data.csv')
eu_core_cpi_df

Unnamed: 0,DATE,TIME PERIOD,HICP - All-items excluding energy and seasonal food (ICP.M.U2.N.XESEAS.4.INX)
0,1996-01-31,1996Jan,73.77
1,1996-02-29,1996Feb,74.08
2,1996-03-31,1996Mar,74.27
3,1996-04-30,1996Apr,74.32
4,1996-05-31,1996May,74.48
...,...,...,...
327,2023-04-30,2023Apr,119.15
328,2023-05-31,2023May,119.48
329,2023-06-30,2023Jun,119.91
330,2023-07-31,2023Jul,119.86


In [44]:
# understanding dataframe
eu_core_cpi_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 332 entries, 0 to 331
Data columns (total 3 columns):
 #   Column                                                                         Non-Null Count  Dtype  
---  ------                                                                         --------------  -----  
 0   DATE                                                                           332 non-null    object 
 1   TIME PERIOD                                                                    332 non-null    object 
 2   HICP - All-items excluding energy and seasonal food (ICP.M.U2.N.XESEAS.4.INX)  332 non-null    float64
dtypes: float64(1), object(2)
memory usage: 7.9+ KB


In [45]:
# drop TIME PERIOD column
eu_core_cpi_df = eu_core_cpi_df.drop('TIME PERIOD', axis=1)
eu_core_cpi_df.head()

Unnamed: 0,DATE,HICP - All-items excluding energy and seasonal food (ICP.M.U2.N.XESEAS.4.INX)
0,1996-01-31,73.77
1,1996-02-29,74.08
2,1996-03-31,74.27
3,1996-04-30,74.32
4,1996-05-31,74.48


In [46]:
# rename columns
eu_core_cpi_df = eu_core_cpi_df.rename(columns={"DATE": "Date", 'HICP - All-items excluding energy and seasonal food (ICP.M.U2.N.XESEAS.4.INX)': 'EUR Core CPI'})
eu_core_cpi_df.head()

Unnamed: 0,Date,EUR Core CPI
0,1996-01-31,73.77
1,1996-02-29,74.08
2,1996-03-31,74.27
3,1996-04-30,74.32
4,1996-05-31,74.48


In [47]:
# change date to datetime object
eu_core_cpi_df['Date'] = pd.to_datetime(eu_core_cpi_df['Date'])
eu_core_cpi_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 332 entries, 0 to 331
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Date          332 non-null    datetime64[ns]
 1   EUR Core CPI  332 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 5.3 KB


In [48]:
# add to consolidated dataframe
eur_usd_df = eur_usd_df.merge(eu_core_cpi_df, how='left', on='Date')
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,
...,...,...,...,...,...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75,5.330000,306.269,309.661,124.03,
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75,5.330000,306.269,309.661,124.03,
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75,5.330000,306.269,309.661,124.03,
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75,5.330000,306.269,309.661,124.03,


In [49]:
# retrieve EUR Headline CPI in Dec 2009
eu_dec2019_core_cpi = eu_core_cpi_df.loc[(eu_core_cpi_df['Date'].dt.month == 12) & (eu_core_cpi_df['Date'].dt.year == 2009)]['EUR Core CPI']

# set 2010-01-01 EUR Headline CPI data with EUR Headline CPI in Dec 2009
eur_usd_df['EUR Core CPI'].iloc[0] = eu_dec2019_core_cpi

# forward fill CPI
eur_usd_df['EUR Core CPI'] = eur_usd_df['EUR Core CPI'].ffill()
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52
...,...,...,...,...,...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75,5.330000,306.269,309.661,124.03,120.23
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75,5.330000,306.269,309.661,124.03,120.23
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75,5.330000,306.269,309.661,124.03,120.23
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75,5.330000,306.269,309.661,124.03,120.23


# Gross Domestic Product (GDP)

## US GDP Per Capita

retrieved quarterly csv file from FRED Economic Data <br>

In [50]:
# read csv file
us_gdp_per_capita = pd.read_csv('US GDP Per Capita Data.csv')
us_gdp_per_capita.head()

Unnamed: 0,DATE,A939RC0Q052SBEA
0,2010-01-01,47797.0
1,2010-04-01,48403.0
2,2010-07-01,48821.0
3,2010-10-01,49256.0
4,2011-01-01,49302.0


In [51]:
# understand dataframe
us_gdp_per_capita.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54 entries, 0 to 53
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   DATE             54 non-null     object 
 1   A939RC0Q052SBEA  54 non-null     float64
dtypes: float64(1), object(1)
memory usage: 992.0+ bytes


In [52]:
# rename columns
us_gdp_per_capita = us_gdp_per_capita.rename(columns={"DATE": "Date", 'A939RC0Q052SBEA': 'US GDP Per Capita'})
us_gdp_per_capita.head()

Unnamed: 0,Date,US GDP Per Capita
0,2010-01-01,47797.0
1,2010-04-01,48403.0
2,2010-07-01,48821.0
3,2010-10-01,49256.0
4,2011-01-01,49302.0


In [53]:
# change date to datetime object
us_gdp_per_capita['Date'] = pd.to_datetime(us_gdp_per_capita['Date'])
us_gdp_per_capita.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54 entries, 0 to 53
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Date               54 non-null     datetime64[ns]
 1   US GDP Per Capita  54 non-null     float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 992.0 bytes


In [54]:
# add to consolidated dataframe
eur_usd_df = eur_usd_df.merge(us_gdp_per_capita, how='left', on='Date')
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75,5.330000,306.269,309.661,124.03,120.23,
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75,5.330000,306.269,309.661,124.03,120.23,
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75,5.330000,306.269,309.661,124.03,120.23,
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75,5.330000,306.269,309.661,124.03,120.23,


In [55]:
# forward fill US GDP Per Capita values
eur_usd_df['US GDP Per Capita'] = eur_usd_df['US GDP Per Capita'].ffill()
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5016,2023-09-26,1.059165,1.060895,1.056814,1.059165,1.059165,4.75,5.330000,306.269,309.661,124.03,120.23,80781.0
5017,2023-09-27,1.056948,1.057373,1.050906,1.056948,1.056948,4.75,5.330000,306.269,309.661,124.03,120.23,80781.0
5018,2023-09-28,1.050531,1.057865,1.049186,1.050531,1.050531,4.75,5.330000,306.269,309.661,124.03,120.23,80781.0
5019,2023-09-29,1.056245,1.061797,1.055855,1.056245,1.056245,4.75,5.330000,306.269,309.661,124.03,120.23,80781.0


In [56]:
# remove July 2023 and Aug 2023 as data is quarterly
eur_usd_df = eur_usd_df.loc[(eur_usd_df['Date'].dt.date < pd.to_datetime('2023-07-01'))]
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0


## EUR GDP Per Capita

In [57]:
# read csv file
eu_gdp_per_capita = pd.read_csv('EUR GDP data.csv')
eu_gdp_per_capita.head()

Unnamed: 0,DATE,Gross domestic product at market prices
0,3/31/1995,1340390.05
1,6/30/1995,1385541.37
2,9/30/1995,1386333.58
3,12/31/1995,1471601.75
4,3/31/1996,1402186.87


In [58]:
# understand dataframe
eu_gdp_per_capita.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114 entries, 0 to 113
Data columns (total 2 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   DATE                                     114 non-null    object 
 1   Gross domestic product at market prices  114 non-null    float64
dtypes: float64(1), object(1)
memory usage: 1.9+ KB


# Current-account Debt

Current-account debt: Measures import and exports of goods <br>
Formula = (X-M) + NI + NT <br>

X - M = Exports - Imports <br>
NI = Net Income from foreign countries <br>
NT = Net transfers (government transfers)

## US Current Account Balance

In [59]:
us_current_acc_balance_df = pd.read_csv('IEABCN.csv')
us_current_acc_balance_df

Unnamed: 0,DATE,IEABCN
0,1999-01-01,-52413
1,1999-04-01,-67691
2,1999-07-01,-86705
3,1999-10-01,-79803
4,2000-01-01,-84582
...,...,...
93,2022-04-01,-258277
94,2022-07-01,-254574
95,2022-10-01,-204311
96,2023-01-01,-189744


In [60]:
us_current_acc_balance_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 98 entries, 0 to 97
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   DATE    98 non-null     object
 1   IEABCN  98 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 1.7+ KB


In [61]:
us_current_acc_balance_df = us_current_acc_balance_df.rename(columns = {"DATE" : "Date", "IEABCN": "US Current Acc Bal"})
us_current_acc_balance_df

Unnamed: 0,Date,US Current Acc Bal
0,1999-01-01,-52413
1,1999-04-01,-67691
2,1999-07-01,-86705
3,1999-10-01,-79803
4,2000-01-01,-84582
...,...,...
93,2022-04-01,-258277
94,2022-07-01,-254574
95,2022-10-01,-204311
96,2023-01-01,-189744


In [62]:
us_current_acc_balance_df['Date'] = pd.to_datetime(us_current_acc_balance_df['Date'])
us_current_acc_balance_df

Unnamed: 0,Date,US Current Acc Bal
0,1999-01-01,-52413
1,1999-04-01,-67691
2,1999-07-01,-86705
3,1999-10-01,-79803
4,2000-01-01,-84582
...,...,...
93,2022-04-01,-258277
94,2022-07-01,-254574
95,2022-10-01,-204311
96,2023-01-01,-189744


In [63]:
eur_usd_df = eur_usd_df.merge(us_current_acc_balance_df, how='left', on='Date')
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,


In [64]:
eur_usd_df['US Current Acc Bal'] = eur_usd_df['US Current Acc Bal'].ffill()
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0


## EUR Current Account Balance

In [65]:
eur_current_acc_balance_df = pd.read_csv('BOP EU27 excl UK EU27E Current Account CA.csv')
eur_current_acc_balance_df

Unnamed: 0,Date,BOP: EU27 excl UK (EU27E): Current Account (CA)
0,1999-03-01,-8190
1,1999-06-01,24730
2,1999-09-01,4060
3,1999-12-01,-6320
4,2000-03-01,-18560
...,...,...
93,2022-06-01,-26060
94,2022-09-01,-34380
95,2022-12-01,-4770
96,2023-03-01,43720


In [66]:
eur_current_acc_balance_df = eur_current_acc_balance_df.rename(columns = {"Date" : "Date", "BOP: EU27 excl UK (EU27E): Current Account (CA)": "EUR Current Acc Bal"})
eur_current_acc_balance_df

Unnamed: 0,Date,EUR Current Acc Bal
0,1999-03-01,-8190
1,1999-06-01,24730
2,1999-09-01,4060
3,1999-12-01,-6320
4,2000-03-01,-18560
...,...,...
93,2022-06-01,-26060
94,2022-09-01,-34380
95,2022-12-01,-4770
96,2023-03-01,43720


In [67]:
eur_current_acc_balance_df['Date'] = pd.to_datetime(eur_current_acc_balance_df['Date'])
eur_current_acc_balance_df

Unnamed: 0,Date,EUR Current Acc Bal
0,1999-03-01,-8190
1,1999-06-01,24730
2,1999-09-01,4060
3,1999-12-01,-6320
4,2000-03-01,-18560
...,...,...
93,2022-06-01,-26060
94,2022-09-01,-34380
95,2022-12-01,-4770
96,2023-03-01,43720


In [68]:
rows_with_nan = eur_current_acc_balance_df[eur_current_acc_balance_df['EUR Current Acc Bal'].isna()]
print(rows_with_nan)

Empty DataFrame
Columns: [Date, EUR Current Acc Bal]
Index: []


In [69]:
eur_usd_df = eur_usd_df.merge(eur_current_acc_balance_df, how='left', on='Date')
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal,EUR Current Acc Bal
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,


In [70]:
eur_usd_df['EUR Current Acc Bal'] = eur_usd_df['EUR Current Acc Bal'].ffill()
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal,EUR Current Acc Bal
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0


In [71]:
eur_usd_df['EUR Current Acc Bal'].fillna(10150.0, inplace=True)
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal,EUR Current Acc Bal
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0


# Terms of Trade

Terms of Trade: ratio of an index of a country's export prices to an index of its import prices

### US Terms of Trade

In [72]:
us_tot_df = pd.read_csv('US Terms of Trade (1947 to 04-2023).csv')
us_tot_df

Unnamed: 0,DATE,W369RG3Q066SBEA
0,1947-01-01,140.193
1,1947-04-01,140.667
2,1947-07-01,140.318
3,1947-10-01,139.037
4,1948-01-01,136.725
...,...,...
301,2022-04-01,109.384
302,2022-07-01,107.941
303,2022-10-01,107.570
304,2023-01-01,107.952


In [73]:
us_tot_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 306 entries, 0 to 305
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   DATE             306 non-null    object 
 1   W369RG3Q066SBEA  306 non-null    float64
dtypes: float64(1), object(1)
memory usage: 4.9+ KB


In [74]:
# rename columns
us_tot_df = us_tot_df.rename(columns={"DATE": "Date", 'W369RG3Q066SBEA': 'US Terms of Trade'})
us_tot_df.head()

Unnamed: 0,Date,US Terms of Trade
0,1947-01-01,140.193
1,1947-04-01,140.667
2,1947-07-01,140.318
3,1947-10-01,139.037
4,1948-01-01,136.725


In [75]:
# change date to datetime object
us_tot_df['Date'] = pd.to_datetime(us_tot_df['Date'])
us_tot_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 306 entries, 0 to 305
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Date               306 non-null    datetime64[ns]
 1   US Terms of Trade  306 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 4.9 KB


In [76]:
# add to consolidated dataframe
eur_usd_df = eur_usd_df.merge(us_tot_df, how='left', on='Date')

In [77]:
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal,EUR Current Acc Bal,US Terms of Trade
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.01
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,


In [78]:
# forward fill CPI
eur_usd_df['US Terms of Trade'] = eur_usd_df['US Terms of Trade'].ffill()

In [79]:
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal,EUR Current Acc Bal,US Terms of Trade
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216


### EUR Terms of Trade

In [80]:
eu_tot_df = pd.read_csv('EU Terms of Trade (1995 to 2022).csv')
eu_tot_df

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,EU,TERMTRADE,TOT,RT,A,1995,100.310319,
1,EU,TERMTRADE,TOT,RT,A,1996,100.282949,
2,EU,TERMTRADE,TOT,RT,A,1997,100.00194,
3,EU,TERMTRADE,TOT,RT,A,1998,101.530077,
4,EU,TERMTRADE,TOT,RT,A,1999,101.377711,
5,EU,TERMTRADE,TOT,RT,A,2000,98.553093,
6,EU,TERMTRADE,TOT,RT,A,2001,99.03948,
7,EU,TERMTRADE,TOT,RT,A,2002,100.337486,
8,EU,TERMTRADE,TOT,RT,A,2003,100.972951,
9,EU,TERMTRADE,TOT,RT,A,2004,100.470648,


In [81]:
eu_tot_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   LOCATION    28 non-null     object 
 1   INDICATOR   28 non-null     object 
 2   SUBJECT     28 non-null     object 
 3   MEASURE     28 non-null     object 
 4   FREQUENCY   28 non-null     object 
 5   TIME        28 non-null     int64  
 6   Value       28 non-null     float64
 7   Flag Codes  0 non-null      float64
dtypes: float64(2), int64(1), object(5)
memory usage: 1.9+ KB


In [82]:
# drop all column except Value column
eu_tot_df = eu_tot_df.drop(['LOCATION', 'INDICATOR', 'SUBJECT', 'MEASURE', 'FREQUENCY', 'Flag Codes'], axis=1)
eu_tot_df.head()

Unnamed: 0,TIME,Value
0,1995,100.310319
1,1996,100.282949
2,1997,100.00194
3,1998,101.530077
4,1999,101.377711


In [83]:
# rename columns
eu_tot_df = eu_tot_df.rename(columns={"TIME": "Date", 'Value': 'EU Terms of Trade'})
eu_tot_df.head()

Unnamed: 0,Date,EU Terms of Trade
0,1995,100.310319
1,1996,100.282949
2,1997,100.00194
3,1998,101.530077
4,1999,101.377711


In [84]:
# change date column from 'year' to 'yyyy-mm-dd'
eu_tot_df['Date'] = pd.to_datetime(eu_tot_df['Date'], format='%Y')
eu_tot_df['Date'] = eu_tot_df['Date'].dt.strftime('%Y-%m-%d') # '%Y-12-31'
eu_tot_df.head()

Unnamed: 0,Date,EU Terms of Trade
0,1995-01-01,100.310319
1,1996-01-01,100.282949
2,1997-01-01,100.00194
3,1998-01-01,101.530077
4,1999-01-01,101.377711


In [85]:
# change date to datetime object
eu_tot_df['Date'] = pd.to_datetime(eu_tot_df['Date'])
eu_tot_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Date               28 non-null     datetime64[ns]
 1   EU Terms of Trade  28 non-null     float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 576.0 bytes


In [86]:
# add to consolidated dataframe
eur_usd_df = eur_usd_df.merge(eu_tot_df, how='left', on='Date')

In [87]:
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal,EUR Current Acc Bal,US Terms of Trade,EU Terms of Trade
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,


In [88]:
# forward fill CPI
eur_usd_df['EU Terms of Trade'] = eur_usd_df['EU Terms of Trade'].ffill()

In [89]:
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal,EUR Current Acc Bal,US Terms of Trade,EU Terms of Trade
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069


# Government Debt

# Unemployment rates

## EU UN rates

In [90]:
eu_un_df = pd.read_csv('EU UN.csv')
eu_un_df

Unnamed: 0,DATE,TIME PERIOD,(LFSI.M.U2.S.UNEHRT.TOTAL0.15_74.T)
0,1995-01-31,1995Jan,10.885272
1,1995-02-28,1995Feb,10.843466
2,1995-03-31,1995Mar,10.791312
3,1995-04-30,1995Apr,10.733899
4,1995-05-31,1995May,10.742418
...,...,...,...
339,2023-04-30,2023Apr,6.501191
340,2023-05-31,2023May,6.453621
341,2023-06-30,2023Jun,6.400128
342,2023-07-31,2023Jul,6.451526


In [91]:
eu_un_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 3 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   DATE                                 344 non-null    object 
 1   TIME PERIOD                          344 non-null    object 
 2   (LFSI.M.U2.S.UNEHRT.TOTAL0.15_74.T)  344 non-null    float64
dtypes: float64(1), object(2)
memory usage: 8.2+ KB


In [92]:
# drop TIME PERIOD column
eu_un_df = eu_un_df.drop('TIME PERIOD', axis=1)
eu_un_df.head()

Unnamed: 0,DATE,(LFSI.M.U2.S.UNEHRT.TOTAL0.15_74.T)
0,1995-01-31,10.885272
1,1995-02-28,10.843466
2,1995-03-31,10.791312
3,1995-04-30,10.733899
4,1995-05-31,10.742418


In [93]:
# rename columns
eu_un_df = eu_un_df.rename(columns={"DATE": "Date", '(LFSI.M.U2.S.UNEHRT.TOTAL0.15_74.T)': 'EUR Unemployment Rate'})
eu_un_df.head()

Unnamed: 0,Date,EUR Unemployment Rate
0,1995-01-31,10.885272
1,1995-02-28,10.843466
2,1995-03-31,10.791312
3,1995-04-30,10.733899
4,1995-05-31,10.742418


In [94]:
# change date to datetime object
eu_un_df['Date'] = pd.to_datetime(eu_un_df['Date'])
eu_un_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 2 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   Date                   344 non-null    datetime64[ns]
 1   EUR Unemployment Rate  344 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 5.5 KB


In [95]:
# add to consolidated dataframe
eur_usd_df = eur_usd_df.merge(eu_un_df, how='left', on='Date')
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal,EUR Current Acc Bal,US Terms of Trade,EU Terms of Trade,EUR Unemployment Rate
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,


In [96]:
# retrieve EUR UN in Dec 2009
eu_dec2019_un = eu_un_df.loc[(eu_un_df['Date'].dt.month == 12) & (eu_un_df['Date'].dt.year == 2009)]['EUR Unemployment Rate']

# set 2010-01-01 EUR Headline CPI data with EUR Headline CPI in Dec 2009
eur_usd_df['EUR Unemployment Rate'].iloc[0] = eu_dec2019_un

# forward fill CPI
eur_usd_df['EUR Unemployment Rate'] = eur_usd_df['EUR Unemployment Rate'].ffill()
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal,EUR Current Acc Bal,US Terms of Trade,EU Terms of Trade,EUR Unemployment Rate
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621


## US UN rate

In [97]:
us_un_df = pd.read_csv('US UN.csv')
us_un_df

Unnamed: 0,DATE,UNRATE
0,1948-01-01,3.4
1,1948-02-01,3.8
2,1948-03-01,4.0
3,1948-04-01,3.9
4,1948-05-01,3.5
...,...,...
904,2023-05-01,3.7
905,2023-06-01,3.6
906,2023-07-01,3.5
907,2023-08-01,3.8


In [98]:
us_un_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 909 entries, 0 to 908
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   DATE    909 non-null    object 
 1   UNRATE  909 non-null    float64
dtypes: float64(1), object(1)
memory usage: 14.3+ KB


In [99]:
# rename columns
us_un_df = us_un_df.rename(columns={"DATE": "Date", 'UNRATE': 'US Unemployment Rate'})
us_un_df.head()

Unnamed: 0,Date,US Unemployment Rate
0,1948-01-01,3.4
1,1948-02-01,3.8
2,1948-03-01,4.0
3,1948-04-01,3.9
4,1948-05-01,3.5


In [100]:
# change date to datetime object
us_un_df['Date'] = pd.to_datetime(us_un_df['Date'])
us_un_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 909 entries, 0 to 908
Data columns (total 2 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Date                  909 non-null    datetime64[ns]
 1   US Unemployment Rate  909 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 14.3 KB


In [101]:
# add to consolidated dataframe
eur_usd_df = eur_usd_df.merge(us_un_df, how='left', on='Date')
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal,EUR Current Acc Bal,US Terms of Trade,EU Terms of Trade,EUR Unemployment Rate,US Unemployment Rate
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,9.8
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621,
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621,
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621,
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621,


In [102]:
# forward fill CPI
eur_usd_df['US Unemployment Rate'] = eur_usd_df['US Unemployment Rate'].ffill()
eur_usd_df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal,EUR Current Acc Bal,US Terms of Trade,EU Terms of Trade,EUR Unemployment Rate,US Unemployment Rate
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,9.8
1,2010-01-02,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,9.8
2,2010-01-03,,,,,,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,9.8
3,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,9.8
4,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,9.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4924,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621,3.6
4925,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621,3.6
4926,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621,3.6
4927,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621,3.6


# Commodity Prices

- Oil

# Create CSV File

In [103]:
# drop null values
eur_usd_df.dropna().reset_index(drop=True)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,EUR I/R,US Fed Rate,US Headline CPI,US Core CPI,EUR Headline CPI,EUR Core CPI,US GDP Per Capita,US Current Acc Bal,EUR Current Acc Bal,US Terms of Trade,EU Terms of Trade,EUR Unemployment Rate,US Unemployment Rate
0,2010-01-01,1.432706,1.440196,1.432706,1.438994,1.438994,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,9.8
1,2010-01-04,1.431004,1.445191,1.426208,1.442398,1.442398,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,9.8
2,2010-01-05,1.442710,1.448310,1.435194,1.436596,1.436596,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,9.8
3,2010-01-06,1.436596,1.443460,1.429123,1.440403,1.440403,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,9.8
4,2010-01-07,1.440300,1.444481,1.430206,1.431803,1.431803,1.75,0.095714,217.488,220.633,92.32,93.52,47797.0,-86832.0,10150.0,92.010,98.649861,9.997619,9.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3511,2023-06-26,1.090667,1.092180,1.088815,1.090667,1.090667,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621,3.6
3512,2023-06-27,1.091048,1.097610,1.090322,1.091048,1.091048,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621,3.6
3513,2023-06-28,1.095722,1.096251,1.089966,1.095722,1.095722,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621,3.6
3514,2023-06-29,1.091584,1.094068,1.086236,1.091584,1.091584,4.25,5.071429,303.841,308.309,123.15,119.48,80781.0,-218801.0,77030.0,108.216,96.093069,6.453621,3.6


In [104]:
# check for missing values
eur_usd_df.dropna().reset_index(drop=True).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3516 entries, 0 to 3515
Data columns (total 19 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   Date                   3516 non-null   datetime64[ns]
 1   Open                   3516 non-null   float64       
 2   High                   3516 non-null   float64       
 3   Low                    3516 non-null   float64       
 4   Close                  3516 non-null   float64       
 5   Adj Close              3516 non-null   float64       
 6   EUR I/R                3516 non-null   float64       
 7   US Fed Rate            3516 non-null   float64       
 8   US Headline CPI        3516 non-null   float64       
 9   US Core CPI            3516 non-null   float64       
 10  EUR Headline CPI       3516 non-null   float64       
 11  EUR Core CPI           3516 non-null   float64       
 12  US GDP Per Capita      3516 non-null   float64       
 13  US 

In [105]:
# create consolidated csv file
eur_usd_df.dropna().reset_index(drop=True).to_csv('EUR_USD Consolidated Data.csv', index=False)