In [2]:
# run this only if you dont't have these libraries installed
!pip install pyjstat
!pip install ecbdata

Collecting pyjstat
  Downloading pyjstat-2.4.0.tar.gz (798 kB)
Building wheels for collected packages: pyjstat
  Building wheel for pyjstat (setup.py): started
  Building wheel for pyjstat (setup.py): finished with status 'done'
  Created wheel for pyjstat: filename=pyjstat-2.4.0-py3-none-any.whl size=22766 sha256=0c31bcd0a07a10cf69a30f590976b463ab5a59a20ebdd1a38393d1b7c3ba2574
  Stored in directory: c:\users\b28658\appdata\local\pip\cache\wheels\b3\3c\60\09f5b3335b52ca134ca6fab28189ccde645313cfa029de108f
Successfully built pyjstat
Installing collected packages: pyjstat
Successfully installed pyjstat-2.4.0


# Importing Libraries

In [10]:
import pandas as pd
import numpy as np
import requests

import warnings
warnings.filterwarnings("ignore")

from pyjstat import pyjstat
from ecbdata import ecbdata

import matplotlib.pyplot as plt

%matplotlib inline


# How to extract data from Banco de Portugal

### Function to extraction data

In [11]:
def extract_data_from_bank_pt(series_id, variable_name):
    """ 
    Function to extract data from BPSTAT API.

    Arguments: series_id int
             variable_name str.
             If variable_name is None, variable_name is set to urls label.

    Returns:   pandas dataframe with Date and variable_name columns
    """
    
    BPSTAT_API_URL="https://bpstat.bportugal.pt/data/v1"

    url = f"{BPSTAT_API_URL}/series/?lang=EN&series_ids={series_id}"
    series_info = requests.get(url).json()[0]

    domain_id = series_info["domain_ids"][0]
    dataset_id = series_info["dataset_id"]

    dataset_url = f"{BPSTAT_API_URL}/domains/{domain_id}/datasets/{dataset_id}/?lang=EN&series_ids={series_id}"
    dataset = pyjstat.Dataset.read(dataset_url)
    df = dataset.write('dataframe')

    df['Date'] = pd.to_datetime(df['Date'])
    if variable_name is None:
        variable_name = series_info['label']

    df = df.rename(columns={'value': variable_name})
    df = df[['Date', variable_name]]

    return df

In [12]:
help(extract_data_from_bank_pt)

Help on function extract_data_from_bank_pt in module __main__:

extract_data_from_bank_pt(series_id, variable_name)
    Function to extract data from BPSTAT API.
    
    Arguments: series_id int
             variable_name str.
             If variable_name is None, variable_name is set to urls label.
    
    Returns:   pandas dataframe with Date and variable_name columns



### Example: 

https://bpstat.bportugal.pt/serie/12504591

You can find the Key ECB interest rates - Deposit facility data on the specified URL. To extract this data, you will need the series key, which is 12504591. With this information, you can proceed to use the function as follows:

In [13]:
# extracting data with original column names
df = extract_data_from_bank_pt(12504591, None)
df.head()

Unnamed: 0,Date,Key ECB interest rates - Deposit facility
0,1999-01-01,2.0
1,1999-01-04,2.75
2,1999-01-05,2.75
3,1999-01-06,2.75
4,1999-01-07,2.75


In [14]:
# extracting data but changing the values column name
df = extract_data_from_bank_pt(12504591, 'Deposit facility')
df.head()

Unnamed: 0,Date,Deposit facility
0,1999-01-01,2.0
1,1999-01-04,2.75
2,1999-01-05,2.75
3,1999-01-06,2.75
4,1999-01-07,2.75


In [15]:
df.dtypes

Date                datetime64[ns]
Deposit facility           float64
dtype: object

# How to extract data from ECB

The ECB URLs follow the below strcuture:
https://data.ecb.europa.eu/data/datasets/XXXX/XXXX.AAA.BBB.CCC.....

In order to extract the data from the ECB website, we need to grab 'XXXX.AAA.BBB.CCC.....'. and then run:

In [17]:
extract_data_from_ecb('XXXX.AAA.BBB.CCC.....',
                        start=start_date)

start and end: It is possible to define a start and end date for which observations are to be returned. The format will vary depending on the frequency: YYYY for annual data (e.g. 2020); YYYY-S[1–2] for semi-annual data (e.g. 2020-S1); YYYY-Q[1–4] for quarterly data (e.g. 2020-Q1); YYYY-MM for monthly data (e.g. 2020–01); YYYY-W[01–53] for weekly data (e.g. 2020-W01); YYYY-MM-DD for daily data (e.g. 2020–01–01).

With this information, you can proceed to extract data from the following URL:

https://data.ecb.europa.eu/data/datasets/PFBR/PFBR.Q.U2.S.S129.L43C.T.4.W0.S1._T.EUR

start and end: It is possible to define a start and end date for which observations are to be returned. The format will vary depending on the frequency: YYYY for annual data (e.g. 2020); YYYY-S[1–2] for semi-annual data (e.g. 2020-S1); YYYY-Q[1–4] for quarterly data (e.g. 2020-Q1); YYYY-MM for monthly data (e.g. 2020–01); YYYY-W[01–53] for weekly data (e.g. 2020-W01); YYYY-MM-DD for daily data (e.g. 2020–01–01).

### Function to extract data

In [18]:
def extract_data_from_ecb(key, start_date='2020-01'):
    """ 
    Function to extract data from ECB.

    Arguments: key str: URL key
               start_date str:  start date

    Returns:   pandas dataframe with TIME_PERIOD and OBS_VALUE columns
    """
    
    df = ecbdata.get_series(key,
                        start=start_date, detail='dataonly')
    
    df.TIME_PERIOD = pd.to_datetime(df.TIME_PERIOD)
    df = df[['TIME_PERIOD', 'OBS_VALUE']]
    
    return df

In [19]:
help(extract_data_from_ecb)

Help on function extract_data_from_ecb in module __main__:

extract_data_from_ecb(key, start)
    Function to extract data from ECB.
    
    Arguments: key str: URL key
         start str:  start date
    
    Returns:   pandas dataframe with TIME_PERIOD and OBS_VALUE columns



In [20]:
df = extract_data_from_ecb('PFBR.Q.U2.S.S129.L43C.T.4.W0.S1._T.EUR',
                        start='2020-01')

df.head()

Unnamed: 0,TIME_PERIOD,OBS_VALUE
0,2020-01-01,-2766.262
1,2020-04-01,5378.6638
2,2020-07-01,12776.5149
3,2020-10-01,12280.0377
4,2021-01-01,6808.37


In [21]:
df.dtypes

TIME_PERIOD    datetime64[ns]
OBS_VALUE             float64
dtype: object

In [57]:
# plt.figure(figsize=(12,8))
# plt.title('Pension entitlements - DC reported by Pension Funds in the Euro Area (transactions)')
# dfx = df.set_index('TIME_PERIOD')
# dfx.OBS_VALUE.plot()
# plt.show()

# Function to extract data from ALPHA VANTAGE

In [22]:
# first you need an api_key for alphavantage, it's free
api_key = 'EW4A338V8YGLZI3G' # insert your api key

In [23]:
def extract_data_from_alphavantage(key):
    
    """
    Function to extract data (US related) from alphavantage website
    
    Parameters:
        key: str
        choose one string of the list ['UNEMPLOYMENT', 'CPI', REAL_GDP_PER_CAPITA', 'INFLATION']  
    
    Returns: pandas DataFrame with Date column e key column
    """
    
    url = f'https://www.alphavantage.co/query?function={key}&apikey={api_key}'
    r = requests.get(url)
    data = r.json()
    
    df = pd.DataFrame(data['data'])
    df['date'] = pd.to_datetime(df['date'])
    df['value'] = df['value'].astype(float)
    df.columns = ['Date', key]
    #df = df[df['Date'] >= '2020-01-01']
    #df.set_index('Date', inplace=True)
    
    return df

In [24]:
help(extract_data_from_alphavantage)

Help on function extract_data_from_alphavantage in module __main__:

extract_data_from_alphavantage(key)
    Function to extract data (US related) from alphavantage website
    
    Parameters:
        key: str
        choose one string of the list ['UNEMPLOYMENT', 'CPI', REAL_GDP_PER_CAPITA', 'INFLATION']  
    
    Returns: pandas DataFrame with Date column e key column



# US Inflation

In [25]:
df = extract_data_from_alphavantage('INFLATION')
df.head()

Unnamed: 0,Date,INFLATION
0,2023-01-01,4.116338
1,2022-01-01,8.0028
2,2021-01-01,4.697859
3,2020-01-01,1.233584
4,2019-01-01,1.81221


# FX rates 

In [26]:
def get_fx_data(api_key, from_currency, to_currency, start_date='1900-01-01'):
    """
    Function to extract FX rates
    
    Parameters: api_key: str alpha vantage key
                from_currency: str
                to_currency: str
                start_date: str YY-MM-DD, if not defined it will retrieve since the beginning
    
    Returns a pandas DataFrame with Date and FX rates columns
    """
    
    url = f'https://www.alphavantage.co/query?function=FX_DAILY&from_symbol={from_currency}&to_symbol={to_currency}&outputsize=full&apikey={api_key}'
    response = requests.get(url)
    data = response.json()
    
    time_series = data['Time Series FX (Daily)']
    df = pd.DataFrame.from_dict(time_series, orient='index')
    df.index = pd.to_datetime(df.index)
    df = df.sort_index()
    df = df.reset_index()
    
    df[df.columns[1:]] = df[df.columns[1:]].astype(float)
    df = df.rename(columns={'index' : 'Date'})
    
    df = df[df['Date'] >= start_date]
    
    return df

In [27]:
help(get_fx_data)

Help on function get_fx_data in module __main__:

get_fx_data(api_key, from_currency, to_currency, start_date='1900-01-01')
    Function to extract FX rates
    
    Parameters: api_key: str alpha vantage key
                from_currency: str
                to_currency: str
                start_date: str YY-MM-DD, if not defined it will retrieve since the beginning
    
    Returns a pandas DataFrame with Date and FX rates columns



In [28]:
# fx rates extraction - EUR to USD
fx_data = get_fx_data(api_key, 'EUR', 'USD','2020-01-01')
fx_data.head()

Unnamed: 0,Date,1. open,2. high,3. low,4. close
1416,2020-01-01,1.12189,1.12271,1.11668,1.12208
1417,2020-01-02,1.11708,1.11807,1.11257,1.11714
1418,2020-01-05,1.11625,1.12082,1.11581,1.1162
1419,2020-01-06,1.11958,1.11995,1.11349,1.1198
1420,2020-01-07,1.11557,1.11652,1.11109,1.11547


In [29]:
fx_data.dtypes

Date        datetime64[ns]
1. open            float64
2. high            float64
3. low             float64
4. close           float64
dtype: object