In [None]:
#%pip install kagglehub

Note: you may need to restart the kernel to use updated packages.Collecting kagglehub
  Downloading kagglehub-0.3.11-py3-none-any.whl.metadata (32 kB)
Downloading kagglehub-0.3.11-py3-none-any.whl (63 kB)
Installing collected packages: kagglehub
Successfully installed kagglehub-0.3.11




[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import plotly.express as px
import plotly.graph_objects as go
import kagglehub
from kagglehub import KaggleDatasetAdapter
from statsmodels.tsa.seasonal import STL


  from .autonotebook import tqdm as notebook_tqdm


## 1. Data

In [2]:
dolarbo = pd.read_excel('./DATA/dolarbo_time_series.xlsx', index_col=0)
dolarbo.index = pd.to_datetime(dolarbo.index)

In [3]:
usdt_luis = pd.read_excel('./DATA/usdt_lucho.xlsx')
# Ensure column names are stripped of extra spaces
usdt_luis.columns = usdt_luis.columns.str.strip()
# Combine 'Fecha' and 'hora' columns to create a 'timestamp' column
usdt_luis['timestamp'] = pd.to_datetime(usdt_luis['Fecha'].astype(str) + " " + usdt_luis['hora'].astype(str))
usdt_luis.index = usdt_luis['timestamp']
usdt_luis = usdt_luis.drop(['Fecha', 'hora', 'timestamp'], axis=1)
usdt_luis

Unnamed: 0_level_0,Compra,Venta
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-09-21 18:00:00,7.40,7.55
2023-09-22 18:00:00,7.40,7.55
2023-11-18 10:00:00,7.41,7.56
2023-12-12 17:00:00,7.55,7.65
2024-02-22 09:47:00,8.12,8.40
...,...,...
2025-03-11 16:56:00,11.10,11.60
2025-03-12 12:34:00,11.20,11.70
2025-03-12 18:26:00,11.50,12.00
2025-03-12 23:11:00,11.60,12.20


In [4]:
usdtbol = pd.read_excel('./DATA/usdtbol_time_series.xlsx')
usdtbol['timestamp'] = pd.to_datetime(usdtbol['category'], format='%a %b %d %Y')
usdtbol.index = usdtbol['timestamp']
usdtbol = usdtbol.drop(['category', 'timestamp'], axis=1)
#usdtbol['aux'] = 1
#usdtbol['within_day'] = usdtbol.resample('D')['aux'].transform('sum')
usdtbol

Unnamed: 0_level_0,Bs
timestamp,Unnamed: 1_level_1
2024-08-06,12.89
2024-08-06,12.86
2024-08-06,12.82
2024-08-06,12.72
2024-08-06,12.44
...,...
2025-04-14,13.43
2025-04-14,13.42
2025-04-14,13.41
2025-04-14,13.43


In [5]:
usdtbol_day = usdtbol.resample('D').mean()

In [6]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=dolarbo.index, y=dolarbo['compra'], mode='lines', name='Buy', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=dolarbo.index, y=dolarbo['venta'], mode='lines', name='Sell', line=dict(color='red')))
#fig.add_trace(go.Scatter(x=usdt_luis.index, y=usdt_luis['Compra'], mode='lines', name='Sell', line=dict(color='cyan')))
#fig.add_trace(go.Scatter(x=usdt_luis.index, y=usdt_luis['Venta'], mode='lines', name='Sell', line=dict(color='green')))
fig.add_trace(go.Scatter(x=usdtbol_day.index, y=usdtbol_day['Bs'], mode='lines', name='usdtbol.com', line=dict(color='green')))
fig.update_layout(
    title='BOB/USDT',
    xaxis_title='Timestamp',
    yaxis_title='Price',
    legend_title='Type',
    template='plotly_white'
)

fig.show()

## 2. Ultra-High Frequency Data

History Exchange Rate USDT/BOB with hourly frequency [(More info)](https://www.kaggle.com/datasets/darlynbravo/history-exchange-rate-usdtbob).

- **price:** exchange rate
- **available:** amount available
- **advertisers_qty:** quantity of advertisers
- **type:** bid (the highest price a buyer will pay) or ask (the lowest price a seller will accept)
- **timestamp:** captured datetime
- **curr_from:** Currency from
- **curr_to:** Currency to
- **source:** source of data

|                              | Bid                                     | Ask                                      |
|------------------------------|-----------------------------------------|------------------------------------------|
| Definition                   | Price to sell an asset                  | Price to buy an asset                    |
| Relationship to market price | Always slightly lower than market price | Always slightly higher than market price |

Factors that affect bid and ask prices: 
- Market liquidity
- Trading volume
- Supply and demand of currencies
- Overall market volatility
- Economic releases
- Bank holidays   


Measuring the bid-ask spread: The smallest whole unit measurement of the bid-ask spread is called a pip. One pip equals 1/100 of 1%, or 0.0001. 

In [7]:
#kaggle_df = pd.read_csv('./kaggle_currency_exchange_rates.csv')
kaggle_df = pd.read_excel('./DATA/kaggle_currency_exchange_rates.xlsx')
kaggle_df.index = kaggle_df['timestamp']
kaggle_df.index = pd.to_datetime(kaggle_df.index)

kaggle_bid = kaggle_df[kaggle_df['type']=="bid"]
kaggle_ask = kaggle_df[kaggle_df['type']=="ask"]

In [8]:
hourly_minute_bid = kaggle_bid['price'].copy().resample('T').mean()
hourly_minute_bid.dropna(axis=0, inplace=True)

hourly_minute_ask = kaggle_ask['price'].copy().resample('T').mean()
hourly_minute_ask.dropna(axis=0, inplace=True)

In [9]:
seconds_bid = kaggle_bid['price'].copy().resample('S').mean()
seconds_bid.dropna(axis=0, inplace=True)
seconds_ask = kaggle_ask['price'].copy().resample('S').mean()
seconds_ask.dropna(axis=0, inplace=True)

In [10]:
seconds_bid.info()

<class 'pandas.core.series.Series'>
DatetimeIndex: 634 entries, 2024-07-09 17:43:37 to 2024-09-02 13:30:03
Series name: price
Non-Null Count  Dtype  
--------------  -----  
634 non-null    float64
dtypes: float64(1)
memory usage: 9.9 KB


In [11]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=seconds_bid.index, y=seconds_bid.values, mode='lines', name='Bid Price', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=seconds_ask.index, y=seconds_ask.values, mode='lines', name='Ask Price', line=dict(color='red')))
fig.update_layout(
    title='Bid and Ask Prices Over Time',
    xaxis_title='Timestamp',
    yaxis_title='Price',
    legend_title='Type',
    template='plotly_white'
)

fig.show()

In [12]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=hourly_minute_bid.index, y=hourly_minute_bid.values, mode='lines', name='Bid Price', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=hourly_minute_ask.index, y=hourly_minute_ask.values, mode='lines', name='Ask Price', line=dict(color='red')))
fig.update_layout(
    title='Bid and Ask Prices Over Time',
    xaxis_title='Timestamp',
    yaxis_title='Price',
    legend_title='Type',
    template='plotly_white'
)

fig.show()

In [13]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=dolarbo.index, y=dolarbo['compra'], mode='lines', name='Buy', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=dolarbo.index, y=dolarbo['venta'], mode='lines', name='Sell', line=dict(color='red')))
fig.add_trace(go.Scatter(x=hourly_minute_bid.index, y=hourly_minute_bid.values, mode='lines', name='Bid Price', line=dict(color='pink')))
fig.add_trace(go.Scatter(x=hourly_minute_ask.index, y=hourly_minute_ask.values, mode='lines', name='Ask Price', line=dict(color='orange')))
fig.update_layout(
    title='BOB/USDT',
    xaxis_title='Timestamp',
    yaxis_title='Price',
    legend_title='Type',
    template='plotly_white'
)

fig.show()

In [14]:
# Resample the DataFrame
resampled_df = kaggle_df['price'].resample('M').mean()
resampled_df

timestamp
2024-07-31     9.783415
2024-08-31    10.668275
2024-09-30    10.453263
Freq: M, Name: price, dtype: float64

## EPU Index

In [100]:
epu_day = pd.read_csv('./DATA/EPU_index_bolivia.csv', index_col=0)
epu_day.index = pd.to_datetime(epu_day.index)
epu_day.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1695 entries, 2019-04-03 to 2025-04-11
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   EPU_index       1695 non-null   float64
 1   EPU_index_norm  1695 non-null   float64
dtypes: float64(2)
memory usage: 39.7 KB


## DSGE Data

In [101]:
epu_month = epu_day.resample('M').mean()
epu_month

Unnamed: 0_level_0,EPU_index,EPU_index_norm
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-04-30,0.000000,0.000000
2019-05-31,0.000889,21.973180
2019-06-30,0.003551,87.769447
2019-07-31,0.002829,69.908573
2019-08-31,0.000000,0.000000
...,...,...
2024-12-31,0.005594,138.246011
2025-01-31,0.009096,224.817445
2025-02-28,0.005862,144.874381
2025-03-31,0.012485,308.569580


In [77]:
usdtbol_month = usdtbol.resample('M').mean()
dolarbo_month = dolarbo.resample('M').mean() 
#usdtbol_month = usdtbol.resample('M').max()
#dolarbo_month = dolarbo.resample('M').max() 

monthly_usdt = pd.merge(usdtbol_month, dolarbo_month, how='outer', left_index=True, right_index=True)
monthly_usdt.columns = ['usdtbol', 'dolarbo_compra', 'dolarbo_venta']
monthly_usdt = monthly_usdt.interpolate(method='linear', limit_direction='forward')

monthly_usdt

Unnamed: 0_level_0,usdtbol,dolarbo_compra,dolarbo_venta
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-09-30,,7.4,7.55
2023-10-31,,7.405,7.555
2023-11-30,,7.41,7.56
2023-12-31,,7.55,7.65
2024-01-31,,7.835,8.0275
2024-02-29,,8.12,8.405
2024-03-31,,8.146667,8.246667
2024-04-30,,8.321667,8.471667
2024-05-31,,8.496667,8.696667
2024-06-30,,9.037778,9.11


In [102]:
igae_ipc = pd.read_excel('./DATA/igae_ipc.xlsx', index_col=0)
igae_ipc.index = igae_ipc.index.to_period('M').to_timestamp('M')

# Perform seasonal decomposition for 'igae'
igae_stl = STL(igae_ipc['igae'], seasonal=13)
igae_decomposition = igae_stl.fit()
igae_seasonally_adjusted = igae_ipc['igae'] - igae_decomposition.seasonal

# Perform seasonal decomposition for 'ipc'
ipc_stl = STL(igae_ipc['ipc'], seasonal=13)
ipc_decomposition = ipc_stl.fit()
ipc_seasonally_adjusted = igae_ipc['ipc'] - ipc_decomposition.seasonal

# Add the seasonally adjusted series to the DataFrame
igae_ipc['igae_sa'] = igae_seasonally_adjusted
igae_ipc['ipc_sa'] = ipc_seasonally_adjusted

# Filling Exchange rate NaN values
igae_ipc.loc['2023-09-30':'2024-12-31', 'exchange'] = monthly_usdt.loc['2023-09-30':'2024-12-31', 'dolarbo_venta']
igae_ipc['exchange'] = igae_ipc['exchange'].interpolate(method='linear', limit_direction='forward')

# Add EPU
igae_ipc['epu'] = np.nan
igae_ipc.loc['2019-04-30':'2024-12-31', 'epu'] = epu_month.loc['2019-04-30':'2024-12-31', 'EPU_index_norm']

igae_ipc.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 420 entries, 1990-01-31 to 2024-12-31
Freq: M
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   igae      420 non-null    float64
 1   ipc       420 non-null    float64
 2   exchange  420 non-null    float64
 3   igae_sa   420 non-null    float64
 4   ipc_sa    420 non-null    float64
 5   epu       63 non-null     float64
dtypes: float64(6)
memory usage: 39.1 KB


In [103]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=igae_ipc.index, y=igae_ipc['igae'], mode='lines', name='Observed', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=igae_ipc.index, y=igae_ipc['igae_sa'], mode='lines', name='SA Adjusted', line=dict(color='red')))
fig.update_layout(
    title='IGAE',
    xaxis_title='',
    yaxis_title='Index Units',
    legend_title='',
    template='plotly_white'
)
fig.show()

In [104]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=igae_ipc.index, y=igae_ipc['ipc'], mode='lines', name='Observed', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=igae_ipc.index, y=igae_ipc['ipc_sa'], mode='lines', name='SA Adjusted', line=dict(color='red')))
fig.update_layout(
    title='IPC',
    xaxis_title='',
    yaxis_title='Index Units',
    legend_title='',
    template='plotly_white'
)
fig.show()

In [105]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=igae_ipc.index, y=igae_ipc['exchange'], mode='lines', name='', line=dict(color='blue')))
fig.update_layout(
    title='Exchange Rate',
    xaxis_title='',
    yaxis_title='BOB/USD',
    legend_title='',
    template='plotly_white'
)
fig.show()

In [106]:
# Computing Inflation rate
igae_ipc['inf'] = (igae_ipc['ipc_sa'].pct_change(1)) * 100
# Computing y-o-y GDP growth rate
igae_ipc['g12_y'] = (igae_ipc['igae_sa'].pct_change(12)) * 100
igae_ipc['igae_trend'] = igae_decomposition.trend
igae_ipc['g_gap'] = ((igae_ipc['igae'] / igae_ipc['igae_trend']) - 1) * 100
igae_ipc['g_gap_sa'] = ((igae_ipc['igae_sa'] / igae_ipc['igae_trend']) - 1) * 100
# Percentage deviation from official Exchange rate (Bs6.96)
igae_ipc['e_dev'] = (np.log(igae_ipc['exchange']) - np.log(6.96)) * 100

In [107]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=igae_ipc.index, y=igae_ipc['igae'], mode='lines', name='Observed', line=dict(color='gray')))
fig.add_trace(go.Scatter(x=igae_ipc.index, y=igae_ipc['igae_sa'], mode='lines', name='SA Adjusted', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=igae_ipc.index, y=igae_ipc['igae_trend'], mode='lines', name='Trend', line=dict(color='red')))
fig.update_layout(
    #title='IPC',
    xaxis_title='',
    yaxis_title='Percentage Points',
    legend_title='',
    template='plotly_white'
)
fig.show()

In [108]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=igae_ipc.index, y=igae_ipc['inf'], mode='lines', name='SA Inflation', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=igae_ipc.index, y=igae_ipc['e_dev'], mode='lines', name='Exchange Dev.', line=dict(color='pink')))
fig.add_trace(go.Scatter(x=igae_ipc.index, y=igae_ipc['g_gap'], mode='lines', name='Output Gap', line=dict(color='red')))
fig.add_trace(go.Scatter(x=igae_ipc.index, y=igae_ipc['g_gap_sa'], mode='lines', name='SA Output Gap', line=dict(color='green')))
fig.update_layout(
    #title='IPC',
    xaxis_title='',
    yaxis_title='Percentage Points',
    legend_title='',
    template='plotly_white'
)
fig.show()

In [112]:
igae_ipc.columns

Index(['igae', 'ipc', 'exchange', 'igae_sa', 'ipc_sa', 'epu', 'inf', 'g12_y',
       'igae_trend', 'g_gap', 'g_gap_sa', 'e_dev'],
      dtype='object')

In [119]:
bolivia_data = pd.DataFrame(
    {'pi_obs': igae_ipc.loc['2023-06-01':'2024-12-31', 'inf'],
     'y_obs': igae_ipc.loc['2023-06-01':'2024-12-31', 'g_gap_sa'],
     'e_obs': igae_ipc.loc['2023-06-01':'2024-12-31', 'e_dev'],
     'epu': igae_ipc.loc['2023-06-01':'2024-12-31', 'epu']
    }, index=igae_ipc.loc['2023-06-01':'2024-12-31'].index
)
bolivia_data['s_obs'] = bolivia_data['epu'] - bolivia_data['epu'].mean()
bolivia_data = bolivia_data.drop(columns=['epu'])

bolivia_data

Unnamed: 0,pi_obs,y_obs,e_obs,s_obs
2023-06-30,0.177483,-0.080972,0.0,-47.009831
2023-07-31,0.337401,-0.520343,2.786485,-70.159793
2023-08-31,-0.046644,0.114465,5.497425,-42.958091
2023-09-30,0.135911,-0.226984,8.136809,-96.61002
2023-10-31,-0.36165,1.195786,8.203012,-75.069437
2023-11-30,-0.265699,0.433262,8.269172,-24.667377
2023-12-31,0.507429,-0.665096,9.452617,6.836899
2024-01-31,0.754448,-0.855065,14.269367,96.608
2024-02-29,0.530736,-0.982797,18.864729,97.246344
2024-03-31,0.619282,-3.252619,16.96296,-34.954014


In [120]:
bolivia_data.describe()

Unnamed: 0,pi_obs,y_obs,e_obs,s_obs
count,19.0,19.0,19.0,19.0
mean,0.523221,0.071964,22.815895,9.723216e-15
std,0.461309,1.697885,17.19667,58.21333
min,-0.36165,-3.252619,0.0,-96.61002
25%,0.257442,-0.76008,8.236092,-44.98396
50%,0.530736,-0.226984,18.864729,7.535131
75%,0.823507,0.958832,41.010862,32.16109
max,1.241081,3.570509,49.331533,97.24634


In [123]:
from statsmodels.tsa.stattools import adfuller

# Perform ADF test for each column in bolivia_data
for column in bolivia_data.columns:
    result = adfuller(bolivia_data[column].dropna(), maxlag=1)
    print(f"ADF Statistic for {column}: {result[0]}")
    print(f"p-value for {column}: {result[1]}")
    print("Stationary" if result[1] < 0.05 else "Non-stationary")
    print("-" * 50)

ADF Statistic for pi_obs: -1.4334698292173318
p-value for pi_obs: 0.5661292924706354
Non-stationary
--------------------------------------------------
ADF Statistic for y_obs: -2.885161098565763
p-value for y_obs: 0.04710097084974228
Stationary
--------------------------------------------------
ADF Statistic for e_obs: -0.19363341276611243
p-value for e_obs: 0.9392762550316617
Non-stationary
--------------------------------------------------
ADF Statistic for s_obs: -2.2405323442425273
p-value for s_obs: 0.19186468189007927
Non-stationary
--------------------------------------------------


In [126]:
bolivia_data.to_excel('./DSGE/bolivia_data.xlsx')