In [33]:
#%pip install kagglehub

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import plotly.express as px
import plotly.graph_objects as go
import kagglehub
from kagglehub import KaggleDatasetAdapter
from statsmodels.tsa.seasonal import STL


  from .autonotebook import tqdm as notebook_tqdm


# <font color="orange">**Cryptocurrency Market Sentiment and Inflation Dynamics: Evidence from Bolivia** - *Data Preprocessing*</font>

**Author:** Osmar Bolivar

## 1. Data for Daily inflation Forecast

In [None]:
daily_feats = pd.read_excel('./IPC_forecast/daily_features.xlsx', index_col=0)
daily_feats = daily_feats.ffill().bfill()
daily_feats.info()

In [None]:
weekly_feats = daily_feats.copy().resample('W').mean()
weekly_feats.info()

In [None]:
monthly_feats = daily_feats.copy().resample('M').mean()
monthly_feats.info()

In [None]:
cpi_raw = pd.read_excel('./IPC_forecast/ipc.xlsx', index_col=0)
cpi_raw.index = cpi_raw.index + pd.offsets.MonthEnd(0)

cpi_raw["lag_1"] = cpi_raw['ipc_all'].shift(1)
cpi_raw["lag_2"] = cpi_raw['ipc_all'].shift(2)
cpi_raw["lag_3"] = cpi_raw['ipc_all'].shift(3)
cpi_raw["lag_6"] = cpi_raw['ipc_all'].shift(6)
cpi_raw["lag_9"] = cpi_raw['ipc_all'].shift(9)
cpi_raw["lag_12"] = cpi_raw['ipc_all'].shift(12)

cpi_raw.dropna(axis=0, inplace=True)
cpi_raw.drop(['ipc'], axis=1, inplace=True)

apr2025_lags = [cpi_raw['ipc_all'][-1], cpi_raw['ipc_all'][-2], cpi_raw['ipc_all'][-3], 
                cpi_raw['ipc_all'][-6], cpi_raw['ipc_all'][-9], cpi_raw['ipc_all'][-12]]
cpi_raw.loc['2025-04-30', 'lag_1':'lag_12'] = apr2025_lags

#cpi_raw.tail(14)

In [None]:
#monthly_lags = cpi_raw.iloc[:,1:].copy()

daily_feats_adj = pd.merge(cpi_raw, daily_feats, left_index=True, right_index=True, how='right')
daily_feats_adj = daily_feats_adj.bfill()
daily_feats_adj.loc['2025-04-01':, 'lag_1':'lag_12'] = apr2025_lags

daily_feats_adj.info()

In [None]:
monthly_target_feats = pd.merge(cpi_raw, monthly_feats, left_index=True, right_index=True, how='right')
monthly_target_feats.info()

In [None]:
#daily_feats_adj.to_excel('./IPC_forecast/daily_feats_adj.xlsx')
#monthly_target_feats.to_excel('./IPC_forecast/monthly_target_feats.xlsx')

#daily_feats_adj.to_csv('./IPC_forecast/daily_test_set.csv')
#monthly_target_feats.to_csv('./IPC_forecast/monthly_train_val_sets.csv')

## 2. Data

### 2.1. BOB/USDT

In [4]:
# Code below is aimed to allow manual adjustment for "usdtbol" prior to 2024-08-06 using "dolarbo_usdt" trajectory

#h_dolarbo = pd.read_excel('./DATA/DATA_USDT.xlsx', sheet_name='dolarbo.com', index_col=0)
#d_dolarbo = h_dolarbo.resample('D').mean()
#h_usdtbol = pd.read_excel('./DATA/DATA_USDT.xlsx', sheet_name='usdtbol.com', index_col=0)
#d_usdtbol = h_usdtbol.resample('D').mean()
#h_to_d_usdt = d_dolarbo.merge(d_usdtbol, how='outer', left_index=True, right_index=True)
#h_to_d_usdt.to_excel('./DATA/h_to_d_usdt.xlsx')

In [5]:
usdt_day = pd.read_excel('./DATA/DAILY_USDT.xlsx', index_col=0)
# Interpolate USDT missing values
usdt_day['usdt'].interpolate(method='time', inplace=True)

usdt_day['depre_1'] = usdt_day['usdt'] - usdt_day['usdt'].shift(1)
usdt_day['depre_7'] = usdt_day['usdt'] - usdt_day['usdt'].shift(7)
usdt_day['depre_30'] = usdt_day['usdt'] - usdt_day['usdt'].shift(30)
usdt_day['depre_365'] = usdt_day['usdt'] - usdt_day['usdt'].shift(365)

usdt_day['usdt_g1'] = (usdt_day['usdt'].pct_change(1)) * 100
usdt_day['usdt_g7'] = (usdt_day['usdt'].pct_change(7)) * 100
usdt_day['usdt_g30'] = (usdt_day['usdt'].pct_change(30)) * 100
usdt_day['usdt_g365'] = (usdt_day['usdt'].pct_change(365)) * 100

usdt_day.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1200 entries, 2022-01-01 to 2025-04-14
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   usdt       1200 non-null   float64
 1   depre_1    1199 non-null   float64
 2   depre_7    1193 non-null   float64
 3   depre_30   1170 non-null   float64
 4   depre_365  835 non-null    float64
 5   usdt_g1    1199 non-null   float64
 6   usdt_g7    1193 non-null   float64
 7   usdt_g30   1170 non-null   float64
 8   usdt_g365  835 non-null    float64
dtypes: float64(9)
memory usage: 93.8 KB


In [6]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=usdt_day.index, y=usdt_day['usdt'], mode='lines', name='BOB/USDT', line=dict(color='blue')))
fig.update_layout(
    title='BOB/USDT',
    xaxis_title='Timestamp',
    yaxis_title='Price',
    legend_title='Type',
    template='plotly_white'
)

fig.show()

### 1.2. Uncertainty Index

In [7]:
epu_day = pd.read_excel('./DATA/DAILY_EPU.xlsx', index_col=0)
epu_day = epu_day[epu_day.index > '2020-05-03']
epu_day['epu'].fillna(0, inplace=True)
epu_day['epu_norm'].fillna(-1.41162186910918, inplace=True)
epu_day.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1808 entries, 2020-05-04 to 2025-04-15
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   epu       1808 non-null   float64
 1   epu_norm  1808 non-null   float64
dtypes: float64(2)
memory usage: 42.4 KB


In [8]:
epu_day['7mean_epu'] = epu_day['epu'].rolling(window=7).mean()
epu_day['15mean_epu'] = epu_day['epu'].rolling(window=15).mean()
epu_day['30mean_epu'] = epu_day['epu'].rolling(window=30).mean()

epu_day['7median_epu'] = epu_day['epu'].rolling(window=7).median()
epu_day['15median_epu'] = epu_day['epu'].rolling(window=15).median()
epu_day['30median_epu'] = epu_day['epu'].rolling(window=30).median()

epu_day['7max_epu'] = epu_day['epu'].rolling(window=7).max()
epu_day['15max_epu'] = epu_day['epu'].rolling(window=15).max()
epu_day['30max_epu'] = epu_day['epu'].rolling(window=30).max()

In [9]:
epu_stl_7 = STL(epu_day['epu'], period=365, seasonal=7)
epu_stl_fit_7 = epu_stl_7.fit()
epu_day['epu_trend_7'] = epu_stl_fit_7.trend


epu_stl_15 = STL(epu_day['epu'], period=365, seasonal=15)
epu_stl_fit_15 = epu_stl_15.fit()
epu_day['epu_trend_15'] = epu_stl_fit_15.trend

epu_stl_31 = STL(epu_day['epu'], period=365, seasonal=31)
epu_stl_fit_31 = epu_stl_31.fit()
epu_day['epu_trend_31'] = epu_stl_fit_31.trend

In [10]:
epu_day['epu_trend_7_g1'] = (epu_day['epu_trend_7'].pct_change(1)) * 100
epu_day['epu_trend_7_g7'] = (epu_day['epu_trend_7'].pct_change(7)) * 100
epu_day['epu_trend_7_g30'] = (epu_day['epu_trend_7'].pct_change(30)) * 100
epu_day['epu_trend_7_g365'] = (epu_day['epu_trend_7'].pct_change(365)) * 100

In [11]:
epu_day.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1808 entries, 2020-05-04 to 2025-04-15
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   epu               1808 non-null   float64
 1   epu_norm          1808 non-null   float64
 2   7mean_epu         1802 non-null   float64
 3   15mean_epu        1794 non-null   float64
 4   30mean_epu        1779 non-null   float64
 5   7median_epu       1802 non-null   float64
 6   15median_epu      1794 non-null   float64
 7   30median_epu      1779 non-null   float64
 8   7max_epu          1802 non-null   float64
 9   15max_epu         1794 non-null   float64
 10  30max_epu         1779 non-null   float64
 11  epu_trend_7       1808 non-null   float64
 12  epu_trend_15      1808 non-null   float64
 13  epu_trend_31      1808 non-null   float64
 14  epu_trend_7_g1    1807 non-null   float64
 15  epu_trend_7_g7    1801 non-null   float64
 16  epu_trend_7_g30   1778 n

In [12]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=epu_day.index, y=epu_day['epu'], mode='lines', name='EPU', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=epu_day.index, y=epu_day['7mean_epu'], mode='lines', name='EPU 7-day mean.', line=dict(color='pink')))
fig.add_trace(go.Scatter(x=epu_day.index, y=epu_day['7median_epu'], mode='lines', name='EPU 7-day median.'))
fig.add_trace(go.Scatter(x=epu_day.index, y=epu_day['7max_epu'], mode='lines', name='EPU 7-day max.'))
fig.add_trace(go.Scatter(x=epu_day.index, y=epu_day['epu_trend_7'], mode='lines', name='EPU Trend', line=dict(color='black')))
fig.add_trace(go.Scatter(x=usdt_day.index, y=usdt_day['usdt'], mode='lines', name='USDT Sell', line=dict(color='red'), yaxis='y2'))
fig.update_layout(
    yaxis2=dict(
        title="Bs/USDT and Uncertainty Index",
        overlaying='y',
        side='right'),
    title='BOB/USDT',
    xaxis_title='Timestamp',
    yaxis_title='Price',
    legend_title='Type',
    template='plotly_white'
)

fig.show()

### 1.3. Inflation

In [13]:
inf_day = pd.read_csv('./IPC_forecast/daily_forecast.csv', index_col=0)
inf_day.index = pd.to_datetime(inf_day.index)
inf_day.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5580 entries, 2010-01-01 to 2025-04-11
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   cpi         183 non-null    float64
 1   ridge       5580 non-null   float64
 2   lasso       5580 non-null   float64
 3   ada         5580 non-null   float64
 4   gbr         5580 non-null   float64
 5   rf          5580 non-null   float64
 6   et          5580 non-null   float64
 7   set         5580 non-null   object 
 8   min         5580 non-null   float64
 9   max         5580 non-null   float64
 10  w_avg       5580 non-null   float64
 11  w_avg_best  5580 non-null   float64
 12  forecast    5580 non-null   float64
dtypes: float64(12), object(1)
memory usage: 610.3+ KB


In [14]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=epu_day.index, y=epu_day['epu'], mode='lines', name='EPU', line=dict(color='#335b9c')))
fig.add_trace(go.Scatter(x=epu_day.index, y=epu_day['7mean_epu'], mode='lines', name='EPU 7-day mean.', line=dict(color='#4472c4')))
fig.add_trace(go.Scatter(x=epu_day.index, y=epu_day['7median_epu'], mode='lines', name='EPU 7-day median.', line=dict(color='#698ecf')))
fig.add_trace(go.Scatter(x=epu_day.index, y=epu_day['7max_epu'], mode='lines', name='EPU 7-day max.', line=dict(color='#8eaadb')))
fig.add_trace(go.Scatter(x=epu_day.index, y=epu_day['epu_trend_7'], mode='lines', name='EPU Trend', line=dict(color='#7e82f7')))
fig.add_trace(go.Scatter(x=usdt_day.index, y=usdt_day['usdt'], mode='lines', name='USDT Sell', line=dict(color='#ff106a'), yaxis='y2'))
fig.add_trace(go.Scatter(x=inf_day.index, y=inf_day['forecast'], mode='lines', name='y-o-y Inflation', line=dict(color='#55ff00'), yaxis='y2'))
fig.update_layout(
    yaxis2=dict(
        #title="Bs/USDT and Uncertainty Index",
        overlaying='y',
        side='right'),
    title='BOB/USDT',
    xaxis_title='Timestamp',
    yaxis_title='Price',
    legend_title='Type',
    template='plotly_white'
)

fig.show()

### 1.4. Interest Rate

In [25]:
interbank_day = pd.read_excel('./DATA/Tasa_Interbancaria.xlsx', index_col=0)
interbank_day['interbank_365'] = interbank_day['interbank'] - interbank_day['interbank'].shift(365)
interbank_day.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1201 entries, 2022-01-01 to 2025-04-15
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   interbank      1201 non-null   float64
 1   interbank_365  836 non-null    float64
dtypes: float64(2)
memory usage: 28.1 KB


### 1.5. Daily Dataframe

In [26]:
daily_df = usdt_day.copy().loc['2023-01-01':'2025-04-11']
daily_df = daily_df.merge(inf_day[['forecast']], how='left', left_index=True, right_index=True)
daily_df = daily_df.merge(epu_day, how='left', left_index=True, right_index=True)
daily_df['sentiment'] = (daily_df['30mean_epu'] - daily_df['30mean_epu'].mean()) / daily_df['30mean_epu'].std()
daily_df = daily_df.merge(interbank_day, how='left', left_index=True, right_index=True)

daily_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 832 entries, 2023-01-01 to 2025-04-11
Data columns (total 31 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   usdt              832 non-null    float64
 1   depre_1           832 non-null    float64
 2   depre_7           832 non-null    float64
 3   depre_30          832 non-null    float64
 4   depre_365         832 non-null    float64
 5   usdt_g1           832 non-null    float64
 6   usdt_g7           832 non-null    float64
 7   usdt_g30          832 non-null    float64
 8   usdt_g365         832 non-null    float64
 9   forecast          832 non-null    float64
 10  epu               832 non-null    float64
 11  epu_norm          832 non-null    float64
 12  7mean_epu         832 non-null    float64
 13  15mean_epu        832 non-null    float64
 14  30mean_epu        832 non-null    float64
 15  7median_epu       832 non-null    float64
 16  15median_epu      832 non

In [27]:
daily_correlation_matrix = daily_df.corr()
daily_correlation_matrix

Unnamed: 0,usdt,depre_1,depre_7,depre_30,depre_365,usdt_g1,usdt_g7,usdt_g30,usdt_g365,forecast,...,epu_trend_7,epu_trend_15,epu_trend_31,epu_trend_7_g1,epu_trend_7_g7,epu_trend_7_g30,epu_trend_7_g365,sentiment,interbank,interbank_365
usdt,1.0,0.094693,0.215762,0.375627,0.992413,0.087204,0.209384,0.353359,0.986227,0.869449,...,0.916084,0.902131,0.898481,-0.238917,-0.228333,-0.191214,0.636125,0.659808,-0.056544,-0.243362
depre_1,0.094693,1.0,0.430383,0.235108,0.093512,0.989689,0.436151,0.242797,0.090419,0.051287,...,0.061645,0.061494,0.061478,-0.022032,-0.021624,-0.01784,0.036044,0.038555,0.027189,0.022235
depre_7,0.215762,0.430383,1.0,0.518132,0.217472,0.421156,0.988518,0.535226,0.212872,0.085151,...,0.125868,0.127158,0.127686,-0.041636,-0.041114,-0.03418,0.083259,0.090988,-0.002096,-0.029196
depre_30,0.375627,0.235108,0.518132,1.0,0.37536,0.241065,0.543811,0.988857,0.368204,0.145227,...,0.249305,0.259001,0.262509,-0.059101,-0.058941,-0.053461,0.211046,0.272192,-0.091698,0.018988
depre_365,0.992413,0.093512,0.217472,0.37536,1.0,0.086592,0.212582,0.356097,0.998573,0.841478,...,0.886409,0.872659,0.869405,-0.254151,-0.245939,-0.218583,0.614869,0.642756,-0.059153,-0.23566
usdt_g1,0.087204,0.989689,0.421156,0.241065,0.086592,1.0,0.435289,0.253183,0.084399,0.035159,...,0.056062,0.057582,0.05813,-0.019463,-0.019213,-0.015303,0.041113,0.039037,0.028066,0.035663
usdt_g7,0.209384,0.436151,0.988518,0.543811,0.212582,0.435289,1.0,0.569771,0.209825,0.055555,...,0.119721,0.124638,0.126414,-0.037724,-0.037626,-0.030682,0.098275,0.100307,0.004285,-0.004725
usdt_g30,0.353359,0.242797,0.535226,0.988857,0.356097,0.253183,0.569771,1.0,0.352021,0.091033,...,0.230918,0.246564,0.252095,-0.044289,-0.045123,-0.042054,0.229897,0.291591,-0.071437,0.0588
usdt_g365,0.986227,0.090419,0.212872,0.368204,0.998573,0.084399,0.209825,0.352021,1.0,0.820743,...,0.88139,0.869954,0.867482,-0.244545,-0.236579,-0.210337,0.629073,0.640519,-0.050117,-0.220747
forecast,0.869449,0.051287,0.085151,0.145227,0.841478,0.035159,0.055555,0.091033,0.820743,1.0,...,0.802036,0.763698,0.752018,-0.361038,-0.351137,-0.316612,0.366961,0.505927,-0.082831,-0.367243


In [28]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=daily_df.index, y=daily_df['sentiment'], mode='lines', name='EPU Index', line=dict(color='#4472c4'), yaxis='y2'))
fig.add_trace(go.Scatter(x=daily_df.index, y=daily_df['depre_365'], mode='lines', name='y-o-y BOB/USDT Depreciation', line=dict(color='#ff106a')))
fig.add_trace(go.Scatter(x=daily_df.index, y=daily_df['forecast'], mode='lines', name='y-o-y Inflation', line=dict(color='#55ff00')))
fig.add_trace(go.Scatter(x=daily_df.index, y=daily_df['interbank_365'], mode='lines', name='y-o-y Change Interest Rate', line=dict(color='gray'), yaxis='y2'))
fig.update_layout(
    yaxis2=dict(
        title="EPU Index (std. deviations)",
        overlaying='y',
        side='right'),
    xaxis_title='Days',
    yaxis_title='% (BOB/USDT Depreciation and Inflation)',
    #legend_title='Type',
    template='plotly_white',
    legend=dict(
        orientation="h",
        yanchor="top",
        y=-0.2,
        xanchor="center",
        x=0.5
        ),
    width=1000,
    height=600
)

fig.show()

In [19]:
#daily_df.to_excel('./daily_df.xlsx')
#daily_correlation_matrix.to_excel('./daily_correlation_matrix.xlsx')

## 2. BSVAR Data

In [124]:
#data_bsvar = pd.read_excel('./daily_df.xlsx', index_col=0)
endog_vars = ['forecast', 'depre_365', 'sentiment']
data_bsvar = daily_df[endog_vars].copy()
data_bsvar.columns = ['inf', 'e', 's']
data_bsvar.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 832 entries, 2023-01-01 to 2025-04-11
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   inf     832 non-null    float64
 1   e       832 non-null    float64
 2   s       832 non-null    float64
dtypes: float64(3)
memory usage: 26.0 KB


In [125]:
data_bsvar.corr()

Unnamed: 0,inf,e,s
inf,1.0,0.841478,0.505927
e,0.841478,1.0,0.642756
s,0.505927,0.642756,1.0


In [126]:
from statsmodels.tsa.stattools import adfuller

# Perform ADF test for each column in bolivia_data
for column in data_bsvar.columns:
    result = adfuller(data_bsvar[column].dropna(), maxlag=7)
    print(f"ADF Statistic for {column}: {result[0]}")
    print(f"p-value for {column}: {result[1]}")
    print("Stationary" if result[1] < 0.05 else "Non-stationary")
    print("-" * 50)

ADF Statistic for inf: 1.7609695223181854
p-value for inf: 0.9982683821504925
Non-stationary
--------------------------------------------------
ADF Statistic for e: 0.022957935740926888
p-value for e: 0.9603900089785768
Non-stationary
--------------------------------------------------
ADF Statistic for s: -2.960320270018778
p-value for s: 0.03876983908204059
Stationary
--------------------------------------------------


In [127]:
data_bsvar.to_csv('./data_bsvar.csv')

## DSGE Data

In [156]:
igae_ipc = pd.read_excel('./DATA/igae_ipc.xlsx', index_col=0)
igae_ipc.index = igae_ipc.index.to_period('M').to_timestamp('M')

# Perform seasonal decomposition for 'igae'
igae_stl = STL(igae_ipc['igae'], seasonal=13)
igae_decomposition = igae_stl.fit()
igae_seasonally_adjusted = igae_ipc['igae'] - igae_decomposition.seasonal

# Perform seasonal decomposition for 'ipc'
ipc_stl = STL(igae_ipc['ipc'], seasonal=13)
ipc_decomposition = ipc_stl.fit()
ipc_seasonally_adjusted = igae_ipc['ipc'] - ipc_decomposition.seasonal

# Add the seasonally adjusted series to the DataFrame
igae_ipc['igae_sa'] = igae_seasonally_adjusted
igae_ipc['ipc_sa'] = ipc_seasonally_adjusted

# Computing Inflation rate
igae_ipc['inf'] = (igae_ipc['ipc_sa'].pct_change(1)) * 100
igae_ipc['inf_12'] = (igae_ipc['ipc'].pct_change(12)) * 100
# Computing y-o-y GDP growth rate
igae_ipc['g12_y'] = (igae_ipc['igae_sa'].pct_change(12)) * 100
igae_ipc['igae_trend'] = igae_decomposition.trend
igae_ipc['g_gap'] = ((igae_ipc['igae'] / igae_ipc['igae_trend']) - 1) * 100
igae_ipc['g_gap_sa'] = ((igae_ipc['igae_sa'] / igae_ipc['igae_trend']) - 1) * 100

igae_ipc.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 420 entries, 1990-01-31 to 2024-12-31
Freq: M
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   igae        420 non-null    float64
 1   ipc         420 non-null    float64
 2   exchange    402 non-null    float64
 3   igae_sa     420 non-null    float64
 4   ipc_sa      420 non-null    float64
 5   inf         419 non-null    float64
 6   inf_12      408 non-null    float64
 7   g12_y       408 non-null    float64
 8   igae_trend  420 non-null    float64
 9   g_gap       420 non-null    float64
 10  g_gap_sa    420 non-null    float64
dtypes: float64(11)
memory usage: 39.4 KB


In [None]:
#month_raw.to_excel('./DSGE/bolivia_data.xlsx')