In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import plotly.express as px
import plotly.graph_objects as go

## **1. Google Trends**

In [27]:
gt_m_2011_2024 = pd.read_excel('./Data/Google trends/ALL_queries.xlsx', sheet_name='monthly', index_col=0)
gt_w_2019_2023 = pd.read_excel('./Data/Google trends/ALL_queries.xlsx', sheet_name='W 2019-2023', index_col=0)
gt_w_2019_2024 = pd.read_excel('./Data/Google trends/ALL_queries.xlsx', sheet_name='W 2019-2024feb', index_col=0)
gt_w_2023_2024 = pd.read_excel('./Data/Google trends/ALL_queries.xlsx', sheet_name='W 2023-2024', index_col=0)

gt_m_2011_2024 = pd.pivot_table(gt_m_2011_2024, index=gt_m_2011_2024.index, columns='query', values='value')
gt_m_2011_2024.index = gt_m_2011_2024.index + pd.offsets.MonthEnd(0)

gt_w_2019_2023 = pd.pivot_table(gt_w_2019_2023, index=gt_w_2019_2023.index, columns='query', values='value')
gt_w_2019_2024 = pd.pivot_table(gt_w_2019_2024, index=gt_w_2019_2024.index, columns='query', values='value')
gt_w_2023_2024 = pd.pivot_table(gt_w_2023_2024, index=gt_w_2023_2024.index, columns='query', values='value')

In [28]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=gt_m_2011_2024.index, y=gt_m_2011_2024['Inflación'], mode='lines', name='Monthly 2011-2024'))
fig.add_trace(go.Scatter(x=gt_w_2019_2023.index, y=gt_w_2019_2023['Inflación'], mode='lines', name='Weekly 2019-2023'))
fig.add_trace(go.Scatter(x=gt_w_2019_2024.index, y=gt_w_2019_2024['Inflación'], mode='lines', name='Weekly 2019-2024'))
fig.add_trace(go.Scatter(x=gt_w_2023_2024.index, y=gt_w_2023_2024['Inflación'], mode='lines', name='Weekly 2023-2024'))

fig.update_layout(
    title='Inflation Trends',
    xaxis_title='Date',
    yaxis_title='Inflation',
    legend_title='Data Source'
)
fig.show()

Weekly-2019-2023 series are not the same scale as weekly-2023-2024 series. "aux" dataframe provides a rescale-factor to adjust weekly-2023-2024 series. 

In [29]:
aux = ((gt_w_2019_2023.loc['2023-01-01':'2023-12-31', ]/gt_w_2023_2024.loc['2023-01-01':'2023-12-31', ])-1) + 1
aux = aux.mean(axis=0, skipna=True)
aux

query
Coste - Tema                                   NaN
Cuenta - Tema                             0.450000
Deflación - Tema                               NaN
Demanda - Economía                        0.953191
Dinero - Tema                                  NaN
Economía - Ciencia económica                   NaN
Gasto - Tema                                   NaN
Inflación                                 1.144476
Inflación - Tema                          0.550000
Interés - Tema                                 NaN
Macroeconomía - Campo de estudio               NaN
Mercado - Tema                                 NaN
Política - Tema                                NaN
Precio - Tema                                  NaN
Producto interno bruto - Tema             0.780000
Tasa - Matemáticas                        0.600000
canasta familiar                               inf
causas de la inflación                         NaN
como se calcula el ipc                         NaN
cpi                      

Using "aux", I adjust weekly-2023-2024 series as follows:

In [30]:
gt_w_2023_2024_adj = gt_w_2023_2024.copy()
for query in gt_w_2023_2024.columns:
    gt_w_2023_2024_adj[query] = gt_w_2023_2024[query] * aux[query]

In [31]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=gt_m_2011_2024.index, y=gt_m_2011_2024['Inflación'], mode='lines', name='Monthly 2011-2024'))
fig.add_trace(go.Scatter(x=gt_w_2019_2023.index, y=gt_w_2019_2023['Inflación'], mode='lines', name='Weekly 2019-2023'))
fig.add_trace(go.Scatter(x=gt_w_2019_2024.index, y=gt_w_2019_2024['Inflación'], mode='lines', name='Weekly 2019-2024'))
fig.add_trace(go.Scatter(x=gt_w_2023_2024_adj.index, y=gt_w_2023_2024_adj['Inflación'], mode='lines', name='Weekly 2023-2024'))

fig.update_layout(
    title='Inflation Trends',
    xaxis_title='Date',
    yaxis_title='Inflation',
    legend_title='Data Source'
)
fig.show()

Some GT series are mainly NaN (or Inf) values. "gt_words" is used to filter GT series with stable information.

In [32]:
gt_words = aux.copy()
gt_words = gt_words[~gt_words.isin([np.nan, np.inf, -np.inf])]
gt_words

query
Cuenta - Tema                             0.450000
Demanda - Economía                        0.953191
Inflación                                 1.144476
Inflación - Tema                          0.550000
Producto interno bruto - Tema             0.780000
Tasa - Matemáticas                        0.600000
deflactor                                 0.580000
demanda                                   1.151617
desempleo                                 0.641902
dinero                                    1.000000
economia                                  0.712863
el ipc                                    0.480000
indice de precios del consumidor          1.000000
ine                                       1.932609
ine bolivia                               0.847663
inflacion                                 0.840548
inflacion en bolivia                      1.020991
inflación argentina                       0.470000
inflación bolivia                         1.000000
ipc                      

### **1.1. Weekly Series**

"GT_weekly" dataframe comprises GT time series. However, some adjustments were applied.

In [33]:
all_weekly = pd.concat([gt_w_2019_2023, gt_w_2023_2024_adj], axis=0).filter(gt_words.index, axis=1)
all_weekly = all_weekly[~all_weekly.index.duplicated(keep='first')]
all_weekly = all_weekly.sort_index()
all_weekly

query,Cuenta - Tema,Demanda - Economía,Inflación,Inflación - Tema,Producto interno bruto - Tema,Tasa - Matemáticas,deflactor,demanda,desempleo,dinero,...,que es el ipc,que es inflacion,que es inflación,que es la inflacion,que es la inflación,que es pib,qué es inflación,qué es la inflación,tipo de inflación,Índice de precios al consumidor - Tema
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-12-30,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,18.000000,0.000000,74.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
2019-01-06,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,35.000000,0.000000,77.0,...,63.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
2019-01-13,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,51.000000,45.000000,72.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
2019-01-20,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,43.000000,0.000000,73.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
2019-01-27,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,43.000000,0.000000,73.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-01,0.0,0.0,30.900847,0.0,0.0,0.0,0.0,42.609844,26.959889,76.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
2024-12-08,0.0,0.0,50.356935,0.0,0.0,0.0,0.0,40.306610,0.000000,81.0,...,0.0,0.0,0.0,68.575714,0.0,0.0,0.0,0.0,0.0,0.0
2024-12-15,0.0,0.0,25.178468,0.0,0.0,0.0,0.0,33.396905,0.000000,88.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
2024-12-22,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,26.487201,0.000000,89.0,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0


Yet, some 0 values are showing up. 

In [34]:
zero_counts = (all_weekly == 0).sum()
print(zero_counts)

query
Cuenta - Tema                             306
Demanda - Economía                        303
Inflación                                  63
Inflación - Tema                          310
Producto interno bruto - Tema             312
Tasa - Matemáticas                        312
deflactor                                 310
demanda                                     0
desempleo                                  54
dinero                                      0
economia                                    0
el ipc                                    307
indice de precios del consumidor          313
ine                                         0
ine bolivia                                 3
inflacion                                   9
inflacion en bolivia                      229
inflación argentina                       310
inflación bolivia                         240
ipc                                       211
ipc bolivia                               299
ipc que es                  

"gt_words_2" is used to filter GT series with lower 0-values count.

In [35]:
gt_words_2 = zero_counts.copy()
gt_words_2 = gt_words_2[gt_words_2 < 250]
gt_words_2

query
Inflación                63
demanda                   0
desempleo                54
dinero                    0
economia                  0
ine                       0
ine bolivia               3
inflacion                 9
inflacion en bolivia    229
inflación bolivia       240
ipc                     211
la inflacion            130
la inflación            171
pib                       0
pib bolivia              12
que es inflacion        232
que es pib              138
dtype: int64

<font color="cyan">"GT_weekly" dataframe comprises final GT weekly time-series. </font>

In [40]:
GT_weekly = all_weekly.copy().filter(gt_words_2.index, axis=1)
GT_weekly.replace(0, np.nan, inplace=True)
GT_weekly.interpolate(method='linear', inplace=True)
GT_weekly = GT_weekly.round(0)
GT_weekly

query,Inflación,demanda,desempleo,dinero,economia,ine,ine bolivia,inflacion,inflacion en bolivia,inflación bolivia,ipc,la inflacion,la inflación,pib,pib bolivia,que es inflacion,que es pib
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-12-30,,18.0,,74.0,30.0,5.0,,,,,,,,27.0,41.0,,
2019-01-06,,35.0,,77.0,49.0,22.0,28.0,21.0,,,,,,40.0,44.0,,
2019-01-13,,51.0,45.0,72.0,48.0,17.0,31.0,56.0,,,,,,29.0,47.0,,
2019-01-20,,43.0,41.0,73.0,49.0,13.0,28.0,48.0,,,,,,38.0,29.0,,
2019-01-27,,43.0,37.0,73.0,53.0,13.0,24.0,40.0,,,,,,21.0,34.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-01,31.0,43.0,27.0,76.0,25.0,21.0,16.0,29.0,43.0,59.0,71.0,41.0,56.0,37.0,23.0,56.0,44.0
2024-12-08,50.0,40.0,27.0,81.0,23.0,21.0,24.0,32.0,43.0,59.0,71.0,41.0,45.0,30.0,23.0,56.0,44.0
2024-12-15,25.0,33.0,27.0,88.0,21.0,17.0,17.0,18.0,43.0,59.0,71.0,41.0,45.0,34.0,22.0,56.0,44.0
2024-12-22,23.0,26.0,27.0,89.0,12.0,10.0,15.0,25.0,43.0,59.0,71.0,41.0,45.0,20.0,22.0,56.0,44.0


In [41]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=gt_m_2011_2024.index, y=gt_m_2011_2024['Inflación'], mode='lines', name='Monthly 2011-2024'))
fig.add_trace(go.Scatter(x=all_weekly.index, y=all_weekly['Inflación'], mode='lines', name='Weekly'))
fig.add_trace(go.Scatter(x=GT_weekly.index, y=GT_weekly['Inflación'], mode='lines', name='Weekly Adjusted'))

fig.update_layout(
    title='Inflation Trends',
    xaxis_title='Date',
    yaxis_title='Inflation',
    legend_title='Data Source'
)
fig.show()

### **1.2. Monthly Series**

In [51]:
gt_month_1 = gt_m_2011_2024.loc[gt_m_2011_2024.index < '2019-03-31'].copy().filter(gt_words_2.index, axis=1).resample('M').mean()
gt_month_1 = gt_month_1.replace(0, np.nan).interpolate(method='linear')
gt_month_1

query,Inflación,demanda,desempleo,dinero,economia,ine,ine bolivia,inflacion,inflacion en bolivia,inflación bolivia,ipc,la inflacion,la inflación,pib,pib bolivia,que es inflacion,que es pib
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2011-01-31,,22.0,42.0,41.0,60.0,43.0,54.0,58.0,59.0,,,44.0,,50.0,29.0,,
2011-02-28,42.0,28.0,53.0,38.0,82.0,50.0,63.0,63.0,44.0,,79.0,45.0,,59.0,51.0,,
2011-03-31,44.5,41.0,44.0,46.0,99.0,59.0,79.0,82.0,78.0,,54.0,57.0,,92.0,77.0,,54.0
2011-04-30,47.0,54.0,76.0,43.0,85.0,54.0,72.0,100.0,90.0,,93.0,100.0,,98.0,77.0,100.0,59.0
2011-05-31,29.0,55.0,62.0,47.0,83.0,57.0,75.0,75.0,68.0,100.0,88.0,73.0,,78.0,69.0,89.0,56.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-10-31,27.0,89.0,41.0,75.0,47.0,16.0,16.0,48.0,29.0,28.0,49.0,39.0,29.0,73.0,60.0,35.0,47.0
2018-11-30,37.0,46.0,54.0,74.0,40.0,15.0,15.0,43.0,27.0,29.0,34.0,35.0,33.0,58.0,41.0,43.0,33.0
2018-12-31,18.0,26.0,27.0,75.0,31.0,10.0,9.0,21.0,11.0,29.0,22.0,17.0,25.0,47.0,36.0,36.0,32.0
2019-01-31,14.0,26.0,22.0,85.0,29.0,10.0,10.0,24.0,11.0,29.0,25.0,15.0,17.0,32.0,29.0,29.0,15.0


In [53]:
gt_month_2 = GT_weekly.loc[GT_weekly.index >= '2019-03-31'].copy().resample('M').mean()
gt_month_2

query,Inflación,demanda,desempleo,dinero,economia,ine,ine bolivia,inflacion,inflacion en bolivia,inflación bolivia,ipc,la inflacion,la inflación,pib,pib bolivia,que es inflacion,que es pib
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2019-03-31,30.00,78.00,66.00,70.00,90.00,23.00,36.00,53.00,,83.0,90.00,52.00,62.00,62.00,54.00,68.00,67.00
2019-04-30,28.25,76.00,44.75,64.50,76.75,22.25,34.25,51.75,,80.5,82.25,54.50,61.50,63.00,53.25,69.00,61.25
2019-05-31,28.00,76.75,59.00,65.25,82.75,23.75,33.25,50.00,81.25,76.5,86.00,81.25,60.50,67.75,50.75,69.50,69.50
2019-06-30,26.60,68.80,56.60,67.00,68.20,21.80,36.00,47.00,93.00,73.0,92.00,72.00,59.40,66.00,60.20,70.20,60.20
2019-07-31,22.25,48.50,44.00,69.25,54.75,14.75,25.00,32.75,92.25,72.0,98.75,74.00,58.25,43.25,34.75,71.00,54.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-31,68.75,56.25,40.00,74.25,36.25,32.75,35.50,42.00,39.75,70.0,55.00,69.50,111.25,51.75,39.25,68.50,42.50
2024-09-30,32.20,61.20,40.80,71.40,31.80,33.20,31.20,28.80,53.40,49.2,58.60,59.60,59.20,49.60,39.00,64.20,38.20
2024-10-31,39.75,58.25,31.00,69.25,27.25,29.00,23.75,27.75,93.25,51.5,55.00,35.75,57.00,42.75,29.50,60.50,39.00
2024-11-30,44.25,81.00,33.25,73.00,26.75,24.50,23.50,36.75,58.25,59.0,70.00,43.00,50.25,42.25,35.75,56.75,44.00


"GT_monthly" combines original monthly GT trends from 2011 to 2019(Feb) and the monthly averages computed on (final) weekly GT trends.  

In [55]:
GT_monthly = pd.concat([gt_month_1, gt_month_2], axis=0)
GT_monthly = GT_monthly.interpolate(method='linear').bfill().astype(int)
GT_monthly

query,Inflación,demanda,desempleo,dinero,economia,ine,ine bolivia,inflacion,inflacion en bolivia,inflación bolivia,ipc,la inflacion,la inflación,pib,pib bolivia,que es inflacion,que es pib
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2011-01-31,42,22,42,41,60,43,54,58,59,100,79,44,33,50,29,100,54
2011-02-28,42,28,53,38,82,50,63,63,44,100,79,45,33,59,51,100,54
2011-03-31,44,41,44,46,99,59,79,82,78,100,54,57,33,92,77,100,54
2011-04-30,47,54,76,43,85,54,72,100,90,100,93,100,33,98,77,100,59
2011-05-31,29,55,62,47,83,57,75,75,68,100,88,73,33,78,69,89,56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-31,68,56,40,74,36,32,35,42,39,70,55,69,111,51,39,68,42
2024-09-30,32,61,40,71,31,33,31,28,53,49,58,59,59,49,39,64,38
2024-10-31,39,58,31,69,27,29,23,27,93,51,55,35,57,42,29,60,39
2024-11-30,44,81,33,73,26,24,23,36,58,59,70,43,50,42,35,56,44


In [56]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=gt_m_2011_2024.index, y=gt_m_2011_2024['Inflación'], mode='lines', name='Monthly 2011-2024'))
fig.add_trace(go.Scatter(x=GT_monthly.index, y=GT_monthly['Inflación'], mode='lines', name='Monthly (Avg. 2019-2024)'))
fig.add_trace(go.Scatter(x=GT_weekly.index, y=GT_weekly['Inflación'], mode='lines', name='Weekly Adjusted'))

fig.update_layout(
    title='Inflation Trends',
    xaxis_title='Date',
    yaxis_title='Inflation',
    legend_title='Data Source'
)
fig.show()

In [57]:
GT_monthly.to_csv('./Data/Google trends/Inflation.csv')
GT_weekly.to_csv('./Data/Google trends/Inflation_weekly.csv')

## **2. Wholesale Prices**

The Agro-Environmental and Productive Observatory (OAP) is an agency under the Ministry of Rural Development and Lands in Bolivia. The OAP publishes daily bulletins of wholesale prices of selected agricultural products. It also publishes weekly bulletins on the behavior of wholesale prices and the supply of agricultural products. Time-series for domestic wholesale prices are sourced from teh OAP.

In [92]:
ws_products = pd.read_excel('./Data/Wholesale prices/DAILY_WHOLESALE_PRICES(adjusted).xlsx', sheet_name='dict')
ws_products["Description"].unique()

array(['Carne de res en gancho',
       'Carne de pollo entero evicerado NACIONAL',
       'Papa Desiree (Mediana)', 'Papa Huaycha o Imilla (Mediana)',
       'Yuca (Mediano - Grande)', 'Banana Cavendish (Mediana)',
       'Limón Sutil (Mediano) NACIONAL', 'Naranja criolla',
       'Papaya Salvietti (Mediana) NACIONAL', 'Piña', 'Platano', 'Sandia',
       'Manzana Red Delicious Mapleado (Mediana) IMPORTADO', 'Toronja',
       'Naranja injerto', 'Tomate', 'Aji en Vaina ', 'Arveja Verde ',
       'Cebolla Cabeza Roja ', 'Cebolla Cabeza IMPORTADA', 'Haba Verde',
       'Locoto', 'Maiz Choclo ', 'Pimenton ', 'Vainita', 'Zapallo ',
       'Zanahoria ', 'Arroz de primera', 'Arroz económico',
       'Arroz de segunda', 'Arroz de primera IMPORTADO',
       'Aceite vegetal envasado NACIONAL',
       'Aceite vegetal envasado IMPORTADO', 'Azúcar blanco NACIONAL',
       'Fideo NACIONAL', 'Harina blanca NACIONAL',
       'Harina blanca IMPORTADA', ' Leche fluida ', 'Leche en polvo ',
       'Mante

In [79]:
ws_raw = pd.read_excel('./Data/Wholesale prices/DAILY_WHOLESALE_PRICES(adjusted).xlsx', sheet_name='prices', index_col=0)
ws_raw = ws_raw.loc[ws_raw.index <= '2024-10-24'].bfill()
ws_raw

Unnamed: 0,beef_lp,beef_cb,beef_sc,beef_or,beef_po,beef_su,beef_tj,beef_tr,beef_co,beef_bol,...,wheat_lp,wheat_cb,wheat_sc,wheat_or,wheat_po,wheat_su,wheat_tj,wheat_tr,wheat_co,wheat_bol
2011-01-01,19.00,21.50,18.65,22.0,24.0,19.0,20.0,15.5,17.0,21.75,...,280.0,168.0,114.93,250.0,150.0,145.0,345.0,,,114.93
2011-01-03,19.00,21.50,18.65,21.5,24.0,19.0,20.0,15.5,17.0,20.00,...,280.0,168.0,114.93,250.0,150.0,145.0,345.0,,,114.93
2011-01-04,19.50,20.00,18.65,21.5,24.0,19.0,20.0,15.5,17.0,20.00,...,280.0,168.0,114.93,250.0,150.0,145.0,345.0,,,114.93
2011-01-05,19.00,20.50,18.25,21.5,24.0,19.0,20.0,15.5,17.0,20.00,...,280.0,168.0,114.93,250.0,150.0,145.0,345.0,,,114.93
2011-01-06,19.00,20.50,18.25,21.5,22.0,19.0,21.0,15.5,17.0,20.50,...,280.0,168.0,114.93,250.0,150.0,145.0,345.0,,,114.93
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-19,28.52,32.75,24.00,,,28.5,,,,28.50,...,,,,,,,,,,
2024-10-21,28.52,32.75,24.75,,,28.5,,,,28.50,...,,,,,,,,,,
2024-10-22,30.00,32.75,24.75,,,29.5,,,,29.50,...,,,,,,,,,,
2024-10-23,30.00,32.75,24.00,,,29.5,,,,29.50,...,,,,,,,,,,


Recalculate national median prices.

In [84]:
ws_bol_vars = ["beef_bol", "chicken_bol", "papa1_bol", "papa2_bol", "yuca_bol", "banana_bol", "lemon_bol", "orange_bol_x", "papaya_bol", "pineapple_bol", "platano_bol", "watermelon_bol", 
                "apple_bol", "grapefruit_bol", "orange2_bol", "tomato_bol", "redpepper_bol", "orange_bol_y", "peas_bol", "onion_bol", "onion2_bol", "bean_bol", "chili_bol", "corn_bol", 
                "paprika_bol", "greenbean_bol", "squash_bol", "carrot_bol", "rice_bol", "rice2_bol", "rice3_bol_x", "rice3_bol_y", "rice4_bol", "oil_bol", "oil2_bol", "sugar_bol",
                "noodle_bol", "flour_bol", "flour2_bol", "milk_bol", "milk2_bol", "lard_bol", "veglard_bol", "ycorn_bol", "quinoa_bol", "sorghum_bol", "soy_bol", "wheat_bol"]

# Recalculate the national median prices
ws_adj = ws_raw.copy()

for column in ws_bol_vars:
    ws_adj[column] = ws_adj.iloc[:, (ws_adj.columns.get_loc(column) - 9):(ws_adj.columns.get_loc(column) ) ].median(axis=1)

ws_adj

Unnamed: 0,beef_lp,beef_cb,beef_sc,beef_or,beef_po,beef_su,beef_tj,beef_tr,beef_co,beef_bol,...,wheat_lp,wheat_cb,wheat_sc,wheat_or,wheat_po,wheat_su,wheat_tj,wheat_tr,wheat_co,wheat_bol
2011-01-01,19.00,21.50,18.65,22.0,24.0,19.0,20.0,15.5,17.0,19.00,...,280.0,168.0,114.93,250.0,150.0,145.0,345.0,,,168.0
2011-01-03,19.00,21.50,18.65,21.5,24.0,19.0,20.0,15.5,17.0,19.00,...,280.0,168.0,114.93,250.0,150.0,145.0,345.0,,,168.0
2011-01-04,19.50,20.00,18.65,21.5,24.0,19.0,20.0,15.5,17.0,19.50,...,280.0,168.0,114.93,250.0,150.0,145.0,345.0,,,168.0
2011-01-05,19.00,20.50,18.25,21.5,24.0,19.0,20.0,15.5,17.0,19.00,...,280.0,168.0,114.93,250.0,150.0,145.0,345.0,,,168.0
2011-01-06,19.00,20.50,18.25,21.5,22.0,19.0,21.0,15.5,17.0,19.00,...,280.0,168.0,114.93,250.0,150.0,145.0,345.0,,,168.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-19,28.52,32.75,24.00,,,28.5,,,,28.51,...,,,,,,,,,,
2024-10-21,28.52,32.75,24.75,,,28.5,,,,28.51,...,,,,,,,,,,
2024-10-22,30.00,32.75,24.75,,,29.5,,,,29.75,...,,,,,,,,,,
2024-10-23,30.00,32.75,24.00,,,29.5,,,,29.75,...,,,,,,,,,,


In [85]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=ws_raw.index, y=ws_raw['beef_bol'], mode='lines', name='Original'))
fig.add_trace(go.Scatter(x=ws_adj.index, y=ws_adj['beef_bol'], mode='lines', name='Adjusted'))

In [86]:
nan_counts = ws_adj.isna().sum()
nan_counts

beef_lp         0
beef_cb         0
beef_sc         0
beef_or        18
beef_po        91
             ... 
wheat_su      221
wheat_tj      221
wheat_tr     4070
wheat_co     4070
wheat_bol     221
Length: 480, dtype: int64

In [87]:
ws_adj.to_excel('./Data/Wholesale prices/ws_adj.xlsx')

In [88]:
WS_weekly = ws_adj.resample('W').mean()
WS_monthly = ws_adj.resample('M').mean()

In [89]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=ws_adj.index, y=ws_adj['tomato_bol'], mode='lines', name='Daily'))
fig.add_trace(go.Scatter(x=WS_weekly.index, y=WS_weekly['tomato_bol'], mode='lines', name='Weekly'))
fig.add_trace(go.Scatter(x=WS_monthly.index, y=WS_monthly['tomato_bol'], mode='lines', name='Monthly'))

### **2.4. Monthly-Weekly**

In [None]:
WP_monthly_weekly = pd.concat([WP_monthly, aux3], axis=0)
WP_monthly_weekly = WP_monthly_weekly[~WP_monthly_weekly.index.duplicated(keep='first')]
WP_monthly_weekly = WP_monthly_weekly.sort_index()
WP_monthly_weekly

Unnamed: 0,beef_ea,beef_lp,beef_cb,beef_sc,beef_or,beef_po,beef_su,beef_tj,beef_tr,beef_co,...,wheat_cb,wheat_sc,wheat_or,wheat_po,wheat_su,wheat_tj,wheat_tr,wheat_co,wheat_dlp,wheat_bol
2011-01-31,22.500000,18.491667,20.980769,18.442000,21.565217,23.117647,19.593077,21.560000,15.50,17.0,...,169.000000,109.683158,255.0,165.652174,145.098462,345.000000,,,270.000000,109.683158
2011-02-28,22.500000,18.720833,20.500000,18.385000,22.000000,21.444444,19.593077,22.000000,15.50,17.0,...,169.000000,109.683158,255.0,165.652174,145.098462,345.000000,,,270.000000,109.683158
2011-03-31,22.500000,19.160000,20.461538,18.136923,22.807692,21.185185,19.593077,20.769231,15.50,17.0,...,169.000000,109.683158,255.0,165.652174,145.098462,345.000000,,,270.000000,109.683158
2011-04-30,22.500000,19.080000,19.153846,17.840000,22.538462,21.115385,19.593077,19.307692,15.50,17.0,...,169.000000,116.945455,255.0,165.652174,145.098462,345.000000,,,270.000000,116.945455
2011-05-31,22.500000,19.000000,18.860000,17.865000,22.000000,19.346154,19.593077,20.000000,15.50,17.0,...,169.000000,113.558750,255.0,165.652174,145.098462,345.000000,,,270.000000,130.487844
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-31,23.673125,23.673125,23.350000,21.000000,23.470588,24.000000,24.500000,20.000000,18.83,23.0,...,270.666667,280.000000,240.0,240.000000,220.000000,277.058824,,,241.764706,248.236565
2024-02-04,24.416667,24.416667,23.750000,21.000000,24.333333,24.000000,24.500000,20.000000,18.83,23.0,...,273.333333,280.000000,240.0,240.000000,220.000000,290.000000,,,242.500000,250.939889
2024-02-11,24.416667,24.416667,23.750000,21.000000,24.333333,24.000000,24.500000,20.000000,18.83,23.0,...,273.333333,280.000000,240.0,240.000000,220.000000,290.000000,,,242.500000,250.939889
2024-02-18,24.416667,24.416667,23.750000,21.000000,24.333333,24.000000,24.500000,20.000000,18.83,23.0,...,273.333333,280.000000,240.0,240.000000,220.000000,290.000000,,,242.500000,250.939889


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=WP_monthly_weekly.index, y=WP_monthly_weekly['tomato_ea'], name='El Alto'))
fig.add_trace(go.Scatter(x=WP_monthly_weekly.index, y=WP_monthly_weekly['tomato_lp'], name='La Paz'))
fig.add_trace(go.Scatter(x=WP_monthly_weekly.index, y=WP_monthly_weekly['tomato_sc'], name='Santa Cruz'))
fig.add_trace(go.Scatter(x=WP_monthly_weekly.index, y=WP_monthly_weekly['tomato_bol'], name='Bolivia'))
fig.update_layout(
    title='Tomato Wholesale Prices',
    xaxis_title='Date',
    yaxis_title='Price'
)
fig.show()

In [None]:
WP_monthly_weekly.to_excel('/content/drive/MyDrive/Research/CEMLA 2024/Precios al por mayor 2008 2023/WP_MONTHLY_WEEKLY.xlsx')

## **3. Commodity Prices**

International commodity prices are sourced from different international organizations amd plataforms that provide daily reports of these prices.

### **3.1. Daily**

In [None]:
COM_daily = pd.read_excel('/content/drive/MyDrive/Research/CEMLA 2024/Serie diaria de precios internacionales.xlsx', index_col=0, sheet_name='data')
COM_daily.fillna(method='bfill', inplace=True)
COM_daily.fillna(method='ffill', inplace=True)
COM_daily

Unnamed: 0_level_0,wti,gas_ny,gold,silver,zinc,tin,soybean,soy_flour,soy_oil,lead,copper,libor
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2009-01-02,46.34,5.40,875.40,12.370000,1280.0,11625.0,328.823000,274.398000,835.033500,1090.0,3231.0,1.75250
2009-01-05,48.81,5.82,859.48,12.511136,1300.0,11500.0,336.826475,284.145691,828.799364,1120.0,3190.0,1.79375
2009-01-06,48.58,6.11,863.90,12.520000,1320.0,11950.0,335.895450,282.663000,862.596000,1178.0,3390.0,1.77000
2009-01-07,42.63,5.89,842.98,13.040000,1290.0,11600.0,331.762200,277.704000,872.298000,1140.0,3340.0,1.75000
2009-01-08,41.70,5.98,857.40,13.335000,1235.0,11400.0,331.578500,273.296000,872.298000,1149.0,3195.0,1.68625
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-28,83.17,1.54,2229.87,24.648800,2439.0,27451.0,438.124500,373.578000,1051.123500,2055.0,8867.0,5.64607
2024-03-29,83.71,1.54,2251.44,24.963100,2439.0,27451.0,437.757100,372.145400,1057.297500,2055.0,8867.0,5.64607
2024-04-01,83.71,1.63,2251.44,24.963100,2439.0,27451.0,437.757100,372.145400,1057.297500,2055.0,8867.0,5.64607
2024-04-02,85.15,1.63,2280.67,24.560786,2479.5,27897.0,435.460850,371.237562,1047.196500,2020.5,8991.0,5.64607


### **3.2. Monthly**

In [None]:
COM_monthly = COM_daily.resample('M').mean()
COM_monthly

Unnamed: 0_level_0,wti,gas_ny,gold,silver,zinc,tin,soybean,soy_flour,soy_oil,lead,copper,libor
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2009-01-31,41.595714,5.225714,861.889524,13.862673,1224.404762,11470.000000,343.755782,290.416119,872.988136,1145.071429,3294.952381,1.622113
2009-02-28,38.939500,4.510500,941.716000,14.868940,1142.625000,10898.000000,360.317272,310.637008,878.855250,1109.125000,3363.350000,1.756939
2009-03-31,47.975455,3.957727,926.955455,13.457863,1250.363636,10467.272727,338.145179,286.929791,802.444364,1256.454545,3803.136364,1.827274
2009-04-30,49.919545,3.499091,890.775909,14.138542,1414.477273,11569.318182,347.944318,300.795256,832.509080,1395.636364,4457.681818,1.654107
2009-05-31,59.215238,3.811905,930.398095,15.657639,1446.642857,12950.238095,344.983810,296.271576,807.503000,1452.428571,4602.666667,1.357679
...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-31,72.352857,2.529048,2036.407619,24.050643,2512.690476,24727.571429,485.129831,440.159790,1105.135500,2067.166667,8486.333333,5.688384
2024-01-31,73.646087,3.588348,2033.357391,22.912308,2536.847826,25382.478261,453.759437,403.105681,1050.660506,2105.021739,8448.043478,5.590543
2024-02-29,76.684762,1.710071,2025.791905,22.666519,2387.547619,26332.571429,431.331974,375.210010,1014.825000,2074.261905,8400.619048,5.650853
2024-03-31,80.562381,1.505095,2167.808571,24.452187,2499.904762,27578.666667,435.158849,371.395490,1045.177000,2078.071429,8793.333333,5.671097


### **3.3. Weekly**

In [None]:
COM_daily_week = COM_daily.merge(aux2, left_index=True, right_index=True, how='outer')
COM_daily_week = COM_daily_week['2019-01-01':]
COM_daily_week['ymw'].fillna(method='bfill', inplace=True)
COM_daily_week.drop(['ym', 'count', 'num'], axis=1, inplace=True)
#COM_daily_week

COM_weekly = COM_daily_week.groupby('ymw').mean()
COM_weekly

Unnamed: 0_level_0,wti,gas_ny,gold,silver,zinc,tin,soybean,soy_flour,soy_oil,lead,copper,libor
ymw,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-01-01,46.750,2.7023,1286.880,15.618000,2423.5,19518.75,356.883175,362.144750,667.122750,1964.75,5865.25,2.866050
2019-01-02,50.968,2.6760,1289.012,15.645360,2485.6,20039.00,360.309180,367.847600,671.246100,1974.00,5931.80,2.859226
2019-01-03,52.160,3.3660,1289.812,15.537700,2512.7,20586.00,356.524960,360.949080,672.833700,1977.40,5966.20,2.853052
2019-01-04,53.052,3.1180,1286.974,15.413300,2619.4,20675.00,359.390680,361.874760,691.267500,2048.80,5969.10,2.848878
2019-02-01,53.716,4.4640,1314.858,15.924020,2703.6,20783.00,359.078390,359.097720,707.408100,2096.40,6099.20,2.810852
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-04,76.004,2.3760,2020.846,22.770260,2543.6,26335.40,450.138480,396.543680,1039.304700,2153.80,8485.40,5.604506
2024-02-01,75.310,2.1980,2040.902,22.949536,2514.6,25984.00,446.411531,400.682014,1015.962829,2160.80,8559.60,5.557954
2024-02-02,74.602,1.9243,2031.078,22.439540,2376.2,25554.40,438.547010,387.617480,1030.705200,2082.80,8287.20,5.618660
2024-02-03,77.730,1.5940,2004.704,22.707120,2337.3,27319.60,434.119840,372.652320,1035.203400,2028.80,8299.00,5.652020


In [None]:
aux4 = aux2.merge(COM_weekly, left_on='ymw', right_on=COM_weekly.index, how='inner')
aux4.set_index(aux2.index, inplace=True)
aux4.drop(['ym', 'count', 'num', 'ymw'], axis=1, inplace=True)
aux4

Unnamed: 0_level_0,wti,gas_ny,gold,silver,zinc,tin,soybean,soy_flour,soy_oil,lead,copper,libor
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-01-06,46.750,2.7023,1286.880,15.618000,2423.5,19518.75,356.883175,362.144750,667.122750,1964.75,5865.25,2.866050
2019-01-13,50.968,2.6760,1289.012,15.645360,2485.6,20039.00,360.309180,367.847600,671.246100,1974.00,5931.80,2.859226
2019-01-20,52.160,3.3660,1289.812,15.537700,2512.7,20586.00,356.524960,360.949080,672.833700,1977.40,5966.20,2.853052
2019-01-27,53.052,3.1180,1286.974,15.413300,2619.4,20675.00,359.390680,361.874760,691.267500,2048.80,5969.10,2.848878
2019-02-03,53.716,4.4640,1314.858,15.924020,2703.6,20783.00,359.078390,359.097720,707.408100,2096.40,6099.20,2.810852
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-28,76.004,2.3760,2020.846,22.770260,2543.6,26335.40,450.138480,396.543680,1039.304700,2153.80,8485.40,5.604506
2024-02-04,75.310,2.1980,2040.902,22.949536,2514.6,25984.00,446.411531,400.682014,1015.962829,2160.80,8559.60,5.557954
2024-02-11,74.602,1.9243,2031.078,22.439540,2376.2,25554.40,438.547010,387.617480,1030.705200,2082.80,8287.20,5.618660
2024-02-18,77.730,1.5940,2004.704,22.707120,2337.3,27319.60,434.119840,372.652320,1035.203400,2028.80,8299.00,5.652020


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=COM_monthly.index, y=COM_monthly['wti'], name='Monthly'))
fig.add_trace(go.Scatter(x=COM_weekly.index, y=COM_weekly['wti'], name='Weekly'))
fig.add_trace(go.Scatter(x=COM_daily.index, y=COM_daily['wti'], name='Daily'))
fig.update_layout(
    title='WTI Price',
    xaxis_title='Date',
    yaxis_title='Price'
)
fig.show()

### **3.4. Monthly-Weekly**

In [None]:
COM_monthly_weekly = pd.concat([COM_monthly, aux4], axis=0)
COM_monthly_weekly = COM_monthly_weekly[~COM_monthly_weekly.index.duplicated(keep='first')]
COM_monthly_weekly = COM_monthly_weekly.sort_index()
COM_monthly_weekly

Unnamed: 0_level_0,wti,gas_ny,gold,silver,zinc,tin,soybean,soy_flour,soy_oil,lead,copper,libor
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2009-01-31,41.595714,5.225714,861.889524,13.862673,1224.404762,11470.000000,343.755782,290.416119,872.988136,1145.071429,3294.952381,1.622113
2009-02-28,38.939500,4.510500,941.716000,14.868940,1142.625000,10898.000000,360.317272,310.637008,878.855250,1109.125000,3363.350000,1.756939
2009-03-31,47.975455,3.957727,926.955455,13.457863,1250.363636,10467.272727,338.145179,286.929791,802.444364,1256.454545,3803.136364,1.827274
2009-04-30,49.919545,3.499091,890.775909,14.138542,1414.477273,11569.318182,347.944318,300.795256,832.509080,1395.636364,4457.681818,1.654107
2009-05-31,59.215238,3.811905,930.398095,15.657639,1446.642857,12950.238095,344.983810,296.271576,807.503000,1452.428571,4602.666667,1.357679
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-18,77.730000,1.594000,2004.704000,22.707120,2337.300000,27319.600000,434.119840,372.652320,1035.203400,2028.800000,8299.000000,5.652020
2024-02-25,77.874000,1.550000,2025.480000,22.922900,2393.300000,26340.600000,427.580120,369.544680,1000.276200,2070.300000,8525.700000,5.681720
2024-02-29,76.684762,1.710071,2025.791905,22.666519,2387.547619,26332.571429,431.331974,375.210010,1014.825000,2074.261905,8400.619048,5.650853
2024-03-31,80.562381,1.505095,2167.808571,24.452187,2499.904762,27578.666667,435.158849,371.395490,1045.177000,2078.071429,8793.333333,5.671097


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=COM_monthly_weekly.index, y=COM_monthly_weekly['wti'], name='WTI'))
fig.update_layout(
    title='WTI Price',
    xaxis_title='Date',
    yaxis_title='Price'
)
fig.show()

In [None]:
COM_monthly_weekly.to_excel('/content/drive/MyDrive/Research/CEMLA 2024/COM_MONTHLY_WEEKLY.xlsx')

## **4. Financial variables**

The Housing Development Unit (UFV) is a daily index, calculated based on inflation. It serves as a reference for financial transactions, contracts and all types of legal acts in national currency with the maintenance of value with respect to the evolution of domestic prices. This information is publicly available and published by the Central Bank of Bolivia.

The variable "exchange" is a daily time-series of the exchage rate USD/BOB that generates Google Finance, which is not neccesarily the as the exchange rate time-series published by the Central Bank of Bolivia; the latter shows a fixed echange rate since 2011. Conversely, Google Finance's USD/BOB exchage rate shows more variation that is better sutted for forecasting.

In [None]:
UFVFX_daily = pd.read_excel('/content/drive/MyDrive/Research/CEMLA 2024/UFV_FX_diario.xlsx', index_col=0)
UFVFX_daily = UFVFX_daily['2011-01-01':]
UFVFX_daily.fillna(method='bfill', inplace=True)
UFVFX_monthly = UFVFX_daily.resample('M').mean()
UFVFX_monthly

Unnamed: 0_level_0,compra,venta,spread,ufv,exchange
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-01-31,7.040000,6.940000,0.1,1.568637,6.985806
2011-02-28,7.026429,6.926429,0.1,1.577936,7.011429
2011-03-31,7.007097,6.907097,0.1,1.588942,6.979355
2011-04-30,6.992667,6.892667,0.1,1.601850,6.954833
2011-05-31,6.990000,6.890000,0.1,1.615897,6.973871
...,...,...,...,...,...
2023-11-30,6.960000,6.860000,0.1,2.468625,6.899293
2023-12-31,6.960000,6.860000,0.1,2.472746,6.904152
2024-01-31,6.960000,6.860000,0.1,2.476424,6.906287
2024-02-29,6.960000,6.860000,0.1,2.480599,6.895669


In [None]:
UFVFX_daily_week = UFVFX_daily.merge(aux2, left_index=True, right_index=True, how='outer')
UFVFX_daily_week = UFVFX_daily_week['2019-01-01':]
UFVFX_daily_week['ymw'].fillna(method='bfill', inplace=True)
UFVFX_daily_week.drop(['ym', 'count', 'num'], axis=1, inplace=True)

UFVFX_weekly = UFVFX_daily_week.groupby('ymw').mean()
UFVFX_weekly

Unnamed: 0_level_0,compra,venta,spread,ufv,exchange
ymw,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-01,6.96,6.86,0.1,2.291075,6.910000
2019-01-02,6.96,6.86,0.1,2.291660,6.910000
2019-01-03,6.96,6.86,0.1,2.292290,6.910000
2019-01-04,6.96,6.86,0.1,2.292920,6.899286
2019-02-01,6.96,6.86,0.1,2.293559,6.885000
...,...,...,...,...,...
2024-01-04,6.96,6.86,0.1,2.477640,6.905500
2024-02-01,6.96,6.86,0.1,2.478634,6.894929
2024-02-02,6.96,6.86,0.1,2.479677,6.902357
2024-02-03,6.96,6.86,0.1,2.480630,6.904000


In [None]:
aux5 = aux2.merge(UFVFX_weekly, left_on='ymw', right_on=UFVFX_weekly.index, how='inner')
aux5.set_index(aux2.index, inplace=True)
aux5.drop(['ym', 'count', 'num', 'ymw'], axis=1, inplace=True)
aux5.fillna(method='bfill')
aux5

Unnamed: 0_level_0,compra,venta,spread,ufv,exchange
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-06,6.96,6.86,0.1,2.291075,6.910000
2019-01-13,6.96,6.86,0.1,2.291660,6.910000
2019-01-20,6.96,6.86,0.1,2.292290,6.910000
2019-01-27,6.96,6.86,0.1,2.292920,6.899286
2019-02-03,6.96,6.86,0.1,2.293559,6.885000
...,...,...,...,...,...
2024-01-28,6.96,6.86,0.1,2.477640,6.905500
2024-02-04,6.96,6.86,0.1,2.478634,6.894929
2024-02-11,6.96,6.86,0.1,2.479677,6.902357
2024-02-18,6.96,6.86,0.1,2.480630,6.904000


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=UFVFX_monthly.index, y=UFVFX_monthly['exchange'], name='Monthly'))
fig.add_trace(go.Scatter(x=UFVFX_weekly.index, y=UFVFX_weekly['exchange'], name='Weekly'))
fig.add_trace(go.Scatter(x=UFVFX_daily.index, y=UFVFX_daily['exchange'], name='Daily'))
fig.update_layout(
    title='USD/BOB',
    xaxis_title='Date',
    yaxis_title='Price'
)
fig.show()

In [None]:
UFVFX_monthly_weekly = pd.concat([UFVFX_monthly, aux5], axis=0)
UFVFX_monthly_weekly = UFVFX_monthly_weekly[~UFVFX_monthly_weekly.index.duplicated(keep='first')]
UFVFX_monthly_weekly = UFVFX_monthly_weekly.sort_index()
UFVFX_monthly_weekly

Unnamed: 0,compra,venta,spread,ufv,exchange
2011-01-31,7.040000,6.940000,0.1,1.568637,6.985806
2011-02-28,7.026429,6.926429,0.1,1.577936,7.011429
2011-03-31,7.007097,6.907097,0.1,1.588942,6.979355
2011-04-30,6.992667,6.892667,0.1,1.601850,6.954833
2011-05-31,6.990000,6.890000,0.1,1.615897,6.973871
...,...,...,...,...,...
2024-02-11,6.960000,6.860000,0.1,2.479677,6.902357
2024-02-18,6.960000,6.860000,0.1,2.480630,6.904000
2024-02-25,6.960000,6.860000,0.1,2.481540,6.891700
2024-02-29,6.960000,6.860000,0.1,2.480599,6.895669


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=UFVFX_monthly_weekly.index, y=UFVFX_monthly_weekly['exchange'], name='USD/BOB'))
fig.update_layout(
    title='USD/BOB',
    xaxis_title='Date',
    yaxis_title='Price'
)
fig.show()

In [None]:
UFVFX_monthly_weekly.to_excel('/content/drive/MyDrive/Research/CEMLA 2024/UFVFX_MONTHLY_WEEKLY.xlsx')

## **5. CPI**

In [None]:
CPI_monthly = pd.read_excel('/content/drive/MyDrive/Research/CEMLA 2024/IPC_monthly.xlsx', index_col=0)
CPI_monthly.index = CPI_monthly.index + pd.offsets.MonthEnd(0)
CPI_monthly

Unnamed: 0_level_0,ipc_nal,ipc_food,ipc_nofood,ipc_ali
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-31,68.467691,62.240777,74.122883,60.414889
2010-02-28,68.581371,62.377226,74.216052,60.558520
2010-03-31,68.499278,62.212894,74.208192,60.218233
2010-04-30,68.561311,62.292877,74.254043,60.271464
2010-05-31,68.549203,62.120346,74.386811,59.935518
...,...,...,...,...
2023-10-31,110.429431,113.606645,108.137529,113.582224
2023-11-30,110.425657,113.596885,108.138074,113.601992
2023-12-31,111.123491,114.456695,108.719066,114.779862
2024-01-31,111.211057,114.196976,109.057148,114.324837


## **Aggregated Dataset**

In [None]:
w_adj.index

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31',
               ...
               '2024-01-07', '2024-01-14', '2024-01-21', '2024-01-28',
               '2024-01-31', '2024-02-04', '2024-02-11', '2024-02-18',
               '2024-02-25', '2024-02-29'],
              dtype='datetime64[ns]', name='date', length=419, freq=None)

In [None]:
WP_monthly_weekly.index

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31',
               ...
               '2023-12-31', '2024-01-07', '2024-01-14', '2024-01-21',
               '2024-01-28', '2024-01-31', '2024-02-04', '2024-02-11',
               '2024-02-18', '2024-02-25'],
              dtype='datetime64[ns]', length=417, freq=None)

In [None]:
dataset = w_adj.merge(WP_monthly_weekly, left_index=True, right_index=True, how='left')
dataset = dataset.merge(COM_monthly_weekly, left_index=True, right_index=True, how='left')
dataset = dataset.merge(UFVFX_monthly_weekly, left_index=True, right_index=True, how='left')
dataset = dataset.loc[:'2023-12-31', :]
dataset['week_set'] = np.where(dataset.index.isin(aux2.index), 1, 0)
dataset = dataset.drop('2018-12-30', axis=0)
dataset

Unnamed: 0_level_0,Bien económico - Tema,Contabilidad - Campo de estudio,Coste - Tema,Cuenta - Tema,Deflación - Tema,Deflactor - Tema,Demanda - Economía,Desempleo - Tema,Dinero - Tema,Economía - Campo de estudio,...,soy_oil,lead,copper,libor,compra,venta,spread,ufv,exchange,week_set
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-31,0,0,0,43,0,0,21,0,38,0,...,1293.334761,2544.142857,9520.571429,0.455426,7.040000,6.940000,0.1,1.568637,6.985806,0
2011-02-28,0,0,0,43,0,0,27,62,36,0,...,1282.258425,2573.125000,9872.050000,0.464045,7.026429,6.926429,0.1,1.577936,7.011429,0
2011-03-31,0,0,100,43,0,0,44,48,45,0,...,1308.026045,2603.108696,9514.391304,0.460783,7.007097,6.907097,0.1,1.588942,6.979355,0
2011-04-30,0,0,0,48,100,0,58,87,44,0,...,1277.810864,2655.333333,9514.333333,0.440875,6.992667,6.892667,0.1,1.601850,6.954833,0
2011-05-31,0,0,0,44,0,0,55,56,48,0,...,1257.229909,2411.863636,8979.000000,0.414302,6.990000,6.890000,0.1,1.615897,6.973871,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-03,0,0,0,57,0,0,39,30,75,0,...,1145.894400,2156.700000,8429.100000,5.796372,6.960000,6.860000,0.1,2.470730,6.908529,1
2023-12-10,0,0,0,57,0,0,36,0,83,0,...,1121.066100,2062.200000,8403.200000,5.732856,6.960000,6.860000,0.1,2.471710,6.904143,1
2023-12-17,0,0,0,57,0,0,23,0,85,0,...,1111.187700,2048.800000,8381.000000,5.732612,6.960000,6.860000,0.1,2.472570,6.902214,1
2023-12-24,0,0,0,66,0,0,12,0,87,0,...,1109.379600,2074.100000,8558.400000,5.657090,6.960000,6.860000,0.1,2.473340,6.899500,1


In [None]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 408 entries, 2011-01-31 to 2023-12-31
Columns: 666 entries, Bien económico - Tema to week_set
dtypes: float64(593), int64(73)
memory usage: 2.1 MB


In [None]:
dataset = dataset.dropna(axis=1)
dataset.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 408 entries, 2011-01-31 to 2023-12-31
Columns: 650 entries, Bien económico - Tema to week_set
dtypes: float64(577), int64(73)
memory usage: 2.0 MB


In [None]:
dataset.isna().sum().sum()

0

In [None]:
dataset = dataset.merge(CPI_monthly, left_index=True, right_index=True, how='left')
#dataset.fillna(method='ffill', inplace=True)
dataset

Unnamed: 0_level_0,Bien económico - Tema,Contabilidad - Campo de estudio,Coste - Tema,Cuenta - Tema,Deflación - Tema,Deflactor - Tema,Demanda - Economía,Desempleo - Tema,Dinero - Tema,Economía - Campo de estudio,...,compra,venta,spread,ufv,exchange,week_set,ipc_nal,ipc_food,ipc_nofood,ipc_ali
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-31,0,0,0,43,0,0,21,0,38,0,...,7.040000,6.940000,0.1,1.568637,6.985806,0,74.207255,70.885954,77.240962,68.870036
2011-02-28,0,0,0,43,0,0,27,62,36,0,...,7.026429,6.926429,0.1,1.577936,7.011429,0,75.439060,72.663719,77.980264,70.869435
2011-03-31,0,0,100,43,0,0,44,48,45,0,...,7.007097,6.907097,0.1,1.588942,6.979355,0,76.108818,73.107625,78.854304,71.367307
2011-04-30,0,0,0,48,100,0,58,87,44,0,...,6.992667,6.892667,0.1,1.601850,6.954833,0,76.125495,72.727340,79.229494,70.686865
2011-05-31,0,0,0,44,0,0,55,56,48,0,...,6.990000,6.890000,0.1,1.615897,6.973871,0,76.277495,72.728528,79.517765,70.585534
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-03,0,0,0,57,0,0,39,30,75,0,...,6.960000,6.860000,0.1,2.470730,6.908529,1,,,,
2023-12-10,0,0,0,57,0,0,36,0,83,0,...,6.960000,6.860000,0.1,2.471710,6.904143,1,,,,
2023-12-17,0,0,0,57,0,0,23,0,85,0,...,6.960000,6.860000,0.1,2.472570,6.902214,1,,,,
2023-12-24,0,0,0,66,0,0,12,0,87,0,...,6.960000,6.860000,0.1,2.473340,6.899500,1,,,,


In [None]:
dataset.isna().sum().sum()

1008

In [None]:
dataset.to_excel('/content/drive/MyDrive/Research/CEMLA 2024/DATASET.xlsx')

In [None]:
#df = pd.DataFrame(index=pd.date_range('2011-01-01', '2024-02-29', freq='D'))
#df['year'] = df.index.year
#df['month'] = df.index.month
#df['week'] = df.index.isocalendar().week
#df['weekday'] = df.index.isocalendar().day
#df['day'] = df.index.day