# Banxico Analyser

Banking on an earlier developed project, we aim to use natural language processing techniques to analyze every monetary policy statement that the Bank of Mexico (Banxico) has issued with regards to its monetary poliy stance (whether it raised, lowered or mantain unchanged its key policy rate). By doing so we expect to extract the frequency with which key words or groups of words (binominal, trinomial, etc) are used and extract a correlation between these counts and market fluctuations in response to them as well as between the counts and future policy decisions.

We will be using python to transform the statements into processable text files, matplotlib and numpy to create the relevant indicators, correlations and graphs, and, as said, NLP to derive word ferquencies. Besides the statements, we have data on market interest rates and Banxico's policy rate readily available in Banxico's API.

In [1]:
#Import Dependencies
%matplotlib inline
import os
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
import json
from pprint import pprint
from datetime import datetime
from datetime import timedelta 
import time
import matplotlib.pyplot as plt
import numpy as np

In [2]:
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
#Variables
start_date = "2008-01-01"
initial_date = datetime(2008, 1, 1)
end_date = (datetime.today() - timedelta(days = 4)).date()
print(str(initial_date.date()))

2008-01-01


In [4]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path)

In [5]:
#Banxico URL
url = 'https://www.banxico.org.mx/publicaciones-y-prensa/anuncios-de-las-decisiones-de-politica-monetaria/anuncios-politica-monetaria-t.html'
browser.visit(url)
api_url = "https://www.banxico.org.mx/SieAPIRest/service/v1/series/"
banxico_key = "53a846791d89d8a66caece1024f75a984dda9317cbd533c085d1e1dac0f3779c"

In [6]:
html = browser.html
soup = bs(html, 'html.parser')

In [7]:
links = []
for link in soup.find_all('a'):
    if ('/publicaciones-y-prensa/anuncios-de-las-decisiones-de-politica-monetaria') in link.get('href'):
        partial_link = link.get('href')
        links.append('http://www.banxico.org.mx' + partial_link)
links = links[1:]
#links

In [8]:
dates = []
for date in soup.find_all("td"):
    data = date.get_text()
    dates.append(data)
del dates[1::2]

In [9]:
import re
dates2 = []
for index in range(len(dates)):
    dates2.append(re.sub("\s", "", dates[index])) 


In [10]:
dates3 = []
for index in range(len(dates)):
    dates3.append(re.sub("[\/]", "", dates2[index])) 
#print(dates3)

In [11]:
name_counter = 0
for i in links:
    r = requests.get(i)
    out_file = open(f'{dates3[name_counter]}.pdf', 'wb')
    out_file.write(r.content)
    name_counter += 1

In [12]:
#Guillermo 
#pdf to text

In [13]:
final_dates = []
for date in dates2:
    dates = datetime.strptime(date, "%d/%m/%y")
    if dates > initial_date:
        final_dates.append(datetime.strftime(dates, "%Y-%m-%d"))
final_dates.append(datetime.strftime(end_date, "%Y-%m-%d"))

In [20]:
#MXN Lvl
Series = "SF43788"

fxt = []
for fecha in final_dates:
    url_consulta = [f"{api_url}{Series}/datos/{fecha}/{fecha}?token={banxico_key}"]
    for query in url_consulta: 
        data = requests.get(query).json()
        fxt.append(data["bmx"]["series"][0]["datos"][0]["dato"])
time.sleep(180) #tiempo del API

KeyError: 'bmx'

In [None]:
#Tasa Obejtivo
Series = "SF43878"

tasa = []
for fecha in final_dates:
    try:
        url_consulta = [f"{api_url}{Series}/datos/{fecha}/{fecha}?token={banxico_key}"]
        for query in url_consulta: 
            data = requests.get(query).json()
            tasa.append(data["bmx"]["series"][0]["datos"][0]["dato"])
    except:
        tasa.append(0)
time.sleep(180) #tiempo del API

In [None]:
#Inflacion
Series = "SP30578"

#Total Data
total_query_url = f"{api_url}{Series}/datos/{start_date}/{end_date}?token={banxico_key}"
total_data = requests.get(total_query_url).json()
print(total_data)
data_total = total_data['bmx']['series'][0]['datos']
data_total

In [None]:
#Match fechas
data_total_df = pd.DataFrame(data_total)
data_total_df['fecha'] = pd.to_datetime(data_total_df['fecha'], format='%d/%m/%Y')
data_total_df['key'] = pd.to_datetime(data_total_df['fecha']).dt.to_period('M')
data_total_df

In [None]:
pre_banxico_dict = {"MXN": fxt,  "Tasa": tasa}
pre_banxico_df = pd.DataFrame(pre_banxico_dict)
pre_banxico_df['Fecha'] = final_dates
pre_banxico_df['key'] = pd.to_datetime(pre_banxico_df['Fecha']).dt.to_period('M')
pre_banxico_df = pre_banxico_df.sort_values('Fecha')
pre_banxico_df = pre_banxico_df[pre_banxico_df.Tasa != 0]
pre_banxico_df = pre_banxico_df.reset_index(drop = True)
pre_banxico_df

In [None]:
banxico_merge = data_total_df.merge(pre_banxico_df, on='key')
banxico_merge['Tasa'] = banxico_merge['Tasa'].astype(float)
banxico_merge['Dif'] = 0
banxico_merge.tail()


In [None]:
#Fix Array
banxico_merge.rename(columns = {'dato':'Inflacion'}, inplace = True) 
banxico_merge = banxico_merge.reset_index(drop = True)
banxico_merge = banxico_merge.drop(['fecha', 'key'], axis = 1) 
banxico_merge['Tasa'] = banxico_merge['Tasa'].astype(float)
banxico_merge = banxico_merge[['Fecha', 'MXN', 'Inflacion', 'Tasa', 'Dif']]
banxico_merge

In [None]:
for i in range(1, len(banxico_merge) -1 ):
    banxico_merge.loc[i, 'Dif'] = 25 * round((100 * (banxico_merge.loc[i, 'Tasa'] - banxico_merge.loc[i-1, 'Tasa']))/25)

In [None]:
banxico_merge['key'] = pd.to_datetime(banxico_merge['Fecha']).dt.to_period('M')
banxico_merge

In [None]:
#import frecuencias_banxico.csv
frecuencias_df = pd.read_csv('frecuencias_banxico.csv')
frecuencias_df.rename(columns={'Unnamed: 0':'name'}, inplace=True)
frecuencias_df['key'] = frecuencias_df['name'].str.replace('count',"")
frecuencias_df['key'] = pd.to_datetime(frecuencias_df['key'].astype(str), format='%d%m%y')
frecuencias_df['key'] = pd.to_datetime(frecuencias_df['key']).dt.to_period('M')
frecuencias_df

In [None]:
banxico_merge = banxico_merge.merge(frecuencias_df, on='key')
banxico_merge = banxico_merge.drop(['name', 'key'], axis = 1)
banxico_merge

In [None]:
title = f"FX vs. Interest Rate"
plt.scatter(banxico_merge["MXN"], banxico_merge["Tasa"], marker = "o", color="blue")
plt.title(title, size=13)
plt.xlabel("FX (Pesos per dollar)")
plt.ylabel("Interest Rate (%)")
plt.grid(True)
plt.savefig(f"{title}.png")

In [None]:
from scipy.stats import linregress
title = "Inflation counts vs. FX Regression"
x = banxico_merge["inflacion"]
y = banxico_merge["MXN"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x,y)
plt.plot(x,regress_values,"r-")
plt.annotate(line_eq,(17,18.8),fontsize=15,color="red")
plt.title(title)
plt.xlabel('Inflation counts')
plt.ylabel("FX (pesos per dollar)")
plt.savefig(f"{title}.png")
print(f"The r-squared is: {rvalue}")
plt.show()

In [None]:
#Aqui va como x "inflacion" (la del conteo, no la de los números) y la tasa de interés (no el diferencial)
title = "Inflation counts vs. Interest Rate Regression"
x = banxico_merge["inflacion"]
y = banxico_merge["Tasa"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x,y)
plt.plot(x,regress_values,"r-")
plt.annotate(line_eq,(20,7.7),fontsize=15,color="red")
plt.title(title)
plt.xlabel('Inflation counts')
plt.ylabel("Interest Rate (%)")
plt.savefig(f"{title}.png")
print(f"The r-squared is: {rvalue}")
plt.show()

In [None]:
title = "Economic slack counts vs. Interest Rate Regression"
x = banxico_merge["holgura"]*-1
y = banxico_merge["Tasa"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x,y)
plt.plot(x,regress_values,"r-")
plt.annotate(line_eq,(2,7.7),fontsize=15,color="red")
plt.title(title)
plt.xlabel('Economic slack counts')
plt.ylabel("Interest Rate (%)")
plt.savefig(f"{title}.png")
print(f"The r-squared is: {rvalue}")
plt.show()

In [None]:
title = "Aggregate Concern Index vs. Interest Rate"
x = banxico_merge["index"]
y = banxico_merge["Tasa"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
regress_values = x * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x,y)
plt.plot(x,regress_values,"r-")
plt.annotate(line_eq,(15,7.7),fontsize=15,color="red")
plt.title(title)
plt.xlabel('Index (higher = more concern)')
plt.ylabel("Interest Rate (%)")
plt.savefig(f"{title}.png")
print(f"The r-squared is: {rvalue}")
plt.show()

In [None]:
#Identify data anomalities
banxico_merge.describe()

In [None]:
#One Hot Encoding
#banxico_clean = pd.get_dummies(banxico_merge)
#banxico_clean.tail(30)

In [None]:
#Assign target and variables
target = np.array(banxico_merge['Dif'])
variables = banxico_merge.drop(["Dif", "Fecha"], axis = 1)
variable_list = list(variables.columns)
variables = np.array(variables)

In [None]:
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets
train_variables, test_variables, train_target, test_target = train_test_split(variables, target, test_size = 0.25, random_state = 42)

In [None]:
print('Training Variables Shape:', train_variables.shape)
print('Training TArget Shape:', train_target.shape)
print('Testing Variables Shape:', test_variables.shape)
print('Testing TArget Shape:', test_target.shape)

In [None]:
# Import the model we are using
from sklearn.ensemble import RandomForestRegressor
classifier  = RandomForestRegressor(n_estimators = 1000, random_state = 42)
classifier.fit(train_variables, train_target);
classifier.fit(variables, target)

In [None]:
# Use the forest's predict method on the test data
predictions = classifier .predict(test_variables)
# Calculate the absolute errors
errors = abs(predictions - test_target)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'Basicos.')


In [None]:
# Get VAriable importances
importances = list(classifier .feature_importances_)
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(variable_list, importances)]
feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)
[print('Variable: {:20} Importance: {}'.format(*pair)) for pair in feature_importances];

In [None]:
# Import matplotlib for plotting and use magic command for Jupyter Notebooks
import matplotlib.pyplot as plt
%matplotlib inline
# Set the style
plt.style.use('fivethirtyeight')
# list of x locations for plotting
x_values = list(range(len(importances)))
# Make a bar chart
plt.bar(x_values, importances, orientation = 'vertical')
# Tick labels for x axis
plt.xticks(x_values, variable_list, rotation='vertical')
# Axis labels and title
plt.ylabel('Importance'); plt.xlabel('Variable'); plt.title('Variable Importances');

In [None]:
predictions = classifier.predict(variables)
print(f"First 10 Predictions:   {predictions[:10]}")
print(f"First 10 Actual labels: {test_target[:10].tolist()}")