# ELT Banxico & INEGI data to build Athena's database

Using the `fx`, `tiie` and `inpc`, create a database in Athena to analyze the behavior of the Mexican economy.

@roman avj

18 mar 24

---
# Settings

In [6]:
import os
import boto3
import awswrangler as wr
import pandas as pd
import yaml
from dotenv import load_dotenv
from datetime import datetime
from tqdm import tqdm

from INEGIpy import Indicadores
from sie_banxico import SIEBanxico

In [3]:
# load environment variables
load_dotenv()

# get config file
with open('../config.yaml', 'r') as file:
    config = yaml.safe_load(file)

# today
TODAY = datetime.today().strftime('%Y-%m-%d')

In [4]:
# AWS Settings
session = boto3.Session(profile_name="arquitectura")
s3 = session.client('s3')

# Bucket
BUCKET_NAME = config['aws']['bucket']

# Sub Bucket
SUB_BUCKET = config['aws']['sub-bucket']

---
# Extract

In [13]:
# read files from s3
def read_from_s3(folder, filename):
    df = wr.s3.read_csv(
        path=f"s3://{BUCKET_NAME}/{SUB_BUCKET}/{folder}/{filename}",
        boto3_session=session
    )
    # set date as datetime
    df['date'] = pd.to_datetime(df['date'])

    # set date as index
    df = df.set_index('date')
    return df

# read files from s3
dict_df = {}
for key, value in tqdm(config['aws']['filenames'].items()):
    dict_df[key] = read_from_s3('raw', value)

100%|██████████| 3/3 [00:01<00:00,  1.72it/s]


---
# Load

(wip)

---
# Transform

## Monthly data

In [None]:
# Montly data for FX and TIIE
# get the first day of the month
df_banxico_monthly = df_banxico.resample('MS').first()
df_banxico_monthly

Unnamed: 0_level_0,dollar_fx,tiie
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-01,19.4899,7.6311
2018-02-01,18.4004,7.6600
2018-03-01,18.8610,7.8294
2018-04-01,18.2967,7.8503
2018-05-01,19.0943,7.8508
...,...,...
2023-11-01,17.9305,11.5035
2023-12-01,17.2143,11.5033
2024-01-01,17.0297,11.5025
2024-02-01,17.1335,11.5012


In [None]:
# join with INPC
df_monthly = df_banxico_monthly.join(df_inpc, how='left')
df_monthly

Unnamed: 0_level_0,dollar_fx,tiie,inpc
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,19.4899,7.6311,98.795000
2018-02-01,18.4004,7.6600,99.171374
2018-03-01,18.8610,7.8294,99.492157
2018-04-01,18.2967,7.8503,99.154847
2018-05-01,19.0943,7.8508,98.994080
...,...,...,...
2023-11-01,17.9305,11.5035,131.445000
2023-12-01,17.2143,11.5033,132.373000
2024-01-01,17.0297,11.5025,133.555000
2024-02-01,17.1335,11.5012,133.681000


In [None]:
# look nans
df_monthly.isna().sum()

dollar_fx    0
tiie         0
inpc         1
dtype: int64