# ITAEE y Tipo de Cambio: Exploración de Datos

Exploración inicial del dataset ITAEE (Indicador Trimestral de Actividad Económica Estatal) de INEGI para identificar el rango temporal disponible.

In [None]:
import pandas as pd
import openpyxl
import os

## 1. Cargar y explorar ITAEE_3.xlsx (Total de la economía por entidad federativa)

In [None]:
DATA_DIR = os.path.join(os.path.dirname(os.getcwd()), "data", "tabulados_ITAEE")
filepath = os.path.join(DATA_DIR, "ITAEE_3.xlsx")

wb = openpyxl.load_workbook(filepath, read_only=True)
ws = wb[wb.sheetnames[0]]
print(f"Hojas: {wb.sheetnames}")
print(f"Dimensiones: {ws.max_row} filas x {ws.max_column} columnas")

In [None]:
# Extraer encabezados de años (fila 5) y trimestres (fila 6)
row5 = [c.value for c in list(ws.iter_rows(min_row=5, max_row=5))[0]]
row6 = [c.value for c in list(ws.iter_rows(min_row=6, max_row=6))[0]]

# Identificar columnas de trimestres (T1-T4)
current_year = None
quarter_cols = []  # (col_index, year, quarter)
for i, (yr, q) in enumerate(zip(row5, row6)):
    if yr is not None and yr != "Concepto":
        current_year = str(yr).replace("R", "").replace("P", "")
    if q in ("T1", "T2", "T3", "T4") and current_year:
        quarter_cols.append((i, current_year, q))

years = sorted(set(yr for _, yr, _ in quarter_cols))
print(f"Años disponibles: {years[0]} - {years[-1]}")
print(f"Total de años: {len(years)}")
print(f"Total de observaciones trimestrales: {len(quarter_cols)}")
print(f"\nDetalle trimestral del último año ({years[-1]}):")
for col_idx, yr, q in quarter_cols:
    if yr == years[-1]:
        print(f"  {yr}-{q} (col {col_idx})")

In [None]:
# Estados de interés y sus filas en el archivo
TARGET_STATES = ["Aguascalientes", "Durango", "Querétaro", "San Luis Potosí", "Zacatecas"]

# Leer filas 8-40 (índice de volumen físico)
state_rows = {}
for row in ws.iter_rows(min_row=8, max_row=40, values_only=False):
    name = row[0].value
    if name and name.strip() in TARGET_STATES:
        state_rows[name.strip()] = [row[col_idx].value for col_idx, _, _ in quarter_cols]

print("Estados encontrados:", list(state_rows.keys()))
for state, vals in state_rows.items():
    non_null = sum(1 for v in vals if v is not None and v != "")
    print(f"  {state}: {non_null} observaciones trimestrales")

In [None]:
# Construir DataFrame con las series ITAEE
dates = [f"{yr}-{q}" for _, yr, q in quarter_cols]
df_itaee = pd.DataFrame(state_rows, index=dates)
df_itaee.index.name = "periodo"

# Convertir a numérico
df_itaee = df_itaee.apply(pd.to_numeric, errors="coerce")

print(df_itaee.info())
print("\nPrimeras filas:")
df_itaee.head(10)

In [None]:
# Últimas filas disponibles
print("Últimas observaciones:")
df_itaee.dropna(how="all").tail(10)

In [None]:
wb.close()

# Resumen
valid = df_itaee.dropna(how="all")
print(f"\n=== RESUMEN ===")
print(f"Rango temporal ITAEE: {valid.index[0]} a {valid.index[-1]}")
print(f"Observaciones válidas: {len(valid)}")
print(f"Estados: {list(df_itaee.columns)}")
print(f"\nNulos por estado:")
print(df_itaee.isnull().sum())