Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
319 changes: 319 additions & 0 deletions PipeLain1/ETLmsTc.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,319 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "98cabcd0",
"metadata": {},
"source": [
"## Saldo y cantidad de tarjetas en el sistema financiero.\n",
"Fuente: https://www.bcp.gov.py/web/institucional/boletines-estad%C3%ADstico-financieros-"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "1aa669a4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Se inicializa parametros\n"
]
}
],
"source": [
"## Proceso par al carga de Market share de Tarjetas BCP\n",
"# Configuración logging\n",
"# Primero, configuramos el logger al inicio del script:\n",
"import os\n",
"import logging\n",
"import pandas as pd\n",
"from sqlalchemy import create_engine\n",
"from datetime import datetime\n",
"import logging\n",
"import pyodbc\n",
"\n",
"# Obtener la ruta actual\n",
"ruta_actual = os.getcwd()\n",
"# Configurar logging\n",
"logging.basicConfig(\n",
" filename=f'{ruta_actual}/etl_proceso.log', # Archivo de log\n",
" level=logging.INFO, # Nivel de log: DEBUG, INFO, WARNING, ERROR, CRITICAL\n",
" format='%(asctime)s - %(levelname)s - %(message)s',\n",
" filemode='w' # 'w' sobrescribe cada vez, 'a' acumula\n",
")\n",
"\n",
"logger = logging.getLogger()\n",
"print('Se inicializa parametros')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "09a5c4ed",
"metadata": {},
"outputs": [],
"source": [
"## ruta del archivo\n",
"excel_file = 'C:/Users/rfigu/Documents/Python Scripts/BoletinBCP/Tablas_Boletin_Bancos_Jul25.xlsx'\n",
"\n",
"def extraer_excel(excel_file):\n",
" try:\n",
" logger.info(f\"Iniciando extracción desde: {excel_file}\")\n",
" sheets_dict = pd.read_excel(excel_file, sheet_name=None)\n",
" # Acceder a cada hoja como DataFrame\n",
" for sheet_name, df in sheets_dict.items():\n",
" print(f\"Hoja: {sheet_name}, Filas: {len(df)}\")\n",
" # logger.info(f\"Iniciando extracción desde: {excel_file}\")\n",
" # sheets_dict = pd.read_excel(excel_file, sheet_name=None)\n",
" # # Acceder a cada hoja como DataFrame\n",
" # for sheet_name, df in sheets_dict.items():\n",
" # print(f\"Hoja: {sheet_name}, Filas: {len(df)}\")\n",
" # logger.info(f\"Hoja: {sheet_name}, Filas: {len(df)}\")\n",
" return sheets_dict\n",
" except Exception as e:\n",
" logger.error(f\"Error en la extracción: {e}\")\n",
" raise"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8155844",
"metadata": {},
"outputs": [],
"source": [
"# Obtener ruta actual\n",
"ruta_actual = os.getcwd()\n",
"logger.info(f\"Ruta actual de ejecución: {ruta_actual}\")\n",
"\n",
"# Ruta del archivo Excel\n",
"excel_file = 'C:/Users/rfigu/Documents/Python Scripts/BoletinBCP/Tablas_Boletin_Bancos_Jul25.xlsx'\n",
"\n",
"# Extracción de hojas\n",
"try:\n",
" logger.info(f\"Iniciando lectura del archivo Excel: {excel_file}\")\n",
" sheets_dict = pd.read_excel(excel_file, sheet_name=None)\n",
" logger.info(f\"Lectura exitosa. Hojas encontradas: {list(sheets_dict.keys())}\")\n",
" \n",
" # Acceder a cada hoja como DataFrame\n",
" for sheet_name, df in sheets_dict.items():\n",
" logger.info(f\"Hoja: {sheet_name}, Filas: {len(df)}\")\n",
" print(f\"Hoja: {sheet_name}, Filas: {len(df)}\")\n",
"\n",
"except Exception as e:\n",
" logger.error(f\"Error al leer el archivo Excel: {e}\")\n",
" print(f\"Error: {e}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "755bc612",
"metadata": {},
"outputs": [],
"source": [
"## Vista previa de los datos la hoja\n",
"sheets_dict.get('Carteras').head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa84a367",
"metadata": {},
"outputs": [],
"source": [
"## version sin Looger\n",
"def transforBol(df):\n",
" # Eliminar filas vacías\n",
" df = df.dropna(how='all')\n",
" \n",
" # Renombrar columnas\n",
" df.columns = [col.strip().lower().replace(' ', '_') for col in df.columns]\n",
" \n",
" # Convertir fechas\n",
" if 'fecha' in df.columns:\n",
" df['fecha'] = pd.to_datetime(df['fecha'], errors='coerce')\n",
" \n",
" return df\n",
"\n",
"# Aplicar transformación a cada hoja\n",
"datos_transformados = {sheet: transforBol(df) for sheet, df in sheets_dict.items()}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "08e6c4c9",
"metadata": {},
"outputs": [],
"source": [
"datos_transformados.get('Credito Sector').head(3)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5106e525",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Conexión exitosa\n"
]
}
],
"source": [
"# Parámetros de conexión\n",
"server = 'ASUSTUF\\SQL22' # Doble barra para escapar correctamente\n",
"database = 'BolBcp'\n",
"\n",
"try:\n",
" logger.info(f\"Intentando conectar a SQL Server: {server}, Base de datos: {database}\")\n",
" \n",
" connP = pyodbc.connect(\n",
" 'DRIVER={ODBC Driver 17 for SQL Server};'\n",
" f'SERVER={server};'\n",
" f'DATABASE={database};'\n",
" 'Trusted_Connection=yes;'\n",
" )\n",
" \n",
" logger.info(\"Conexión a SQL Server establecida exitosamente.\")\n",
" print('Conexión exitosa')\n",
"\n",
"except pyodbc.Error as e:\n",
" logger.error(f\"Error al conectar a la base de datos: {str(e)}\")\n",
" print(f'Error al conectar a la base de datos: {str(e)}')"
]
},
{
"cell_type": "markdown",
"id": "adddb229",
"metadata": {},
"source": [
"Ejemplo de consulta Query a dmeanda"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "78aed9b7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" fecha codigo_entidad codigo_cuenta codigo_moneda \\\n",
"0 2025-11-30 1044 CDA 6900 \n",
"1 2025-11-30 1006 Intereses Devengados 6900 \n",
"2 2025-11-30 1044 Intereses Devengados 6200 \n",
"3 2025-11-30 1030 CDA 6900 \n",
"4 2025-11-30 1044 Intereses Devengados 6900 \n",
"5 2025-11-30 1002 Cartera Vigente 6200 \n",
"6 2025-11-30 1044 Cartera Vencida 6200 \n",
"7 2025-11-30 1030 Intereses Devengados 6200 \n",
"8 2025-11-30 1044 Cartera Vencida 6900 \n",
"9 2025-11-30 1008 Cartera Vigente 6200 \n",
"\n",
" importe fecha_carga \n",
"0 1.068517e+06 2025-12-25 11:05:44.963 \n",
"1 1.965212e+02 2025-12-25 11:05:44.963 \n",
"2 1.847820e+04 2025-12-25 11:05:44.963 \n",
"3 2.018903e+06 2025-12-25 11:05:44.963 \n",
"4 3.741408e+04 2025-12-25 11:05:44.963 \n",
"5 3.566519e+06 2025-12-25 11:05:44.963 \n",
"6 3.579872e+03 2025-12-25 11:05:44.963 \n",
"7 4.102243e+04 2025-12-25 11:05:44.963 \n",
"8 5.766352e+04 2025-12-25 11:05:44.963 \n",
"9 2.045478e+07 2025-12-25 11:05:44.963 \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\rfigu\\AppData\\Local\\Temp\\ipykernel_20820\\2923281015.py:9: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
" df = pd.read_sql(query, connP)\n"
]
}
],
"source": [
"# import pandas as pd\n",
"\n",
"# query = \"\"\"\n",
"# SELECT TOP 10 *\n",
"# FROM dbo.Carteras\n",
"# ORDER BY 1 DESC\n",
"# \"\"\"\n",
"\n",
"# df = pd.read_sql(query, connP)\n",
"# print(df)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d4314e09",
"metadata": {},
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a3266a72",
"metadata": {},
"outputs": [],
"source": [
"# Cargar cada hoja transformada como tabla\n",
"for nombre_tabla, df in datos_transformados.items():\n",
" try:\n",
" logger.info(f\"Iniciando carga de hoja: {nombre_tabla}\")\n",
"\n",
" # Eliminar filas completamente vacías\n",
" df = df.dropna(how='all')\n",
" # Agregar columna de fecha y hora de carga\n",
" df['fecha_carga'] = datetime.now()\n",
"\n",
" # Insertar usando pandas\n",
" df.to_sql(name=nombre_tabla, con=engine, if_exists='replace', index=False)\n",
"\n",
" logger.info(f\"Hoja '{nombre_tabla}' cargada exitosamente con {len(df)} filas.\")\n",
" print(f\"✅ Hoja '{nombre_tabla}' cargada exitosamente con {len(df)} filas.\")\n",
"\n",
" except Exception as e:\n",
" logger.error(f\"Error al cargar hoja '{nombre_tabla}': {e}\")\n",
" print(f\"❌ Error al cargar hoja '{nombre_tabla}': {e}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading