# Capa Silver

In this notebook data from Bronze Layer is extraceted and treated to be upload in Silver Layer. 

In [1]:
# Importar librerias python
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
# Importar modulos propios en notebook
project_root = Path(os.getcwd()).parent
sys.path.append(str(project_root))

In [3]:
from db.connection import engine_setting, engine_connection
import dependencies as dp


### Conexión con capa bronze

In [4]:
# = Connection to schema ddbb =

# Define database type and port 
db_type = 'postgresql' 
port = 5432

# Get schema name acording to layer name
db_schema_bronze = 'bronze'

# Create engine for the specific schema
engine_bronze = engine_setting(db_type=db_type, db_port=port, db_schema=db_schema_bronze)

# Establish the connection
conn_bronze = engine_connection(engine_bronze, db_schema_bronze)

2024-12-04 11:01:45,771 - INFO - Successful connection to schema bronze


### Conexión con capa Silver 

In [5]:
# = Connection to schema ddbb =

# Define database type and port 
db_type = 'postgresql' 
port = 5432

# Get schema name acording to layer name
db_schema_silver = 'silver'

# Create engine for the specific schema
engine_silver = engine_setting(db_type=db_type, db_port=port, db_schema=db_schema_silver)

# Establish the connection
conn_silver = engine_connection(engine_silver, db_schema_silver)

2024-12-04 11:01:48,491 - INFO - Successful connection to schema silver


Extract Data

In [6]:
from sqlalchemy import MetaData, Table
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import sessionmaker

# Crear motor y establecer conexión
metadata = MetaData()

# Reflejar todas las tablas del esquema "bronze"
metadata.reflect(bind=engine_bronze, schema='bronze')

# Crear una base automapeada
Base = automap_base(metadata=metadata)

# Preparar el automapeo
Base.prepare()

# Inspeccionar y cargar tablas manualmente
tables = {}
for table_name in metadata.tables:
    print(f"Tabla encontrada: {table_name}")
    tables[table_name] = Table(table_name, metadata, autoload_with=engine_bronze)

# Ahora puedes acceder a las tablas mapeadas como objetos de Table
# for table_name, table_obj in tables.items():
#     print(f"Columnas en la tabla {table_name}: {[col.name for col in table_obj.columns]}")


Tabla encontrada: bronze.U6321303_Almacenes
Tabla encontrada: bronze.U6301303_Articulos
Tabla encontrada: bronze.U551_Presencia
Tabla encontrada: bronze.U532_Trabajadas
Tabla encontrada: bronze.U6301303_Clientes
Tabla encontrada: bronze.U553_Compras
Tabla encontrada: bronze.U6311303_Empresas
Tabla encontrada: bronze.U555_Invertidas
Tabla encontrada: bronze.U6331303_Operarios
Tabla encontrada: bronze.U552_Stock
Tabla encontrada: bronze.U533_OrdenesReparacion
Tabla encontrada: bronze.U554_VentasMostrador
Tabla encontrada: bronze.U560_VentasTaller
Tabla encontrada: bronze.U6311303_Talleres
Tabla encontrada: bronze.ULSTTHPT_TipoHoras
Tabla encontrada: bronze.TipoOrdenesReparacion
Tabla encontrada: bronze.ULSTTVPT_TiposVentas
Tabla encontrada: bronze.U6341303_Vehiculos
Tabla encontrada: bronze.ULSTTHPT_TiposHoras


In [7]:
import pandas as pd

# Almacenar los DataFrames en un diccionario
dataframes = {}

for table_name, table_obj in tables.items():
    # Extraer solo el nombre de la tabla (sin el esquema)
    clean_table_name = table_name.split('.')[-1]
    print(f"Cargando datos de la tabla: {clean_table_name}")
    
    # Leer la tabla directamente en un DataFrame
    query = f'SELECT * FROM "{clean_table_name}"'  # Usar esquema al construir la consulta
    dataframes[clean_table_name] = pd.read_sql(query, con=engine_bronze).iloc[:, :-2]

# # Mostrar el contenido del diccionario de DataFrames
# for clean_table_name, df in dataframes.items():
#     print(f"Tabla: {clean_table_name}, Filas: {len(df)}, Columnas: {list(df.columns)}")



Cargando datos de la tabla: U6321303_Almacenes
Cargando datos de la tabla: U6301303_Articulos
Cargando datos de la tabla: U551_Presencia
Cargando datos de la tabla: U532_Trabajadas
Cargando datos de la tabla: U6301303_Clientes
Cargando datos de la tabla: U553_Compras
Cargando datos de la tabla: U6311303_Empresas
Cargando datos de la tabla: U555_Invertidas
Cargando datos de la tabla: U6331303_Operarios
Cargando datos de la tabla: U552_Stock
Cargando datos de la tabla: U533_OrdenesReparacion
Cargando datos de la tabla: U554_VentasMostrador
Cargando datos de la tabla: U560_VentasTaller
Cargando datos de la tabla: U6311303_Talleres
Cargando datos de la tabla: ULSTTHPT_TipoHoras
Cargando datos de la tabla: TipoOrdenesReparacion
Cargando datos de la tabla: ULSTTVPT_TiposVentas
Cargando datos de la tabla: U6341303_Vehiculos
Cargando datos de la tabla: ULSTTHPT_TiposHoras


In [8]:
len(dataframes['U532_Trabajadas'])

595460

In [9]:
dataframes['U551_Presencia'][dataframes['U551_Presencia']['Operario'].isnull()]

Unnamed: 0,Tall,Fecha,Secc,Operario,NOMBRE OPERARIO,ENTRADA PRES,SALIDA PRESE,DIF.PRES,Tipo h


In [10]:
dataframes['U551_Presencia'].sort_values('Operario')

Unnamed: 0,Tall,Fecha,Secc,Operario,NOMBRE OPERARIO,ENTRADA PRES,SALIDA PRESE,DIF.PRES,Tipo h
0,2,2012-01-02,CR,104,MANZANO HERRERA LUIS,09:00:00,13:02:00,4.03,HN
52637,2,2015-01-13,CR,104,MANZANO HERRERA LUIS,15:30:00,19:00:00,3.5,HN
6613,2,2012-04-25,CR,104,MANZANO HERRERA LUIS,09:00:00,13:00:00,4.0,HN
6612,2,2012-04-25,CR,104,MANZANO HERRERA LUIS,19:00:00,21:00:00,2.0,HE
52638,2,2015-01-13,CR,104,MANZANO HERRERA LUIS,19:00:00,20:00:00,1.0,HE
...,...,...,...,...,...,...,...,...,...
265305,8,2023-05-31,MR,XY,PRUEBA QUITER,,,0.0,
264996,8,2023-05-30,MR,XY,PRUEBA QUITER,,,0.0,
264790,8,2023-05-29,MR,XY,PRUEBA QUITER,,,0.0,
261669,8,2023-05-11,MR,XY,PRUEBA QUITER,,,0.0,


Transform data 

En esta proceso de transformación, se llevan a cabo las siguientes tareas:

* Asignar nombres correspondientes a tablas y campos.
* Gestión de valores nullos. 
* Asignación de los tipos de datos correspondientes. 

In [11]:
from model.model_info import almacenes_info, articulos_info, bonos_presencia_info
from model.model_info import bonos_trabajadas_info, clientes_info
from model.model_info import compras_info, empresas_info, invertidas_info
from model.model_info import operarios_info, stock_info, ordenes_reparacion_info
from model.model_info import ordenes_venta_mostrador_info, ordenes_venta_taller_info
from model.model_info import talleres_info, tipos_horas_info, tipos_ordenes_reparacion_info
from model.model_info import tipos_ventas_almacen_info, vehiculos_info

In [12]:
# Asignar nombres correspondientes a campos de tablas

# Maestros
dataframes['U6321303_Almacenes'].columns = almacenes_info.keys()
dataframes['U6301303_Articulos'].columns = articulos_info.keys()
dataframes['U6301303_Clientes'].columns = clientes_info.keys()
dataframes['U6311303_Empresas'].columns = empresas_info.keys()
dataframes['U6331303_Operarios'].columns = operarios_info.keys()
dataframes['U6311303_Talleres'].columns = talleres_info.keys()
dataframes['ULSTTHPT_TiposHoras'].columns = tipos_horas_info.keys()
dataframes['ULSTTVPT_TiposVentas'].columns = tipos_ventas_almacen_info.keys()
dataframes['U6341303_Vehiculos'].columns = vehiculos_info.keys()


In [13]:
# Hechos 
dataframes['U551_Presencia'].columns = bonos_presencia_info.keys()
dataframes['U532_Trabajadas'].columns = bonos_trabajadas_info.keys()
dataframes['U553_Compras'].columns = compras_info.keys()
dataframes['U555_Invertidas'].columns = invertidas_info.keys()
dataframes['U552_Stock'].columns = stock_info.keys()
dataframes['U533_OrdenesReparacion'].columns = ordenes_reparacion_info.keys()
dataframes['U554_VentasMostrador'].columns = ordenes_venta_mostrador_info.keys()
dataframes['U560_VentasTaller'].columns = ordenes_venta_taller_info.keys()

Gestión de valores nulos 


In [14]:
def check_nulls(df, dict_info):
    '''
    This function verifies null values in a dataframe that cannot be null.

        Input:
            - df (DataFrame): DataFrame to get rid of null values in specific columns.
            - dict_info (Dictionary): Dictionary containing the column information and their rules.
        
        Output: 
            - df (DataFrame): DataFrame with no null values in specific columns that are not allowed to be null.
            - df_invalid_rows (DataFrame): DataFrame containing invalid rows with reasons for violations.
    '''
    
    # Crear una columna 'Reason' vacía para almacenar los motivos
    df['Reason'] = ""

    # Inicializar un diccionario para rastrear las columnas con valores nulos
    null_columns_per_row = {}

    # Verificar las filas según las reglas de dict_info
    for column, rules in dict_info.items():
        # Si la columna no permite valores nulos
        if not rules['nullable']:
            # Encontrar las filas que tienen valores nulos en la columna actual
            null_rows = df[column].isnull()

            # Rastrear las columnas que generan problemas en cada fila
            for index in df[null_rows].index:
                if index not in null_columns_per_row:
                    null_columns_per_row[index] = []
                null_columns_per_row[index].append(column)

    # Actualizar la columna 'Reason' con una frase consolidada por fila
    for index, columns in null_columns_per_row.items():
        df.loc[index, 'Reason'] = (
            f"The columns {', '.join([f"'{col}'" for col in columns])} do not allow null values."
        )

    # Filtrar las filas inválidas y eliminar la columna Reason del DataFrame limpio
    df_invalid_rows = df[df['Reason'] != ""].copy()
    df = df[df['Reason'] == ""].drop(columns=['Reason'])

    # Log de errores
    if not df_invalid_rows.empty:
        dp.logger.error(f'There is constraint violation in null values for table.')
    else:
        dp.logger.info(f'There is no constraint violation in null values for table.')

    return df, df_invalid_rows



In [16]:
for column, rules in almacenes_info.items():
    print(rules)

{'data_type': <class 'int'>, 'nullable': False}
{'data_type': <class 'str'>, 'nullable': False}
{'data_type': <class 'int'>, 'nullable': False}


In [15]:
# Master Tables
dataframes['U6321303_Almacenes'], df_null_almacenes = check_nulls(dataframes['U6321303_Almacenes'], almacenes_info)
dataframes['U6301303_Articulos'], df_null_articulos = check_nulls(dataframes['U6301303_Articulos'], articulos_info)
dataframes['U6301303_Clientes'], df_null_clientes = check_nulls(dataframes['U6301303_Clientes'], clientes_info)
dataframes['U6311303_Empresas'], df_null_empresas = check_nulls(dataframes['U6311303_Empresas'], empresas_info)
dataframes['U6331303_Operarios'], df_null_operarios = check_nulls(dataframes['U6331303_Operarios'], operarios_info)
dataframes['U6311303_Talleres'], df_null_talleres = check_nulls(dataframes['U6311303_Talleres'], talleres_info)
dataframes['ULSTTHPT_TiposHoras'], df_null_tipos_horas = check_nulls(dataframes['ULSTTHPT_TiposHoras'], tipos_horas_info)
dataframes['ULSTTVPT_TiposVentas'], df_null_tipo_ventas_almacen = check_nulls(dataframes['ULSTTVPT_TiposVentas'], tipos_ventas_almacen_info)
dataframes['U6341303_Vehiculos'], df_null_vehiculos = check_nulls(dataframes['U6341303_Vehiculos'], vehiculos_info)

2024-12-04 11:25:44,555 - INFO - There is no constraint violation in null values for table.
2024-12-04 11:25:45,020 - ERROR - There is constraint violation in null values for table.
2024-12-04 11:25:45,447 - ERROR - There is constraint violation in null values for table.
2024-12-04 11:25:45,458 - INFO - There is no constraint violation in null values for table.
2024-12-04 11:25:45,467 - INFO - There is no constraint violation in null values for table.
2024-12-04 11:25:45,471 - INFO - There is no constraint violation in null values for table.
2024-12-04 11:25:45,495 - INFO - There is no constraint violation in null values for table.
2024-12-04 11:25:45,508 - INFO - There is no constraint violation in null values for table.
2024-12-04 11:25:51,179 - ERROR - There is constraint violation in null values for table.


In [16]:
# Fact Tables
dataframes['U551_Presencia'], df_null_presencia = check_nulls(dataframes['U551_Presencia'], bonos_presencia_info)
dataframes['U532_Trabajadas'], df_null_trabajadas = check_nulls(dataframes['U532_Trabajadas'], bonos_trabajadas_info)
dataframes['U553_Compras'], df_null_compras = check_nulls(dataframes['U553_Compras'], compras_info)
dataframes['U555_Invertidas'], df_null_invertidas = check_nulls(dataframes['U555_Invertidas'], invertidas_info)
dataframes['U552_Stock'], df_null_stock = check_nulls(dataframes['U552_Stock'], stock_info)
dataframes['U533_OrdenesReparacion'], df_null_orden = check_nulls(dataframes['U533_OrdenesReparacion'], ordenes_reparacion_info)
dataframes['U554_VentasMostrador'], df_null_ventas_mostrador = check_nulls(dataframes['U554_VentasMostrador'], ordenes_venta_mostrador_info)
dataframes['U560_VentasTaller'], df_null_ventas_taller = check_nulls(dataframes['U560_VentasTaller'], ordenes_venta_taller_info)

2024-12-04 11:25:58,671 - INFO - There is no constraint violation in null values for table.
2024-12-04 11:26:00,919 - INFO - There is no constraint violation in null values for table.
2024-12-04 11:26:03,660 - ERROR - There is constraint violation in null values for table.
2024-12-04 11:26:15,691 - ERROR - There is constraint violation in null values for table.
2024-12-04 11:26:15,759 - INFO - There is no constraint violation in null values for table.
2024-12-04 11:26:17,670 - ERROR - There is constraint violation in null values for table.
2024-12-04 11:26:18,926 - ERROR - There is constraint violation in null values for table.
2024-12-04 11:26:28,831 - ERROR - There is constraint violation in null values for table.


In [21]:
dataframes['U551_Presencia'][dataframes['U551_Presencia']['Operario'].isnull()]

Unnamed: 0,Taller,Fecha,Seccion,Operario,NombreOperario,EntradaPresencia,SalidaPresencia,DiferenciaPresencia,TipoHora


In [17]:
dataframes['U532_Trabajadas'][dataframes['U532_Trabajadas']['NombreOperario'] == 'PRUEBA QUITER']

Unnamed: 0,Taller,Fecha,Seccion,Operario,NombreOperario,ReferenciaOR,TipoOR,IDV,Matricula,Entrada,Salida,DiferenciaTrabajadas
491391,8,2023-05-01,MR,XY,PRUEBA QUITER,,,,,,,0.0
491788,8,2023-05-02,MR,XY,PRUEBA QUITER,,,,,,,0.0
492022,8,2023-05-03,MR,XY,PRUEBA QUITER,,,,,,,0.0
492173,8,2023-05-04,MR,XY,PRUEBA QUITER,,,,,,,0.0
492589,8,2023-05-05,MR,XY,PRUEBA QUITER,,,,,,,0.0
492772,8,2023-05-06,MR,XY,PRUEBA QUITER,,,,,,,0.0
492834,8,2023-05-07,MR,XY,PRUEBA QUITER,,,,,,,0.0
493082,8,2023-05-08,MR,XY,PRUEBA QUITER,,,,,,,0.0
493243,8,2023-05-09,MR,XY,PRUEBA QUITER,,,,,,,0.0
493790,8,2023-05-10,MR,XY,PRUEBA QUITER,,,,,,,0.0


: 

In [23]:
df_null_presencia.Reason.unique()

array([], dtype=object)

In [25]:
dataframes['U551_Presencia'][dataframes['U551_Presencia']['Operario'].isnull()]

Unnamed: 0,Taller,Fecha,Seccion,Operario,NombreOperario,EntradaPresencia,SalidaPresencia,DiferenciaPresencia,TipoHora


Gestion de tipos de datos

In [28]:
def check_data_types(df, dict_info, df_invalid=None):
    '''
    This function verifies the data types of columns in a DataFrame and converts them to the required types as per the provided dictionary.
    
    Input:
        - df (DataFrame): The dataframe containing the data.
        - dict_info (Dictionary): Dictionary with column information, including expected data types.
        - df_invalid (DataFrame, optional): DataFrame to append rows with invalid data.
    
    Output: 
        - df (DataFrame): DataFrame with columns converted to the specified data types.
        - df_invalid_rows (DataFrame): DataFrame with rows where data type conversion failed or violated the rules.
    '''
    
    if df_invalid is None:
        df_invalid = pd.DataFrame()  # Initialize df_invalid if not provided

    # Loop through each column and validate the data type
    for column, rules in dict_info.items():
        expected_type = rules['data_type']  # Get the expected data type for the column
        
        try:
            # Try to convert the column to the expected type
            if expected_type == 'datetime64[ns]':  # Special handling for datetime conversion
                df[column] = pd.to_datetime(df[column], errors='coerce')  # Convert and coerce errors to NaT
            elif expected_type == 'timedelta64[ns]':
                df[column].astype(str).replace('NaT', None)
            elif expected_type == str:
                    df[column] = df[column].astype(expected_type).replace('None', None)
            else:
                df[column] = df[column].astype(expected_type)  # Convert to the expected type

        except Exception as e:
            # Identify rows that failed type conversion
            if expected_type == 'datetime64[ns]':
                invalid_rows = df[pd.to_datetime(df[column], errors='coerce').isna() & pd.notnull(df[column])]
            else:
                invalid_rows = df[~df[column].apply(lambda x: isinstance(x, (int, float)) if expected_type in ['int64', 'float64'] else True) & pd.notnull(df[column])]
            
            # Add reason for invalid rows
            if not invalid_rows.empty:
                invalid_rows['Reason'] = f"The column '{column}' has values that cannot be converted to {expected_type}."
                df_invalid = pd.concat([df_invalid, invalid_rows])
            
            # Attempt coercion to NaN for problematic values
            if expected_type == 'datetime64[ns]':
                df[column] = pd.to_datetime(df[column], errors='coerce')
            else:
                df[column] = pd.to_numeric(df[column], errors='coerce')

    return df, df_invalid


In [29]:
# Master Tables
dataframes['U6321303_Almacenes'], df_invalid_almacenes = check_data_types(dataframes['U6321303_Almacenes'], almacenes_info, df_null_almacenes)
dataframes['U6301303_Articulos'], df_invalid_articulos = check_data_types(dataframes['U6301303_Articulos'], articulos_info, df_null_articulos)
dataframes['U6301303_Clientes'], df_invalid_clientes = check_data_types(dataframes['U6301303_Clientes'], clientes_info, df_null_clientes)
dataframes['U6311303_Empresas'], df_invalid_empresas = check_data_types(dataframes['U6311303_Empresas'], empresas_info, df_null_empresas)
dataframes['U6331303_Operarios'], df_invalid_operarios = check_data_types(dataframes['U6331303_Operarios'], operarios_info, df_null_operarios)
dataframes['U6311303_Talleres'], df_invalid_talleres = check_data_types(dataframes['U6311303_Talleres'], talleres_info, df_null_talleres)
dataframes['ULSTTHPT_TiposHoras'], df_invalid_tipos_horas = check_data_types(dataframes['ULSTTHPT_TiposHoras'], tipos_horas_info, df_null_tipos_horas)
dataframes['ULSTTVPT_TiposVentas'], df_invalid_tipo_ventas_almacen = check_data_types(dataframes['ULSTTVPT_TiposVentas'], tipos_ventas_almacen_info, df_null_tipo_ventas_almacen)
dataframes['U6341303_Vehiculos'], df_invalid_vehiculos = check_data_types(dataframes['U6341303_Vehiculos'], vehiculos_info, df_null_vehiculos)

In [30]:
# Fact Tables
dataframes['U551_Presencia'], df_invalid_presencia = check_data_types(dataframes['U551_Presencia'], bonos_presencia_info, df_null_presencia)
dataframes['U532_Trabajadas'], df_invalid_trabajadas = check_data_types(dataframes['U532_Trabajadas'], bonos_trabajadas_info, df_null_trabajadas)
dataframes['U553_Compras'], df_invalid_compras = check_data_types(dataframes['U553_Compras'], compras_info, df_null_compras)
dataframes['U555_Invertidas'], df_invalid_invertidas = check_data_types(dataframes['U555_Invertidas'], invertidas_info, df_null_invertidas)
dataframes['U552_Stock'], df_invalid_stock = check_data_types(dataframes['U552_Stock'], stock_info, df_null_stock)
dataframes['U533_OrdenesReparacion'], df_invalid_orden = check_data_types(dataframes['U533_OrdenesReparacion'], ordenes_reparacion_info, df_null_orden)
dataframes['U554_VentasMostrador'], df_invalid_ventas_mostrador = check_data_types(dataframes['U554_VentasMostrador'], ordenes_venta_mostrador_info, df_null_ventas_mostrador)
dataframes['U560_VentasTaller'], df_invalid_ventas_taller = check_data_types(dataframes['U560_VentasTaller'], ordenes_venta_taller_info, df_null_ventas_taller)

Load Data

In [None]:
dataframes['U6321303_Almacenes'].to_sql('Almacenes', con=engine_silver, if_exists='append', index=False)
dataframes['U6301303_Articulos'].to_sql('Articulos', con=engine_silver, if_exists='append', index=False)
dataframes['U6301303_Clientes'].to_sql('Clientes', con=engine_silver, if_exists='append', index=False)
dataframes['U6311303_Empresas'].to_sql('Empresas', con=engine_silver, if_exists='append', index=False)
dataframes['U6331303_Operarios'].to_sql('Operarios', con=engine_silver, if_exists='append', index=False)
dataframes['U6311303_Talleres'].to_sql('Talleres', con=engine_silver, if_exists='append', index=False)
dataframes['ULSTTHPT_TiposHoras'].to_sql('TiposHoras', con=engine_silver, if_exists='append', index=False)
dataframes['ULSTTVPT_TiposVentas'].to_sql('TiposVentasAlmacen', con=engine_silver, if_exists='append', index=False)
dataframes['U6341303_Vehiculos'].to_sql('Vehiculos', con=engine_silver, if_exists='append', index=False)


In [None]:
dataframes['U551_Presencia'].to_sql('BonosPresencia', con=engine_silver, if_exists='append', index=False)
dataframes['U532_Trabajadas'].to_sql('BonosTrabajadas', con=engine_silver, if_exists='append', index=False)
dataframes['U553_Compras'].to_sql('Compras', con=engine_silver, if_exists='append', index=False)
dataframes['U555_Invertidas'].to_sql('Invertidas', con=engine_silver, if_exists='append', index=False)
dataframes['U552_Stock'].to_sql('Stock', con=engine_silver, if_exists='append', index=False)
dataframes['U533_OrdenesReparacion'].to_sql('OrdenesReparacion', con=engine_silver, if_exists='append', index=False)
dataframes['U554_VentasMostrador'].to_sql('OrdenesVentaMostrador', con=engine_silver, if_exists='append', index=False)
dataframes['U560_VentasTaller'].to_sql('OrdenesVentaTaller', con=engine_silver, if_exists='append', index=False)

Load new data

In [17]:
from processes.bronze.load import upload_new_data_master, upload_new_data_fact, upload_new_data_fact_mult


Extract silver data

In [None]:
# Crear motor y establecer conexión
metadata = MetaData()

# Reflejar todas las tablas del esquema "bronze"
metadata.reflect(bind=engine_silver, schema='silver')

# Crear una base automapeada
Base = automap_base(metadata=metadata)

# Preparar el automapeo
Base.prepare()

# Inspeccionar y cargar tablas manualmente
tables = {}
for table_name in metadata.tables:
    print(f"Tabla encontrada: {table_name}")
    tables[table_name] = Table(table_name, metadata, autoload_with=engine_silver)

In [None]:
# Almacenar los DataFrames en un diccionario
dataframes_s = {}

for table_name, table_obj in tables.items():
    # Extraer solo el nombre de la tabla (sin el esquema)
    clean_table_name = table_name.split('.')[-1]
    print(f"Cargando datos de la tabla: {clean_table_name}")
    
    # Leer la tabla directamente en un DataFrame
    query = f'SELECT * FROM "{clean_table_name}"'  # Usar esquema al construir la consulta
    dataframes_s[clean_table_name] = pd.read_sql(query, con=engine_silver)

Validación carga de datos Bonos Presencia

In [31]:
#dataframes['U551_Presencia'].iloc[-15:, :]
df_presencia_sample = dataframes['U551_Presencia'].sort_values('Fecha', ascending=False).iloc[14:24, :]
df_presencia_new_data = dataframes['U551_Presencia'].sort_values('Fecha', ascending=False).iloc[0:19, :]

In [32]:
df_presencia_sample.loc[7681, 'TipoHora']

In [34]:
upload_new_data_fact_mult(df_presencia_sample, 'BonosPresencia',  ['Operario', 'EntradaPresencia'], 'Fecha', engine_silver)

In [35]:
upload_new_data_fact_mult(df_presencia_new_data, 'BonosPresencia',  ['Operario', 'EntradaPresencia'], 'Fecha', engine_silver)

Analisis de cuentas de vehiculos

In [None]:
df_vehiculos['CuentaTitular'] = df_vehiculos['CuentaTitular'].astype(str)
df_vehiculos['CuentaCliente'] = df_vehiculos['CuentaCliente'].astype(str)
df_vehiculos['CuentaConductor'] = df_vehiculos['CuentaConductor'].astype(str)

In [None]:
pd.merge(df_vehiculos, df_cliente, left_on='CuentaConductor', right_on='Codigo')

In [None]:
len(df_vehiculos)

Análisis de cuentas ordenes

In [None]:
df_taller_orden_rep.head()

In [None]:
pd.merge(df_taller_orden_rep, df_cliente, left_on='CuentaCargo', right_on='Codigo')

Analisis cuentas mostrador

In [None]:
pd.merge(df_mostrador, df_cliente, left_on='Cuenta', right_on='Codigo')

In [None]:
df_mostrador.head()

In [None]:
df_mostrador[df_mostrador['Referencia'] == '29128095']

In [None]:
df_taller_orden_rep