In [17]:
import pandas as pd
import numpy as np
import boto3
import logging
from io import StringIO
from dotenv import load_dotenv
from pathlib import Path
import os
import polars as pl
from logging import Logger
import json
from chalice import Response
from datetime import datetime, timedelta
from functions_mo import (
final_result, 
calculate_volume_distribution, 
get_item_from_dynamodb, 
get_item_from_dynamodb_global, 
get_ssm_parameter,
object_to_dataframe
) 

In [11]:
#set up logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [12]:
env_path = Path('C:/Users/Nata_/Documents/Etapa_1_proyecto/Simulador_mano_de_obra/mano_de_obra_agrovid/var.env')

load_dotenv(dotenv_path=env_path)
AWS_PROFILE = os.environ["aws_profile"]
SSM_PARAMETER_BUCKET_INPUT_FILES_NAME = os.environ['SSM_PARAMETER_BUCKET_INPUT_FILES_NAME']
SSM_PARAMETER_BUCKET_OUTPUT_FILES_NAME = os.environ['SSM_PARAMETER_BUCKET_OUTPUT_FILES_NAME']
REGION = os.environ['REGION']
SSM_PARAMETER_DYNAMODB_MODULE_CONFIG_NAME = os.environ['SSM_PARAMETER_DYNAMODB_MODULE_CONFIG_NAME']
SSM_PARAMETER_DYNAMODB_GLOBAL_MODULE_CONFIG_NAME = os.environ['SSM_PARAMETER_DYNAMODB_GLOBAL_MODULE_CONFIG_NAME']


## Función para despliegue

In [13]:
def materiales(event, context):
    modulo: str = "modulo_mano_de_obra"
    modulo_revenue: str = "modulo_ingresos"
    PROCESS_TYPE = "er_simulado"
    modulo_global_materiales = "nombre_finca"
    file_revenue: str = "volumen.csv"
    body = event
    # # 4. Body (usually JSON, but can be other formats)
    # if event.get("body"):
    #     body = event["body"]
    #     # Assuming the body is JSON
    #     body = json.loads(body)
    #     print("body_json", body)
    #     print("type body_json", type(body))
    
    session = boto3.Session(profile_name=AWS_PROFILE)

    s3_client = session.client('s3', region_name=REGION)
    ssm_client = session.client("ssm", region_name=REGION)
    dynamodb_client = boto3.resource("dynamodb", region_name=REGION)

    bucket_input_files = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_BUCKET_INPUT_FILES_NAME,
        logger=logger,
        ssm_client=ssm_client,
        with_decryption=True,
    )
    bucket_output_files = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_BUCKET_OUTPUT_FILES_NAME,
        logger=logger,
        ssm_client=ssm_client,
        with_decryption=True,
    )
    dynamodb_table_name_module_config = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_DYNAMODB_MODULE_CONFIG_NAME,
        logger=logger,
        ssm_client=ssm_client,
        with_decryption=True,
    )

    dynamodb_table_name_global_module_config = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_DYNAMODB_GLOBAL_MODULE_CONFIG_NAME,
        logger=logger,
        ssm_client=ssm_client,
        with_decryption=True,
    )
    logger.info(f"{dynamodb_table_name_module_config}")

    module_config = get_item_from_dynamodb(
        table_name=dynamodb_table_name_module_config,
        key={"module": modulo, "process_type": PROCESS_TYPE},
        logger=logger,
        dynamodb_client=dynamodb_client,
    )

    global_config_materiales = get_item_from_dynamodb_global(
        table_name=dynamodb_table_name_global_module_config,
        partition_key_value=modulo_global_materiales,
        logger=logger,
        dynamodb_client=dynamodb_client,
   )
    
    
    input_files_names: dict = module_config["input_file_names_mano_de_obra"]
    farm_order: dict = global_config_materiales["fincas"]

    # Descarga el archivo CSV desde S3
    
    map_with_dfs = {}
    current_date = datetime.now().strftime("%Y-%m-%d")
    partition = body.get("partition", current_date)

    volum_file = object_to_dataframe(
        s3_client=s3_client,
        bucket_name=bucket_input_files,
        folder_name=f"{PROCESS_TYPE}/{modulo_revenue}",
        partition=partition,
        file_name=file_revenue,
    )

    try:
        for key, file in input_files_names.items():
            df = object_to_dataframe(
                s3_client=s3_client,
                bucket_name=bucket_input_files,
                folder_name=f"{PROCESS_TYPE}/{modulo}",
                partition=partition,
                file_name=file,
            )
            logger.info(f"file_key: {file} read successfully")
            map_with_dfs[key] = df
        volum_distribution = map_with_dfs["volum_distribution"]
    except Exception as e:
        logger.error(
            f"Error reding file {file} from {bucket_input_files}: error detail:{e}"
        )
        return Response(
            status_code=500,
            body=json.dumps(
                {
                    "message": f"Error reding file {file} from {bucket_input_files}: error detail:{e}"
                }
            ),
        )

### Importar los archivos

In [26]:
modulo: str = "modulo_mano_de_obra"
PROCESS_TYPE = "er_simulado"
modulo_global_materiales = "nombre_finca"
modulo_revenue: str = "modulo_ingresos"
modulo_materiales: str = "modulo_materiales"
file_revenue: str = "volumen.csv"
file_material:str = "distribucion_volumen.csv"
   
session = boto3.Session(profile_name=AWS_PROFILE)

s3_client = session.client("s3", region_name=REGION)
ssm_client = session.client("ssm", region_name=REGION)
dynamodb_client = session.resource("dynamodb", region_name=REGION)

bucket_input_files = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_BUCKET_INPUT_FILES_NAME,
        logger=logger,
        ssm_client=ssm_client,
        with_decryption=True,
    )
dynamodb_table_name_module_config = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_DYNAMODB_MODULE_CONFIG_NAME,
        logger=logger,
        ssm_client=ssm_client,
        with_decryption=True,
    )

dynamodb_table_name_global_module_config = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_DYNAMODB_GLOBAL_MODULE_CONFIG_NAME,
        ssm_client=ssm_client,
        logger=logger,
        with_decryption=True
    ) 
    
    
logger.info(f"{dynamodb_table_name_module_config}")

module_config = get_item_from_dynamodb(
        table_name=dynamodb_table_name_module_config,
        key={"module": modulo, "process_type": PROCESS_TYPE},
        logger=logger,
        dynamodb_client=dynamodb_client,
    )


global_config_materiales = get_item_from_dynamodb_global(
        table_name=dynamodb_table_name_global_module_config,
        partition_key_value=modulo_global_materiales,
        logger=logger,
        dynamodb_client=dynamodb_client,
   )
 

input_files_names: dict = module_config["input_file_names_mo"]
farm_order: dict = global_config_materiales["fincas"]

    # Descarga el archivo CSV desde S3
map_with_dfs = {}
current_date = datetime.now().strftime("%Y-%m-%d")
partition = "2025-03-13"
partition_materiales = "2025-02-19"
partition_revenue = "2025-02-06"

volum_file = object_to_dataframe(
        s3_client=s3_client,
        bucket_name=bucket_input_files,
        folder_name=f"{PROCESS_TYPE}/{modulo_revenue}",
        partition=partition_revenue,
        file_name=file_revenue,
    )

volum_distribution = object_to_dataframe(
        s3_client=s3_client,
        bucket_name=bucket_input_files,
        folder_name=f"{PROCESS_TYPE}/{modulo_materiales}",
        partition=partition_materiales,
        file_name=file_material,
    )

try:
    for key, file in input_files_names.items():
        df = object_to_dataframe(
            s3_client=s3_client,
            bucket_name=bucket_input_files,
            folder_name=f"{PROCESS_TYPE}/{modulo}",
            partition=partition,
            file_name=file,
        )

    

      
        logger.info(f"file_key: {file} read successfully")
        map_with_dfs[key] = df
    cut_emp = map_with_dfs["cut_emp"]
    
except Exception as e:
    logger.error(
            f"Error reding file {file} from {bucket_input_files}: error detail:{e}"
    )
     

2025-03-13 18:10:12,130 - INFO - Found credentials in shared credentials file: ~/.aws/credentials


2025-03-13 18:10:13,115 - INFO - Parameter dev-bucket-upload-files-name retrieved successfully.
2025-03-13 18:10:13,198 - INFO - Parameter dev-dynamodb-table-name-module-config retrieved successfully.
2025-03-13 18:10:13,313 - INFO - Parameter dev-dynamodb-table-name-global-config retrieved successfully.
2025-03-13 18:10:13,313 - INFO - dev-module-config
2025-03-13 18:10:14,504 - INFO - file_key: corte_y_empaque.csv read successfully


In [27]:
cut_emp.head(3)

Unnamed: 0,ID,FINCA,GRUPO,LABOR,MAESTRA,TARIFA,REF,PROMEDIADO,PRESTACIONES,FACTOR
0,100001,ENANO,COSECHA,CAJA INTEGRAL CAMPO,REG SUR,820.0,CAJA,G04,0.3887,1.0
1,100001,EVA,COSECHA,CAJA INTEGRAL CAMPO,REG SUR,820.0,CAJA,G04,0.3887,1.0
2,100001,SAMI,COSECHA,CAJA INTEGRAL CAMPO,REG SUR,820.0,CAJA,G04,0.3887,1.0


In [28]:
volum_distribution.head()

Unnamed: 0,SEMANA,6/01/2024,13/01/2024,20/01/2024,27/01/2024,3/02/2024,10/02/2024,17/02/2024,24/02/2024,2/03/2024,...,26/10/2024,2/11/2024,9/11/2024,16/11/2024,23/11/2024,30/11/2024,7/12/2024,14/12/2024,21/12/2024,28/12/2024
0,ENERO,1,1,1,1,0.6,0,0,0,0.0,...,0,0.0,0,0,0,0,0,0,0,0
1,FEBRERO,0,0,0,0,0.4,1,1,1,0.8,...,0,0.0,0,0,0,0,0,0,0,0
2,MARZO,0,0,0,0,0.0,0,0,0,0.2,...,0,0.0,0,0,0,0,0,0,0,0
3,ABRIL,0,0,0,0,0.0,0,0,0,0.0,...,0,0.0,0,0,0,0,0,0,0,0
4,MAYO,0,0,0,0,0.0,0,0,0,0.0,...,0,0.0,0,0,0,0,0,0,0,0


## Corte y empaque

In [29]:
volum_distribution.columns = list(volum_distribution.columns[:1]) + [pd.to_datetime(col, dayfirst=True, errors='coerce') for col in volum_distribution.columns[1:]]
volum_distribution.head(2)

Unnamed: 0,SEMANA,2024-01-06 00:00:00,2024-01-13 00:00:00,2024-01-20 00:00:00,2024-01-27 00:00:00,2024-02-03 00:00:00,2024-02-10 00:00:00,2024-02-17 00:00:00,2024-02-24 00:00:00,2024-03-02 00:00:00,...,2024-10-26 00:00:00,2024-11-02 00:00:00,2024-11-09 00:00:00,2024-11-16 00:00:00,2024-11-23 00:00:00,2024-11-30 00:00:00,2024-12-07 00:00:00,2024-12-14 00:00:00,2024-12-21 00:00:00,2024-12-28 00:00:00
0,ENERO,1,1,1,1,0.6,0,0,0,0.0,...,0,0.0,0,0,0,0,0,0,0,0
1,FEBRERO,0,0,0,0,0.4,1,1,1,0.8,...,0,0.0,0,0,0,0,0,0,0,0


In [30]:
volum_file.columns = list(volum_file.columns[:3]) + [pd.to_datetime(col, dayfirst=True, errors='coerce') for col in volum_file.columns[3:]]
volum_file.head(3)

Unnamed: 0.1,Unnamed: 0,CONCEPTO,FINCA,2024-01-06 00:00:00,2024-01-13 00:00:00,2024-01-20 00:00:00,2024-01-27 00:00:00,2024-02-03 00:00:00,2024-02-10 00:00:00,2024-02-17 00:00:00,...,2024-10-26 00:00:00,2024-11-02 00:00:00,2024-11-09 00:00:00,2024-11-16 00:00:00,2024-11-23 00:00:00,2024-11-30 00:00:00,2024-12-07 00:00:00,2024-12-14 00:00:00,2024-12-21 00:00:00,2024-12-28 00:00:00
0,0,CAJAS,ENANO,7969.0,8319.0,7813.0,7359.0,7989.0,5479.0,6037.0,...,9164.0,9406.0,10244.0,9851.0,9751.0,9302.0,10343.0,10606.0,10349.0,9936.0
1,1,CAJAS,EVA,4098.0,6617.0,7238.0,7600.0,6414.0,4585.0,4992.0,...,7407.0,7045.0,7287.0,6578.0,6586.0,6073.0,6218.0,6091.0,5903.0,5791.0
2,2,CAJAS,SAMI,4560.0,8771.0,9549.0,9418.0,8004.0,6011.0,5031.0,...,8233.0,8250.0,8530.0,7911.0,7875.0,7546.0,8216.0,7893.0,7937.0,7645.0


In [31]:
volum_distribution_matrix = volum_distribution.iloc[:, 1:]
volum_file_100001 = volum_file[volum_file['CONCEPTO'] == 'CAJAS'].iloc[:, 3:]
volum_file_subset = volum_file[volum_file['CONCEPTO'] == 'CAJAS']


In [33]:
volum_file_100001

Unnamed: 0,2024-01-06,2024-01-13,2024-01-20,2024-01-27,2024-02-03,2024-02-10,2024-02-17,2024-02-24,2024-03-02,2024-03-09,...,2024-10-26,2024-11-02,2024-11-09,2024-11-16,2024-11-23,2024-11-30,2024-12-07,2024-12-14,2024-12-21,2024-12-28
0,7969.0,8319.0,7813.0,7359.0,7989.0,5479.0,6037.0,6918.0,8895.0,8528.0,...,9164.0,9406.0,10244.0,9851.0,9751.0,9302.0,10343.0,10606.0,10349.0,9936.0
1,4098.0,6617.0,7238.0,7600.0,6414.0,4585.0,4992.0,5411.0,6998.0,5030.0,...,7407.0,7045.0,7287.0,6578.0,6586.0,6073.0,6218.0,6091.0,5903.0,5791.0
2,4560.0,8771.0,9549.0,9418.0,8004.0,6011.0,5031.0,6875.0,8082.0,9268.0,...,8233.0,8250.0,8530.0,7911.0,7875.0,7546.0,8216.0,7893.0,7937.0,7645.0
3,3625.0,3448.0,4037.0,2996.0,3455.0,2949.0,3097.0,3034.0,3528.0,4040.0,...,4869.0,4584.0,4547.0,3897.0,3637.0,3361.0,3460.0,3426.0,3303.0,3147.0
4,5265.0,5243.0,3467.0,3098.0,3646.0,4033.0,4654.0,4113.0,5740.0,5822.0,...,10303.0,9370.0,9802.0,8779.0,8725.0,8484.0,9021.0,8505.0,7837.0,7814.0
5,3000.0,5787.0,5936.0,4399.0,3398.0,3414.0,3938.0,3912.0,3929.0,4360.0,...,7604.0,7292.0,7647.0,7244.0,7242.0,7030.0,7626.0,7687.0,7127.0,6831.0
6,4680.0,5584.0,8506.0,6538.0,4917.0,3309.0,6143.0,5854.0,5890.0,6808.0,...,8010.0,7607.0,7946.0,7365.0,7206.0,6547.0,6818.0,6831.0,6339.0,6299.0
7,2254.0,3631.0,3980.0,2133.0,2343.0,1970.0,2104.0,1945.0,2398.0,1446.0,...,2659.0,2595.0,2765.0,2509.0,2461.0,2323.0,2409.0,2317.0,2214.0,2246.0
8,6749.0,8353.0,6299.0,5817.0,7720.0,7329.0,4817.0,4965.0,7601.0,6891.0,...,11200.0,10572.0,10791.0,9457.0,9097.0,8538.0,9069.0,8758.0,8302.0,8041.0
9,9138.0,12311.0,13580.0,12180.0,11745.0,8499.0,9751.0,9419.0,12271.0,11988.0,...,15790.0,15241.0,15896.0,14806.0,15092.0,14462.0,15374.0,15398.0,15053.0,14977.0


In [22]:
labor_cic = calculate_volume_distribution(box_id_100001, volum_distribution_matrix, box_farm, new_columns)

2025-01-22 19:25:15,921 - INFO - Iniciando el cálculo de la matriz de distribución de volumen.
2025-01-22 19:25:15,922 - INFO - Procesando el mes: 0
2025-01-22 19:25:15,927 - INFO - Procesando el mes: 1
2025-01-22 19:25:15,950 - INFO - Procesando el mes: 2
2025-01-22 19:25:15,963 - INFO - Procesando el mes: 3
2025-01-22 19:25:15,977 - INFO - Procesando el mes: 4
2025-01-22 19:25:15,977 - INFO - Procesando el mes: 5
2025-01-22 19:25:15,996 - INFO - Procesando el mes: 6
2025-01-22 19:25:15,996 - INFO - Procesando el mes: 7
2025-01-22 19:25:16,016 - INFO - Procesando el mes: 8
2025-01-22 19:25:16,028 - INFO - Procesando el mes: 9
2025-01-22 19:25:16,031 - INFO - Procesando el mes: 10
2025-01-22 19:25:16,046 - INFO - Procesando el mes: 11
2025-01-22 19:25:16,061 - INFO - Matriz de resultados generada.
2025-01-22 19:25:16,061 - INFO - Matriz de resultados transpuesta y columnas renombradas.
2025-01-22 19:25:16,061 - INFO - DataFrame final concatenado con los datos de volumen transformados.


In [27]:
labor_cic


Unnamed: 0,FINCA,CONCEPTO,2024-01,2024-02,2024-03,2024-04,2024-05,2024-06,2024-07,2024-08,2024-09,2024-10,2024-11,2024-12
0,ENANO,CAJAS,36253.4,28745.6,35116.0,34368.6,35978.4,37744.0,44853.2,41569.8,35998.4,40047.4,41029.2,41234.0
1,EVA,CAJAS,29401.4,23152.0,27527.6,29610.8,24468.2,22901.0,29528.6,31522.4,32153.6,33810.4,27933.0,24003.0
2,SAMI,CAJAS,37100.4,27584.2,38828.4,33531.4,29984.6,30364.0,49405.8,46196.2,42365.4,40141.6,33512.0,31691.0
3,VEGA,CAJAS,16179.0,13284.4,18257.6,17892.6,15608.4,13915.0,17035.0,18262.0,19695.8,21760.4,16358.8,13336.0
4,FEDERICA,CAJAS,19260.6,18850.4,23099.0,23482.4,32951.6,31992.0,40472.4,40286.6,40250.4,45201.6,37664.0,33177.0
5,NEERLANDIA,CAJAS,21160.8,15766.4,22825.8,29697.8,25612.2,22451.0,28206.4,30207.6,29904.2,34381.4,30621.4,29271.0
6,SAN ANTONIO,CAJAS,28258.2,21984.8,28497.0,28030.2,27632.8,25564.0,33204.6,33724.4,33004.6,36158.0,30585.4,26287.0
7,MRS,CAJAS,13403.8,8874.6,10766.6,7980.2,8827.8,8977.0,11642.6,11140.4,10971.6,12075.4,10577.0,9186.0
8,DON FUAD,CAJAS,31850.0,26279.8,38492.2,44479.2,32925.8,30264.0,38753.4,44285.6,46008.4,50712.2,39997.4,34170.0
9,EUFEMIA,CAJAS,54256.0,42183.8,54314.2,51532.0,59857.0,61195.0,69885.2,67362.8,66722.6,71515.2,63304.2,60802.0


In [40]:
def calcular_resultado(matriz_a: pd.DataFrame, matriz_b: pd.DataFrame) -> pd.DataFrame:

    # Convertir las columnas en formato datetime

    # Eliminar columnas no relacionadas (si existen)
    matriz_a_numeric = matriz_a
    matriz_b_numeric = matriz_b

    # Crear un DataFrame para almacenar los resultados
    resultados = pd.DataFrame()

    # Iterar por las filas de la matriz A (cada mes)
    for i, fila_a in matriz_a_numeric.iterrows():
        # Multiplicar cada fila de B por la fila actual de A
        producto = matriz_b_numeric.multiply(fila_a.values, axis=1)
        # Sumar los resultados para cada fila de B
        suma_producto = producto.sum(axis=1)
        # Agregar el resultado como una nueva columna en el DataFrame
        resultados[fila_a.name] = suma_producto

    # Renombrar las columnas con los meses


    return suma_producto

In [41]:
calcular_resultado(volum_distribution_matrix, box_id_100001)

0     41234.0
1     24003.0
2     31691.0
3     13336.0
4     33177.0
5     29271.0
6     26287.0
7      9186.0
8     34170.0
9     60802.0
10     9559.0
11    81285.0
12    37114.0
13    79172.0
14     9527.0
15    10153.0
16    50302.0
17    37539.0
18    32680.0
19        0.0
20    17529.0
21    12269.0
22        0.0
23        0.0
24    11545.0
25    18521.0
26     9126.0
27    14348.0
28     8290.0
29    35319.0
30    18620.0
31     8823.0
32    10033.0
33    48376.0
34    28063.0
35    39696.0
36    35658.0
37    23194.0
38     2361.0
39    12227.0
40    53028.0
41    26200.0
42    11616.0
43    15404.0
44     5070.0
45    43955.0
dtype: float64

In [18]:
cut

Unnamed: 0,ID,FINCA,GRUPO,LABOR,MAESTRA,TARIFA,REF,PROMEDIADO,PRESTACIONES,FACTOR
0,100001,ENANO,COSECHA,CAJA INTEGRAL CAMPO,REG SUR,820.0,CAJA,G04,0.3887,1.0
1,100001,EVA,COSECHA,CAJA INTEGRAL CAMPO,REG SUR,820.0,CAJA,G04,0.3887,1.0
2,100001,SAMI,COSECHA,CAJA INTEGRAL CAMPO,REG SUR,820.0,CAJA,G04,0.3887,1.0
3,100001,VEGA,COSECHA,CAJA INTEGRAL CAMPO,REG SUR,820.0,CAJA,G04,0.3887,1.0
4,100001,FEDERICA,COSECHA,CAJA INTEGRAL CAMPO,REG SUR,820.0,CAJA,G04,0.3887,1.0
...,...,...,...,...,...,...,...,...,...,...
1053,100022,DON PEDRO,EMPAQUE,AJUSTE DE EMPAQUE,ORG NORTE,1.0,AJUSTE,G04,0.3887,0.0
1054,100022,LA VELA,EMPAQUE,AJUSTE DE EMPAQUE,ORG NORTE,1.0,AJUSTE,G04,0.3887,0.0
1055,100022,ESPERANZA,EMPAQUE,AJUSTE DE EMPAQUE,ORG NORTE,1.0,AJUSTE,G04,0.3887,0.0
1056,100022,LINA MARGARITA,EMPAQUE,AJUSTE DE EMPAQUE,PLAT,0.0,AJUSTE,G04,0.3887,0.0
