In [1]:
import pandas as pd
import numpy as np
import boto3
import logging
from io import StringIO
from dotenv import load_dotenv
from pathlib import Path
import os
import polars as pl
from logging import Logger
import json
from chalice import Response
from datetime import datetime, timedelta
from functions_mo import (
calculate_volume_distribution, 
get_item_from_dynamodb, 
get_item_from_dynamodb_global, 
get_ssm_parameter,
object_to_dataframe,
group_by_month,
multiply_price,
multiply_by_month_promediado,
multiply_p_social,
total_cost
) 

In [2]:
#set up logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [3]:
env_path = Path('C:/Users/Nata_/Documents/Etapa_1_proyecto/Simulador_mano_de_obra/mano_de_obra_agrovid/var.env')

load_dotenv(dotenv_path=env_path)
AWS_PROFILE = os.environ["aws_profile"]
SSM_PARAMETER_BUCKET_INPUT_FILES_NAME = os.environ['SSM_PARAMETER_BUCKET_INPUT_FILES_NAME']
SSM_PARAMETER_BUCKET_OUTPUT_FILES_NAME = os.environ['SSM_PARAMETER_BUCKET_OUTPUT_FILES_NAME']
REGION = os.environ['REGION']
SSM_PARAMETER_DYNAMODB_MODULE_CONFIG_NAME = os.environ['SSM_PARAMETER_DYNAMODB_MODULE_CONFIG_NAME']
SSM_PARAMETER_DYNAMODB_GLOBAL_MODULE_CONFIG_NAME = os.environ['SSM_PARAMETER_DYNAMODB_GLOBAL_MODULE_CONFIG_NAME']


## Función para despliegue

In [4]:
def materiales(event, context):
    modulo: str = "modulo_mano_de_obra"
    modulo_revenue: str = "modulo_ingresos"
    PROCESS_TYPE = "er_simulado"
    modulo_global_materiales = "nombre_finca"
    file_revenue: str = "volumen.csv"
    body = event
    # # 4. Body (usually JSON, but can be other formats)
    # if event.get("body"):
    #     body = event["body"]
    #     # Assuming the body is JSON
    #     body = json.loads(body)
    #     print("body_json", body)
    #     print("type body_json", type(body))
    
    session = boto3.Session(profile_name=AWS_PROFILE)

    s3_client = session.client('s3', region_name=REGION)
    ssm_client = session.client("ssm", region_name=REGION)
    dynamodb_client = boto3.resource("dynamodb", region_name=REGION)

    bucket_input_files = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_BUCKET_INPUT_FILES_NAME,
        logger=logger,
        ssm_client=ssm_client,
        with_decryption=True,
    )
    bucket_output_files = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_BUCKET_OUTPUT_FILES_NAME,
        logger=logger,
        ssm_client=ssm_client,
        with_decryption=True,
    )
    dynamodb_table_name_module_config = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_DYNAMODB_MODULE_CONFIG_NAME,
        logger=logger,
        ssm_client=ssm_client,
        with_decryption=True,
    )

    dynamodb_table_name_global_module_config = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_DYNAMODB_GLOBAL_MODULE_CONFIG_NAME,
        logger=logger,
        ssm_client=ssm_client,
        with_decryption=True,
    )
    logger.info(f"{dynamodb_table_name_module_config}")

    module_config = get_item_from_dynamodb(
        table_name=dynamodb_table_name_module_config,
        key={"module": modulo, "process_type": PROCESS_TYPE},
        logger=logger,
        dynamodb_client=dynamodb_client,
    )

    global_config_materiales = get_item_from_dynamodb_global(
        table_name=dynamodb_table_name_global_module_config,
        partition_key_value=modulo_global_materiales,
        logger=logger,
        dynamodb_client=dynamodb_client,
   )
    
    
    input_files_names: dict = module_config["input_file_names_mano_de_obra"]
    farm_order: dict = global_config_materiales["fincas"]

    # Descarga el archivo CSV desde S3
    
    map_with_dfs = {}
    current_date = datetime.now().strftime("%Y-%m-%d")
    partition = body.get("partition", current_date)

    volum_file = object_to_dataframe(
        s3_client=s3_client,
        bucket_name=bucket_input_files,
        folder_name=f"{PROCESS_TYPE}/{modulo_revenue}",
        partition=partition,
        file_name=file_revenue,
    )

    try:
        for key, file in input_files_names.items():
            df = object_to_dataframe(
                s3_client=s3_client,
                bucket_name=bucket_input_files,
                folder_name=f"{PROCESS_TYPE}/{modulo}",
                partition=partition,
                file_name=file,
            )
            logger.info(f"file_key: {file} read successfully")
            map_with_dfs[key] = df
        volum_distribution = map_with_dfs["volum_distribution"]
    except Exception as e:
        logger.error(
            f"Error reding file {file} from {bucket_input_files}: error detail:{e}"
        )
        return Response(
            status_code=500,
            body=json.dumps(
                {
                    "message": f"Error reding file {file} from {bucket_input_files}: error detail:{e}"
                }
            ),
        )

### Importar los archivos

In [5]:
modulo: str = "modulo_mano_de_obra"
PROCESS_TYPE = "er_simulado"
modulo_global_materiales = "nombre_finca"
modulo_revenue: str = "modulo_ingresos"
modulo_materiales: str = "modulo_materiales"
file_revenue: str = "volumen.csv"
file_material:str = "distribucion_volumen.csv"
   
session = boto3.Session(profile_name=AWS_PROFILE)

s3_client = session.client("s3", region_name=REGION)
ssm_client = session.client("ssm", region_name=REGION)
dynamodb_client = session.resource("dynamodb", region_name=REGION)

bucket_input_files = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_BUCKET_INPUT_FILES_NAME,
        logger=logger,
        ssm_client=ssm_client,
        with_decryption=True,
    )
dynamodb_table_name_module_config = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_DYNAMODB_MODULE_CONFIG_NAME,
        logger=logger,
        ssm_client=ssm_client,
        with_decryption=True,
    )

dynamodb_table_name_global_module_config = get_ssm_parameter(
        parameter_name=SSM_PARAMETER_DYNAMODB_GLOBAL_MODULE_CONFIG_NAME,
        ssm_client=ssm_client,
        logger=logger,
        with_decryption=True
    ) 
    
    
logger.info(f"{dynamodb_table_name_module_config}")

module_config = get_item_from_dynamodb(
        table_name=dynamodb_table_name_module_config,
        key={"module": modulo, "process_type": PROCESS_TYPE},
        logger=logger,
        dynamodb_client=dynamodb_client,
    )


global_config_materiales = get_item_from_dynamodb_global(
        table_name=dynamodb_table_name_global_module_config,
        partition_key_value=modulo_global_materiales,
        logger=logger,
        dynamodb_client=dynamodb_client,
   )
 

input_files_names: dict = module_config["input_file_names_mo"]
farm_order: dict = global_config_materiales["fincas"]

    # Descarga el archivo CSV desde S3
map_with_dfs = {}
current_date = datetime.now().strftime("%Y-%m-%d")
partition = "2025-03-13"
partition_materiales = "2025-02-19"
partition_revenue = "2025-02-06"

volum_file = object_to_dataframe(
        s3_client=s3_client,
        bucket_name=bucket_input_files,
        folder_name=f"{PROCESS_TYPE}/{modulo_revenue}",
        partition=partition_revenue,
        file_name=file_revenue,
    )

volum_distribution = object_to_dataframe(
        s3_client=s3_client,
        bucket_name=bucket_input_files,
        folder_name=f"{PROCESS_TYPE}/{modulo_materiales}",
        partition=partition_materiales,
        file_name=file_material,
    )

try:
    for key, file in input_files_names.items():
        df = object_to_dataframe(
            s3_client=s3_client,
            bucket_name=bucket_input_files,
            folder_name=f"{PROCESS_TYPE}/{modulo}",
            partition=partition,
            file_name=file,
        )

    

      
        logger.info(f"file_key: {file} read successfully")
        map_with_dfs[key] = df
    cut_emp = map_with_dfs["cut_emp"]
    promediado = map_with_dfs["promediado"]
    
except Exception as e:
    logger.error(
            f"Error reding file {file} from {bucket_input_files}: error detail:{e}"
    )
     

2025-03-20 18:36:51,336 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2025-03-20 18:36:52,553 - INFO - Parameter dev-bucket-upload-files-name retrieved successfully.
2025-03-20 18:36:52,669 - INFO - Parameter dev-dynamodb-table-name-module-config retrieved successfully.
2025-03-20 18:36:52,786 - INFO - Parameter dev-dynamodb-table-name-global-config retrieved successfully.
2025-03-20 18:36:52,786 - INFO - dev-module-config
2025-03-20 18:36:54,653 - INFO - file_key: corte_y_empaque.csv read successfully
2025-03-20 18:36:54,770 - INFO - file_key: curvas_g.csv read successfully


In [6]:
cut_emp.head(1)

Unnamed: 0,ID,FINCA,GRUPO,LABOR,MAESTRA,TARIFA,REF,PROMEDIADO,PRESTACIONES,FACTOR
0,100001,ENANO,COSECHA,CAJA INTEGRAL CAMPO,REG SUR,820.0,CAJA,G04,0.3887,1.0


In [7]:
volum_distribution.head(1)

Unnamed: 0,SEMANA,6/01/2024,13/01/2024,20/01/2024,27/01/2024,3/02/2024,10/02/2024,17/02/2024,24/02/2024,2/03/2024,...,26/10/2024,2/11/2024,9/11/2024,16/11/2024,23/11/2024,30/11/2024,7/12/2024,14/12/2024,21/12/2024,28/12/2024
0,ENERO,1,1,1,1,0.6,0,0,0,0.0,...,0,0.0,0,0,0,0,0,0,0,0


In [8]:
volum_file.head(1)

Unnamed: 0.1,Unnamed: 0,CONCEPTO,FINCA,6/01/2024,13/01/2024,20/01/2024,27/01/2024,3/02/2024,10/02/2024,17/02/2024,...,26/10/2024,2/11/2024,9/11/2024,16/11/2024,23/11/2024,30/11/2024,7/12/2024,14/12/2024,21/12/2024,28/12/2024
0,0,CAJAS,ENANO,7969.0,8319.0,7813.0,7359.0,7989.0,5479.0,6037.0,...,9164.0,9406.0,10244.0,9851.0,9751.0,9302.0,10343.0,10606.0,10349.0,9936.0


In [9]:
promediado.head(1)

Unnamed: 0,DETALLE,PROMEDIADO,2024-01,2024-02,2024-03,2024-04,2024-05,2024-06,2024-07,2024-08,2024-09,2024-10,2024-11,2024-12
0,LABORES ORDINARIAS,G01,0.224581,0.224581,0.224581,0.224581,0.224581,0.224581,0.224581,0.224581,0.224581,0.224581,0.224581,0.224581


## Corte y empaque

In [10]:
volum_file.columns = list(volum_file.columns[:3]) + [pd.to_datetime(col, dayfirst=True, errors='coerce') for col in volum_file.columns[3:]]
volum_file.head(3)

Unnamed: 0.1,Unnamed: 0,CONCEPTO,FINCA,2024-01-06 00:00:00,2024-01-13 00:00:00,2024-01-20 00:00:00,2024-01-27 00:00:00,2024-02-03 00:00:00,2024-02-10 00:00:00,2024-02-17 00:00:00,...,2024-10-26 00:00:00,2024-11-02 00:00:00,2024-11-09 00:00:00,2024-11-16 00:00:00,2024-11-23 00:00:00,2024-11-30 00:00:00,2024-12-07 00:00:00,2024-12-14 00:00:00,2024-12-21 00:00:00,2024-12-28 00:00:00
0,0,CAJAS,ENANO,7969.0,8319.0,7813.0,7359.0,7989.0,5479.0,6037.0,...,9164.0,9406.0,10244.0,9851.0,9751.0,9302.0,10343.0,10606.0,10349.0,9936.0
1,1,CAJAS,EVA,4098.0,6617.0,7238.0,7600.0,6414.0,4585.0,4992.0,...,7407.0,7045.0,7287.0,6578.0,6586.0,6073.0,6218.0,6091.0,5903.0,5791.0
2,2,CAJAS,SAMI,4560.0,8771.0,9549.0,9418.0,8004.0,6011.0,5031.0,...,8233.0,8250.0,8530.0,7911.0,7875.0,7546.0,8216.0,7893.0,7937.0,7645.0


In [11]:
volum_distribution.columns = list(volum_distribution.columns[:1]) + [pd.to_datetime(col, dayfirst=True, errors='coerce') for col in volum_distribution.columns[1:]]
volum_distribution.head(2)

Unnamed: 0,SEMANA,2024-01-06 00:00:00,2024-01-13 00:00:00,2024-01-20 00:00:00,2024-01-27 00:00:00,2024-02-03 00:00:00,2024-02-10 00:00:00,2024-02-17 00:00:00,2024-02-24 00:00:00,2024-03-02 00:00:00,...,2024-10-26 00:00:00,2024-11-02 00:00:00,2024-11-09 00:00:00,2024-11-16 00:00:00,2024-11-23 00:00:00,2024-11-30 00:00:00,2024-12-07 00:00:00,2024-12-14 00:00:00,2024-12-21 00:00:00,2024-12-28 00:00:00
0,ENERO,1,1,1,1,0.6,0,0,0,0.0,...,0,0.0,0,0,0,0,0,0,0,0
1,FEBRERO,0,0,0,0,0.4,1,1,1,0.8,...,0,0.0,0,0,0,0,0,0,0,0


In [12]:
volum_file_subset = volum_file.iloc[:,3:]
volum_file_subset.head()

Unnamed: 0,2024-01-06,2024-01-13,2024-01-20,2024-01-27,2024-02-03,2024-02-10,2024-02-17,2024-02-24,2024-03-02,2024-03-09,...,2024-10-26,2024-11-02,2024-11-09,2024-11-16,2024-11-23,2024-11-30,2024-12-07,2024-12-14,2024-12-21,2024-12-28
0,7969.0,8319.0,7813.0,7359.0,7989.0,5479.0,6037.0,6918.0,8895.0,8528.0,...,9164.0,9406.0,10244.0,9851.0,9751.0,9302.0,10343.0,10606.0,10349.0,9936.0
1,4098.0,6617.0,7238.0,7600.0,6414.0,4585.0,4992.0,5411.0,6998.0,5030.0,...,7407.0,7045.0,7287.0,6578.0,6586.0,6073.0,6218.0,6091.0,5903.0,5791.0
2,4560.0,8771.0,9549.0,9418.0,8004.0,6011.0,5031.0,6875.0,8082.0,9268.0,...,8233.0,8250.0,8530.0,7911.0,7875.0,7546.0,8216.0,7893.0,7937.0,7645.0
3,3625.0,3448.0,4037.0,2996.0,3455.0,2949.0,3097.0,3034.0,3528.0,4040.0,...,4869.0,4584.0,4547.0,3897.0,3637.0,3361.0,3460.0,3426.0,3303.0,3147.0
4,5265.0,5243.0,3467.0,3098.0,3646.0,4033.0,4654.0,4113.0,5740.0,5822.0,...,10303.0,9370.0,9802.0,8779.0,8725.0,8484.0,9021.0,8505.0,7837.0,7814.0


In [13]:
group_data = group_by_month(volum_file_subset, volum_file)
month_colums = group_data.iloc[:, 2:].columns

2025-03-20 18:36:55,388 - INFO - Inicia la función agrupando por mes.
2025-03-20 18:36:55,388 - INFO - agrupando las columnas por mes y sumandolas.
  grouped_df = df.groupby(df.columns.to_period('M'), axis=1).sum()
2025-03-20 18:36:55,437 - INFO - Se realizó la agrupación con éxito.


In [14]:
volum_distribution_matrix = volum_distribution.iloc[:, 1:]
volum_file_matrix = volum_file[volum_file['CONCEPTO'] == 'CAJAS'].iloc[:, 3:]
volum_file_subset = volum_file[volum_file['CONCEPTO'] == 'CAJAS']


In [15]:
full_matrix = volum_file_subset.merge(cut_emp, on=['FINCA'], how='inner')
full_matrix_100001 = full_matrix[full_matrix['ID'] == 100001]

In [16]:
quantity_100001 = calculate_volume_distribution(volum_file_matrix, volum_distribution_matrix, volum_file_subset, month_colums)

2025-03-20 18:36:55,510 - INFO - Iniciando el cálculo de la matriz de distribución de volumen.
2025-03-20 18:36:55,510 - INFO - Procesando el mes: 0
2025-03-20 18:36:55,522 - INFO - Procesando el mes: 1
2025-03-20 18:36:55,537 - INFO - Procesando el mes: 2
2025-03-20 18:36:55,552 - INFO - Procesando el mes: 3
2025-03-20 18:36:55,552 - INFO - Procesando el mes: 4
2025-03-20 18:36:55,568 - INFO - Procesando el mes: 5
2025-03-20 18:36:55,568 - INFO - Procesando el mes: 6
2025-03-20 18:36:55,586 - INFO - Procesando el mes: 7
2025-03-20 18:36:55,599 - INFO - Procesando el mes: 8
2025-03-20 18:36:55,615 - INFO - Procesando el mes: 9
2025-03-20 18:36:55,615 - INFO - Procesando el mes: 10
2025-03-20 18:36:55,631 - INFO - Procesando el mes: 11
2025-03-20 18:36:55,646 - INFO - Matriz de resultados generada.
2025-03-20 18:36:55,646 - INFO - Matriz de resultados transpuesta y columnas renombradas.
2025-03-20 18:36:55,646 - INFO - DataFrame final concatenado con los datos de volumen transformados.


In [17]:
labor_100001 = multiply_price(quantity_100001, full_matrix_100001, 'FINCA', 'TARIFA', 'PROMEDIADO', month_colums)

2025-03-20 18:36:55,671 - INFO - Inicia la función para multiplicar los DataFrames.
2025-03-20 18:36:55,671 - INFO - Realizando el merge sobre la columna FINCA
2025-03-20 18:36:55,678 - INFO - Columnas posteriores a la multiplicación: ['FINCA', 'CONCEPTO', '2024-01', '2024-02', '2024-03', '2024-04', '2024-05', '2024-06', '2024-07', '2024-08', '2024-09', '2024-10', '2024-11', '2024-12', 'TARIFA', 'PROMEDIADO']
2025-03-20 18:36:55,693 - INFO - Proceso completado exitosamente.


In [18]:
promediado_100001 = multiply_by_month_promediado(labor_100001, promediado, month_colums)
promediado_100001.head(5)


2025-03-20 18:36:55,719 - INFO - Inicia la función para multiplicar ambos dataframes.
2025-03-20 18:36:55,719 - INFO - Se realiza el merge sobre la columna PROMEDIADO.
2025-03-20 18:36:55,739 - INFO - Columnas después del merge: ['FINCA', 'PROMEDIADO', '2024-01_df1', '2024-02_df1', '2024-03_df1', '2024-04_df1', '2024-05_df1', '2024-06_df1', '2024-07_df1', '2024-08_df1', '2024-09_df1', '2024-10_df1', '2024-11_df1', '2024-12_df1', 'DETALLE', '2024-01_df2', '2024-02_df2', '2024-03_df2', '2024-04_df2', '2024-05_df2', '2024-06_df2', '2024-07_df2', '2024-08_df2', '2024-09_df2', '2024-10_df2', '2024-11_df2', '2024-12_df2']
2025-03-20 18:36:55,746 - INFO - Proceso completado exitosamente


Unnamed: 0,FINCA,2024-01,2024-02,2024-03,2024-04,2024-05,2024-06,2024-07,2024-08,2024-09,2024-10,2024-11,2024-12
0,ENANO,7466254.0,3744228.0,9668523.0,4476646.0,7409619.0,8963626.0,9237351.0,8561147.0,5824269.0,6731973.0,8449812.0,8231899.0
1,EVA,6055110.0,3015639.0,7579201.0,3856923.0,5039135.0,5438639.0,6081306.0,6491922.0,5202209.0,5683533.0,5752698.0,4791926.0
2,SAMI,7640690.0,3592951.0,10690660.0,4367597.0,6175218.0,7210988.0,10174940.0,9513937.0,6854401.0,6747808.0,6901673.0,6326747.0
3,VEGA,3332005.0,1730345.0,5026883.0,2330582.0,3214492.0,3304601.0,3508295.0,3760992.0,3186631.0,3657926.0,3369034.0,2662381.0
4,FEDERICA,3966649.0,2455339.0,6359870.0,3058675.0,6786260.0,7597614.0,8335141.0,8296877.0,6512210.0,7598395.0,7756762.0,6623410.0


In [19]:
social_p_100001 = multiply_p_social(labor_100001, promediado_100001,full_matrix_100001, 'FINCA', 'PRESTACIONES', 'LABOR',month_colums)
social_p_100001.head(4)


2025-03-20 18:36:55,772 - INFO - Inicia la función sumar los dataframes de labor y promediados.
2025-03-20 18:36:55,791 - INFO - Suma realizada con éxito.
2025-03-20 18:36:55,792 - INFO - Realizando el merge sobre la columna FINCA y PRESTACIONES
2025-03-20 18:36:55,798 - INFO - Columnas posteriores a la multiplicación: ['FINCA', '2024-01', '2024-02', '2024-03', '2024-04', '2024-05', '2024-06', '2024-07', '2024-08', '2024-09', '2024-10', '2024-11', '2024-12', 'PRESTACIONES', 'LABOR']
2025-03-20 18:36:55,800 - INFO - Proceso completado exitosamente.


Unnamed: 0,FINCA,LABOR,2024-01,2024-02,2024-03,2024-04,2024-05,2024-06,2024-07,2024-08,2024-09,2024-10,2024-11,2024-12
0,ENANO,CAJA INTEGRAL CAMPO,14457320.0,10617580.0,14950820.0,12694510.0,14347660.0,15514460.0,17886800.0,16577430.0,13737810.0,15381190.0,16361840.0,16342420.0
1,EVA,CAJA INTEGRAL CAMPO,11724850.0,8551509.0,11720020.0,10937150.0,9757559.0,9413326.0,11775570.0,12570670.0,12270540.0,12985710.0,11139270.0,9513194.0
2,SAMI,CAJA INTEGRAL CAMPO,14795100.0,10188600.0,16531390.0,12385280.0,11957420.0,12480950.0,19702310.0,18422370.0,16167600.0,15417370.0,13364090.0,12560210.0
3,VEGA,CAJA INTEGRAL CAMPO,6451948.0,4906775.0,7773267.0,6608877.0,6224401.0,5719682.0,6793308.0,7282618.0,7516365.0,8357615.0,6523649.0,5285504.0


In [23]:
cost_100001 = total_cost(labor_100001, promediado_100001, social_p_100001, 'FINCA', 'LABOR', month_colums)
cost_100001.head(3)

2025-03-20 18:37:39,376 - INFO - Inicia la función sumar los dataframes de labor y promediados.
2025-03-20 18:37:39,387 - INFO - Suma realizada con éxito.
2025-03-20 18:37:39,390 - INFO - Realizando el merge sobre la columna FINCA y PRESTACIONES
2025-03-20 18:37:39,394 - INFO - Proceso completado exitosamente.


Unnamed: 0,FINCA,LABOR,2024-01,2024-02,2024-03,2024-04,2024-05,2024-06,2024-07,2024-08,2024-09,2024-10,2024-11,2024-12
0,ENANO,CAJA INTEGRAL CAMPO,51651370.0,37933200.0,53414460.0,45353410.0,51259560.0,55428160.0,63903770.0,59225810.0,49080760.0,54952030.0,58455600.0,58386200.0
1,EVA,CAJA INTEGRAL CAMPO,41889100.0,30551790.0,41871850.0,39074930.0,34860620.0,33630790.0,42070330.0,44910960.0,43838710.0,46393770.0,39797030.0,33987580.0
2,SAMI,CAJA INTEGRAL CAMPO,52858110.0,36400600.0,59061340.0,44248630.0,42720010.0,44590420.0,70390010.0,65817190.0,57761630.0,55081290.0,47745610.0,44873570.0


In [21]:
## NOTA DEBES PRIMERO SUMAR LA LABOR + PROMEDIADO Y ESO SE MULTIPLICA POR LAS PRESTACIONES OJO PENDIENTE!!!!!