### Working Structure - Categorizer Layer

**Author**: Ricardo Pérez Castillo

**Latest update**: 2024-12-30

**Version**: 6.0

**Purpose**: Categorize the spend.

In [None]:
# Import essential libraries for data processing, numerical operations, and file handling

import pandas as pd  # For data manipulation and analysis
import numpy as np  # For numerical computations
from datetime import datetime  # For handling date and time operations
import os  # For interacting with the operating system (e.g., file paths, directories)

# Import custom configuration
from config import current_month, current_month_text, current_year  # Custom configurations for date handling


In [None]:

def load_data(file_path):
    """
    Load an Excel file into a pandas DataFrame.

    Parameters:
    - file_path (str): The path to the Excel file.

    Returns:
    - pd.DataFrame: The loaded DataFrame.

    Raises:
    - FileNotFoundError: If the file does not exist at the specified path.
    """
    if os.path.exists(file_path):
        print(f"Loading data from {file_path}...")
        return pd.read_excel(file_path)
    else:
        raise FileNotFoundError(f"Error: {file_path} does not exist.")

def merge_data(df_nocat, df_categorizer):
    """
    Merge uncategorized data with categorizer data using a left join.

    Parameters:
    - df_nocat (pd.DataFrame): The uncategorized spend cube DataFrame.
    - df_categorizer (pd.DataFrame): The categorizer DataFrame with predefined categories.

    Returns:
    - pd.DataFrame: The merged DataFrame with categories added.
    """
    print("Merging data...")
    return pd.merge(
        df_nocat, 
        df_categorizer, 
        how='left', 
        left_on=['Entidad', 'Descripcion Movimiento'], 
        right_on=['Entidad', 'Descripcion Movimiento']
    )

def save_data(df, output_file):
    """
    Save a pandas DataFrame to an Excel file.

    Parameters:
    - df (pd.DataFrame): The DataFrame to be saved.
    - output_file (str): The path where the Excel file will be saved.
    """
    output_dir = os.path.dirname(output_file)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"Created output directory: {output_dir}")
    
    print(f"Saving data to {output_file}...")
    df.to_excel(output_file, index=False)
    print("Data saved successfully.")

def main(spendcube_file, categorizer_file, taxonomy_file, output_file):
    """
    Main function to load data, merge data, and save the result.

    Parameters:
    - spendcube_file (str): The path to the uncategorized spend cube file.
    - categorizer_file (str): The path to the categorizer file.
    - taxonomy_file (str): The path to the taxonomy file (not used currently).
    - output_file (str): The path to save the categorized spend cube.
    """
    # Step 1: Load the input files
    df_nocat = load_data(spendcube_file)
    df_categorizer = load_data(categorizer_file)
    df_taxonomy = load_data(taxonomy_file)  # Note: Currently unused, consider removing or utilizing.

    # Step 2: Merge the uncategorized data with the categorizer
    df_cat = merge_data(df_nocat, df_categorizer)

    # Step 3: Save the merged DataFrame to an Excel file
    save_data(df_cat, output_file)


In [None]:
if __name__ == "__main__":
    # Step 1: Automatically format the current month and year for file paths
    current_month_formatted = f"{current_month_text}{current_year}"  # Example: "Noviembre2024"
    current_month_path = f"{current_month} {current_month_text}"     # Example: "11 Noviembre"

    # Step 2: Define file paths using dynamically generated values
    spendcube_file = f"/Users/Documents/Finanzas/Personal Spend/03 Consolidated Dataset/{current_year}/{current_month_path}/df-nocat-spendcube-{current_month_formatted}.xlsx"
    
    categorizer_file = "/Users/Documents/Finanzas/Personal Spend/04 Taxonomia/Libro de Categorizacion 2024.xlsx"
    
    taxonomy_file = "/Users/Documents/Finanzas/Personal Spend/04 Taxonomia/Personal Spend Taxonomy 2024.xlsx"
    
    output_file = f"/Users/Documents/Finanzas/Personal Spend/05 Consolidated Categorized Dataset/{current_year}/{current_month_path}/df-cat-spendcube-{current_month_formatted}.xlsx"

    # Step 3: Run the main function to process and save the data
    main(spendcube_file, categorizer_file, taxonomy_file, output_file)
