In [None]:
# gastos_report_global.py
import re
import unicodedata
from pathlib import Path
import pandas as pd
import numpy as np
from openpyxl.chart import BarChart, Reference
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyxl.styles import Alignment, Font, PatternFill
from openpyxl.utils import get_column_letter
import uuid

# ===== CONFIG =====
ONLY_PAID = False  # True = solo facturas pagadas (si existe columna 'Estado')
INPUT_PATTERN = r"Maria Vert Carbó - Compras \d{2}_\d{2}_\d{4}-\d{2}_\d{2}_\d{4}\.xlsx"
OUTPUT_FILENAME = "gastos_report_global.xlsx"

# Series para la hoja Tendencias
TREND_VARIABLES = ["Flores", "Materiales (cristales etc)", "Packaging"]
TREND_FIXED     = ["Otros", "Asesorías", "Publicidad y marketing"]


AMOUNT_CANDIDATES = ["Total", "Importe", "Amount", "Gasto", "Coste", "Base imponible"]

# ---- Clasificación por 'Cuenta' (substring, etiqueta final) ----
# Coincidencia por subcadenas, sin tildes, case-insensitive
# GASTOS FIJOS / OPERATIVOS
EXPENSE_RULES = [
    ("alquiler", "Alquiler del local"),
    ("salario", "Salarios y sueldos del personal fijo (independ de las ventas)"),
    ("nómina", "Salarios y sueldos del personal fijo (independ de las ventas)"),
    ("nomina", "Salarios y sueldos del personal fijo (independ de las ventas)"),
    ("electricidad", "Servicios: Electricidad, agua, gas, y otros servicios públicos."),
    ("luz", "Servicios: Electricidad, agua, gas, y otros servicios públicos."),
    ("agua", "Servicios: Electricidad, agua, gas, y otros servicios públicos."),
    ("gas", "Servicios: Electricidad, agua, gas, y otros servicios públicos."),
    ("seguro", "Seguro del local"),
    ("licencia", "Costos de licencias y permisos"),
    ("permiso", "Costos de licencias y permisos"),
    ("limpieza", "Materiales de limpieza"),
    ("marketing", "Publicidad y marketing"),
    ("publicidad", "Publicidad y marketing"),
    ("ads", "Publicidad y marketing"),
    ("oficina", "Productos de oficina"),
    ("papeler", "Productos de oficina"),
    ("mantenimiento", "Mantenimiento y reparaciones y muebles"),
    ("reparaci", "Mantenimiento y reparaciones y muebles"),
    ("mueble", "Mantenimiento y reparaciones y muebles"),
    ("gestor", "Servicios de gestoría"),
    ("autónomo", "Cuota de autónomos"),
    ("autonomo", "Cuota de autónomos"),
    ("formaci", "Formación"),
    ("banca", "Gastos bancarios"),
    ("comisi", "Gastos bancarios"),
    ("web", "Web, comunicación, sistemas"),
    ("hosting", "Web, comunicación, sistemas"),
    ("dominio", "Web, comunicación, sistemas"),
    ("software", "Web, comunicación, sistemas"),
    ("sistem", "Web, comunicación, sistemas"),
    ("it", "Web, comunicación, sistemas"),
    ("automat", "Web, comunicación, sistemas"),
    ("telef", "Telefonia"),
    ("asesor", "Asesorías"),
    ("asessor", "Asesorías"),

    

    # GASTOS VARIABLES
    ("flor", "Flores"),
    ("cristal", "Materiales (cristales etc)"),
    ("material", "Materiales (cristales etc)"),
    ("packaging", "Packaging"),
    ("embalaje", "Packaging"),
    ("mensaj", "Mensajería"),
    ("envío", "Mensajería"),
    ("envio", "Mensajería"),
    ("courier", "Mensajería"),
    ("inmoviliz", "Inmovilizados: maderas prensa"),
    ("madera", "Inmovilizados: maderas prensa"),
    ("contrataci", "Contrataciones temporales - impuestos"),
    ("temporal", "Contrataciones temporales - impuestos"),
    ("irpf", "Impuestos sobre la renta IRPF"),
    ("pago trimestral", "Pagos trimestrales impuestos"),
    ("trimestral", "Pagos trimestrales impuestos"),
]

EXPENSE_DEFAULT = "Otros"  # si no hay match


# ---- Grupos para tablas separadas ----
FIXED_TYPES = [
    "Alquiler del local",
    "Salarios y sueldos del personal fijo (independ de las ventas)",
    "Servicios: Electricidad, agua, gas, y otros servicios públicos.",
    "Seguro del local",
    "Costos de licencias y permisos",
    "Materiales de limpieza",
    "Publicidad y marketing",
    "Productos de oficina",
    "Mantenimiento y reparaciones y muebles",
    "Servicios de gestoría",
    "Cuota de autónomos",
    "Formación",
    "Gastos bancarios",
    "Web, comunicación, sistemas",
    "Telefonia",
    "Asesorías",
    "Otros",  # mantenemos “Otros” visible en ambas (si algo cae aquí, lo ves)
]

VARIABLE_TYPES = [
    "Flores",
    "Materiales (cristales etc)",
    "Packaging",
    "Mensajería",
    "Inmovilizados: maderas prensa",
    "Contrataciones temporales - impuestos",
    "Impuestos sobre la renta IRPF",
    "Pagos trimestrales impuestos",
    "Otros",
]

# ===== Helpers =====
def _strip_accents(s: str) -> str:
    return "".join(c for c in unicodedata.normalize("NFKD", s) if not unicodedata.combining(c))

def normalize_text(x: object) -> str:
    t = "" if pd.isna(x) else str(x)
    t = _strip_accents(t).lower().strip()
    return t

def _first_sheet_name(xls_path: Path) -> str:
    xfile = pd.ExcelFile(xls_path)
    if not xfile.sheet_names:
        raise ValueError("This workbook has no sheets.")
    return xfile.sheet_names[0]

def find_header_row(xls_path: Path, sheet_name: str | None = None) -> int:
    if sheet_name is None:
        sheet_name = _first_sheet_name(xls_path)
    df_raw = pd.read_excel(xls_path, sheet_name=sheet_name, header=None, dtype=str)
    for i in range(min(40, len(df_raw))):
        row_vals = df_raw.iloc[i].astype(str).str.strip().str.lower().tolist()
        if ("cuenta" in row_vals) or ("fecha emisión" in row_vals) or ("fecha emision" in row_vals):
            return i
    return 0

def load_table(xls_path: Path, sheet_name: str | None = None) -> pd.DataFrame:
    if sheet_name is None:
        sheet_name = _first_sheet_name(xls_path)
    header_row = find_header_row(xls_path, sheet_name)
    df = pd.read_excel(xls_path, sheet_name=sheet_name, header=header_row)
    df.columns = [str(c).strip() for c in df.columns]
    return df.dropna(how="all")

def pick_amount_column(df: pd.DataFrame) -> str:
    for cand in AMOUNT_CANDIDATES:
        if cand in df.columns:
            return cand
    # Last resort: find the first numeric-ish column
    for c in df.columns:
        coerced = pd.to_numeric(df[c], errors="ignore")
        if pd.api.types.is_numeric_dtype(coerced):
            return c
    raise ValueError("No numeric amount column found.")

def _safe_table_name(base: str) -> str:
    s = re.sub(r'[^A-Za-z0-9_]', '_', base)
    if not s or not s[0].isalpha():
        s = "T_" + s
    suffix = uuid.uuid4().hex[:6]
    s = s[:20] + "_" + suffix
    return s

def _add_styled_table(ws, df, start_row, start_col=1, name_prefix="T"):
    # header
    for j, col in enumerate(df.columns, start=start_col):
        cell = ws.cell(row=start_row, column=j, value=col)
        cell.font = Font(bold=True)
        cell.alignment = Alignment(horizontal="center")
        cell.fill = PatternFill("solid", fgColor="F3F6FA")
    # data
    for i, (_, row) in enumerate(df.iterrows(), start=start_row + 1):
        for j, val in enumerate(row, start=start_col):
            ws.cell(row=i, column=j, value=val)
    nrows = len(df) + 1
    ncols = len(df.columns)
    end_row = start_row + nrows - 1
    end_col = start_col + ncols - 1
    if len(df) == 0 or ncols == 0:
        return end_row, end_col
    ref = f"{get_column_letter(start_col)}{start_row}:{get_column_letter(end_col)}{end_row}"
    base_name = f"{name_prefix}_{ws.title}_{start_row}_{start_col}"
    table_name = _safe_table_name(base_name)
    table = Table(displayName=table_name, ref=ref)
    table.tableStyleInfo = TableStyleInfo(name="TableStyleMedium9", showRowStripes=True, showColumnStripes=False)
    ws.add_table(table)
    return end_row, end_col

def _number_format_columns(ws, start_row, end_row, start_col, headers):
    for j, h in enumerate(headers, start=start_col):
        hlow = str(h).lower()
        if hlow.startswith("total"):
            nf = '#,##0'
        elif "porcentaje" in hlow:
            nf = '0.00'
        else:
            nf = None
        if nf:
            for r in range(start_row+1, end_row+1):
                ws.cell(row=r, column=j).number_format = nf

def _autofit_columns(ws, first_row=1, last_row=None, margin=2):
    if last_row is None:
        last_row = ws.max_row
    for col in range(1, ws.max_column + 1):
        max_len = 0
        for row in range(first_row, last_row + 1):
            v = ws.cell(row=row, column=col).value
            if v is None:
                continue
            v = str(v)
            if row == first_row:
                max_len = max(max_len, len(v) + 2)
            else:
                max_len = max(max_len, len(v))
        ws.column_dimensions[get_column_letter(col)].width = max(8, min(max_len + margin, 60))

def unique_labels_in_order(rules, *extras):
    seen, out = set(), []
    for _, lab in rules:
        if lab not in seen:
            out.append(lab); seen.add(lab)
    for lab in extras:
        if lab not in seen:
            out.append(lab); seen.add(lab)
    return out

def month_label_from_filename(filename: str) -> str:
    m = re.search(r"Compras (\d{2})_(\d{2})_(\d{4})-(\d{2})_(\d{2})_(\d{4})", filename)
    if m:
        dd, mm, yyyy = m.group(1), m.group(2), m.group(3)
        return f"{yyyy}-{mm}"
    return Path(filename).stem[:31]

# ===== Clasificación por CUENTA =====
def map_expense(cuenta_val) -> str:
    if pd.isna(cuenta_val):
        return EXPENSE_DEFAULT
    txt = normalize_text(cuenta_val)
    for sub, label in EXPENSE_RULES:
        if normalize_text(sub) in txt:
            return label
    return EXPENSE_DEFAULT

# ===== CORE aggregation for one DataFrame =====
def analyze_df(df: pd.DataFrame, amount_col: str):
    # Filtrar pagado si procede
    if ONLY_PAID and "Estado" in df.columns:
        df = df[df["Estado"].astype(str).str.lower().str.strip() == "pagado"].copy()

    if "Cuenta" not in df.columns:
        raise ValueError("El Excel no contiene la columna 'Cuenta'.")

    # Monto a numérico
    df[amount_col] = pd.to_numeric(df[amount_col], errors="coerce")

    # Clasificación
    df["Tipo gasto"] = df["Cuenta"].apply(map_expense)

    
    # Listado fijo de tipos (únicos + 'Otros')
    ALL_TYPES = unique_labels_in_order(EXPENSE_RULES, EXPENSE_DEFAULT)

    # Total general
    grand_total = float(pd.to_numeric(df[amount_col], errors="coerce").sum())
    if grand_total == 0:
        grand_total = np.nan

    # Totales por tipo (tabla combinada)
    tot_types = (
    df.groupby("Tipo gasto", dropna=False)[amount_col]
      .sum(min_count=1)
      .reindex(ALL_TYPES)
      .fillna(0)                        # <-- mover fillna(0) DESPUÉS del reindex
      .reset_index()
      .rename(columns={amount_col: "Total"})
    )
    tot_types["Porcentaje"] = (tot_types["Total"] / grand_total * 100).round(2).fillna(0)


    # Tablas separadas: VARIABLES y FIJOS (porcentaje sobre el total global)
    tot_vars = (
        tot_types.set_index("Tipo gasto")
                 .reindex(VARIABLE_TYPES)
                 .fillna(0)
                 .reset_index()
                 .rename(columns={"Tipo gasto": "Tipo gasto"})
    )
    # recalcular % por si hubo nuevas filas 0, pero sobre el mismo total global
    tot_vars["Porcentaje"] = (tot_vars["Total"] / grand_total * 100).round(2).fillna(0)

    tot_fixed = (
        tot_types.set_index("Tipo gasto")
                 .reindex(FIXED_TYPES)
                 .fillna(0)
                 .reset_index()
                 .rename(columns={"Tipo gasto": "Tipo gasto"})
    )
    tot_fixed["Porcentaje"] = (tot_fixed["Total"] / grand_total * 100).round(2).fillna(0)

    # Cuentas sin clasificar (para revisar)
    unknown_accounts = (df.loc[df["Tipo gasto"] == EXPENSE_DEFAULT, ["Cuenta", amount_col]]
                          .assign(Count=1)
                          .groupby("Cuenta", dropna=False)
                          .agg(Total=(amount_col, "sum"), Aparece=("Count", "sum"))
                          .reset_index()
                          .sort_values("Total", ascending=False))

    return {
        "tot_types": tot_types.sort_values("Total", ascending=False),
        "tot_vars": tot_vars,      # NEW
        "tot_fixed": tot_fixed,    # NEW
        "unknown_accounts": unknown_accounts,
        "df_clean": df,
    }


# ===== Escribir una hoja por mes (tabla + gráfico) =====
def write_month_sheet(writer, month_name: str, results: dict):
    ws_name = month_name[:31]
    ws = writer.book.create_sheet(ws_name)
    writer.sheets[ws_name] = ws
    ws.freeze_panes = "A2"
    row = 1

    # Tabla de totales por tipo
    end_row, end_col = _add_styled_table(ws, results["tot_types"], start_row=row, start_col=1, name_prefix="GASTOS")
    _number_format_columns(ws, start_row=row, end_row=end_row, start_col=1, headers=list(results["tot_types"].columns))

    # Gráfico de barras
    chart = BarChart()
    chart.title = "Gastos por tipo"
    chart.y_axis.title = "€"
    chart.x_axis.title = "Tipo de gasto"
    data_ref = Reference(ws, min_col=2, min_row=row, max_col=2, max_row=end_row)  # Total
    cats_ref = Reference(ws, min_col=1, min_row=row+1, max_row=end_row)           # Tipo gasto
    chart.add_data(data_ref, titles_from_data=True)
    chart.set_categories(cats_ref)
    chart.width = 18
    chart.height = 10
    ws.add_chart(chart, "H2")
    row = end_row + 3

    # --- GASTOS VARIABLES ---
    ws.cell(row=row, column=1, value="GASTOS VARIABLES").font = Font(bold=True)
    row += 1
    end_row, end_col = _add_styled_table(ws, results["tot_vars"], start_row=row, start_col=1, name_prefix="GASTOS_VAR")
    _number_format_columns(ws, start_row=row, end_row=end_row, start_col=1, headers=list(results["tot_vars"].columns))
    row = end_row + 3

    # --- GASTOS FIJOS / OPERATIVOS ---
    ws.cell(row=row, column=1, value="GASTOS FIJOS / OPERATIVOS").font = Font(bold=True)
    row += 1
    end_row, end_col = _add_styled_table(ws, results["tot_fixed"], start_row=row, start_col=1, name_prefix="GASTOS_FIJ")
    _number_format_columns(ws, start_row=row, end_row=end_row, start_col=1, headers=list(results["tot_fixed"].columns))
    row = end_row + 3


    # Tabla de cuentas sin clasificar
    end_row, end_col = _add_styled_table(ws, results["unknown_accounts"], start_row=row, start_col=1, name_prefix="REVISAR")
    _number_format_columns(ws, start_row=row, end_row=end_row, start_col=1, headers=list(results["unknown_accounts"].columns))

    _autofit_columns(ws, first_row=1, last_row=ws.max_row)

# ===== MAIN =====
def main():
    files = sorted([p for p in Path(".").glob("*.xlsx") if re.fullmatch(INPUT_PATTERN, p.name)])
    if not files:
        raise FileNotFoundError("No se encontraron ficheros mensuales 'Compras ...xlsx' en esta carpeta.")

    monthly_results = []
    all_rows = []

    for f in files:
        df = load_table(f)
        amount_col = pick_amount_column(df)
        res = analyze_df(df, amount_col)
        monthly_results.append((month_label_from_filename(f.name), res))
        all_rows.append(res["df_clean"])

    # TOTAL
    df_all = pd.concat(all_rows, ignore_index=True)
    amount_col_all = pick_amount_column(df_all)
    total_res = analyze_df(df_all, amount_col_all)

    # Escribir todo a un libro
    output_path = Path(OUTPUT_FILENAME)
    with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
        for month_name, res in monthly_results:
            write_month_sheet(writer, month_name, res)
        write_month_sheet(writer, "TOTAL", total_res)
            # ===== Hoja Tendencias =====
        ws = writer.book.create_sheet("Tendencias")
        writer.sheets["Tendencias"] = ws
        ws.freeze_panes = "A2"
        row = 1

        months_order = [month for month, _ in monthly_results]

        def _sum_for_label(df, label, amount_col_name):
            if label not in df["Tipo gasto"].unique():
                return 0.0
            return pd.to_numeric(df.loc[df["Tipo gasto"] == label, amount_col_name], errors="coerce").sum()

        # ---- Variables ----
        var_trend_df = pd.DataFrame(0.0, index=months_order, columns=TREND_VARIABLES)
        for month_name, res in monthly_results:
            dfm = res["df_clean"]
            amt_col_m = pick_amount_column(dfm)
            for label in TREND_VARIABLES:
                var_trend_df.loc[month_name, label] = _sum_for_label(dfm, label, amt_col_m)
        # añadir Total mensual
        var_trend_df["TOTAL"] = var_trend_df.sum(axis=1)

        # Tabla + gráfico
        from openpyxl.chart import LineChart, Reference, Series
        end_row, end_col = _add_styled_table(
            ws,
            var_trend_df.reset_index().rename(columns={"index": "Mes"}),
            start_row=row,
            start_col=1,
            name_prefix="TREND_VAR"
        )
        data_ref = Reference(ws, min_col=2, min_row=row, max_col=1+len(var_trend_df.columns), max_row=end_row)
        cats_ref = Reference(ws, min_col=1, min_row=row+1, max_row=end_row)
        chart1 = LineChart()
        chart1.title = "Tendencia mensual (Gastos Variables)"
        chart1.y_axis.title = "€"
        chart1.x_axis.title = "Mes"
        chart1.add_data(data_ref, titles_from_data=True)
        chart1.set_categories(cats_ref)
        # poner línea negra al TOTAL
        for s in chart1.series:
            if s.title.value == "TOTAL":
                s.graphicalProperties.line.solidFill = "000000"
                s.graphicalProperties.line.width = 30000  # más gruesa
        chart1.width = 20; chart1.height = 12
        ws.add_chart(chart1, "H2")
        row = end_row + 3

        # ---- Fijos ----
        fix_trend_df = pd.DataFrame(0.0, index=months_order, columns=TREND_FIXED)
        for month_name, res in monthly_results:
            dfm = res["df_clean"]
            amt_col_m = pick_amount_column(dfm)
            for label in TREND_FIXED:
                fix_trend_df.loc[month_name, label] = _sum_for_label(dfm, label, amt_col_m)
        fix_trend_df["TOTAL"] = fix_trend_df.sum(axis=1)

        end_row, end_col = _add_styled_table(
            ws,
            fix_trend_df.reset_index().rename(columns={"index": "Mes"}),
            start_row=row,
            start_col=1,
            name_prefix="TREND_FIJ"
        )
        data_ref2 = Reference(ws, min_col=2, min_row=row, max_col=1+len(fix_trend_df.columns), max_row=end_row)
        cats_ref2 = Reference(ws, min_col=1, min_row=row+1, max_row=end_row)
        chart2 = LineChart()
        chart2.title = "Tendencia mensual (Gastos Fijos/Operativos)"
        chart2.y_axis.title = "€"
        chart2.x_axis.title = "Mes"
        chart2.add_data(data_ref2, titles_from_data=True)
        chart2.set_categories(cats_ref2)
        for s in chart2.series:
            if s.title.value == "TOTAL":
                s.graphicalProperties.line.solidFill = "000000"
                s.graphicalProperties.line.width = 30000
        chart2.width = 20; chart2.height = 12
        ws.add_chart(chart2, f"H27")
        row = end_row + 3



    print(f"✅ Informe de GASTOS guardado en: {output_path.resolve()}")

if __name__ == "__main__":
    main()


: 