In [31]:
import pandas as pd
import functools
file_name = "ventas-enero-febrero"
dir_path = "../data/excel-ventas/"
file_path = f"{dir_path}{file_name}.csv"

@functools.lru_cache(maxsize=1)
def load_data_2(url):
  try:
    df = pd.read_csv(url, encoding='utf-8')
  except UnicodeDecodeError:
    df = pd.read_csv(url, encoding='latin-1')
  return df

In [32]:
consumer_complaint_data = load_data_2(file_path)
consumer_complaint_data.head()

Unnamed: 0,Fecha Inicial,Fecha Final,Código producto,Nombre producto,Referencia fábrica,Grupo inventario,Cantidad vendida,Valor bruto,Descuento,Subtotal,Impuesto cargo,Impuesto retención,Total
0,01/01/2025,31/01/2025,SVCH,Miguita (Sencillo) Vaso Chocolate,,Productos,63,"384.152,50",0,"384.152,50","88.361,31",0.0,"472.513,81"
1,01/01/2025,31/01/2025,SVMA,Miguita (Sencillo) Vaso Maracuyá,,Productos,91,"554.885,03",0,"554.885,03","127.629,71",0.0,"682.514,74"
2,01/01/2025,31/01/2025,SVVA,Miguita (Sencillo) Vaso Vainilla,,Productos,61,"371.954,66",0,"371.954,66","85.553,24",0.0,"457.507,90"
3,01/01/2025,31/01/2025,SVPC,Miguita (Sencillo) Vaso Panelita de Coco,,Productos,37,"225.609,89",0,"225.609,89","51.890,01",0.0,"277.499,90"
4,01/01/2025,31/01/2025,SVZA,Miguita (Sencillo) Vaso Zapote,,Productos,33,"202.066,51",0,"202.066,51","45.433,36",0.0,"247.499,87"


In [33]:
def transform_sales_data(df):
    def parse_product_name(name):
        mapping = {
            "Miguita (Sencillo) Vaso": ("Vaso", "Miguita"),
            "Miguita (Sencillo) Cono": ("Cono", "Miguita"),
            "Con Ñapa (Doble) Vaso": ("Vaso", "Ñapa"),
            "Con Ñapa (Doble) Cono": ("Cono", "Ñapa"),
            "Pote (Medio Litro)": ("Pote", "Medio Litro"),
            "Tambuco (Litro)": ("Tambuco", "Litro"),
        }

        for key, (presentacion, tamano) in mapping.items():
            if name.startswith(key):
                sabor = name[len(key):].strip()
                return presentacion, sabor, tamano

        return None, name, None

    df[['presentacion', 'sabor', 'tamaño']] = df['Nombre producto'].apply(
        lambda x: pd.Series(parse_product_name(x))
    )

    return df

df = pd.DataFrame(consumer_complaint_data)
df_transformed = transform_sales_data(df)
print(df_transformed)
df_transformed.to_csv(f"transformed_{file_name}.csv", index=False)
print(df_transformed)


    Fecha Inicial Fecha Final Código producto  \
0      01/01/2025  31/01/2025            SVCH   
1      01/01/2025  31/01/2025            SVMA   
2      01/01/2025  31/01/2025            SVVA   
3      01/01/2025  31/01/2025            SVPC   
4      01/01/2025  31/01/2025            SVZA   
..            ...         ...             ...   
246    01/02/2025  28/02/2025            SCVA   
247    01/02/2025  28/02/2025            SVAJ   
248    01/02/2025  28/02/2025            SVLB   
249    01/02/2025  28/02/2025             PPC   
250    01/02/2025  28/02/2025            SVKO   

                               Nombre producto  Referencia fábrica  \
0            Miguita (Sencillo) Vaso Chocolate                 NaN   
1             Miguita (Sencillo) Vaso Maracuyá                 NaN   
2             Miguita (Sencillo) Vaso Vainilla                 NaN   
3     Miguita (Sencillo) Vaso Panelita de Coco                 NaN   
4               Miguita (Sencillo) Vaso Zapote               