In [1]:
## App para filtros interactivos

In [1]:
## Importamos librerías
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np
import os
import datetime

In [6]:
date_cols_to_parse = ['Signup Date','Last Login Date','First Order Date','Last Order Date']

In [7]:


# ==============================================================================

df_para_filtrar = pd.read_csv('purchases_df_processed.csv', parse_dates=date_cols_to_parse)

# Asigne su DataFrame al nombre 'df_para_filtrar'
#
# SIEMPRE DEBE TENER UNA DE LAS SIGUIENTES DOS LÍNEAS ACTIVAS:
# 1. Si ya tiene su DataFrame cargado en una variable (ej. 'mi_dataframe_de_ventas'):
# df_para_filtrar = pd.read_csv("purchases_df_processed.csv")

# 2. Si necesita cargar su DataFrame desde un CSV:
# Aquí es donde añadimos 'parse_dates' para automatizar la conversión
# Asegúrate de que los nombres de las columnas en esta lista coincidan con los de tu CSV

df_para_filtrar = pd.read_csv('purchases_df_processed.csv', parse_dates=date_cols_to_parse)
# ==============================================================================

# --- 2. Preprocesamiento: (Solo si necesitas ajustes adicionales o no todas las fechas se convirtieron) ---
# df_para_filtrar['Signup Date'] = pd.to_datetime(df_para_filtrar['Signup Date'], errors='coerce') # Ya manejado por parse_dates
# df_para_filtrar['Last Login Date'] = pd.to_datetime(df_para_filtrar['Last Login Date'], errors='coerce') # Útil si hay problemas después de parse_dates
# df_para_filtrar['First Order Date'] = pd.to_datetime(df_para_filtrar['First Order Date'], errors='coerce') # Ya manejado por parse_dates
# df_para_filtrar['Last Order Date'] = pd.to_datetime(df_para_filtrar['Last Order Date'], errors='coerce') # Ya manejado por parse_dates

# AÑADE ESTA LÍNEA SIEMPRE QUE TU ARCHIVO ORIGINAL NO TENGA ESTA COLUMNA.
# Si 'Signup Year' ya está en tu CSV procesado, esta línea es opcional.
# Si no lo está, asegúrate de que se cree aquí.
df_para_filtrar['Signup Year'] = df_para_filtrar['Signup Date'].dt.year

# --- Opcional: Para verificar que los tipos de datos se cargaron correctamente ---
print("Tipos de datos después de la carga inicial:")
print(df_para_filtrar.info())
print("\n")


# --- 3. Definir los widgets interactivos ---
# Ahora los valores min/max se tomarán del DataFrame ya con tipos datetime
min_year = df_para_filtrar['Signup Year'].min()
max_year = df_para_filtrar['Signup Year'].max()
year_slider = widgets.IntRangeSlider(
    value=[min_year, max_year],
    min=min_year, max=max_year, step=1,
    description='Signup Year Range:',
    continuous_update=True, readout=True, readout_format='d',
    layout=widgets.Layout(width='500px')
)

member_id_input = widgets.Text(
    value='', placeholder='Enter Member ID (e.g., 123 or 123,456)',
    description='Filter by Member ID:',
    layout=widgets.Layout(width='300px')
)

min_purchases = df_para_filtrar['Purchases'].min()
max_purchases = df_para_filtrar['Purchases'].max()
purchases_slider = widgets.IntRangeSlider(
    value=[min_purchases, max_purchases],
    min=min_purchases, max=max_purchases, step=1,
    description='Purchases Range:',
    continuous_update=True, readout=True, readout_format='d',
    layout=widgets.Layout(width='500px')
)

# min/max dates for pickers will be datetime.date objects after initial parsing
min_first_order_date = df_para_filtrar['First Order Date'].min().date()
max_first_order_date = df_para_filtrar['First Order Date'].max().date()
first_order_date_start = widgets.DatePicker(
    description='First Order (Start):',
    value=min_first_order_date,
    min=min_first_order_date,
    max=max_first_order_date,
    disabled=False
)
first_order_date_end = widgets.DatePicker(
    description='First Order (End):',
    value=max_first_order_date,
    min=min_first_order_date,
    max=max_first_order_date,
    disabled=False
)

min_last_order_date = df_para_filtrar['Last Order Date'].min().date()
max_last_order_date = df_para_filtrar['Last Order Date'].max().date()
last_order_date_start = widgets.DatePicker(
    description='Last Order (Start):',
    value=min_last_order_date,
    min=min_last_order_date,
    max=max_last_order_date,
    disabled=False
)
last_order_date_end = widgets.DatePicker(
    description='Last Order (End):',
    value=max_last_order_date,
    min=min_last_order_date,
    max=max_last_order_date,
    disabled=False
)

sort_column_dropdown = widgets.Dropdown(
    options=['None'] + list(df_para_filtrar.columns),
    value='None',
    description='Sort by Column:',
    layout=widgets.Layout(width='200px')
)

sort_ascending_checkbox = widgets.Checkbox(
    value=True,
    description='Sort Ascending',
    disabled=False,
    indent=False
)

save_button = widgets.Button(
    description='Guardar DataFrame Filtrado',
    button_style='success',
    tooltip='Click to save the current filtered DataFrame to a CSV file.'
)
save_output_message = widgets.Output()

output_area = widgets.Output()

current_filtered_df = pd.DataFrame()

def apply_filters_and_sort(year_range, member_id_str, purchases_range,
                           fo_start, fo_end, lo_start, lo_end,
                           sort_column, sort_ascending):
    global current_filtered_df

    with output_area:
        clear_output(wait=True)

        filtered_df = df_para_filtrar.copy()

        start_year, end_year = year_range
        filtered_df = filtered_df[
            (filtered_df['Signup Year'] >= start_year) &
            (filtered_df['Signup Year'] <= end_year)
        ]

        min_p, max_p = purchases_range
        filtered_df = filtered_df[
            (filtered_df['Purchases'] >= min_p) &
            (filtered_df['Purchases'] <= max_p)
        ]

        if member_id_str:
            try:
                member_ids_to_filter = [int(mid.strip()) for mid in member_id_str.split(',') if mid.strip()]
                filtered_df = filtered_df[filtered_df['Member ID'].isin(member_ids_to_filter)]
            except ValueError:
                print("Invalid Member ID(s). Please enter comma-separated numbers.")
                display(filtered_df.head())
                return

        if fo_start and fo_end:
            filtered_df = filtered_df[
                (filtered_df['First Order Date'].dt.date >= fo_start) &
                (filtered_df['First Order Date'].dt.date <= fo_end)
            ]

        if lo_start and lo_end:
            filtered_df = filtered_df[
                (filtered_df['Last Order Date'].dt.date >= lo_start) &
                (filtered_df['Last Order Date'].dt.date <= lo_end)
            ]

        if sort_column != 'None':
            if sort_column in filtered_df.columns:
                filtered_df = filtered_df.sort_values(by=sort_column, ascending=sort_ascending)
            else:
                print(f"Warning: Column '{sort_column}' not found for sorting.")

        current_filtered_df = filtered_df.copy()

        print(f"Mostrando {len(current_filtered_df)} de {len(df_para_filtrar)} filas.")
        display(current_filtered_df.head(10))

def save_filtered_df(button):
    with save_output_message:
        clear_output(wait=True)
        if not current_filtered_df.empty:
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            file_name = f"filtered_df_output_{timestamp}.csv"
            
            current_dir = os.getcwd()
            save_path = os.path.join(current_dir, file_name)

            try:
                current_filtered_df.to_csv(save_path, index=False)
                print(f"DataFrame guardado exitosamente en: {save_path}")
            except Exception as e:
                print(f"Error al guardar el DataFrame: {e}")
        else:
            print("No hay DataFrame filtrado para guardar. Aplica algún filtro primero.")

save_button.on_click(save_filtered_df)

widgets.interactive_output(
    apply_filters_and_sort,
    {
        'year_range': year_slider,
        'member_id_str': member_id_input,
        'purchases_range': purchases_slider,
        'fo_start': first_order_date_start,
        'fo_end': first_order_date_end,
        'lo_start': last_order_date_start,
        'lo_end': last_order_date_end,
        'sort_column': sort_column_dropdown,
        'sort_ascending': sort_ascending_checkbox
    }
)

print("--- Filtros Interactivos ---")
display(
    widgets.VBox([
        widgets.HBox([year_slider, purchases_slider]),
        widgets.HBox([member_id_input]),
        widgets.HBox([first_order_date_start, first_order_date_end]),
        widgets.HBox([last_order_date_start, last_order_date_end])
    ])
)

print("\n--- Opciones de Ordenamiento ---")
display(
    widgets.HBox([sort_column_dropdown, sort_ascending_checkbox])
)

print("\n--- Guardar DataFrame ---")
display(save_button, save_output_message)

display(output_area)

Tipos de datos después de la carga inicial:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 197639 entries, 0 to 197638
Data columns (total 9 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   Member ID         197639 non-null  int64         
 1   Signup Date       197639 non-null  datetime64[ns]
 2   Last Login Date   195622 non-null  datetime64[ns]
 3   Purchases         197639 non-null  int64         
 4   Total Amount      197639 non-null  float64       
 5   Credit Amount     74379 non-null   float64       
 6   First Order Date  197639 non-null  datetime64[ns]
 7   Last Order Date   197639 non-null  datetime64[ns]
 8   Signup Year       197639 non-null  int32         
dtypes: datetime64[ns](4), float64(2), int32(1), int64(2)
memory usage: 12.8 MB
None


--- Filtros Interactivos ---


VBox(children=(HBox(children=(IntRangeSlider(value=(2015, 2023), description='Signup Year Range:', layout=Layo…


--- Opciones de Ordenamiento ---


HBox(children=(Dropdown(description='Sort by Column:', layout=Layout(width='200px'), options=('None', 'Member …


--- Guardar DataFrame ---


Button(button_style='success', description='Guardar DataFrame Filtrado', style=ButtonStyle(), tooltip='Click t…

Output()

Output()