## initial code ##

## version current working on

In [1]:
import pandas as pd
import ipywidgets as widgets

from IPython.display import display, clear_output, HTML


csv_file = '~/Downloads/nuvemshparids_clustersfull_1010.csv'
# # Load the CSV into a DataFrame
# csv_df = pd.read_csv(csv_file)


# Load the data
df_full = pd.read_csv(csv_file)

# Sample 200 rows for testing
df = df_full.sample(n=5000, random_state=42).reset_index(drop=True)


# Define a new DataFrame to store the results
adjusted_clusters_df = pd.DataFrame(columns=df.columns.tolist() + ['selected_user_cluster'])

# Ensure the original DataFrame has the new column for flagging adjustments
df['flag_adjusted'] = False

# Increase the notebook container width
display(HTML("<style>.container { width:95% !important; }</style>"))


# Get all available levels sorted numerically
levels = [col.split('_')[-1] for col in df.columns if col.startswith('hierarchical_ward_') and not col.endswith('_name')]
levels = sorted(levels, key=lambda x: float(x))

# Initialize state variables
selected_clusters = []
adjusting_clusters = False
questions_to_adjust = pd.DataFrame()

# Initialize variable to track which view to show
show_adjusted = False

# Widgets for initial cluster level selection
initial_level_widget = widgets.Dropdown(
    options=levels,
    value='20.0',  # Default initial level
    description='Initial Level:',
)

# Level selection widget for adjustments
level_widget = widgets.Dropdown(
    options=levels,
    value='1.0',
    description='New Level:',
)

# Buttons to toggle between adjusted and non-adjusted clusters
view_adjusted_button = widgets.Button(
    description='Adjusted Clusters',
    button_style='info',
)

view_clusters_to_adjust_button = widgets.Button(
    description='Clusters to Adjust',
    button_style='info',
)

# Buttons for actions
aggregate_button = widgets.Button(
    description='Aggregate',
    button_style='primary',
)
split_button = widgets.Button(
    description='Split',
    button_style='warning',
)
set_level_button = widgets.Button(
    description='Set Level',
    button_style='success',
)
done_button = widgets.Button(
    description='Done',
    button_style='info',
)

# Button to start adjustment
start_adjust_button = widgets.Button(
    description='Adjust Selected Clusters',
    button_style='info',
)

set_as_undone_button = widgets.Button(
    description='Set as Undone',
    button_style='danger',  # Use a distinct style for visibility
)

# Output widgets to display clusters and adjustments
selected_clusters_output = widgets.Output()
clusters_output = widgets.Output()

# Initialize DataFrame with current cluster assignments
def initialize_df(initial_level):
    """
    Initializes the DataFrame with current cluster assignments based on the initial level.
    """
    global df
    df['current_cluster'] = df[f'hierarchical_ward_{initial_level}']
    df['current_cluster_name'] = df[f'hierarchical_ward_{initial_level}_name']
    df['current_level'] = initial_level
    return df

def toggle_cluster_view(b):
    """
    Toggles the cluster view between adjusted clusters and clusters to adjust.
    """
    global show_adjusted
    if b.description == 'Adjusted Clusters':
        show_adjusted = True
    elif b.description == 'Clusters to Adjust':
        show_adjusted = False
    display_clusters()


# Display selected clusters for adjustment
def display_selected_clusters():
    """
    Displays the selected clusters in a separate section for adjustment.
    Groups questions under their cluster names.
    """
    global adjust_checkboxes
    adjust_checkboxes = []
    
    selected_clusters_output.clear_output()
    with selected_clusters_output:
        if not adjusting_clusters or questions_to_adjust.empty:
            return
        display(HTML("<h3>Adjusting Selected Clusters</h3>"))
        
        # Group questions by their cluster
        grouped = questions_to_adjust.groupby(['current_cluster', 'current_cluster_name', 'current_level'])
        
        for (cluster_id, cluster_name, current_level), group in grouped:
            questions = group['question'].tolist()
            count = len(questions)

            # Create checkbox for each cluster
            checkbox = widgets.Checkbox(value=False)
            adjust_checkboxes.append((checkbox, cluster_id))

            # Create cluster info
            cluster_info_html = f"<b>{cluster_name}</b> (ID: {cluster_id}, Level: {current_level}, {count} questions)"
            cluster_label = widgets.HTML(value=cluster_info_html)
            
            # Create an accordion to display questions
            questions_html = "<ul>"
            for question in questions:
                questions_html += f"<li>{question}</li>"
            questions_html += "</ul>"
            accordion = widgets.Accordion(children=[widgets.HTML(questions_html)])
            accordion.set_title(0, "View Questions")
            accordion.selected_index = None  # Ensure the accordion is collapsed by default

            # Arrange the checkbox, label, and accordion
            hbox = widgets.HBox([checkbox, cluster_label])
            vbox = widgets.VBox([hbox, accordion])
            display(vbox)
            display(HTML("<hr>"))
        
        # Display adjustment buttons
        adjustment_buttons = widgets.HBox([aggregate_button, split_button, set_level_button, done_button], layout=widgets.Layout(margin='10px 0'))
        display(adjustment_buttons)

# Display main clusters
def display_clusters():
    """
    Displays the main list of clusters based on toggle state (adjusted or to adjust).
    """
    clusters_output.clear_output()
    with clusters_output:
        global checkboxes
        checkboxes = []

        # Choose the appropriate dataframe to display based on toggle
        if show_adjusted:
            clusters_df = adjusted_clusters_df.copy()
             # Display the 'Set as Undone' button
            display(set_as_undone_button)
        else:
            # Use only clusters that haven't been adjusted
            clusters_df = df[df['flag_adjusted'] == False].copy()

        # Group and sort as usual
        clusters = clusters_df.groupby(['current_cluster', 'current_cluster_name', 'current_level'])['question'].apply(list).reset_index()
        clusters['question_count'] = clusters['question'].apply(len)
        clusters = clusters.sort_values(by='question_count', ascending=False).reset_index(drop=True)

        # Display
        header_text = "Adjusted Clusters" if show_adjusted else "Clusters to Adjust"
        display(HTML(f"<h3>{header_text}</h3>"))

        for index, row in clusters.iterrows():
            cluster_id = row['current_cluster']
            cluster_name = row['current_cluster_name']
            current_level = row['current_level']
            questions = row['question']
            count = row['question_count']
            checkbox = widgets.Checkbox(value=False)
            checkboxes.append((checkbox, cluster_id))
            cluster_info_html = f"<b>{cluster_name}</b> (ID: {cluster_id}, Level: {current_level}, {count} questions)"
            cluster_label = widgets.HTML(value=cluster_info_html)
            hbox = widgets.HBox([checkbox, cluster_label])
            questions_html = "<ul>"
            for question in questions:
                questions_html += f"<li>{question}</li>"
            questions_html += "</ul>"
            accordion = widgets.Accordion(children=[widgets.HTML(questions_html)])
            accordion.set_title(0, "View Questions")
            accordion.selected_index = None  # Ensure the accordion is collapsed by default
            vbox = widgets.VBox([hbox, accordion])
            display(vbox)
            display(HTML("<hr>"))
        
        # for index, row in clusters.iterrows():
        #     cluster_id = row['current_cluster']
        #     cluster_name = row['current_cluster_name']
        #     current_level = row['current_level']
        #     questions = row['question']
        #     count = row['question_count']
        #     checkbox = widgets.Checkbox(value=False)
        #     checkboxes.append((checkbox, cluster_id))
        #     cluster_info_html = f"<b>{cluster_name}</b> (ID: {cluster_id}, Level: {current_level}, {count} questions)"
        #     cluster_label = widgets.HTML(value=cluster_info_html)
        #     hbox = widgets.HBox([checkbox, cluster_label])

        #     # Create a VBox for all question HBoxes
        #     question_widgets = []

        #     # For each question, add a "View Details" button next to it
        #     for question in questions:
        #         question_label = widgets.HTML(f"<li>{question}</li>")
        #         details_accordion = widgets.Accordion(children=[widgets.HTML("<ul><li>Here will be details</li></ul>")])
        #         details_accordion.set_title(0, "View Details")
        #         question_hbox = widgets.HBox([question_label, details_accordion])
        #         question_widgets.append(question_hbox)

        #     questions_vbox = widgets.VBox(question_widgets)
        #     accordion = widgets.Accordion(children=[questions_vbox])
        #     accordion.set_title(0, "View Questions")
        #     vbox = widgets.VBox([hbox, accordion])
        #     display(vbox)
        #     display(HTML("<hr>"))
        
        # Display appropriate button based on state
        if not adjusting_clusters and not show_adjusted:
            display(start_adjust_button)

# Function to start adjusting selected clusters
def start_adjustment(b):
    """
    Initiates the adjustment process by moving selected clusters to the adjustment section.
    """
    global selected_clusters, adjusting_clusters, questions_to_adjust
    selected_clusters = [cluster_id for checkbox, cluster_id in checkboxes if checkbox.value]
    if not selected_clusters:
        with selected_clusters_output:
            display(HTML("<p style='color:red;'>Please select at least one cluster to adjust.</p>"))
        return
    adjusting_clusters = True
    # Collect all questions in the selected clusters
    questions_to_adjust = df[df['current_cluster'].isin(selected_clusters)].copy()
    display_selected_clusters()
    display_clusters()

# Function to aggregate clusters
def aggregate_clusters_func(b):
    """
    Aggregates selected clusters by moving their questions up one level.
    """
    global df, questions_to_adjust
    if questions_to_adjust.empty:
        with selected_clusters_output:
            display(HTML("<p style='color:red;'>No clusters selected for aggregation.</p>"))
        return
    messages = []
    for idx, row in questions_to_adjust.iterrows():
        current_level = row['current_level']
        try:
            idx_level = levels.index(current_level)
        except ValueError:
            messages.append(f"Question '{row['question']}' has an invalid current level '{current_level}'.")
            continue
        if idx_level + 1 >= len(levels):
            messages.append(f"Question '{row['question']}' is already at the maximum level '{current_level}'.")
            continue
        new_level = levels[idx_level + 1]
        # Update cluster assignments
        new_cluster = row[f'hierarchical_ward_{new_level}']
        new_cluster_name = row[f'hierarchical_ward_{new_level}_name']
        df.at[idx, 'current_cluster'] = new_cluster
        df.at[idx, 'current_cluster_name'] = new_cluster_name
        df.at[idx, 'current_level'] = new_level
        messages.append(f"Question '{row['question']}' aggregated to level {new_level}.")
    # Update the questions_to_adjust DataFrame
    questions_to_adjust = df[df.index.isin(questions_to_adjust.index)].copy()
    display_selected_clusters()
    display_clusters()
    # Display messages
    # with selected_clusters_output:
    #     for msg in messages:
    #         if "aggregated" in msg:
    #             display(HTML(f"<p style='color:green;'>{msg}</p>"))
    #         else:
    #             display(HTML(f"<p style='color:red;'>{msg}</p>"))

# Function to split clusters
def split_clusters_func(b):
    """
    Splits selected clusters by moving their questions down one level.
    """
    global df, questions_to_adjust
    if questions_to_adjust.empty:
        with selected_clusters_output:
            display(HTML("<p style='color:red;'>No clusters selected for splitting.</p>"))
        return
    messages = []
    for idx, row in questions_to_adjust.iterrows():
        current_level = row['current_level']
        try:
            idx_level = levels.index(current_level)
        except ValueError:
            messages.append(f"Question '{row['question']}' has an invalid current level '{current_level}'.")
            continue
        if idx_level - 1 < 0:
            messages.append(f"Question '{row['question']}' is already at the minimum level '{current_level}'.")
            continue
        new_level = levels[idx_level - 1]
        # Update cluster assignments
        new_cluster = row[f'hierarchical_ward_{new_level}']
        new_cluster_name = row[f'hierarchical_ward_{new_level}_name']
        df.at[idx, 'current_cluster'] = new_cluster
        df.at[idx, 'current_cluster_name'] = new_cluster_name
        df.at[idx, 'current_level'] = new_level
        messages.append(f"Question '{row['question']}' split to level {new_level}.")
    # Update the questions_to_adjust DataFrame
    questions_to_adjust = df[df.index.isin(questions_to_adjust.index)].copy()
    display_selected_clusters()
    display_clusters()
    # Display messages
    # with selected_clusters_output:
    #     for msg in messages:
    #         if "split" in msg:
    #             display(HTML(f"<p style='color:green;'>{msg}</p>"))
    #         else:
    #             display(HTML(f"<p style='color:red;'>{msg}</p>"))

# Function to set clusters to a specific level
def set_level_clusters_func(b):
    """
    Sets selected clusters to a specific level chosen by the user.
    """
    global df, questions_to_adjust
    new_level = level_widget.value
    messages = []
    for idx, row in questions_to_adjust.iterrows():
        current_level = row['current_level']
        try:
            # Check if new_level exists in the levels list
            idx_new = levels.index(new_level)
        except ValueError:
            messages.append(f"Level '{new_level}' is invalid.")
            continue
        # Update cluster assignments
        new_cluster = row[f'hierarchical_ward_{new_level}']
        new_cluster_name = row[f'hierarchical_ward_{new_level}_name']
        df.at[idx, 'current_cluster'] = new_cluster
        df.at[idx, 'current_cluster_name'] = new_cluster_name
        df.at[idx, 'current_level'] = new_level
        messages.append(f"Question '{row['question']}' set to level {new_level}.")
    # Update the questions_to_adjust DataFrame
    questions_to_adjust = df[df.index.isin(questions_to_adjust.index)].copy()
    display_selected_clusters()
    display_clusters()
    # Display messages
    # with selected_clusters_output:
    #     for msg in messages:
    #         if "set to" in msg:
    #             display(HTML(f"<p style='color:green;'>{msg}</p>"))
    #         else:
    #             display(HTML(f"<p style='color:red;'>{msg}</p>"))


# Function to finalize adjustments, save, and flag adjusted questions
def done_adjusting_func(b):
    """
    Finalizes the adjustment process, saves the currently adjusted clusters,
    flags adjusted questions in the original DataFrame, and clears the UI.
    """
    global adjusting_clusters, selected_clusters, questions_to_adjust, adjusted_clusters_df
    
    if not questions_to_adjust.empty:
        # Filter questions_to_adjust to only include checked clusters
        checked_ids = {cluster_id for checkbox, cluster_id in adjust_checkboxes if checkbox.value}
        to_adjust_df = questions_to_adjust[questions_to_adjust['current_cluster'].isin(checked_ids)].copy()
        
        # Add 'selected_user_cluster' to questions being adjusted and save them
        #to_adjust_df['selected_user_cluster'] = to_adjust_df['current_cluster_name']
        to_adjust_df.loc[:, 'selected_user_cluster'] = to_adjust_df['current_cluster_name']

        adjusted_clusters_df = pd.concat([adjusted_clusters_df, to_adjust_df], ignore_index=True)
        
        # Flag these questions as adjusted in the original DataFrame
        df.loc[to_adjust_df.index, 'flag_adjusted'] = True

    # Reset adjustment state variables and clear the UI
    adjusting_clusters = False
    selected_clusters = []
    questions_to_adjust = pd.DataFrame()
    selected_clusters_output.clear_output()
    display_clusters()
    
# Function to update clusters when initial level changes
def update_initial_level(change):
    """
    Updates the DataFrame and UI when the initial level is changed.
    """
    global current_level, df, adjusting_clusters, selected_clusters, questions_to_adjust
    new_initial_level = change['new']
    current_level = new_initial_level
    initialize_df(current_level)
    level_widget.value = current_level  # Update the adjustment level to match
    adjusting_clusters = False
    selected_clusters = []
    questions_to_adjust = pd.DataFrame()
    selected_clusters_output.clear_output()
    display_clusters()

def set_clusters_undone(b):
    """
    Marks selected adjusted clusters as undone, changing their flag and removing them from adjusted_clusters_df.
    """
    global df, adjusted_clusters_df
    checked_ids = {cluster_id for checkbox, cluster_id in adjust_checkboxes if checkbox.value}
    
    if not checked_ids:
        with selected_clusters_output:
            display(HTML("<p style='color:red;'>Please select at least one cluster to set as undone.</p>"))
        return

    # Update 'flag_adjusted' to False for selected clusters
    df.loc[df['current_cluster'].isin(checked_ids), 'flag_adjusted'] = False

    # Remove from adjusted_clusters_df
    adjusted_clusters_df = adjusted_clusters_df[~adjusted_clusters_df['current_cluster'].isin(checked_ids)]

    # Refresh cluster display
    display_clusters()


# Link the initial level widget to the update function
initial_level_widget.observe(update_initial_level, names='value')

# Link the buttons to their functions
aggregate_button.on_click(aggregate_clusters_func)
split_button.on_click(split_clusters_func)
set_level_button.on_click(set_level_clusters_func)
done_button.on_click(done_adjusting_func)
start_adjust_button.on_click(start_adjustment)
# Link the toggle buttons to the appropriate function
view_adjusted_button.on_click(toggle_cluster_view)
view_clusters_to_adjust_button.on_click(toggle_cluster_view)
set_as_undone_button.on_click(set_clusters_undone)



# Set initial current level and initialize DataFrame
current_level = initial_level_widget.value
initialize_df(current_level)

# Arrange control widgets in one row
first_row_controls = widgets.HBox(
    [
        initial_level_widget,
        start_adjust_button,
        level_widget
    ],
    layout=widgets.Layout(margin='10px 0', justify_content='flex-start')
)

# Arrange toggle buttons in a separate row
second_row_controls = widgets.HBox(
    [
        view_adjusted_button,
        view_clusters_to_adjust_button
    ],
    layout=widgets.Layout(margin='10px 0', justify_content='flex-start')
)

# Stack both rows vertically
control_widgets = widgets.VBox(
    [
        first_row_controls,
        second_row_controls
    ]
)


# Display the control widgets and initial clusters state
display(control_widgets)
display(selected_clusters_output)
display(clusters_output)

# Initial display of clusters
display_clusters()

VBox(children=(HBox(children=(Dropdown(description='Initial Level:', index=74, options=('0.1', '0.2', '0.3', '…

Output()

Output()

In [24]:
# Choose the appropriate dataframe to display based on toggle

# cclusters_df = df[df['flag_adjusted'] == False].copy()

# # Group and sort as usual
# cclusters = cclusters_df.groupby(['current_cluster', 'current_cluster_name', 'current_level'])['question'].apply(list).reset_index()
# cclusters['question_count'] = cclusters['question'].apply(len)
# cclusters = cclusters.sort_values(by='question_count', ascending=False).reset_index(drop=True)



# for index, row in clusters.iterrows():
#     cluster_id = row['current_cluster']

In [25]:
#cclusters


In [23]:
#df.head(2)

In [22]:

#len(adjusted_clusters_df)  
#df[df['flag_adjusted'] == True]
#add logic to if click on done remove from original dataframe, or flag so we know we adjusted these already - OK
#now adjust to have both view, one for showing the DF original and one for DF of adjusted - OK
#conseguir dar done em só um ou mais dos clusters que estão na seção de ajustes - OK
#conseguir dar um desfazer nos clusters que estão no done - OK

#GOAL: Deixar tudo pronto para nuvemshop fazer o dever de casa deles
#output 1: PEerguntas feitas pelos clientes agrupadas por formas diferentes de fazer a mesma pergunta
#output 2: output1 + linkada ao conteúdo correto de resposta (gabarito).
#will be input for: Criar sessões com base em como clientes perguntas e resposta que deve ser dada

#colocar em algum lugar que nuvemshop possa ter acesso para fazer o trabalho necessário de validação / ajuste
#adicionar os steps posteriores de conseguir selectionar entre as sessões já existentes qual responde

#add extra infos sem separar mesmo.
#add the extracted_questions para ser visto se clicar no question, para ver mais no detalhe. Adicionar o ticket id também 


#how should we do if we want to iterate on the adjusted DF? - create a new version
#Also allow the user to group clusters manually, and select

#df.head(3)
