In [16]:
import pickle
import numpy as np
import os
import plotly.graph_objects as go

def read_and_average_test_f1(root_folder, run_folders, filename_template, file_indices):
    """
    Reads pickle files for a specific strategy, extracts test F1 scores, and computes their average across runs.
    """
    performance_test_data_all = []

    for run_folder, file_index in zip(run_folders, file_indices):
        pickle_filename = filename_template.format(file_index)  # Dynamically generate filenames
        pickle_path = os.path.join(root_folder, run_folder, pickle_filename)
        
        print(f"Loading pickle file: {pickle_path}")
        with open(pickle_path, 'rb') as pickle_file:
            results = pickle.load(pickle_file)
            performance_test_data_all.append(results["test_f1_scores_macro"])

    # Calculate the average test F1 score across runs
    average_test_f1 = np.mean(performance_test_data_all, axis=0)
    return average_test_f1

def plot_all_test_performance(no_of_samples, performances, strategies, title, x_axes, y_axes):
    """
    Plots the average test F1 scores of multiple strategies on a single graph.
    """
    fig = go.Figure()
    fig.update_layout(plot_bgcolor='rgb(209, 217, 222)')

    # Add dashed lines for sample numbers and text labels on top of the graph
    samples_text = [str(f) for f in no_of_samples]
    for line, label in zip(no_of_samples, samples_text):
        fig.add_trace(go.Scatter(
            x=[line, line],
            y=[0, 1.12],
            mode='lines',
            line=dict(dash='dash', color='rgb(149, 162, 171)', width=1),
            showlegend=False
        ))

        fig.add_trace(go.Scatter(
            x=[line],
            y=[1.13],
            mode='text',
            marker=dict(size=0),
            text=[label],
            textposition='top center',
            showlegend=False,
            textfont=dict(family='Arial', color='black', size=8.5)
        ))

    # Define the color palette, with random sampling in red and other strategies in different shades of green
    colors = [
              'rgb(97, 192, 134)',
              'rgb(45, 140, 58)',
              'rgb(151, 193, 57)',
              'rgb(255, 0, 0)'# Red for random sampling
              ]   

    # Add traces for each strategy
    for idx, (performance, strategy) in enumerate(zip(performances, strategies)):
        # Ensure random sampling is always red (the first strategy in the list)
        color = colors[strategies.index(strategy)]
        fig.add_trace(go.Scatter(
            x=no_of_samples,
            y=performance,
            name=f"{strategy} (final: {performance[-1]:.3f})",
            marker=dict(color=color),
            textfont=dict(family="Arial", size=11),
            line=dict(width=2)
        ))

    fig.update_layout(
        title_text=str(title),
        legend=dict(font=dict(color='black')),
        title_font_color='black'
    )

    # Set the x-axis to be on a log scale
    fig.update_xaxes(
        title_text=str(x_axes),
        title_font=dict(color="black", family="Arial"),
        tickfont_color='black',
        type="log"
    )

    # Update y-axis for test F1 score
    fig.update_yaxes(
        title_text=str(y_axes),
        title_font=dict(color="black", family="Arial"),
        tickfont_color='black'
    )

    fig.show()

# Define file paths and folders for each strategy
strategies = ['avg_score', 'avg_confidence', 'max_score', 'random_sampling']
root_folders = {
    'avg_score': "/home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DInoS/AL/avg_score",
    'avg_confidence': "/home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DInoS/AL/avg_confidence",
    'max_score': "/home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DInoS/AL/max_score",
    'random_sampling': "/home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DInoS/RS"
}
run_folders = [f"run{i}" for i in range(1, 6)]
file_indices = [2, 3, 4, 5, 6]  # Corresponding indices for the filenames

# Template for filenames for each strategy
filename_templates = {
    'avg_score': "AL_average_score_results_for_multilabel_classification_s4{}.pickle",
    'avg_confidence': "AL_average_confidence_results_for_multilabel_classification_s4{}.pickle",
    'max_score': "AL_max_score_results_for_multilabel_classification_s4{}.pickle",
    'random_sampling': "random_sampling_results_for_multilabel_classification_s4{}.pickle"
}

# Sample counts
no_of_samples = [8, 24, 56, 112, 212, 388, 704, 1264, 2264, 4048, 7200, 12824, 22824, 26880]

# Calculate averages for each strategy
performances = []
for strategy in strategies:
    average_f1 = read_and_average_test_f1(
        root_folder=root_folders[strategy],
        run_folders=run_folders,
        filename_template=filename_templates[strategy],
        file_indices=file_indices
    )
    performances.append(average_f1)

# Plot the results
plot_all_test_performance(
    no_of_samples, 
    performances, 
    strategies, 
    title="Average Test F1 Scores for Different Strategies",
    x_axes="Amount of Training Samples",
    y_axes="Test F1 Score"
)


Loading pickle file: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DInoS/AL/avg_score/run1/AL_average_score_results_for_multilabel_classification_s42.pickle
Loading pickle file: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DInoS/AL/avg_score/run2/AL_average_score_results_for_multilabel_classification_s43.pickle
Loading pickle file: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DInoS/AL/avg_score/run3/AL_average_score_results_for_multilabel_classification_s44.pickle
Loading pickle file: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DInoS/AL/avg_score/run4/AL_average_score_results_for_multilabel_classification_s45.pickle
Loading pickle file: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DInoS/AL/avg_score/run5/AL_average_score_results_for_multilabel_classification_s46.pickle
Loading pickle file: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DInoS/AL/a