In [2]:
import pickle
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.subplots as subplots
import os

# Define the root folder
root_folder = "/home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DinoL/multiclass/RS"

# Folder names for all runs
run_folders = [f"run{i}" for i in range(1, 3)]

# Define corresponding filenames
pickle_filenames = [f"random_sampling_results_for_multiclass_classification_s4{i+2}.pickle" for i in range(3)]
csv_filenames = [f"training_history_s4{i+2}.csv" for i in range(3)]

# Initialize accumulators for averaging
performance_test_data_all = []
performance_all = []

# Number of samples for each iteration
no_of_samples = [8, 24, 56, 112, 208, 384, 704, 1264, 2264, 4040, 7200, 12824, 22824, 26880]

# Loop through each run folder and collect the data
for i in range(2):
    # Construct file paths
    pickle_path = os.path.join(root_folder, run_folders[i], pickle_filenames[i])
    csv_path = os.path.join(root_folder, run_folders[i], csv_filenames[i])
    
    # Print which pickle file is being loaded
    print(f"Loading pickle file for run {i+1}: {pickle_path}")
    
    # Load test F1 scores from pickle file
    with open(pickle_path, 'rb') as pickle_file:
        random_sampling_results = pickle.load(pickle_file)
        performance_test_data_all.append(random_sampling_results["test_f1_scores_micro"])
    
    # Print which CSV file is being loaded
    print(f"Loading CSV file for run {i+1}: {csv_path}")
    
    # Load CSV for validation loss and F1 scores
    df = pd.read_csv(csv_path)
    df['iteration'] = (df['epoch'] == 1).cumsum()
    
    performance = []
    for j in range(1, 15):  # Start from 2 and go up to 14 (inclusive)
        iteration_data = df[df['iteration'] == j]
        performance.append({
            'val_loss': iteration_data['valid_loss'].tolist(),
            'val_f1_score': iteration_data['valid_f1'].tolist()
        })
    
    performance_all.append(performance)

# Aggregate results
min_length = min(len(no_of_samples), *[len(perf) for perf in performance_all])
no_of_samples = no_of_samples[:min_length]

average_performance = []
for i in range(min_length):
    avg_val_loss = np.mean([run[i]['val_loss'][-1] for run in performance_all])
    avg_val_f1 = np.mean([run[i]['val_f1_score'][-1] for run in performance_all])
    average_performance.append({'val_loss': avg_val_loss, 'val_f1_score': avg_val_f1})

average_test_f1 = np.mean(performance_test_data_all, axis=0)

def plot_validation_performance(performance, no_of_samples, title, x_axes, y_axes, y_axes_secondary):
    fig = subplots.make_subplots(specs=[[{"secondary_y": True}]])
    fig.update_layout(plot_bgcolor='rgb(209, 217, 222)')

    # Add dashed lines for sample numbers and text labels on top of the graph
    samples_text = [str(f) for f in no_of_samples]
    for line, label in zip(no_of_samples, samples_text):
        fig.add_trace(go.Scatter(
            x=[line, line],
            y=[0, 1.12],
            mode='lines',
            line=dict(dash='dash', color='rgb(149, 162, 171)', width=1),
            showlegend=False,
            yaxis='y2'
        ))

        fig.add_trace(go.Scatter(
            x=[line],
            y=[1.13],
            mode='text',
            marker=dict(size=0),
            text=[label],
            textposition='top center',
            showlegend=False,
            yaxis='y2',
            textfont=dict(family='Arial', color='black', size=8.5)
        ))

    loss = []
    f1 = []

    for i in range(len(performance)):
        if isinstance(performance[i], dict):
            # Ensure that the keys 'val_loss' and 'val_f1_score' are present in each entry
            if "val_loss" in performance[i] and "val_f1_score" in performance[i]:
                loss.append(performance[i]["val_loss"])  # Directly collect the scalar validation loss
                f1.append(performance[i]["val_f1_score"])  # Directly collect the scalar validation F1 score
            else:
                print(f"Missing 'val_loss' or 'val_f1_score' in performance entry {i}")
        else:
            print(f"Invalid performance entry at index {i}, expected dictionary, got {type(performance[i])}")

    # Add traces for validation loss and validation F1 score
    fig.add_trace(go.Scatter(
        x=no_of_samples,
        y=loss,
        name=f"Val loss ({loss[-1]:.3f})",
        marker=dict(color='rgb(97,192,134)'),
        textfont=dict(family="Arial", size=11),
        line=dict(width=2)
    ), secondary_y=False)

    fig.add_trace(go.Scatter(
        x=no_of_samples,
        y=f1,
        name=f"Val F1 score ({f1[-1]:.3f})",
        marker=dict(color='rgb(245,130,31)'),
        textfont=dict(family="Arial", size=11),
        line=dict(width=2)
    ), secondary_y=True)

    fig.update_layout(
        title_text=str(title),
        legend=dict(font=dict(color='black', family='Arial'), orientation='h', x=0.5, y=-0.2),
        title_font_color='black'
    )

    # Set the x-axis to be on a log scale
    fig.update_xaxes(
        title_text=str(x_axes),
        title_font=dict(color="black", family="Arial"),
        tickfont_color='black',
        type="log"
    )

    # Update y-axes for primary and secondary y-axes
    fig.update_yaxes(
        title_text=str(y_axes),
        title_font=dict(color="black", family="Arial"),
        tickfont_color='black',
        secondary_y=False
    )

    fig.update_yaxes(
        title_text=str(y_axes_secondary),
        title_font=dict(color="black", family="Arial"),
        tickfont_color='black',
        secondary_y=True
    )

    fig.show()

def plot_test_performance(no_of_samples, performance_test_data, title, x_axes, y_axes):
    fig = go.Figure()
    fig.update_layout(plot_bgcolor='rgb(209, 217, 222)')

    # Add dashed lines for sample numbers and text labels on top of the graph
    samples_text = [str(f) for f in no_of_samples]
    for line, label in zip(no_of_samples, samples_text):
        fig.add_trace(go.Scatter(
            x=[line, line],
            y=[0, 1.12],
            mode='lines',
            line=dict(dash='dash', color='rgb(149, 162, 171)', width=1),
            showlegend=False
        ))

        fig.add_trace(go.Scatter(
            x=[line],
            y=[1.13],
            mode='text',
            marker=dict(size=0),
            text=[label],
            textposition='top center',
            showlegend=False,
            textfont=dict(family='Arial', color='black', size=8.5)
        ))

    # Add traces for test F1 score
    fig.add_trace(go.Scatter(
        x=no_of_samples,
        y=performance_test_data,
        name=f"Test F1 score ({performance_test_data[-1]:.3f})",
        marker=dict(color='rgb(151,193,57)'),
        textfont=dict(family="Arial", size=11),
        line=dict(width=2)
    ))

    fig.update_layout(
        title_text=str(title),
        legend=dict(font=dict(color='black', family='Arial')),
        title_font_color='black'
    )

    # Set the x-axis to be on a log scale
    fig.update_xaxes(
        title_text=str(x_axes),
        title_font=dict(color="black", family="Arial"),
        tickfont_color='black',
        type="log"
    )

    # Update y-axis for test F1 score
    fig.update_yaxes(
        title_text=str(y_axes),
        title_font=dict(color="black", family="Arial"),
        tickfont_color='black'
    )

    fig.show()

# Plot the average validation loss and F1 score (log scale x-axis)
plot_validation_performance(
    average_performance, 
    no_of_samples, 
    title="Average Validation Loss and F1 Score for random sampling",
    x_axes="Amount of Training Samples",
    y_axes="Validation Loss",
    y_axes_secondary="Validation F1 Score"
)

# Plot the average test F1 score (log scale x-axis)
plot_test_performance(
    no_of_samples, 
    average_test_f1,
    title="Average Test F1 Score for random sampling",
    x_axes="Amount of Training Samples",
    y_axes="Test F1 Score"
)


Loading pickle file for run 1: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DinoL/multiclass/RS/run1/random_sampling_results_for_multiclass_classification_s42.pickle
Loading CSV file for run 1: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DinoL/multiclass/RS/run1/training_history_s42.csv
Loading pickle file for run 2: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DinoL/multiclass/RS/run2/random_sampling_results_for_multiclass_classification_s43.pickle
Loading CSV file for run 2: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DinoL/multiclass/RS/run2/training_history_s43.csv


In [4]:
import pickle
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.subplots as subplots
import os

# Define the root folder
root_folder = "/home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DinoL/multiclass/RS"

# Folder names for all runs
run_folders = [f"run{i}" for i in range(1, 3)]

# Define corresponding filenames
pickle_filenames = [f"random_sampling_results_for_multiclass_classification_s4{i+2}.pickle" for i in range(2)]
csv_filenames = [f"training_history_s4{i+2}.csv" for i in range(3)]

# Initialize accumulators for averaging
performance_test_data_all = []
performance_all = []

# Number of samples for each iteration
no_of_samples = [8, 24, 56, 112, 208, 384, 704, 1264, 2264, 4040, 7200, 12824, 22824, 26880]

# Loop through each run folder and collect the data
for i in range(2):
    # Construct file paths
    pickle_path = os.path.join(root_folder, run_folders[i], pickle_filenames[i])
    csv_path = os.path.join(root_folder, run_folders[i], csv_filenames[i])
    
    # Print which pickle file is being loaded
    print(f"Loading pickle file for run {i+1}: {pickle_path}")
    
    # Load test F1 scores from pickle file
    with open(pickle_path, 'rb') as pickle_file:
        random_sampling_results = pickle.load(pickle_file)
        performance_test_data_all.append(random_sampling_results["test_f1_scores_micro"])
    
    # Print which CSV file is being loaded
    print(f"Loading CSV file for run {i+1}: {csv_path}")
    
    # Load CSV for validation loss and F1 scores
    df = pd.read_csv(csv_path)
    df['iteration'] = (df['epoch'] == 1).cumsum()
    
    performance = []
    for j in range(1, 15):  # Start from 2 and go up to 14 (inclusive)
        iteration_data = df[df['iteration'] == j]
        performance.append({
            'val_loss': iteration_data['valid_loss'].tolist(),
            'val_f1_score': iteration_data['valid_f1'].tolist()
        })
    
    performance_all.append(performance)

# Aggregate results
min_length = min(len(no_of_samples), *[len(perf) for perf in performance_all])
no_of_samples = no_of_samples[:min_length]

average_performance = []
for i in range(min_length):
    avg_val_loss = np.mean([run[i]['val_loss'][-1] for run in performance_all])
    avg_val_f1 = np.mean([run[i]['val_f1_score'][-1] for run in performance_all])
    average_performance.append({'val_loss': avg_val_loss, 'val_f1_score': avg_val_f1})

average_test_f1 = np.mean(performance_test_data_all, axis=0)

def plot_validation_performance(performance, no_of_samples, min_max_values, title, x_axes, y_axes, y_axes_secondary):
    fig = subplots.make_subplots(specs=[[{"secondary_y": True}]])
    fig.update_layout(plot_bgcolor='rgb(209, 217, 222)')

    # Add dashed lines for sample numbers and text labels on top of the graph
    samples_text = [str(f) for f in no_of_samples]
    for i, (line, label) in enumerate(zip(no_of_samples, samples_text)):
        fig.add_trace(go.Scatter(
            x=[line, line],
            y=[0, 2.3],
            mode='lines',
            line=dict(dash='dash', color='rgb(149, 162, 171)', width=1),
            showlegend=False
        ))

        # Adjust the y-position for the last value to avoid collision
        y_position = 2.31 if i < len(no_of_samples) - 1 else 2.26

        fig.add_trace(go.Scatter(
            x=[line],
            y=[y_position],
            mode='text',
            marker=dict(size=0),
            text=[label],
            textposition='top center',
            showlegend=False,
            textfont=dict(family='Arial', color='black', size=8.5)
        ))

    # Adjust the y-axis range to avoid extra space
    fig.update_yaxes(range=[0.6, 2.4], secondary_y=False)
    fig.update_yaxes(range=[0, 1], secondary_y=True)
    
    loss = []
    f1 = []

    for i in range(len(performance)):
        if isinstance(performance[i], dict):
            # Ensure that the keys 'val_loss' and 'val_f1_score' are present in each entry
            if "val_loss" in performance[i] and "val_f1_score" in performance[i]:
                loss.append(performance[i]["val_loss"])  # Directly collect the scalar validation loss
                f1.append(performance[i]["val_f1_score"])  # Directly collect the scalar validation F1 score
            else:
                print(f"Missing 'val_loss' or 'val_f1_score' in performance entry {i}")
        else:
            print(f"Invalid performance entry at index {i}, expected dictionary, got {type(performance[i])}")

    # Add the shaded region (fluctuation range) for validation loss
    min_val_loss, max_val_loss, min_val_f1, max_val_f1 = min_max_values
    fig.add_trace(go.Scatter(
        x=no_of_samples + no_of_samples[::-1],
        y=list(max_val_loss) + list(min_val_loss)[::-1],
        fill='toself',
        fillcolor='rgba(97, 192, 134, 0.2)',  # Add transparency
        line=dict(width=0),  # No border for shaded region
        showlegend=False  # Don't show a separate legend entry for the shaded region
    ), secondary_y=False)

    # Add the shaded region (fluctuation range) for validation F1 score
    fig.add_trace(go.Scatter(
        x=no_of_samples + no_of_samples[::-1],
        y=list(max_val_f1) + list(min_val_f1)[::-1],
        fill='toself',
        fillcolor='rgba(245, 130, 31, 0.2)',  # Add transparency
        line=dict(width=0),  # No border for shaded region
        showlegend=False  # Don't show a separate legend entry for the shaded region
    ), secondary_y=True)

    # Add traces for validation loss and validation F1 score
    fig.add_trace(go.Scatter(
        x=no_of_samples,
        y=loss,
        name=f"Val loss ({loss[-1]:.3f})",
        marker=dict(color='rgb(97,192,134)'),
        textfont=dict(family="Arial", size=11),
        line=dict(width=2)
    ), secondary_y=False)

    fig.add_trace(go.Scatter(
        x=no_of_samples,
        y=f1,
        name=f"Val F1 score ({f1[-1]:.3f})",
        marker=dict(color='rgb(245,130,31)'),
        textfont=dict(family="Arial", size=11),
        line=dict(width=2)
    ), secondary_y=True)

    fig.update_layout(
        title_text=str(title),
        legend=dict(font=dict(color='black', family="Arial")),
        title_font_color='black'
    )

    # Set the x-axis to be on a log scale
    fig.update_xaxes(
        title_text=str(x_axes),
        title_font=dict(color="black", family="Arial"),
        tickfont_color='black',
        type="log"
    )

    # Update y-axes for primary and secondary y-axes
    fig.update_yaxes(
        title_text=str(y_axes),
        title_font=dict(color="black", family="Arial"),
        tickfont_color='black',
        secondary_y=False
    )

    fig.update_yaxes(
        title_text=str(y_axes_secondary),
        title_font=dict(color="black", family="Arial"),
        tickfont_color='black',
        secondary_y=True
    )

    fig.show()

def plot_test_performance(no_of_samples, performance_test_data, title, x_axes, y_axes):
    fig = go.Figure()
    fig.update_layout(plot_bgcolor='rgb(209, 217, 222)')

    # Add dashed lines for sample numbers and text labels on top of the graph
    samples_text = [str(f) for f in no_of_samples]
    for line, label in zip(no_of_samples, samples_text):
        fig.add_trace(go.Scatter(
            x=[line, line],
            y=[0, 1.12],
            mode='lines',
            line=dict(dash='dash', color='rgb(149, 162, 171)', width=1),
            showlegend=False
        ))

        fig.add_trace(go.Scatter(
            x=[line],
            y=[1.13],
            mode='text',
            marker=dict(size=0),
            text=[label],
            textposition='top center',
            showlegend=False,
            textfont=dict(family='Arial', color='black', size=8.5)
        ))

    # Add traces for test F1 score
    fig.add_trace(go.Scatter(
        x=no_of_samples,
        y=performance_test_data,
        name=f"Test F1 score ({performance_test_data[-1]:.3f})",
        marker=dict(color='rgb(151,193,57)'),
        textfont=dict(family="Arial", size=11),
        line=dict(width=2)
    ))

    fig.update_layout(
        title_text=str(title),
        legend=dict(font=dict(color='black', family='Arial')),
        title_font_color='black'
    )

    # Set the x-axis to be on a log scale
    fig.update_xaxes(
        title_text=str(x_axes),
        title_font=dict(color="black", family="Arial"),
        tickfont_color='black',
        type="log"
    )

    # Update y-axis for test F1 score
    fig.update_yaxes(
        title_text=str(y_axes),
        title_font=dict(color="black", family="Arial"),
        tickfont_color='black'
    )

    fig.show()

# Initialize lists to store validation loss and F1 score for each run
val_loss_all = []
val_f1_all = []

# Loop through each run and collect the validation loss and F1 score
for run in performance_all:
    val_loss_run = [iteration['val_loss'][-1] for iteration in run]
    val_f1_run = [iteration['val_f1_score'][-1] for iteration in run]
    val_loss_all.append(val_loss_run)
    val_f1_all.append(val_f1_run)

# Convert to NumPy arrays for easier computation
val_loss_all = np.array(val_loss_all)
val_f1_all = np.array(val_f1_all)

# Calculate min and max values for validation loss and F1 score
min_val_loss = np.min(val_loss_all, axis=0)
max_val_loss = np.max(val_loss_all, axis=0)
min_val_f1 = np.min(val_f1_all, axis=0)
max_val_f1 = np.max(val_f1_all, axis=0)

# Assuming min_max_values for validation performance is calculated and stored in min_max_values_val
min_max_values_val = (min_val_loss, max_val_loss, min_val_f1, max_val_f1)  # Replace with actual min and max values for validation loss and F1 score

# Plot the average validation loss and F1 score (log scale x-axis)
plot_validation_performance(
    average_performance, 
    no_of_samples, 
    min_max_values_val,  # Pass the min_max_values for the shaded region
    title="Average validation loss and micro F1 score for random_sampling",
    x_axes="Number of training samples (log-scaled)",
    y_axes="Validation loss",
    y_axes_secondary="Validation set micro F1 score"
)

# Plot the average test F1 score (log scale x-axis)
plot_test_performance(
    no_of_samples, 
    average_test_f1,
    title="Average Test F1 Score for random sampling",
    x_axes="Amount of Training Samples",
    y_axes="Test F1 Score"
)


Loading pickle file for run 1: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DinoL/multiclass/RS/run1/random_sampling_results_for_multiclass_classification_s42.pickle
Loading CSV file for run 1: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DinoL/multiclass/RS/run1/training_history_s42.csv
Loading pickle file for run 2: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DinoL/multiclass/RS/run2/random_sampling_results_for_multiclass_classification_s43.pickle
Loading CSV file for run 2: /home/woody/iwfa/iwfa044h/CleanLab_Test/ActiveLearningApproaches/EOD/DinoL/multiclass/RS/run2/training_history_s43.csv
