In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


# Function to create individual plot for each dataset
def create_plot(df_filtered, file_name, lr):
    
    folder_path = f'plots/metrics_new_s1_s2/lr_{lr}/s1=s2'
    df_filtered = df_filtered[df_filtered['learning_rate'] == lr]  

    # Set up figure and axis
    fig, ax1 = plt.subplots(figsize=(10, 6))

    # Line plot for accuracy
    sns.lineplot(data=df_filtered, x='sample_size_layer_1', y='test_accuracy', label='Test Accuracy', ax=ax1)
    sns.lineplot(data=df_filtered, x='sample_size_layer_1', y='train_accuracy', label='Train Accuracy', ax=ax1)
    sns.lineplot(data=df_filtered, x='sample_size_layer_1', y='valid_accuracy', label='Validation Accuracy', ax=ax1)

    ax1.set_xlabel('Neighbourhood Sample Size')
    ax1.set_ylabel('Accuracy')
    ax1.set_title(f'Accuracy vs Neighbourhood Sample Size ({file_name[:-5]})')

    # Create a twin axis for runtime
    ax2 = ax1.twinx()
    sns.lineplot(data=df_filtered, x='sample_size_layer_1', y='runtime_seconds', color='red', label='Runtime', ax=ax2,
                 linestyle='--')

    ax2.set_ylabel('Runtime (s)')

    # Combine handles and labels from both axes for legend
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1, labels1, loc='upper left')
    ax2.legend(lines2, labels2, loc='upper right')

    # Save plot to folder
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    plot_file_name = os.path.join(folder_path, f'Accuracy_Runtime_vs_Neighbourhood_Sample_Size_{file_name[:-5]}.png')
    plt.tight_layout()
    plt.savefig(plot_file_name)
    plt.close()
    


# Path to the directory containing Excel files
metrics_folder = 'metrics/metrics_new_s1_s2'
lr_list = [0.01, 0.001, 0.0001]

# Iterate through Excel files in the folder
for file_name in os.listdir(metrics_folder):
    if file_name.endswith('.xlsx'):
        file_path = os.path.join(metrics_folder, file_name)
        # Load data
        df = pd.read_excel(file_path)
        # Filter data where Sample Size Layer 1 equals Sample Size Layer 2
        df_filtered = df[df['sample_size_layer_1'] == df['sample_size_layer_2']]
        # Create plot
        for lr in lr_list:        
            create_plot(df_filtered, file_name, lr)
        


In [ ]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Function to create individual plot for each s1 or s2 value
def create_plot(df, column_name, s_value, folder_path):
    # Filter data for the specified s_value
    df_filtered = df[df[column_name] == s_value]

    # Set up figure and axis
    fig, ax1 = plt.subplots(figsize=(10, 6))
    if column_name == 'sample_size_layer_1':
        label = 'Sample Size Layer 2'
        x_col = 'sample_size_layer_2'
    else:
        label = 'Sample Size Layer 1'
        x_col = 'sample_size_layer_1'
    # Line plot for accuracy
    sns.lineplot(data=df_filtered, x=x_col, y='test_accuracy', label='Test Accuracy', ax=ax1)
    sns.lineplot(data=df_filtered, x=x_col, y='train_accuracy', label='Train Accuracy', ax=ax1)
    sns.lineplot(data=df_filtered, x=x_col, y='valid_accuracy', label='Validation Accuracy', ax=ax1)
    
    ax1.set_xlabel(label)
    ax1.set_ylabel('Accuracy')
    ax1.set_title(f'Accuracy vs {label} (s1={s_value})')

    # Create a twin axis for runtime
    ax2 = ax1.twinx()
    sns.lineplot(data=df_filtered, x=x_col, y='runtime_seconds', color='red', label='Runtime', ax=ax2, linestyle='--')

    ax2.set_ylabel('Runtime (s)')

    # Combine handles and labels from both axes for legend
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1, labels1, loc='upper left')
    ax2.legend(lines2, labels2, loc='upper right')

    # Save plot to folder
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    plot_file_name = os.path.join(folder_path, f'{column_name}_{s_value}.png')
    plt.tight_layout()
    plt.savefig(plot_file_name)
    plt.close()

# Path to the directory containing Excel files
metrics_folder = 'metrics/metrics_new_s1_s2'
plots_folder = 'plots/metrics_new_s1_s2'
lr_list = [0.01, 0.001, 0.0001]

# Iterate through Excel files in the folder
for file_name in os.listdir(metrics_folder):
    if file_name.endswith('.xlsx'):
        file_path = os.path.join(metrics_folder, file_name)
        df = pd.read_excel(file_path)
        for lr in lr_list:
            df_filtered = df[df['learning_rate'] == lr]
            # Iterate through specified range of values for s1 or s2
            for s_value in range(1, 21, 2):  # for s1
                folder_path = os.path.join(plots_folder, f'lr_{lr}/s2', os.path.splitext(file_name)[0])
                create_plot(df_filtered, 'sample_size_layer_2', s_value, folder_path)
            
            for s_value in range(1, 16, 2):  # for s2
                folder_path = os.path.join(plots_folder, f'lr_{lr}/s1', os.path.splitext(file_name)[0])
                create_plot(df_filtered, 'sample_size_layer_1', s_value, folder_path)

        
