In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
def plot_from_csv(csv_file1, csv_file2):
    # Read the CSV file
    df1 = pd.read_csv(csv_file1)
    df2 = pd.read_csv(csv_file2)
    
    # Extract data for CSV1
    x1 = df1['Train Fraction'].values
    print("X1: ", x1)
    knn_val_scores1 = df1['KNN Validation Score'].values
    knn_val_errors1 = df1['KNN Val Error'].values
    rf_val_scores1 = df1['RF Validation Score'].values
    rf_val_errors1 = df1['RF Val Error'].values

    # Extract data for CSV2
    x2 = df2['Train Fraction'].values
    knn_val_scores2 = df2['KNN Validation Score'].values
    knn_val_errors2 = df2['KNN Val Error'].values
    rf_val_scores2 = df2['RF Validation Score'].values
    rf_val_errors2 = df2['RF Val Error'].values

    # Calculate offset positions
    offset = 0.02 #0.015  # Adjust this value as needed for proper spacing
    x1_offset = x1 - offset
    x2_offset = x2 #+ offset

    # Plot settings
    plt.figure(figsize=(12, 8))
    plt.xlabel('Fraction of Train Size', fontsize=18)
    plt.ylabel('F1 Score', fontsize=18)
    plt.xticks(x2, [f"{x:.2f}" for x in x2], fontsize=18)  # Use original x2 positions for ticks
    plt.yticks(fontsize=18)
    plt.grid(True, linestyle='--', alpha=0.6)

    # Plot KNN
    plt.errorbar(x1_offset, knn_val_scores1, yerr=knn_val_errors1, fmt='>', 
                label=r'$k$NN-Balanced', markersize=12, capsize=5, color='green')
    plt.errorbar(x2_offset, knn_val_scores2, yerr=knn_val_errors2, fmt='o', 
                label=r'$k$NN-Original', markersize=12, capsize=5, color='green')

    # Plot Random Forest
    plt.errorbar(x1_offset, rf_val_scores1, yerr=rf_val_errors1, fmt='>', 
                markeredgecolor='r', label='RF-Balanced', markersize=12, capsize=5, color='r')
    plt.errorbar(x2_offset, rf_val_scores2, yerr=rf_val_errors2, fmt='o', 
                markeredgecolor='r', label='RF-Original', capsize=5, markersize=12, color='r')

    # Final adjustments
    plt.ylim(0.8, 1.)
    plt.grid(True, linestyle='--', alpha=0.4)
    plt.tight_layout()
    plt.legend(fontsize=18, loc="lower right")
    plt.savefig("compare_knn_rf_model_f5.pdf", dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
# Usage example:
# plot_from_csv('model_comparison_balanced.csv', 'model_comparison_original.csv')
# plot_from_csv('model_comparison_bal_f5.csv', 'model_comparison_original_f5.csv')
plot_from_csv('model_comparison_bal_f5.csv', "model_comparison_original_f5_final_20-08-2025.csv")
