In [5]:
import pandas as pd
import os
import glob
from sklearn.metrics import accuracy_score

# List of directories containing the output CSV files
directories = ['./PR/','./NR/','./SR/', './WS/', './SFST/', './PR_Benefit/','./NR_Benefit/','./SR_Benefit/', './WS_Benefit/', './SFST_Benefit/']

# Dictionary to store accuracies for each model in each directory
accuracies_dict = {}

# Read each directory and compute the accuracy by model
for directory in directories:
    csv_files = glob.glob(os.path.join(directory, '*.csv'))
    directory_name = os.path.basename(os.path.normpath(directory))
    directory_accuracies = {}
    for csv_file in csv_files:
        data = pd.read_csv(csv_file)
        for model in data['Model'].unique():
            model_data = data[data['Model'] == model]
            actual = model_data['Actual']
            predicted = model_data['Predicted']
            accuracy = accuracy_score(actual, predicted)
            if model not in directory_accuracies:
                directory_accuracies[model] = []
            directory_accuracies[model].append(accuracy)
    accuracies_dict[directory_name] = directory_accuracies

# LaTeX table string
latex_table = r"""
\begin{table}[H]
\centering
\begin{tabular}{|c|c|c|c|}
\hline
\textbf{Function} & \textbf{Model} & \textbf{Data} & \textbf{Accuracy} \\
\hline
"""

# Find and print the best model(s) for each directory based on highest accuracy
for directory_name, model_accuracies in accuracies_dict.items():
    # Calculate the average accuracy for each model
    average_accuracies = {model: sum(accuracies) / len(accuracies) for model, accuracies in model_accuracies.items()}
    
    # Find the highest average accuracy value
    max_accuracy = max(average_accuracies.values())
    # Find all models that have this max average accuracy
    best_models = [model for model, accuracy in average_accuracies.items() if accuracy == max_accuracy]
    
    # Format the best models for the LaTeX table
    if len(best_models) > 1:
        latex_table += f"\\multirow{{{len(best_models)}}}{{*}}{{{directory_name}}} "
    else:
        latex_table += f"{directory_name} "
    
    for i, model in enumerate(best_models):
        if i > 0:
            latex_table += " & "
        model_accuracies_str = ", ".join(["All Data"])  # Placeholder, replace with actual data methods if available
        accuracy_str = f"{max_accuracy:.1%}"
        latex_table += f"& {model} & {model_accuracies_str} & {accuracy_str} \\\\\n"
        if i < len(best_models) - 1:
            latex_table += " \\hline\n"
    latex_table += " \\hline\n"

latex_table += r"""
\end{tabular}
\caption{Best Model Accuracies}
\label{tab_class:spec_model_accuracies_best}
\end{table}
"""

print(latex_table)



\begin{table}[H]
\centering
\begin{tabular}{|c|c|c|c|}
\hline
\textbf{Function} & \textbf{Model} & \textbf{Data} & \textbf{Accuracy} \\
\hline
PR & LogisticRegression & All Data & 95.2% \\
 \hline
NR & LinearDiscriminantAnalysis & All Data & 85.7% \\
 \hline
SR & TensorFlow & All Data & 89.9% \\
 \hline
WS & GradientBoostingClassifier & All Data & 100.0% \\
 \hline
\multirow{7}{*}{SFST} & DecisionTreeClassifier & All Data & 95.2% \\
 \hline
 & & GradientBoostingClassifier & All Data & 95.2% \\
 \hline
 & & RandomForestClassifier & All Data & 95.2% \\
 \hline
 & & AdaBoostClassifier & All Data & 95.2% \\
 \hline
 & & LogisticRegression & All Data & 95.2% \\
 \hline
 & & SVC & All Data & 95.2% \\
 \hline
 & & LinearDiscriminantAnalysis & All Data & 95.2% \\
 \hline
PR_Benefit & SGDClassifier & All Data & 96.4% \\
 \hline
NR_Benefit & SVC & All Data & 100.0% \\
 \hline
\multirow{8}{*}{SR_Benefit} & RidgeClassifier & All Data & 100.0% \\
 \hline
 & & DecisionTreeClassifier & All Data & 10