In [19]:
import pandas as pd
import os
import glob
from sklearn.metrics import accuracy_score

# Directory containing the output CSV files
output_csv_directory = './NR/'

# Read all the CSV files in the directory
csv_files = glob.glob(os.path.join(output_csv_directory, '*.csv'))

# Dictionary to store accuracies for each model and each CSV file
accuracies_dict = {}

# Function to extract the relevant part of the filename for the title
def extract_relevant_part(filename):
    parts = filename.split('_')
    if len(parts) > 3:  # Ensure there are enough parts to extract
        return parts[2]  # Adjust index based on your filename format
    return "Unknown"  # Fallback if format is not as expected

# Read each CSV file and compute the accuracy by model
for csv_file in csv_files:
    data = pd.read_csv(csv_file)
    file_accuracies = {}
    for model in data['Model']. unique():
        model_data = data[data['Model'] == model]
        actual = model_data['Actual']
        predicted = model_data['Predicted']
        accuracy = accuracy_score(actual, predicted)
        file_accuracies[model] = accuracy
    dataset_name = extract_relevant_part(os.path.basename(csv_file))
    accuracies_dict[dataset_name] = file_accuracies

# Generate LaTeX code using longtable for multiple pages
print("\\begin{longtable}{|c|c|c|c|c|}")
print("\\hline")
print("\\textbf{Data.} & \\textbf{Model} & \\textbf{Acc.} & \\textbf{Model} & \\textbf{Acc.} \\\\ \\hline")
print("\\endfirsthead")

print("\\multicolumn{5}{c}{{Continuation of Table \\ref{tab:model_accuracies}}} \\\\")
print("\\hline")
print("\\textbf{Dataset} & \\textbf{Model} & \\textbf{Accuracy} & \\textbf{Model} & \\textbf{Accuracy} \\\\ \\hline")
print("\\endhead")

for dataset_name, model_accuracies in accuracies_dict.items():
    models = list(model_accuracies.items())
    paired_models = [models[i:i+2] for i in range(0, len(models), 2)]  # Pair up the models for dual column layout
    for pair in paired_models:
        if len(pair) == 2:
            print(f"{dataset_name if pair == paired_models[0] else ''} & {pair[0][0]} & {pair[0][1]:.2f} & {pair[1][0]} & {pair[1][1]:.2f} \\\\")
        else:  # Handle an odd number of models
            print(f"{dataset_name if pair == paired_models[0] else ''} & {pair[0][0]} & {pair[0][1]:.2f} & & \\\\")
        dataset_name = ""  # Clear the dataset name after the first pair to avoid repetition
    print("\\hline")


print("\\caption{Classification Accuracy for NR Benefit}")
print("\\label{tab:class_acc_nr_benefit}")
print("\\end{longtable}")

\begin{longtable}{|c|c|c|c|c|}
\hline
\textbf{Data.} & \textbf{Model} & \textbf{Acc.} & \textbf{Model} & \textbf{Acc.} \\ \hline
\endfirsthead
\multicolumn{5}{c}{{Continuation of Table \ref{tab:model_accuracies}}} \\
\hline
\textbf{Dataset} & \textbf{Model} & \textbf{Accuracy} & \textbf{Model} & \textbf{Accuracy} \\ \hline
\endhead
bfill & RidgeClassifier & 0.76 & DecisionTreeClassifier & 0.57 \\
 & GradientBoostingClassifier & 0.71 & RandomForestClassifier & 0.76 \\
 & AdaBoostClassifier & 0.76 & KNeighborsClassifier & 0.67 \\
 & MLPClassifier & 0.62 & LogisticRegression & 0.81 \\
 & SGDClassifier & 0.71 & SVC & 0.67 \\
 & GaussianNB & 0.62 & LinearDiscriminantAnalysis & 0.71 \\
\hline
custom & RidgeClassifier & 0.67 & DecisionTreeClassifier & 0.62 \\
 & GradientBoostingClassifier & 0.71 & RandomForestClassifier & 0.76 \\
 & AdaBoostClassifier & 0.76 & KNeighborsClassifier & 0.67 \\
 & MLPClassifier & 0.62 & LogisticRegression & 0.81 \\
 & SGDClassifier & 0.62 & SVC & 0.67 \\
 & Gauss

In [None]:
t