In [10]:
import pandas as pd
import numpy as np
import os

In [11]:
datasets = [name for name in os.listdir('data') if os.path.isdir(os.path.join('data', name))]

In [12]:
# get number of error from sequenceID and llda
def get_err(evaluation_df, seqID, llda):
    # get sub eval_df of seqID
    eval_df = evaluation_df[evaluation_df['sequenceID'] == seqID]

    n_labels = 0
    n_errs = 0
    if(eval_df.shape[0] > 0):
        # get right row
        position = np.logical_and(eval_df['min.log.lambda'] <= llda, llda < eval_df['max.log.lambda'])
        row = eval_df[position]

        # get total labels and total errors
        n_labels = row['labels'].item()
        n_errs = row['errors'].item()

    return n_labels, n_errs

def get_acc(eval_df, lldas_df):
    total_err = 0
    total_labels = 0
    for seqID in lldas_df['sequenceID']:
        llda = lldas_df[lldas_df['sequenceID'] == seqID]['pred'].item()
        n_labels, n_errs = get_err(eval_df, seqID, llda)
        total_labels += n_labels
        total_err += n_errs
    acc = (total_labels - total_err)/total_labels
    return acc*100

def add_row_to_csv(file_path, columns, row):
    try:
        existing_df = pd.read_csv(file_path)
    except FileNotFoundError:
        existing_df = pd.DataFrame(columns=columns)

    method = row[0]
    fold = row[1]

    if not existing_df.empty:
        if any((existing_df['method'] == method) & (existing_df['fold'] == fold)):
            print(f"Warning: Entry for method '{method}' and fold '{fold}' already exists in '{file_path}'. Row not added.")
            return

    # Add row safely without concat
    existing_df.loc[len(existing_df)] = row

    existing_df.to_csv(file_path, index=False)



for dataset in datasets:
    # Load the necessary CSV files
    fold_df = pd.read_csv(f'data/{dataset}/folds.csv')
    evaluation_df = pd.read_csv(f'data/{dataset}/evaluation.csv', na_values=['#NAME?'])
    evaluation_df['min.log.lambda'] = evaluation_df['min.log.lambda'].fillna(-np.inf)

    for test_fold in sorted(fold_df['fold'].unique()):
        pred_df = pd.read_csv(f'model/final_predictions/{dataset}.{test_fold}.csv')
        eval_df = evaluation_df[evaluation_df['sequenceID'].isin(fold_df[fold_df['fold'] == test_fold]['sequenceID'])]
        
        # Calculate the accuracy using the averaged predictions
        acc = get_acc(eval_df, pred_df)
        
        # Save the result to the CSV file
        add_row_to_csv('csvs/csvs_proposed/' + dataset + '.csv', ['method', 'fold', 'acc'], ['cnn', test_fold, acc])