In [68]:
import pandas as pd
import numpy as np
import os

In [78]:
# define the filtes to be used
data_filter = ['uwave', 'har'] # 
cycles_filter = ['5'] # 5, 10

In [99]:
filtered_files = []
for foldername in data_filter:
    files = os.listdir(foldername)
    for file in files:
        if file.split('_')[-2] in cycles_filter:
            filtered_files.append(os.path.join(foldername, file))
print(filtered_files)

['uwave/Uncertainty_margin_cycle_5_uwave.csv', 'uwave/RandomIter_cycle_5_uwave.csv', 'uwave/Random_cycle_5_uwave.csv', 'uwave/Uncertainty_least_confidence_cycle_5_uwave.csv', 'uwave/Uncertainty_entropy_cycle_5_uwave.csv', 'har/Uncertainty_least_confidence_cycle_5_har.csv', 'har/Uncertainty_margin_cycle_5_har.csv', 'har/Uncertainty_entropy_cycle_5_har.csv', 'har/RandomIter_cycle_5_har.csv', 'har/Random_cycle_5_har.csv']


In [100]:
def calculate_average_forgetting(df):
    """
    Calculate the average forgetting for each cycle in the DataFrame.

    :param df: DataFrame containing task accuracies and cycles.
    :return: List of average forgetting values for each cycle.
    """
    # Identify task accuracy columns
    task_columns = [col for col in df.columns if col.startswith("task_")]
    num_tasks = len(task_columns)

    # Initialize a list to store forgetting values
    forgetting_values = []

    # Iterate through each cycle (row in the DataFrame)
    for i in range(len(df)):
        forgetting_sum = 0
        count = 0

        # Iterate through all previous tasks (j < i)
        for j in range(num_tasks):
            # Only consider tasks that have been learned before (non-zero values in previous rows)
            if df.iloc[:i, df.columns.get_loc(task_columns[j])].max() > 0:
                max_akj = df.iloc[:i, df.columns.get_loc(task_columns[j])].max()  # max accuracy for task j before task i
                aij = df.iloc[i, df.columns.get_loc(task_columns[j])]  # accuracy for task j after learning task i
                forgetting_sum += max_akj - aij
                count += 1

        # Calculate average forgetting for the current cycle
        avg_forgetting = forgetting_sum / count if count > 0 else 0
        forgetting_values.append(avg_forgetting)

    return forgetting_values

In [102]:
for path in filtered_files:
    print(path)
    df = pd.read_csv(path)
    df = df.groupby(['task', 'cycle',]).mean() # mean over all (default=5) runs
    df.drop(columns=['run'], inplace=True)
    # unpack task column
    df['task'] = df.index.get_level_values(0)
    df['cycle'] = df.index.get_level_values(1)
    df = df.reset_index(drop=True)
    task_cols = [c for c in df.columns if c.startswith('task_')]

    # calculate the average learning accuracy for each cycle
    df['average_learning_accuracy'] = df.apply(
    lambda row: np.mean(row[task_cols]), axis=1)

    df['average_accuracy'] = df.apply(
    lambda row: np.mean([v for v in row[task_cols] if v > 1e-5]), axis=1)

    # calculate the average forgetting for each cycle
    forgetting_values = calculate_average_forgetting(df)
    df['average_forgetting'] = forgetting_values

    dir = os.path.dirname(path)
    file = path[len(dir)+1:]
    df.to_csv(os.path.join(dir+'_scores', 'score_'+ file), index=False)
    
    

uwave/Uncertainty_margin_cycle_5_uwave.csv
uwave/RandomIter_cycle_5_uwave.csv
uwave/Random_cycle_5_uwave.csv
uwave/Uncertainty_least_confidence_cycle_5_uwave.csv
uwave/Uncertainty_entropy_cycle_5_uwave.csv
har/Uncertainty_least_confidence_cycle_5_har.csv
har/Uncertainty_margin_cycle_5_har.csv
har/Uncertainty_entropy_cycle_5_har.csv
har/RandomIter_cycle_5_har.csv
har/Random_cycle_5_har.csv
