In [34]:
import os
import pandas as pd

# Define data reading function.
def combine_csv(folder_path, test_condition):
    """
    Combine all CSV files in a folder into a single DataFrame.
    :param folder_path: Path to the folder containing the CSV files
    :param seq_idx: Sequence index
    :param label: Label of the sequence (Normal - 0, Abnormal - 1)
    :return: A single DataFrame containing all the data from the CSV files
    """

    # Get a list of all CSV files in the folder
    csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]

    # Create an empty DataFrame to store the combined data
    combined_df = pd.DataFrame()

    # Iterate over the CSV files in the folder
    for file in csv_files:
        # Construct the full path to each CSV file
        file_path = os.path.join(folder_path, file)

        # Read each CSV file into a DataFrame
        df = pd.read_csv(file_path)
        # Drop the time. Will add later.
        df = df.drop(labels=df.columns[0], axis=1)

        # Extract the file name (excluding the extension) to use as a prefix
        file_name = os.path.splitext(file)[0]

        # Add a prefix to each column based on the file name
        df = df.add_prefix(f'{file_name}_')

        # Concatenate the current DataFrame with the combined DataFrame
        combined_df = pd.concat([combined_df, df], axis=1)

    df = pd.read_csv(file_path)
    combined_df = pd.concat([df['time'], combined_df], axis=1)
    combined_df.loc[:, 'test_condition'] = test_condition

    return combined_df       

In [35]:
# Read the true labels.

path_header_1 = r'C:\Users\Zhiguo\OneDrive - CentraleSupelec\Code\Python\digital_twin_robot\course_project_DARR\Evaluation\Data challenge\Prepared data\collected_data_group_1\true\\'
df_true_1 = combine_csv(path_header_1, 'unknown')

path_header_2 = r'C:\Users\Zhiguo\OneDrive - CentraleSupelec\Code\Python\digital_twin_robot\course_project_DARR\Evaluation\Data challenge\Prepared data\collected_data_group_2\true\\'
df_true_2 = combine_csv(path_header_2, 'unknown')

path_header_3 = r'C:\Users\Zhiguo\OneDrive - CentraleSupelec\Code\Python\digital_twin_robot\course_project_DARR\Evaluation\Data challenge\Prepared data\collected_data_group_3\true\\'
df_true_3 = combine_csv(path_header_3, 'unknown')
df_true_3.columns = df_true_1.columns

In [36]:
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score

def evaluate_prediction(df_true, df_pred):
    perf = pd.DataFrame(columns=['Motor', 'accuracy', 'precision','recall', 'f1'])
    for i in range(1, 7):
        y_true = df_true.iloc[:, 4*i]
        y_pred = df_pred.iloc[:, 4*i]
        acc = accuracy_score(y_true, y_pred)
        pre = precision_score(y_true, y_pred)
        recall = recall_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred)
        tmp_result = pd.DataFrame([['Motor_{}'.format(i), acc, pre, recall, f1]], 
                              columns=['Motor', 'accuracy', 'precision','recall', 'f1'])
        perf = pd.concat([perf, tmp_result], ignore_index=True)

    return perf


def evaluate_prediction_all(df_true, df_pred):
    y_true = []
    y_pred = []
    for i in range(1, 7):
        y_true.extend(df_true.iloc[:, 4*i].to_list())
        y_pred.extend(df_pred.iloc[:, 4*i].to_list())
        
    acc = accuracy_score(y_true, y_pred)
    pre = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    perf = pd.DataFrame([[acc, pre, recall, f1]], 
                            columns=['accuracy', 'precision','recall', 'f1'])

    return perf


def print_results(results, dictionary_name):
    print(dictionary_name)
    for file_name, performance in results.items():
        print(file_name)
        print(performance)
        print()

In [37]:
# Evaluate group 1

# Read the predictions
path_header = r'C:\Users\Zhiguo\OneDrive - CentraleSupelec\Code\Python\digital_twin_robot\course_project_DARR\Evaluation\Data challenge\results\group_1\results_group_1\results_group_1\testing_data_from_group_2'
df_pred_2 = combine_csv(path_header, 'unknown')
perf_2 = evaluate_prediction(df_true_2, df_pred_2)

path_header = r'C:\Users\Zhiguo\OneDrive - CentraleSupelec\Code\Python\digital_twin_robot\course_project_DARR\Evaluation\Data challenge\results\group_1\results_group_1\results_group_1\testing_data_from_group_3'
df_pred_3 = combine_csv(path_header, 'unknown')
df_pred_3.drop(columns=['data_motor_1_ label', 'data_motor_2_ label', 'data_motor_3_ label',
                        'data_motor_4_ label', 'data_motor_5_ label', 'data_motor_6_ label'], inplace=True)
df_pred_3.columns = df_pred_2.columns
perf_3 = evaluate_prediction(df_true_3, df_pred_3)

perf_combined = evaluate_prediction_all(pd.concat([df_true_2, df_true_3], ignore_index=True), pd.concat([df_pred_2, df_pred_3], ignore_index=True))

perf_group_1 = {'Dataset_2': perf_2, 'Dataset_3': perf_3, 'Overall': perf_combined}
print_results(perf_group_1, 'Group 1:')

Group 1:
Dataset_2
     Motor  accuracy  precision  recall   f1
0  Motor_1  0.842352        0.0     0.0  0.0
1  Motor_2  0.850156        0.0     0.0  0.0
2  Motor_3  0.777315        0.0     0.0  0.0
3  Motor_4  0.823621        0.0     0.0  0.0
4  Motor_5  0.805931        0.0     0.0  0.0
5  Motor_6  0.827784        0.0     0.0  0.0

Dataset_3
     Motor  accuracy  precision    recall        f1
0  Motor_1  0.946942   0.000000  0.000000  0.000000
1  Motor_2  0.870803   0.000000  0.000000  0.000000
2  Motor_3  0.824640   0.000000  0.000000  0.000000
3  Motor_4  0.827038   0.000000  0.000000  0.000000
4  Motor_5  0.953837   0.000000  0.000000  0.000000
5  Motor_6  0.667866   0.343284  0.256125  0.293367

Overall
   accuracy  precision    recall        f1
0  0.838532   0.282209  0.048533  0.082823



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [38]:
# Evaluate group 2

# Read the predictions
path_header = r'C:\Users\Zhiguo\OneDrive - CentraleSupelec\Code\Python\digital_twin_robot\course_project_DARR\Evaluation\Data challenge\results\group_2\results_group_2\testing_data_from_group_1'
df_pred_1 = combine_csv(path_header, 'unknown')
perf_1 = evaluate_prediction(df_true_1, df_pred_1)

path_header = r'C:\Users\Zhiguo\OneDrive - CentraleSupelec\Code\Python\digital_twin_robot\course_project_DARR\Evaluation\Data challenge\results\group_2\results_group_2\testing_data_from_group_3'
df_pred_3 = combine_csv(path_header, 'unknown')
perf_3 = evaluate_prediction(df_true_3, df_pred_3)

perf_combined = evaluate_prediction_all(pd.concat([df_true_1, df_true_3], ignore_index=True), pd.concat([df_pred_1, df_pred_3], ignore_index=True))

perf_group_2 = {'Dataset_1': perf_1, 'Dataset_3': perf_3, 'Overall': perf_combined}
print_results(perf_group_2, 'Group 2:')

Group 2:
Dataset_1
     Motor  accuracy  precision    recall        f1
0  Motor_1  0.873803   0.000000  0.000000  0.000000
1  Motor_2  0.828982   0.217890  0.646259  0.325901
2  Motor_3  0.364665   0.102066  0.862434  0.182531
3  Motor_4  0.477372   0.002938  0.016129  0.004971
4  Motor_5  0.876849   0.300505  0.952000  0.456814
5  Motor_6  0.794169   0.248796  0.968750  0.395913

Dataset_3
     Motor  accuracy  precision    recall        f1
0  Motor_1  0.946942   0.000000  0.000000  0.000000
1  Motor_2  0.721223   0.264486  0.664319  0.378342
2  Motor_3  0.806954   0.441815  0.382906  0.410256
3  Motor_4  0.769484   0.000000  0.000000  0.000000
4  Motor_5  0.624700   0.029966  0.227273  0.052950
5  Motor_6  0.269484   0.269265  1.000000  0.424285

Overall
   accuracy  precision    recall        f1
0  0.695036   0.190784  0.505503  0.277018



  _warn_prf(average, modifier, msg_start, len(result))


In [39]:
# Evaluate group 3

# Read the predictions
path_header = r'C:\Users\Zhiguo\OneDrive - CentraleSupelec\Code\Python\digital_twin_robot\course_project_DARR\Evaluation\Data challenge\results\group_3\results_group_3\testing_data_from_group_1\prediction'
df_pred_1 = combine_csv(path_header, 'unknown')
perf_1 = evaluate_prediction(df_true_1, df_pred_1)

path_header = r'C:\Users\Zhiguo\OneDrive - CentraleSupelec\Code\Python\digital_twin_robot\course_project_DARR\Evaluation\Data challenge\results\group_3\results_group_3\testing_data_from_group_2\prediction'
df_pred_2 = combine_csv(path_header, 'unknown')
perf_2 = evaluate_prediction(df_true_2, df_pred_2)

perf_combined = evaluate_prediction_all(pd.concat([df_true_1, df_true_2], ignore_index=True), pd.concat([df_pred_1, df_pred_2], ignore_index=True))

perf_group_3 = {'Dataset_1': perf_1, 'Dataset_2': perf_2, 'Overall': perf_combined}
print_results(perf_group_3, 'Group 3:')

Group 3:
Dataset_1
     Motor  accuracy  precision    recall        f1
0  Motor_1  0.683203   0.206077  0.551237  0.300000
1  Motor_2  0.554830   0.125641  1.000000  0.223235
2  Motor_3  0.745866   0.000000  0.000000  0.000000
3  Motor_4  0.412097   0.032879  0.220430  0.057223
4  Motor_5  0.637946   0.130617  1.000000  0.231054
5  Motor_6  0.561358   0.135112  0.981250  0.237519

Dataset_2
     Motor  accuracy  precision    recall        f1
0  Motor_1  0.791363   0.304762  0.345324  0.323777
1  Motor_2  0.478668   0.170664  0.642361  0.269679
2  Motor_3  0.830905   0.490501  0.904459  0.636058
3  Motor_4  0.637357   0.242075  0.495575  0.325266
4  Motor_5  0.400104   0.140884  0.410188  0.209733
5  Motor_6  0.511967   0.236568  0.827273  0.367925

Overall
   accuracy  precision    recall        f1
0  0.603397   0.168396  0.592629  0.262269



In [40]:
# Here we evaluate the performance of dataset.

import numpy as np

def eval_perf_dataset(perfs, dataset_name, perf_dataset):
    tmp_f1 = []
    for perf in perfs:
        tmp_f1.extend(perf[dataset_name]['f1'].to_list())
    
    avr_f1 = np.mean(tmp_f1)
    perf_dataset = pd.concat([perf_dataset, pd.DataFrame({'dataset': [dataset_name], 'Averaged f1': [avr_f1]})])

    return perf_dataset



perf_dataset = pd.DataFrame(columns=['dataset', 'Averaged f1'])
# Dataset 1
perfs = [perf_group_2, perf_group_3]
dataset_name = 'Dataset_1'
perf_dataset = eval_perf_dataset(perfs, dataset_name, perf_dataset)
# Dataset 2
perfs = [perf_group_1, perf_group_3]
dataset_name = 'Dataset_2'
perf_dataset = eval_perf_dataset(perfs, dataset_name, perf_dataset)
# Dataset 3
perfs = [perf_group_1, perf_group_2]
dataset_name = 'Dataset_3'
perf_dataset = eval_perf_dataset(perfs, dataset_name, perf_dataset)

print(perf_dataset)


     dataset  Averaged f1
0  Dataset_1     0.201263
0  Dataset_2     0.177703
0  Dataset_3     0.129933
