In [9]:
"""
Average F1 Score for the six motors.
"""

import pandas as pd
import pandas.api.types
from sklearn.metrics import f1_score
import numpy as np


class ParticipantVisibleError(Exception):
    # If you want an error message to be shown to participants, you must raise the error as a ParticipantVisibleError
    # All other errors will only be shown to the competition host. This helps prevent unintentional leakage of solution data.
    pass


def cal_classification_perf(y_true, y_pred):
    ''' ### Description
    This function calculates the classification performance: Accuracy, Precision, Recall and F1 score.
    It considers different scenarios when divide by zero could occur for Precision, Recall and F1 score calculation.

    ### Parameters:
    - y_true: The true labels.
    - y_pred: The predicted labels.

    ### Return:
    - f1: The F1 score.
    '''
    # Only when y_pred contains no zeros, and y_true contains no zeros, set precision to be 1 when divide by zero occurs.
    if sum(y_true)==0 and sum(y_pred)==0:
        f1 = f1_score(y_true, y_pred, zero_division=1)
    else:
        f1 = f1_score(y_true, y_pred, zero_division=0)

    return f1


def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    '''
    >>> import pandas as pd
    >>> num_rows = 5
    >>> data = {'idx': list(range(num_rows))}
    >>> for i in range(1, 7):
    ...     column_name = f'data_motor_{i}_label'
    ...     data[column_name] = [1] * num_rows
    >>> solution = pd.DataFrame(data)
    >>> submission = solution
    >>> score(solution.copy(), submission.copy(), 'idx')
    1.0
    '''


    del solution[row_id_column_name]
    del submission[row_id_column_name]

    results = []

    for i in range(6):
        col_name = f'data_motor_{i+1}_label'
        y_true = solution[col_name].values
        y_pred = submission[col_name].values
        
        # Check lengths
        len_y_true = len(y_true)
        len_y_pred = len(y_pred)

        # If y_pred is shorter, pad it with zeros at the beginning
        if len_y_pred < len_y_true:
            # Calculate the number of zeros needed
            padding_length = len_y_true - len_y_pred
            # Create an array of zeros
            padding = np.zeros(padding_length, dtype=y_pred.dtype)
            # Concatenate the padding with y_pred
            y_pred = np.concatenate((padding, y_pred))
        
        result = cal_classification_perf(y_true, y_pred)
        results.append(result)
    
    df_results = pd.DataFrame(data=[results], columns=['motor_1', 'motor_2', 'motor_3', 'motor_4', 'motor_5', 'motor_6'])
    print('F1 score for each motor:')
    print(df_results)

    df_results.to_csv('f1_per_motor.csv')
        
    score_result = sum(results)/len(results)

    return score_result

In [8]:
# Unit test.

import pandas as pd

num_rows = 5
data = {'idx': list(range(num_rows))}
for i in range(1, 7):
    column_name = f'data_motor_{i}_label'
    data[column_name] = [1] * num_rows
    
solution = pd.DataFrame(data)
submission = solution
score(solution.copy(), submission.copy(), 'idx')

F1 score for each motor:
   motor_1  motor_2  motor_3  motor_4  motor_5  motor_6
0      1.0      1.0      1.0      1.0      1.0      1.0


1.0