In [3]:
"""
Average F1 Score for the six motors.
"""

import pandas as pd
import pandas.api.types
from sklearn.metrics import f1_score


class ParticipantVisibleError(Exception):
    # If you want an error message to be shown to participants, you must raise the error as a ParticipantVisibleError
    # All other errors will only be shown to the competition host. This helps prevent unintentional leakage of solution data.
    pass


def cal_classification_perf(y_true, y_pred):
    ''' ### Description
    This function calculates the classification performance: Accuracy, Precision, Recall and F1 score.
    It considers different scenarios when divide by zero could occur for Precision, Recall and F1 score calculation.

    ### Parameters:
    - y_true: The true labels.
    - y_pred: The predicted labels.

    ### Return:
    - accuracy: The accuracy.
    - precision: The precision.
    - recall: The recall.
    - f1: The F1 score.
    '''
    # Only when y_pred contains no zeros, and y_true contains no zeros, set precision to be 1 when divide by zero occurs.
    if sum(y_true)==0 and sum(y_pred)==0:
        f1 = f1_score(y_true, y_pred, zero_division=1)
    else:
        f1 = f1_score(y_true, y_pred, zero_division=0)

    return f1


def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str='idx') -> float:
    '''
    >>> import pandas as pd
    >>> num_rows = 5
    >>> data = {'idx': list(range(num_rows))}
    >>> for i in range(1, 7):
    >>> ...   column_name = f'data_motor_{i}_label'
    >>> ...   data[column_name] = [1] * num_rows
    >>> solution = pd.DataFrame(data)
    >>> submission = solution
    >>> score(solution.copy(), submission.copy())
    1
    '''

    # TODO: You likely want to delete the row ID column, which Kaggle's system uses to align
    # the solution and submission before passing these dataframes to score().
    del solution[row_id_column_name]
    del submission[row_id_column_name]

    results = []

    for i in range(6):
        col_name = f'data_motor_{i+1}_label'
        y_true = solution[col_name]
        y_pred = submission[col_name]
        result = cal_classification_perf(y_true, y_pred)
        results.append(result)
        
    score_result = sum(results)/len(results)

    return score_result


In [4]:
import pandas as pd

num_rows = 5
data = {'idx': list(range(num_rows))}
for i in range(1, 7):
    column_name = f'data_motor_{i}_label'
    data[column_name] = [1] * num_rows
    
solution = pd.DataFrame(data)
submission = solution
score(solution.copy(), submission.copy())

1.0

In [5]:
solution

Unnamed: 0,idx,data_motor_1_label,data_motor_2_label,data_motor_3_label,data_motor_4_label,data_motor_5_label,data_motor_6_label
0,0,1,1,1,1,1,1
1,1,1,1,1,1,1,1
2,2,1,1,1,1,1,1
3,3,1,1,1,1,1,1
4,4,1,1,1,1,1,1


In [6]:
data[column_name]

[1, 1, 1, 1, 1]