# Overview

Compute F1-score for each subtask

In [2]:
import pandas as pd
from sklearn.metrics import f1_score

def compute_score(hazards_true, products_true, hazards_pred, products_pred):
    f1_hazards = f1_score(hazards_true, hazards_pred, average='macro')
    
    correct_hazards = hazards_pred == hazards_true
    f1_products = f1_score(products_true[correct_hazards], products_pred[correct_hazards], average='macro')
    
    return (f1_hazards + f1_products) / 2.0

def load_labels(file_path):
    df = pd.read_csv(file_path)
    return df['true_label'], df['predicted_label']

hazards_true_st1, hazards_pred_st1 = load_labels("predictions_st1_hazard.csv")
products_true_st1, products_pred_st1 = load_labels("predictions_st1_product.csv")

f1_score_st1 = compute_score(hazards_true_st1, products_true_st1, hazards_pred_st1, products_pred_st1)
print(f"F1 Score for first pair: {f1_score_st1}")

hazards_true_st2, hazards_pred_st2 = load_labels("predictions_st2_hazard.csv")
products_true_st2, products_pred_st2 = load_labels("predictions_st2_product.csv")

f1_score_st2 = compute_score(hazards_true_st2, products_true_st2, hazards_pred_st2, products_pred_st2)
print(f"F1 Score for second pair: {f1_score_st2}")

F1 Score for first pair: 0.693020223006763
F1 Score for second pair: 0.29459511331956634


The F1-score for the first subtask __0.69__ is significantly higher than that of the second subtask __0.3__. This is primarily because the models in the first subtask were trained more effectively and faced less class imbalance, resulting in better overall performance.

Pass the predictions to the sumbmission CSVs

In [3]:
st1_hazard = pd.read_csv("predictions_st1_hazard.csv")
st1_product = pd.read_csv("predictions_st1_product.csv")
st2_hazard = pd.read_csv("predictions_st2_hazard.csv")
st2_product = pd.read_csv("predictions_st2_product.csv")

submission1 = pd.DataFrame({
    "hazard_category": st1_hazard["true_label"],
    "product_category": st1_product["true_label"]
})
submission1.to_csv("submission1.csv", index=False)

submission2 = pd.DataFrame({
    "hazard": st2_hazard["true_label"],
    "product": st2_product["true_label"]
})
submission2.to_csv("submission2.csv", index=False)