In [None]:
#load the validation dataset and testing dataset same as other transformer models.
#then upload the npy files from each model saved in the drive.
#this is an ensemble of three transformer model

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, classification_report

#load npy files from Codebert, Graphcodebert, and Unixoder
val_probs_codebert       = np.load("val_probs_codebert.npy")
val_probs_graphcodebert  = np.load("val_probs_graphcodebert.npy")
val_probs_unixcoder      = np.load("val_probs_unixcoder.npy")

test_probs_codebert      = np.load("test_probs_codebert.npy")
test_probs_graphcodebert = np.load("test_probs_graphcodebert.npy")
test_probs_unixcoder     = np.load("test_probs_unixcoder.npy")

print("val_probs_codebert shape      :", val_probs_codebert.shape)
print("val_probs_graphcodebert shape :", val_probs_graphcodebert.shape)
print("val_probs_unixcoder shape     :", val_probs_unixcoder.shape)

print("test_probs_codebert shape      :", test_probs_codebert.shape)
print("test_probs_graphcodebert shape :", test_probs_graphcodebert.shape)
print("test_probs_unixcoder shape     :", test_probs_unixcoder.shape)

# validation label
y_val = df_val["label"].astype(int).values
print("y_val shape:", y_val.shape)

assert val_probs_codebert.shape[0] == len(y_val)
assert val_probs_graphcodebert.shape[0] == len(y_val)
assert val_probs_unixcoder.shape[0] == len(y_val)

#grid search for best weights
best_f1 = -1.0
best_w  = None
best_val_probs_ens = None

#step size for weights(0.1 is usually enough)
grid = np.linspace(0, 1, 11)

for w1 in grid:  #CodeBERT
    for w2 in grid:  #GraphCodeBERT
        if w1 + w2 > 1.0:
            continue
        w3 = 1.0 - (w1 + w2)  #UniXcoder

        #ensemble probabilities on validation
        val_probs_ens = (
            w1 * val_probs_codebert +
            w2 * val_probs_graphcodebert +
            w3 * val_probs_unixcoder
        )

        val_preds_ens = val_probs_ens.argmax(axis=-1)
        f1 = f1_score(y_val, val_preds_ens, average="macro")

        if f1 > best_f1:
            best_f1 = f1
            best_w  = (w1, w2, w3)
            best_val_probs_ens = val_probs_ens

print("\nbest ensemble weights(CodeBERT, GraphCodeBERT, UniXcoder):", best_w)
print(f"best validation macro F1: {best_f1:.4f}")

#detailed metrics for best ensemble on validation set
val_preds_best = best_val_probs_ens.argmax(axis=-1)
val_acc_best   = accuracy_score(y_val, val_preds_best)

print(f"\nensemble Validation Accuracy : {val_acc_best:.4f}")
print(f"ensemble Validation Macro F1 : {best_f1:.4f}")

cm = confusion_matrix(y_val, val_preds_best)
print("\nensemble confusion matrix (rows=true, cols=pred):")
print(cm)

print("\nEnsemble Classification Report:")
print(classification_report(y_val, val_preds_best, digits=4))


np.save("val_probs_ensemble.npy", best_val_probs_ens)
print("\nSaved: val_probs_ensemble.npy")

#best weights to TEST probs
w1, w2, w3 = best_w

test_probs_ensemble = (
    w1 * test_probs_codebert +
    w2 * test_probs_graphcodebert +
    w3 * test_probs_unixcoder
)

test_preds_ensemble = test_probs_ensemble.argmax(axis=-1).astype(int)

print("\nEnsemble Test predictions shape:", test_preds_ensemble.shape)
print("First 10 ensemble test preds:", test_preds_ensemble[:10])

# Save ensemble test probabilities
np.save("test_probs_ensemble.npy", test_probs_ensemble)
print("Saved: test_probs_ensemble.npy")

#ensemble submission csv
sample_filename = "sample_submission_a.csv"
sample_sub_ens  = pd.read_csv(sample_filename)

print("\nSample submission shape:", sample_sub_ens.shape)
print("Number of ensemble test predictions:", len(test_preds_ensemble))

if len(sample_sub_ens) != len(test_preds_ensemble):
    print("Length mismatch: sample_sub rows:", len(sample_sub_ens), "| test_preds:", len(test_preds_ensemble))
else:
    # label column name
    if "label" in sample_sub_ens.columns:
        label_col = "label"
    else:
        label_col = sample_sub_ens.columns[1]

    sample_sub_ens[label_col] = test_preds_ensemble

    sub_path_ens = "subtask_a_ensemble.csv"
    sample_sub_ens.to_csv(sub_path_ens, index=False)
    print(f"\nsaved ensemble submission file: {sub_path_ens}")
    print(sample_sub_ens.head())
