In [1]:
import numpy as np
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam




In [2]:
# Generate synthetic multi-label data
X, y = make_multilabel_classification(n_samples=1000, n_features=20, n_classes=5, n_labels=2, random_state=42) #This will generate a dataset with 1000 samples, 20 features, and 5 classes, where each sample can belong to multiple classes.

In [3]:
y

array([[0, 0, 0, 1, 0],
       [1, 1, 1, 0, 0],
       [0, 0, 1, 1, 0],
       ...,
       [0, 1, 1, 1, 0],
       [1, 1, 0, 1, 1],
       [0, 1, 0, 1, 0]])

In [4]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42) # Split the data into training and validation sets

# Define a simple DNN model
model = Sequential([
    Dense(64, input_dim=20, activation='relu'),
    Dense(5, activation='sigmoid')  # 5 labels
]) # The output layer uses sigmoid activation for multi-label classification
model.compile(optimizer=Adam(), loss='binary_crossentropy')

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)





<keras.src.callbacks.History at 0x26b22597910>

In [5]:
# Predict probabilities
y_pred_prob = model.predict(X_val)
y_pred_prob



array([[0.02578093, 0.67672664, 0.9823122 , 0.24908255, 0.06403375],
       [0.01045672, 0.73873997, 0.69372004, 0.36369133, 0.53608286],
       [0.41757178, 0.57487947, 0.7272643 , 0.44635072, 0.1062218 ],
       [0.06357594, 0.3663895 , 0.5534002 , 0.5492387 , 0.02769893],
       [0.28916037, 0.855873  , 0.3108535 , 0.4189864 , 0.01361427],
       [0.9034678 , 0.66960853, 0.31976542, 0.20428815, 0.04613649],
       [0.36716416, 0.06180689, 0.79312295, 0.6763623 , 0.85165864],
       [0.2651025 , 0.48760405, 0.9933653 , 0.21358591, 0.18682338],
       [0.31379995, 0.65185076, 0.22457738, 0.64474684, 0.03173888],
       [0.13847218, 0.23831162, 0.34334904, 0.2926379 , 0.83392835],
       [0.06403661, 0.5269497 , 0.04978414, 0.03504786, 0.12480785],
       [0.63865334, 0.36747596, 0.7717239 , 0.03766093, 0.04781377],
       [0.8628452 , 0.35699904, 0.24490432, 0.8402396 , 0.19876693],
       [0.60192764, 0.17455739, 0.82435226, 0.26866016, 0.02616302],
       [0.11801277, 0.98574054, 0.

In [6]:
thresholds = []
for i in range(y.shape[1]):  # for each label
    best_thresh = 0.5 # this is the default threshold
    best_f1 = 0.0 # initialize best F1 score
    for t in np.arange(0.1, 0.9, 0.01): #this is the range of thresholds to test
        y_pred_bin = (y_pred_prob[:, i] > t).astype(int) # this will convert the probabilities to binary predictions based on the threshold
        f1 = f1_score(y_val[:, i], y_pred_bin) #this will calculate the F1 score for the current threshold
        if f1 > best_f1: # this will check if the current F1 score is better than the best one
            best_f1 = f1 # this will update the best F1 score
            best_thresh = t # this will update the best threshold
    thresholds.append(best_thresh) # this will store the best threshold for the current label
    print(f"Label {i}: Best Threshold = {best_thresh:.2f}, F1-score = {best_f1:.4f}") # this will print the best threshold and F1 score for each label
thresholds

Label 0: Best Threshold = 0.54, F1-score = 0.7563
Label 1: Best Threshold = 0.37, F1-score = 0.8108
Label 2: Best Threshold = 0.35, F1-score = 0.8113
Label 3: Best Threshold = 0.39, F1-score = 0.7467
Label 4: Best Threshold = 0.41, F1-score = 0.6970


[0.5399999999999998,
 0.3699999999999999,
 0.34999999999999987,
 0.3899999999999999,
 0.4099999999999998]

In [None]:
y_pred_final = np.zeros_like(y_pred_prob) # this will create an array of zeros with the same shape as y_pred_prob
for i, t in enumerate(thresholds): # this will loop through each label and its corresponding threshold
    y_pred_final[:, i] = (y_pred_prob[:, i] > t).astype(int) # this will convert the probabilities to binary predictions based on the threshold

# Overall micro and macro F1
print("Micro F1-score:", f1_score(y_val, y_pred_final, average='micro')) # this will calculate the micro F1 score
print("Macro F1-score:", f1_score(y_val, y_pred_final, average='macro')) # this will calculate the macro F1 score

Micro F1-score: 0.7802340702210663
Macro F1-score: 0.7644140900345118


In [8]:
y_pred_final

array([[0., 1., 1., 0., 0.],
       [0., 1., 1., 0., 1.],
       [0., 1., 1., 1., 0.],
       [0., 0., 1., 1., 0.],
       [0., 1., 0., 1., 0.],
       [1., 1., 0., 0., 0.],
       [0., 0., 1., 1., 1.],
       [0., 1., 1., 0., 0.],
       [0., 1., 0., 1., 0.],
       [0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0.],
       [1., 0., 1., 0., 0.],
       [1., 0., 0., 1., 0.],
       [1., 0., 1., 0., 0.],
       [0., 1., 1., 0., 0.],
       [0., 1., 1., 0., 0.],
       [0., 1., 1., 1., 0.],
       [0., 1., 1., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 1., 1., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 1.],
       [0., 0., 1., 1., 1.],
       [0., 1., 1., 0., 0.],
       [0., 1., 0., 1., 0.],
       [0., 1., 1., 1., 0.],
       [0., 1., 0., 1., 0.],
       [1., 1., 1., 0., 0.],
       [0., 1., 1., 1., 0.],
       [1., 1., 0., 1., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 1., 1.],
       [0., 1., 0., 0., 0.],
       [0., 0.

In [9]:
##thresholds = [0.27, 0.49, 0.41, 0.28, 0.31]
def multilabel_predict(X, model, thresholds):
    prob = model.predict(X)
    pred = np.zeros_like(prob)
    for i, t in enumerate(thresholds):
        pred[:, i] = (prob[:, i] > t).astype(int)
    return pred
y_pred_final = multilabel_predict(X_val, model, thresholds)
y_pred_final




array([[0., 1., 1., 0., 0.],
       [0., 1., 1., 0., 1.],
       [0., 1., 1., 1., 0.],
       [0., 0., 1., 1., 0.],
       [0., 1., 0., 1., 0.],
       [1., 1., 0., 0., 0.],
       [0., 0., 1., 1., 1.],
       [0., 1., 1., 0., 0.],
       [0., 1., 0., 1., 0.],
       [0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0.],
       [1., 0., 1., 0., 0.],
       [1., 0., 0., 1., 0.],
       [1., 0., 1., 0., 0.],
       [0., 1., 1., 0., 0.],
       [0., 1., 1., 0., 0.],
       [0., 1., 1., 1., 0.],
       [0., 1., 1., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 1., 1., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 1.],
       [0., 0., 1., 1., 1.],
       [0., 1., 1., 0., 0.],
       [0., 1., 0., 1., 0.],
       [0., 1., 1., 1., 0.],
       [0., 1., 0., 1., 0.],
       [1., 1., 1., 0., 0.],
       [0., 1., 1., 1., 0.],
       [1., 1., 0., 1., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 1., 1.],
       [0., 1., 0., 0., 0.],
       [0., 0.

In [10]:
label_names = ["Label_0", "Label_1", "Label_2", "Label_3", "Label_4"]
readable_preds = []

for row in y_pred_final:
    labels = [label_names[i] for i, val in enumerate(row) if val == 1]
    readable_preds.append(labels)

# Display
for i, labels in enumerate(readable_preds):
    print(f"Sample {i}: {labels}")

Sample 0: ['Label_1', 'Label_2']
Sample 1: ['Label_1', 'Label_2', 'Label_4']
Sample 2: ['Label_1', 'Label_2', 'Label_3']
Sample 3: ['Label_2', 'Label_3']
Sample 4: ['Label_1', 'Label_3']
Sample 5: ['Label_0', 'Label_1']
Sample 6: ['Label_2', 'Label_3', 'Label_4']
Sample 7: ['Label_1', 'Label_2']
Sample 8: ['Label_1', 'Label_3']
Sample 9: ['Label_4']
Sample 10: ['Label_1']
Sample 11: ['Label_0', 'Label_2']
Sample 12: ['Label_0', 'Label_3']
Sample 13: ['Label_0', 'Label_2']
Sample 14: ['Label_1', 'Label_2']
Sample 15: ['Label_1', 'Label_2']
Sample 16: ['Label_1', 'Label_2', 'Label_3']
Sample 17: ['Label_1', 'Label_2']
Sample 18: ['Label_2']
Sample 19: ['Label_2', 'Label_3']
Sample 20: ['Label_2']
Sample 21: ['Label_2', 'Label_4']
Sample 22: ['Label_2', 'Label_3', 'Label_4']
Sample 23: ['Label_1', 'Label_2']
Sample 24: ['Label_1', 'Label_3']
Sample 25: ['Label_1', 'Label_2', 'Label_3']
Sample 26: ['Label_1', 'Label_3']
Sample 27: ['Label_0', 'Label_1', 'Label_2']
Sample 28: ['Label_1', 'L