### **Train a deep neural network model in PyTorch for multi-label text classification** 

In [25]:
import pandas as pd
tweet = pd.read_csv('mLabel_tweets.csv', usecols=[1,2], names=['tweet', 'labels'],skiprows=1 )

### **Preprocess text before loding to model**


In [26]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer

nltk.download('stopwords')
nltk.download('wordnet')

stopwords = set(stopwords.words('english'))
lematizer = WordNetLemmatizer()

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    words = text.split()

    clean_words = [ lematizer.lemmatize(w) for w in words if w not in stopwords ]

    return " ".join(clean_words)

tweet['tweet'] = tweet['tweet'].astype(str).apply(preprocess_text)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\NCS\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\NCS\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [27]:
from sklearn.model_selection import train_test_split

X = tweet['tweet']
y = tweet['labels']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((6944,), (2977,), (6944,), (2977,))

### **Multi Label Binarizer**


In [28]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()

y_train = mlb.fit_transform(y_train)
y_test  = mlb.transform(y_test)

num_classes = len(mlb.classes_)
print("Number of classes:", num_classes)

Number of classes: 21


### **Tokenize**

In [30]:
from nltk.tokenize import word_tokenize

def tokenize_text(docs):
    return [ [w.lower() for w in word_tokenize(text) if w.isalpha()] for text in docs ]

X_train_tokens = tokenize_text(X_train)
X_test_tokens  = tokenize_text(X_test)


### **Glove Embeddings**

In [None]:
import gensim.downloader as api
glove_model = api.load('glove-wiki-gigaword-50')
EMB_DIM = glove_model.vector_size

import numpy as np

# Convert documents â†’ vector embeddings
def document_vector(tokens):
    tokens = [w for w in tokens if w in glove_model]
    if len(tokens) == 0:
        return np.zeros(EMB_DIM)
    return np.mean(glove_model[tokens], axis=0)

X_train_vec = np.array([document_vector(t) for t in X_train_tokens], dtype=np.float32)
X_test_vec  = np.array([document_vector(t) for t in X_test_tokens], dtype=np.float32)

### **Creating DataLoaders to feed into MLP**

In [32]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

train_dataset = TensorDataset(torch.tensor(X_train_vec), torch.tensor(y_train, dtype=torch.float32))
test_dataset  = TensorDataset(torch.tensor(X_test_vec), torch.tensor(y_test,  dtype=torch.float32))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False)


### **Text Classifier MLP**

In [33]:
class TextClassifier(nn.Module):
    def __init__(self, in_feature, out_feature):
        super().__init__()
        self.fc1 = nn.Linear(in_feature, 128)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(128, out_feature)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

mlp_model = TextClassifier(EMB_DIM, num_classes).to(device)
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(mlp_model.parameters(), lr=1e-4)

Using device: cpu


### **Training Loop**

In [None]:
epochs = 50
for epoch in range(epochs):
    total_loss = 0
    correct = 0
    total = 0

    mlp_model.train()
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        optimizer.zero_grad()
        preds = mlp_model(xb)
        loss = loss_fn(preds, yb)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        prob = torch.sigmoid(preds)
        pred_label = (prob > 0.5).float()
        correct += (pred_label == yb).sum().item()
        total += yb.numel()

    acc = correct / total
    loss = total_loss/len(train_loader)
    print(f"Epoch {epoch+1}/{epochs} Train Loss: {loss:.2f} | Train Accuracy: {acc:.4f}")

print("Training Completed ðŸŽ‰")

Epoch [1/50] Train Loss: 0.45 | Train Accuracy: 0.7807
Epoch [2/50] Train Loss: 0.45 | Train Accuracy: 0.7804
Epoch [3/50] Train Loss: 0.45 | Train Accuracy: 0.7812
Epoch [4/50] Train Loss: 0.45 | Train Accuracy: 0.7816
Epoch [5/50] Train Loss: 0.45 | Train Accuracy: 0.7816
Epoch [6/50] Train Loss: 0.45 | Train Accuracy: 0.7815
Epoch [7/50] Train Loss: 0.45 | Train Accuracy: 0.7804
Epoch [8/50] Train Loss: 0.45 | Train Accuracy: 0.7824
Epoch [9/50] Train Loss: 0.45 | Train Accuracy: 0.7821
Epoch [10/50] Train Loss: 0.45 | Train Accuracy: 0.7821
Epoch [11/50] Train Loss: 0.45 | Train Accuracy: 0.7823
Epoch [12/50] Train Loss: 0.45 | Train Accuracy: 0.7825
Epoch [13/50] Train Loss: 0.45 | Train Accuracy: 0.7830
Epoch [14/50] Train Loss: 0.45 | Train Accuracy: 0.7812
Epoch [15/50] Train Loss: 0.45 | Train Accuracy: 0.7824
Epoch [16/50] Train Loss: 0.45 | Train Accuracy: 0.7826
Epoch [17/50] Train Loss: 0.45 | Train Accuracy: 0.7817
Epoch [18/50] Train Loss: 0.45 | Train Accuracy: 0.7825
E

In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (classification_report, confusion_matrix, f1_score, roc_auc_score, roc_curve, auc)

device = "cuda" if torch.cuda.is_available() else "cpu"

def evaluate_model(model, loader, model_name):
    model.eval()
    all_preds, all_labels, all_probas = [], [], []

    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(device)
            out = torch.sigmoid(model(xb))

            prob = out.cpu().numpy()
            preds = (prob > 0.5).astype(int)

            all_probas.extend(prob)
            all_preds.extend(preds)
            all_labels.extend(yb.numpy())

    print(f"\n================= {model_name} =================")
    
    ## Classification Report
    print("\nClassification Report:")
    print(classification_report(all_labels, all_preds, target_names=mlb.classes_))

    ## Micro/Macro F1
    print("Micro F1 Score:", f1_score(all_labels, all_preds, average="micro"))
    print("Macro F1 Score:", f1_score(all_labels, all_preds, average="macro"))

    ## ROC-AUC Scores
    print("\nROC-AUC per class:")
    for i, label in enumerate(mlb.classes_):
        score = roc_auc_score([lbl[i] for lbl in all_labels],
                              [prob[i] for prob in all_probas])
        print(f"{label}: {score:.3f}")

    macro_auc = roc_auc_score(all_labels, all_probas, average="macro")
    micro_auc = roc_auc_score(all_labels, all_probas, average="micro")
    print(f"\nMacro-Average ROC-AUC: {macro_auc:.3f}")
    print(f"Micro-Average ROC-AUC: {micro_auc:.3f}")

    ## Plot ROC Curves
    plt.figure(figsize=(8,6))
    for i, label in enumerate(mlb.classes_):
        fpr, tpr, _ = roc_curve([lbl[i] for lbl in all_labels],
                                [prob[i] for prob in all_probas])
        auc_score = auc(fpr, tpr)
        plt.plot(fpr, tpr, label=f"{label} AUC={auc_score:.3f}")
    
    plt.plot([0,1], [0,1], 'k--')
    plt.title(f"ROC Curve â€” {model_name}")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend()
    plt.show()

    ## Confusion Matrix per Label
    for i, label in enumerate(mlb.classes_):
        cm = confusion_matrix([lbl[i] for lbl in all_labels],
                              [pred[i] for pred in all_preds])

        plt.figure(figsize=(3,3))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
        plt.title(f"Confusion Matrix â€” {model_name}: {label}")
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.show()


In [None]:
w2v_test_dataset = TensorDataset(
    torch.tensor(X_test_vec), torch.tensor(y_test, dtype=torch.float32)
)
w2v_test_loader = DataLoader(w2v_test_dataset, batch_size=32, shuffle=False)

evaluate_model(mlp_model, w2v_test_loader, "GloVe Multi-Label DNN")


: 