# **Library Installation and Loading**

In [1]:
!pip uninstall -y numpy
!pip install numpy==1.26.4 --upgrade --force-reinstall --quiet
!pip install --upgrade pennylane pennylane-lightning
!pip install gensim
!pip install pytorch-crf --quiet

Found existing installation: numpy 2.0.2
Uninstalling numpy-2.0.2:
  Successfully uninstalled numpy-2.0.2
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m33.8 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.
opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is inc

In [None]:
import os
os.kill(os.getpid(), 9)

In [1]:
import random
import os
import time
import numpy as onp
from collections import defaultdict

import torch
from torchcrf import CRF
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import pennylane as qml
from pennylane import numpy as np

import gensim
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, classification_report
from gensim.models import KeyedVectors

SEED = 42
random.seed(SEED)
onp.random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


# **Data Loading and Preprocessing**

In [2]:
def load_data(path):
    sentences, tags = [], []
    with open(path, "r", encoding="utf-8") as f:
        sent, tag_seq = [], []
        for line in f:
            line = line.strip()
            if line:
                parts = line.split(":")
                if len(parts) >= 2:
                    word = parts[0].strip()
                    tag = parts[-1].strip()
                    if word in fasttext:
                        sent.append(fasttext[word])
                    else:
                        sent.append(np.zeros(100))
                    tag_seq.append(tag)
            else:
                if sent and tag_seq:
                    sentences.append(sent)
                    tags.append(tag_seq)
                sent, tag_seq = [], []
        if sent and tag_seq:
            sentences.append(sent)
            tags.append(tag_seq)
    return sentences, tags

In [3]:
fasttext = KeyedVectors.load_word2vec_format(
    "/content/drive/MyDrive/QML-Research/Data/hi_Fasttext_vectors/wordVectors100.txt",
    binary=False
)
train_sentences, train_tags = load_data("/content/drive/MyDrive/QML-Research/Data/Data-POS_T/fb/fb_hi_cg_train2.txt")

train_val_sents, test_sents, train_val_tags, test_tags = train_test_split(
    train_sentences, train_tags, test_size=0.2, random_state=SEED
)

train_sents, val_sents, train_tags, val_tags = train_test_split(
    train_val_sents, train_val_tags, test_size=0.25, random_state=SEED
)

all_tags = sorted(set(tag for seq in train_tags for tag in seq))
tag2id = {tag: idx for idx, tag in enumerate(all_tags)}
id2tag = {idx: tag for tag, idx in tag2id.items()}

In [4]:
lengths = [len(s) for s in train_sentences]
print("Min:", min(lengths), "Max:", max(lengths), "Mean:", onp.mean(lengths))
print("95th percentile:", int(onp.percentile(lengths, 95)))
MAX_LEN = int(onp.percentile(lengths, 95))

Min: 1 Max: 245 Mean: 18.376052385406922
95th percentile: 62


In [5]:
def pad_collate(batch, pad_dim=100, max_len=MAX_LEN):
    X_batch, y_batch = [], []
    for x, y in batch:
        if x.shape[0] > max_len:
            x = x[:max_len]
            y = y[:max_len]
        pad_len = max_len - x.shape[0]
        if pad_len > 0:
            pad_x = torch.zeros(pad_len, pad_dim)
            pad_y = torch.full((pad_len,), -100)
            x = torch.cat([x, pad_x], dim=0)
            y = torch.cat([y, pad_y], dim=0)
        X_batch.append(x)
        y_batch.append(y)
    return torch.stack(X_batch), torch.stack(y_batch)

In [6]:
class PosDataset(Dataset):
    def __init__(self, sentences, tags, tag2id):
        self.sentences = sentences
        self.tags = tags
        self.tag2id = tag2id

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sent = np.stack(self.sentences[idx])
        x = torch.tensor(sent, dtype=torch.float32)
        y = torch.tensor([self.tag2id[t] for t in self.tags[idx]], dtype=torch.long)
        return x, y

In [7]:
train_dataset = PosDataset(train_sents, train_tags, tag2id)
val_dataset   = PosDataset(val_sents, val_tags, tag2id)
test_dataset  = PosDataset(test_sents, test_tags, tag2id)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=pad_collate)
val_loader   = DataLoader(val_dataset, batch_size=32, collate_fn=pad_collate)
test_loader  = DataLoader(test_dataset, batch_size=32, collate_fn=pad_collate)

In [8]:
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))
print(len(train_sentences))

641
214
214
1069


# **Model & QNode Definition**

In [32]:
n_qubits = 8
dev = qml.device("default.qubit", wires=n_qubits)

# @qml.qnode(dev, interface="torch")
# def quantum_circuit(inputs, weights1, weights2):
#     qml.AngleEmbedding(inputs, wires=range(n_qubits), rotation='Y')
#     qml.BasicEntanglerLayers(weights1, wires=range(n_qubits))
#     qml.BasicEntanglerLayers(weights2, wires=range(n_qubits))
#     return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

# weight_shapes = {
#     "weights1": (6, n_qubits),
#     "weights2": (6, n_qubits)
# }
# quantum_layer = qml.qnn.TorchLayer(quantum_circuit, weight_shapes)

@qml.qnode(dev, interface="torch")
def quantum_circuit(inputs, weights):
    qml.AngleEmbedding(inputs, wires=range(n_qubits), rotation='Y')
    qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

weight_shapes = {"weights": (6, n_qubits),}
quantum_layer = qml.qnn.TorchLayer(quantum_circuit, weight_shapes)

In [11]:
# Pure Classical Model

class GRU_Classical(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes, num_layers=1):
        super().__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers=num_layers,
                          batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(0.3)
        self.fc_layers = nn.Sequential(
            nn.Linear(hidden_dim * 2, 32),
            nn.ReLU(),
            nn.Linear(32, num_classes)
        )
        self.crf = CRF(num_tags=num_classes, batch_first=True)

    def forward(self, x, tags=None, mask=None):
        out, _ = self.gru(x)
        emissions = self.fc_layers(out)

        if tags is not None:
            return -self.crf(emissions, tags, mask=mask, reduction="mean")
        else:
            return self.crf.decode(emissions, mask=mask)

In [34]:
# Quantum classical Hybrid Model

class HybridGRU(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes, num_layers=1):
        super().__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers=num_layers,
                          batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(0.3)
        self.fc_reduce = nn.Linear(32, n_qubits)
        self.quantum = quantum_layer
        self.fc = nn.Linear(n_qubits, num_classes)
        self.crf = CRF(num_classes, batch_first=True)

    def forward(self, x, tags=None, mask=None):
        out, _ = self.gru(x)
        out = self.fc_reduce(out)
        batch, seq_len, _ = out.shape
        out = out.reshape(-1, n_qubits)
        out = self.quantum(out)
        out = self.fc(out)
        out = out.view(batch, seq_len, -1)

        if tags is not None and mask is not None:
            log_likelihood = self.crf(out, tags, mask=mask, reduction="mean")
            return -log_likelihood
        else:
            return self.crf.decode(out, mask=mask)

# **Paramter Initialization**

In [35]:
model_hybrid = HybridGRU(input_dim=100, hidden_dim=16, num_classes=len(tag2id), num_layers=2)

model_classical = GRU_Classical(
    input_dim=100,
    hidden_dim=16,
    num_classes=len(tag2id),
    num_layers=2
)

optimizer_hybrid = optim.Adam(model_hybrid.parameters(), lr=0.001)
optimizer_classical = optim.Adam(model_classical.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss(ignore_index=-100)
num_epochs = 300

# **Parameter Count**

In [13]:
def print_named_parameter_breakdown(model):
    total = 0
    print(f"{'Name':60} {'Shape':20} {'#params':>12}")
    print("-" * 100)
    for name, p in model.named_parameters():
        cnt = p.numel()
        print(f"{name:60} {tuple(p.shape)!s:20} {cnt:12,}")
        total += cnt
    print("-" * 100)
    print(f"{'Total parameters':60} {total:12,}")
    return total

def print_module_parameter_summary(model):
    groups = defaultdict(int)
    for name, p in model.named_parameters():
        top = name.split('.')[0] if '.' in name else name
        groups[top] += p.numel()
    print(f"{'Module':40} {'#params':>15}")
    print("-" * 60)
    total = 0
    for module_name, cnt in sorted(groups.items(), key=lambda x: -x[1]):
        print(f"{module_name:40} {cnt:15,}")
        total += cnt
    print("-" * 60)
    print(f"{'Total':40} {total:15,}")
    return total

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [36]:
# Classical Model Parameter Count

print("Classical named param breakdown")
print_named_parameter_breakdown(model_classical)
print("\nClassical module summary")
print_module_parameter_summary(model_classical)

Classical named param breakdown
Name                                                         Shape                     #params
----------------------------------------------------------------------------------------------------
gru.weight_ih_l0                                             (48, 100)                   4,800
gru.weight_hh_l0                                             (48, 16)                      768
gru.bias_ih_l0                                               (48,)                          48
gru.bias_hh_l0                                               (48,)                          48
gru.weight_ih_l0_reverse                                     (48, 100)                   4,800
gru.weight_hh_l0_reverse                                     (48, 16)                      768
gru.bias_ih_l0_reverse                                       (48,)                          48
gru.bias_hh_l0_reverse                                       (48,)                          48
gru.weight_i

17690

In [37]:
# Quantum-Classical Hybrid Model Parameter Count

print("Hybrid named param breakdown")
print_named_parameter_breakdown(model_hybrid)
print("\nHybrid module summary")
print_module_parameter_summary(model_hybrid)

Hybrid named param breakdown
Name                                                         Shape                     #params
----------------------------------------------------------------------------------------------------
gru.weight_ih_l0                                             (48, 100)                   4,800
gru.weight_hh_l0                                             (48, 16)                      768
gru.bias_ih_l0                                               (48,)                          48
gru.bias_hh_l0                                               (48,)                          48
gru.weight_ih_l0_reverse                                     (48, 100)                   4,800
gru.weight_hh_l0_reverse                                     (48, 16)                      768
gru.bias_ih_l0_reverse                                       (48,)                          48
gru.bias_hh_l0_reverse                                       (48,)                          48
gru.weight_ih_l

16682

# **Training Loop**

In [17]:
# Classical Model Training Loop

patience = 5
best_val_loss = float("inf")
epochs_no_improve = 0

total_start = time.time()

for epoch in range(num_epochs):
    model_classical.train()
    epoch_start = time.time()
    correct, total, epoch_loss = 0, 0, 0

    for X, y in train_loader:
        mask = (y != -100)
        optimizer_classical.zero_grad()

        y_clamped = y.clone()
        y_clamped[y_clamped == -100] = 0
        loss = model_classical(X, tags=y_clamped, mask=mask)
        loss.backward()
        optimizer_classical.step()
        epoch_loss += loss.item()
        preds = model_classical(X, mask=mask)

        for p_seq, y_seq, m_seq in zip(preds, y, mask):
            for p, gold, m in zip(p_seq, y_seq, m_seq):
                if m:
                    correct += (p == gold.item())
                    total += 1

    acc = correct / total if total > 0 else 0
    elapsed = time.time() - epoch_start
    print(f"Epoch {epoch+1}/{num_epochs} | Loss: {epoch_loss/len(train_loader):.4f} | "
          f"Acc: {acc:.4f} | Time: {elapsed:.2f}s")

    # ---------------- Validation ----------------
    model_classical.eval()
    val_correct, val_total, val_loss = 0, 0, 0
    with torch.no_grad():
        for X, y in val_loader:
            mask = (y != -100)
            y_clamped = y.clone()
            y_clamped[y_clamped == -100] = 0

            loss = model_classical(X, tags=y_clamped, mask=mask)
            val_loss += loss.item()

            preds = model_classical(X, mask=mask)
            for p_seq, y_seq, m_seq in zip(preds, y, mask):
                for p, gold, m in zip(p_seq, y_seq, m_seq):
                    if m:
                        val_correct += (p == gold.item())
                        val_total += 1

    val_acc = val_correct / val_total if val_total > 0 else 0
    avg_val_loss = val_loss / len(val_loader)
    print(f"   >> Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f}")
    model_classical.train()

    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        epochs_no_improve = 0
        # torch.save(model_classical.state_dict(),
        #            "/content/drive/MyDrive/QML-Research/Model Saves/Classical/POS_T/final_check_fb_hi_cg.pt")
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

print(f"\nTotal Training Time: {time.time() - total_start:.2f}s")

Epoch 1/300 | Loss: 36.5567 | Acc: 0.2193 | Time: 5.73s
   >> Val Loss: 36.3103 | Val Acc: 0.2529
Epoch 2/300 | Loss: 35.8941 | Acc: 0.3186 | Time: 5.05s
   >> Val Loss: 34.3356 | Val Acc: 0.3695
Epoch 3/300 | Loss: 31.2194 | Acc: 0.3924 | Time: 6.22s
   >> Val Loss: 31.5890 | Val Acc: 0.3851
Epoch 4/300 | Loss: 29.7737 | Acc: 0.4064 | Time: 4.62s
   >> Val Loss: 28.8968 | Val Acc: 0.4062
Epoch 5/300 | Loss: 25.8654 | Acc: 0.4388 | Time: 3.82s
   >> Val Loss: 26.6613 | Val Acc: 0.4585
Epoch 6/300 | Loss: 24.5569 | Acc: 0.5017 | Time: 4.76s
   >> Val Loss: 24.5159 | Val Acc: 0.5367
Epoch 7/300 | Loss: 21.7687 | Acc: 0.5834 | Time: 3.62s
   >> Val Loss: 22.3107 | Val Acc: 0.5860
Epoch 8/300 | Loss: 21.0025 | Acc: 0.6221 | Time: 3.60s
   >> Val Loss: 20.2833 | Val Acc: 0.6244
Epoch 9/300 | Loss: 20.0921 | Acc: 0.6562 | Time: 5.00s
   >> Val Loss: 18.7701 | Val Acc: 0.6444
Epoch 10/300 | Loss: 16.6188 | Acc: 0.6765 | Time: 3.67s
   >> Val Loss: 17.5447 | Val Acc: 0.6639
Epoch 11/300 | Loss

In [18]:
# Quantum-Classical Model Training Loop

patience = 5
best_val_loss = float("inf")
epochs_no_improve = 0

total_start = time.time()

for epoch in range(num_epochs):
    model_hybrid.train()
    epoch_start = time.time()
    correct, total, epoch_loss = 0, 0, 0

    for X, y in train_loader:
        mask = (y != -100)
        optimizer_hybrid.zero_grad()

        y_clamped = y.clone()
        y_clamped[y_clamped == -100] = 0
        loss = model_hybrid(X, tags=y_clamped, mask=mask)
        loss.backward()
        optimizer_hybrid.step()
        epoch_loss += loss.item()
        preds = model_hybrid(X, mask=mask)

        for p_seq, y_seq, m_seq in zip(preds, y, mask):
            for p, gold, m in zip(p_seq, y_seq, m_seq):
                if m:
                    correct += (p == gold.item())
                    total += 1

    acc = correct / total if total > 0 else 0
    elapsed = time.time() - epoch_start
    print(f"Epoch {epoch+1}/{num_epochs} | Loss: {epoch_loss/len(train_loader):.4f} | "
          f"Acc: {acc:.4f} | Time: {elapsed:.2f}s")

    # ---------------- Validation ----------------
    model_hybrid.eval()
    val_correct, val_total, val_loss = 0, 0, 0
    with torch.no_grad():
        for X, y in val_loader:
            mask = (y != -100)
            y_clamped = y.clone()
            y_clamped[y_clamped == -100] = 0

            loss = model_hybrid(X, tags=y_clamped, mask=mask)
            val_loss += loss.item()

            preds = model_hybrid(X, mask=mask)
            for p_seq, y_seq, m_seq in zip(preds, y, mask):
                for p, gold, m in zip(p_seq, y_seq, m_seq):
                    if m:
                        val_correct += (p == gold.item())
                        val_total += 1

    val_acc = val_correct / val_total if val_total > 0 else 0
    avg_val_loss = val_loss / len(val_loader)
    print(f"   >> Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f}")
    model_hybrid.train()

    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        epochs_no_improve = 0
        torch.save(model_hybrid.state_dict(),
                   "/content/drive/MyDrive/QML-Research/Model Saves/Quantum/POS_T/basicx2.pt")
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

print(f"\nTotal Training Time: {time.time() - total_start:.2f}s")

Epoch 1/300 | Loss: 39.4240 | Acc: 0.0605 | Time: 170.93s
   >> Val Loss: 40.6804 | Val Acc: 0.0673
Epoch 2/300 | Loss: 43.5487 | Acc: 0.0710 | Time: 172.06s
   >> Val Loss: 39.5911 | Val Acc: 0.1169
Epoch 3/300 | Loss: 37.2005 | Acc: 0.2807 | Time: 169.10s
   >> Val Loss: 38.4639 | Val Acc: 0.3186
Epoch 4/300 | Loss: 40.5042 | Acc: 0.3446 | Time: 174.40s
   >> Val Loss: 37.4772 | Val Acc: 0.3152
Epoch 5/300 | Loss: 35.6136 | Acc: 0.3337 | Time: 175.35s
   >> Val Loss: 36.5986 | Val Acc: 0.3180
Epoch 6/300 | Loss: 34.2893 | Acc: 0.3346 | Time: 175.44s
   >> Val Loss: 35.8151 | Val Acc: 0.3203
Epoch 7/300 | Loss: 34.2768 | Acc: 0.3371 | Time: 174.26s
   >> Val Loss: 35.0815 | Val Acc: 0.3261
Epoch 8/300 | Loss: 33.1255 | Acc: 0.3397 | Time: 173.47s
   >> Val Loss: 34.3196 | Val Acc: 0.3322
Epoch 9/300 | Loss: 32.2349 | Acc: 0.3531 | Time: 174.77s
   >> Val Loss: 33.5885 | Val Acc: 0.3395
Epoch 10/300 | Loss: 33.3452 | Acc: 0.3730 | Time: 173.35s
   >> Val Loss: 32.7975 | Val Acc: 0.3673

# **Testing**

In [38]:
model_hybrid.load_state_dict(torch.load("/content/drive/MyDrive/QML-Research/Model Saves/Quantum/POS_T/fb-hi.pt"))
model_classical.load_state_dict(torch.load("/content/drive/MyDrive/QML-Research/Model Saves/Classical/POS_T/fb-hi.pt"))

<All keys matched successfully>

In [39]:
model_classical.eval()
correct, total, test_loss = 0, 0, 0
with torch.no_grad():
    for X, y in test_loader:
        mask = (y != -100)
        y_clamped = y.clone()
        y_clamped[y_clamped == -100] = 0
        loss = model_classical(X, tags=y_clamped, mask=mask)
        test_loss += loss.item()
        preds = model_classical(X, mask=mask)
        for p_seq, y_seq, m_seq in zip(preds, y, mask):
            for p, gold, m in zip(p_seq, y_seq, m_seq):
                if m:
                    correct += (p == gold.item())
                    total += 1

print(f"Test Loss: {test_loss/len(test_loader):.4f} | Test Acc: {correct/total:.4f}")

Test Loss: 9.9528 | Test Acc: 0.8029


In [40]:
model_hybrid.eval()
correct, total, test_loss = 0, 0, 0
with torch.no_grad():
    for X, y in test_loader:
        mask = (y != -100)
        y_clamped = y.clone()
        y_clamped[y_clamped == -100] = 0
        loss = model_hybrid(X, tags=y_clamped, mask=mask)
        test_loss += loss.item()
        preds = model_hybrid(X, mask=mask)
        for p_seq, y_seq, m_seq in zip(preds, y, mask):
            for p, gold, m in zip(p_seq, y_seq, m_seq):
                if m:
                    correct += (p == gold.item())
                    total += 1

print(f"Test Loss: {test_loss/len(test_loader):.4f} | Test Acc: {correct/total:.4f}")

Test Loss: 12.2256 | Test Acc: 0.7813


In [42]:
def evaluate_model_on_loader(model, loader, id2tag, description="Model"):
    model.eval()
    all_true = []
    all_pred = []

    with torch.no_grad():
        for X, y in loader:
            mask = (y != -100)
            y_clamped = y.clone()
            y_clamped[y_clamped == -100] = 0
            preds = model(X, mask=mask)

            for p_seq, y_seq, m_seq in zip(preds, y, mask):
                for p, gold, m in zip(p_seq, y_seq, m_seq):
                    if m:
                        all_true.append(gold.item())
                        all_pred.append(int(p))

    labels = sorted(set(all_true + all_pred))
    target_names = [id2tag[i] for i in range(len(id2tag))]

    print(f"\n=== {description} Evaluation ===")
    print(f"Total tokens evaluated: {len(all_true)}\n")
    print("Classification report (per-tag precision / recall / f1):\n")
    print(classification_report(all_true, all_pred, labels=list(range(len(target_names))), target_names=target_names, zero_division=0))

    precision, recall, f1, support = precision_recall_fscore_support(all_true, all_pred, labels=list(range(len(target_names))), zero_division=0)
    tag_total = {i: 0 for i in range(len(target_names))}
    tag_correct = {i: 0 for i in range(len(target_names))}
    for t_true, t_pred in zip(all_true, all_pred):
        tag_total[t_true] += 1
        if t_true == t_pred:
            tag_correct[t_true] += 1

    print("Per-tag counts and accuracy:")
    print(f"{'Tag':20} {'Support':>8} {'Precision':>9} {'Recall':>8} {'F1':>8} {'Accuracy':>10}")
    print("-" * 70)
    for i, tag in enumerate(target_names):
        sup = int(support[i])
        prec = precision[i]
        rec = recall[i]
        f = f1[i]
        acc = (tag_correct[i] / tag_total[i]) if tag_total[i] > 0 else 0.0
        print(f"{tag:20} {sup:8d} {prec:9.3f} {rec:8.3f} {f:8.3f} {acc:10.3f}")
    print("-" * 70)
    print()
    print()


evaluate_model_on_loader(model_classical, test_loader, id2tag, description="Classical")
evaluate_model_on_loader(model_hybrid, test_loader, id2tag, description="Hybrid (Quantum-Classical)")


=== Classical Evaluation ===
Total tokens evaluated: 3521

Classification report (per-tag precision / recall / f1):

              precision    recall  f1-score   support

          CC       0.59      0.47      0.52       118
          DT       0.92      0.87      0.89       238
         G_J       0.62      0.62      0.62       199
         G_N       0.79      0.84      0.82       755
       G_PRP       0.79      0.88      0.83       336
       G_PRT       0.51      0.51      0.51       142
         G_R       0.64      0.62      0.63       188
       G_SYM       0.67      0.32      0.43        31
         G_V       0.87      0.82      0.85       697
         G_X       0.96      0.96      0.96       478
         PSP       0.80      0.83      0.81       339

    accuracy                           0.80      3521
   macro avg       0.74      0.70      0.72      3521
weighted avg       0.80      0.80      0.80      3521

Per-tag counts and accuracy:
Tag                   Support Precision 

# **Test Inference (Synthetically Generated)**

In [None]:
def test_random_samples(model, train_dataset, val_dataset, test_dataset, idx2tag, num_samples=10):
    model.eval()
    sets = [("Train", train_dataset), ("Validation", val_dataset), ("Test", test_dataset)]

    for set_name, dataset in sets:
        print(f"\n=== Random Samples from {set_name} Set ===")
        samples = random.sample(range(len(dataset)), min(num_samples, len(dataset)))

        for i, idx in enumerate(samples):
            X, y = dataset[idx]
            X = X.unsqueeze(0)
            y = y.unsqueeze(0)

            mask = (y != -100)
            y_clamped = y.clone()
            y_clamped[y_clamped == -100] = 0

            with torch.no_grad():
                preds = model(X, mask=mask)
                preds = preds[0]

            gold_tags = [idx2tag[t.item()] for t in y.squeeze(0) if t.item() != -100]
            pred_tags = [idx2tag[p] for p in preds[:len(gold_tags)]]
            words = [f"w{i}" for i in range(len(gold_tags))]

            print("\nSentence prediction:")
            print(f"{'Word':15}{'Gold':10}{'Pred':10}")
            print("-" * 40)
            correct = 0
            for w, g, p in zip(words, gold_tags, pred_tags):
                print(f"{w:15}{g:10}{p:10}")
                if g == p:
                    correct += 1
            acc = correct / len(gold_tags)
            print(f"Sentence Accuracy: {acc:.2f}")

In [None]:
test_random_samples(model_classical, train_dataset, val_dataset, test_dataset, id2tag, num_samples=5)


=== Random Samples from Train Set ===

Sentence prediction:
Word           Gold      Pred      
----------------------------------------
w0             G_X       G_X       
Sentence Accuracy: 1.00

Sentence prediction:
Word           Gold      Pred      
----------------------------------------
w0             G_X       G_X       
w1             G_V       G_V       
w2             G_PRP     G_PRP     
w3             G_V       G_V       
w4             DT        DT        
w5             G_N       G_N       
w6             G_N       G_N       
w7             PSP       PSP       
w8             DT        DT        
w9             G_N       G_N       
w10            G_X       G_X       
w11            G_PRP     G_PRP     
w12            G_V       G_V       
w13            G_V       G_V       
w14            G_PRT     G_PRT     
w15            G_V       G_V       
w16            G_N       G_N       
w17            G_R       G_R       
w18            G_X       G_X       
w19            G_PR

In [None]:
test_random_samples(model_hybrid, train_dataset, val_dataset, test_dataset, id2tag, num_samples=5)


=== Random Samples from Train Set ===

Sentence prediction:
Word           Gold      Pred      
----------------------------------------
w0             G_N       G_N       
w1             G_V       G_V       
w2             PSP       PSP       
w3             G_N       G_N       
w4             G_R       G_R       
w5             G_X       G_X       
Sentence Accuracy: 1.00

Sentence prediction:
Word           Gold      Pred      
----------------------------------------
w0             G_PRP     G_PRP     
w1             G_N       G_N       
w2             G_V       G_V       
w3             G_V       G_V       
w4             G_N       G_N       
w5             G_N       G_N       
w6             PSP       PSP       
w7             G_N       G_N       
Sentence Accuracy: 1.00

Sentence prediction:
Word           Gold      Pred      
----------------------------------------
w0             G_N       G_N       
w1             G_X       G_X       
w2             G_X       G_X       
w3  