# **Imports and Downloads**

In [67]:
!pip uninstall -y numpy
!pip install numpy==1.26.4 --upgrade --force-reinstall --quiet
!pip install pennylane pennylane-lightning

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.[0m[31m


In [None]:
import os
os.kill(os.getpid(), 9)

In [1]:
from google.colab import drive
drive.mount("/content/drive", force_remount = True)

Mounted at /content/drive


In [2]:
# Core libraries
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset

import numpy as np
import pandas as pd
import math
import re
import os
import random
import time
import matplotlib.pyplot as plt
from collections import Counter

# Quantum computing
import pennylane as qml
from pennylane import numpy as pnp
from pennylane.optimize import AdamOptimizer

# NLP and preprocessing
import nltk
from nltk.corpus import stopwords
from nltk.util import ngrams

# Sklearn tools
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import normalize

# Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
pnp.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False



# **Data Loading and Preprocessing**

In [3]:
with open("/content/drive/MyDrive/QML-Research/Data/sentiment labelled sentences/amazon_cells_labelled.txt", "r") as f:
    lines = f.readlines()

sentences = [line.split("\t")[0] for line in lines]
labels = [int(line.split("\t")[1]) for line in lines]

In [4]:
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))
domain_neutral_words = {
    "phone", "product", "battery", "headset", "quality", "one", "use"
}
stop_words.update(domain_neutral_words)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
def clean_and_tokenize(text):
    text = text.lower()
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"[^a-z0-9\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    tokens = text.split()
    tokens = [word for word in tokens if word not in stop_words]
    return tokens

cleaned_sents = [clean_and_tokenize(sentence) for sentence in sentences]

In [6]:
max_len = 10
for i in range(len(cleaned_sents)):
  if (len(cleaned_sents[i]) < max_len):
    cleaned_sents[i] += ["<PAD>"] * (max_len - len(cleaned_sents[i]))
  else:
    cleaned_sents[i] = cleaned_sents[i][:max_len]

# **GloVE Word Embeddings**

In [7]:
def load_glove_embeddings(file_path):
    embeddings = {}
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.strip().split()
            word = values[0]
            vector = np.array(values[1:], dtype='float32')
            embeddings[word] = vector
    return embeddings

In [8]:
glove_path = '/content/drive/MyDrive/QML-Research/Data/glove.6B.100d.txt'
glove = load_glove_embeddings(glove_path)

# **AutoEncoder**

In [9]:
class GloVeAutoencoder(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(GloVeAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, input_dim)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [10]:
all_words = list(glove.keys())
all_vectors = np.array([glove[word] for word in all_words])
all_vectors = normalize(all_vectors)
word_tensor = torch.tensor(all_vectors).float()

In [11]:
latent_dim = 8
epochs = 100
save_path = '/content/drive/MyDrive/QML-Research/Autoencoder/glove_autoencoder_normalized_8.pth'
# save_path = '/content/drive/MyDrive/QML-Research/Autoencoder/glove_autoencoder_normalized_32.pth'

In [12]:
if os.path.exists(save_path):
    print(f"Loading Autoencoder from {save_path}")
    autoencoder = GloVeAutoencoder(input_dim=100, latent_dim=latent_dim)
    autoencoder.load_state_dict(torch.load(save_path, map_location=torch.device('cpu')))
else:
    print("Training Autoencoder")
    autoencoder = GloVeAutoencoder(input_dim=100, latent_dim=latent_dim)
    optimizer = torch.optim.Adam(autoencoder.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    for epoch in range(epochs):
        optimizer.zero_grad()
        reconstructed = autoencoder(word_tensor)
        loss = criterion(reconstructed, word_tensor)
        loss.backward()
        optimizer.step()

        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {loss.item():.6f}")

    torch.save(autoencoder.state_dict(), save_path)
    print(f"Saved Autoencoder serialized model in drive @ {save_path}")

Loading Autoencoder from /content/drive/MyDrive/QML-Research/Autoencoder/glove_autoencoder_normalized_8.pth


In [13]:
autoencoder.eval()
with torch.no_grad():
    compressed_vectors = autoencoder.encoder(word_tensor).numpy()

reduced_embeddings = {
    word: compressed_vectors[i]
    for i, word in enumerate(all_words)
}

# **Embedding**

In [14]:
def sentence_to_vec(sentence, embeddings, dim):
    vectors = []
    for word in sentence:
        if word in embeddings:
            vectors.append(embeddings[word])
        else:
            vectors.append(np.zeros(dim))
    return vectors

def embed_sentences(cleaned_sents, embeddings, dim):
    return np.array([sentence_to_vec(tokens, embeddings, dim) for tokens in cleaned_sents])

In [15]:
X_embed_np = embed_sentences(cleaned_sents, reduced_embeddings, dim=8)
X_embed_np = (X_embed_np - X_embed_np.min()) * (np.pi / (X_embed_np.max() - X_embed_np.min()))

X_embed = torch.tensor(X_embed_np).long()
y_embed = torch.tensor(labels).long()

# **Dataset and DataLoader**

In [16]:
class AmazonDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [17]:
X_train, X_test, y_train, y_test = train_test_split(
    X_embed, y_embed, test_size=0.2, stratify=y_embed.numpy(), random_state=42
)

train_dataset = AmazonDataset(X_train, y_train)
test_dataset = AmazonDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1)

# **Utility Functions**

In [18]:
def softmax(logits):
    e_x = np.exp(logits - np.max(logits))
    return e_x / e_x.sum()

In [19]:
def cross_entropy(true_label, predictions):
    j = np.clip(predictions[true_label], 1e-10, 1.0)
    return -np.log(j)

In [20]:
def dense_angle_embedding(x, wires):
    # For 32 dim:
    # chunk_size = 8
    chunk_size = 2

    for i, wire in enumerate(wires):
        for j in range(chunk_size):
            idx = i * chunk_size + j
            qml.RX(x[idx], wires=wire)
            qml.RY(x[idx], wires=wire)
            qml.RZ(x[idx], wires=wire)

# **Ansatz and QNode**

In [21]:
n_qubits = 4
dev = qml.device("default.qubit", wires=n_qubits)

In [22]:
@qml.qnode(dev, interface="torch")
def quantum_step(inputs, hidden, weights_in, weights_h):
    #Encoding
    dense_angle_embedding(inputs, wires=[0, 1, 2, 3])
    qml.RY(hidden, wires=3)

    # Interaction Layer/Entanglement
    qml.CNOT(wires=[0, 1])
    qml.CNOT(wires=[1, 2])
    qml.CNOT(wires=[2, 3])
    qml.CNOT(wires=[3, 0])

    # Parametrized trainable unitaries on input wires
    for i in range(3):
        qml.RY(weights_in[i], wires=i)
        qml.RZ(weights_in[i + 3], wires=i)

    # Parametrized unitaries on hidden wire
    qml.RY(weights_h[0], wires=3)
    qml.RZ(weights_h[1], wires=3)

    return qml.expval(qml.PauliZ(2)), qml.expval(qml.PauliZ(3))

# **Model Class**

In [23]:
def forward(input_sequence, weights_in, weights_h, hidden_init=None):
    """
    input_sequence: (10, 32) numpy array
    weights_in: (6,) numpy array
    weights_h: (2,) numpy array
    hidden_init: optional scalar float, default is small random value
    """
    if hidden_init is None:
        hidden = np.random.uniform(-0.1, 0.1)
    else:
        hidden = hidden_init

    for word_vec in input_sequence:
        logits = quantum_step(word_vec, hidden, weights_in, weights_h)
        hidden = np.tanh(logits[0] + logits[1])

    return logits

In [24]:
weights_in = np.random.uniform(0, 2 * np.pi, size=6)
weights_h  = np.random.uniform(0, 2 * np.pi, size=2)
optimizer = AdamOptimizer(stepsize=0.01)
epochs = 20

# **Directory Creation**

In [60]:
base_dir = "/content/drive/MyDrive/QML-Research/Analysis"

folders_to_create = [
    "logs/qrnn_outputs",
    "plots/loss_trend",
    "plots/accuracy_trend",
    "plots/test_boxplots",
]

for folder in folders_to_create:
    path = os.path.join(base_dir, folder)
    os.makedirs(path, exist_ok=True)
    print(f"Created (or already exists): {path}")

Created (or already exists): /content/drive/MyDrive/QML-Research/Analysis/logs/qrnn_outputs
Created (or already exists): /content/drive/MyDrive/QML-Research/Analysis/plots/loss_trend
Created (or already exists): /content/drive/MyDrive/QML-Research/Analysis/plots/accuracy_trend
Created (or already exists): /content/drive/MyDrive/QML-Research/Analysis/plots/test_boxplots


# **Training**

In [None]:
train_losses = []
train_accuracies = []

print("Training Loop\n")
start_time = time.time()

for epoch in range(1, epochs + 1):
    epoch_loss = 0
    correct = 0
    total = 0
    epoch_start = time.time()

    for xb, yb in train_loader:
        x_np = xb.squeeze(0).numpy()
        y_np = yb.item()

        def cost(params):
            w_in, w_h = params
            logits = forward(x_np, w_in, w_h)
            probs = softmax(np.array(logits))
            return cross_entropy(y_np, probs)

        weights_in, weights_h = optimizer.step(cost, (weights_in, weights_h))
        logits = forward(x_np, weights_in, weights_h)
        probs = softmax(np.array(logits))
        loss = cross_entropy(y_np, probs)

        pred_label = np.argmax(probs)

        if pred_label == y_np:
            correct += 1
        epoch_loss += loss
        total += 1

    avg_loss = epoch_loss / total
    acc = correct / total

    train_losses.append(avg_loss)
    train_accuracies.append(acc)

    epoch_time = time.time() - epoch_start
    print(f"Epoch {epoch:02d} | Loss: {avg_loss:.4f} | Acc: {acc:.4f} | Time: {epoch_time:.2f}s")

total_time = time.time() - start_time
print(f"\nTraining complete in {total_time:.2f} seconds.")

Training Loop





Epoch 01 | Loss: 0.7626 | Acc: 0.5012 | Time: 161.84s
Epoch 02 | Loss: 0.7626 | Acc: 0.5012 | Time: 159.10s
Epoch 03 | Loss: 0.7626 | Acc: 0.5012 | Time: 159.77s
Epoch 04 | Loss: 0.7626 | Acc: 0.5012 | Time: 170.14s
Epoch 05 | Loss: 0.7626 | Acc: 0.5012 | Time: 161.22s
Epoch 06 | Loss: 0.7626 | Acc: 0.5012 | Time: 158.76s
Epoch 07 | Loss: 0.7626 | Acc: 0.5012 | Time: 158.39s
Epoch 08 | Loss: 0.7626 | Acc: 0.5012 | Time: 157.65s
Epoch 09 | Loss: 0.7626 | Acc: 0.5012 | Time: 158.04s
Epoch 10 | Loss: 0.7626 | Acc: 0.5012 | Time: 157.93s
Epoch 11 | Loss: 0.7626 | Acc: 0.5012 | Time: 157.85s
Epoch 12 | Loss: 0.7626 | Acc: 0.5012 | Time: 160.06s
Epoch 13 | Loss: 0.7626 | Acc: 0.5012 | Time: 157.08s
Epoch 14 | Loss: 0.7626 | Acc: 0.5012 | Time: 157.37s
Epoch 15 | Loss: 0.7626 | Acc: 0.5012 | Time: 156.76s
Epoch 16 | Loss: 0.7626 | Acc: 0.5012 | Time: 157.20s
Epoch 17 | Loss: 0.7626 | Acc: 0.5012 | Time: 156.77s
Epoch 18 | Loss: 0.7626 | Acc: 0.5012 | Time: 155.87s
Epoch 19 | Loss: 0.7626 | Ac

# **Testing**

In [1]:
def evaluate_test(weights_in, weights_h):
    correct = 0
    total = 0
    test_loss = 0

    for xb, yb in test_loader:
        x_np = xb.squeeze(0).numpy()
        y_np = yb.item()  # correct scalar label

        logits = forward(x_np, weights_in, weights_h)
        probs = softmax(np.array(logits))
        loss = cross_entropy(y_np, probs)

        pred = np.argmax(probs)
        true = y_np

        if pred == true:
            correct += 1
        test_loss += loss
        total += 1

    return test_loss / total, correct / total

# **Logging**

In [2]:
def log_test_results(model_name, test_acc, test_loss):
    log_path = "/content/drive/MyDrive/QML-Research/Analysis/logs/qrnn_outputs/accuracy_logs.txt"

    with open(log_path, "a") as f:
        f.write(f"[Model: {model_name}]\n")
        f.write(f"Test Accuracy: {test_acc:.4f}\n")
        f.write(f"Test Loss: {test_loss:.4f}\n\n")

    print(f"Logged test results to: {log_path}")

In [3]:
def save_training_plots(model_name, train_losses, train_accuracies):
    base_path = "/content/drive/MyDrive/QML-Research/Analysis/plots"

    # Loss Plot
    plt.figure(figsize=(8, 5))
    plt.plot(train_losses, label="Training Loss")
    plt.title(f"{model_name} - Loss Trend")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"{base_path}/loss_trend/loss_plot_{model_name}.png")
    plt.close()

    # Accuracy Plot
    plt.figure(figsize=(8, 5))
    plt.plot(train_accuracies, label="Training Accuracy", color='green')
    plt.title(f"{model_name} - Accuracy Trend")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"{base_path}/accuracy_trend/acc_plot_{model_name}.png")
    plt.close()

    print(f"Plots saved to: {base_path}/loss_trend/ and /accuracy_trend/")

In [None]:
test_loss, test_acc = evaluate_test(weights_in, weights_h)
log_test_results("QRNN_v2", test_acc, test_loss)
save_training_plots("QRNN_v2", train_losses, train_accuracies)