In [1]:
!pip install pennylane
import torch
import torch.nn as nn
import pennylane as qml
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import random
import pandas as pd
import numpy as np
!pip install spacy
!python -m spacy download en_core_web_sm
!python -m spacy download fr_core_news_sm

Collecting pennylane
  Downloading PennyLane-0.39.0-py3-none-any.whl.metadata (9.2 kB)
Collecting rustworkx>=0.14.0 (from pennylane)
  Downloading rustworkx-0.15.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)
Collecting appdirs (from pennylane)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting autoray>=0.6.11 (from pennylane)
  Downloading autoray-0.7.0-py3-none-any.whl.metadata (5.8 kB)
Collecting pennylane-lightning>=0.39 (from pennylane)
  Downloading PennyLane_Lightning-0.39.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (26 kB)
Downloading PennyLane-0.39.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m61.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading autoray-0.7.0-py3-none-any.whl (930 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m930.0/930.0 kB[0m [31m55.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PennyLane_Lightning-0.39.0-cp310

In [None]:
import torch
import torch.nn as nn
import pennylane as qml
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
import random
import pandas as pd
import numpy as np
import spacy

# Load spaCy models
nlp_en = spacy.load("en_core_web_sm")
nlp_fr = spacy.load("fr_core_news_sm")

def spacy_tokenizer_en(sentence):
    return [token.text.lower() for token in nlp_en(sentence)]

def spacy_tokenizer_fr(sentence):
    return [token.text.lower() for token in nlp_fr(sentence)]

src_tokenizer = spacy_tokenizer_en
tgt_tokenizer = spacy_tokenizer_fr

class QRNN(nn.Module):
    def __init__(
        self,
        input_size,
        hidden_size,
        n_qubits=8,  # Increased number of qubits
        n_qlayers=2,  # Increased number of quantum layers
        batch_first=True,
        backend="default.qubit"
    ):
        super(QRNN, self).__init__()
        self.n_inputs = input_size
        self.hidden_size = hidden_size
        self.concat_size = self.n_inputs + self.hidden_size
        self.n_qubits = n_qubits
        self.n_qlayers = n_qlayers
        self.backend = backend
        self.batch_first = batch_first

        self.wires = [f"wire_{i}" for i in range(self.n_qubits)]
        self.dev = qml.device(self.backend, wires=self.wires)

        # Enhanced quantum layer block
        def _layer_qrnn_block(W):
            def layer(W):
                # More complex quantum gates and entanglement
                for i in range(self.n_qubits):
                    qml.RX(W[i, 0], wires=i)
                    qml.RZ(W[i, 1], wires=i)
                    qml.RY(W[i, 2], wires=i)  # Added RY gate

                # More entanglement patterns
                for i in range(self.n_qubits - 1):
                    qml.CNOT(wires=[i, i + 1])
                    qml.RZ(W[i + 1, 0], wires=i + 1)
                    qml.CZ(wires=[i, i + 1])  # Added CZ gate

                # Additional entanglement
                qml.CNOT(wires=[self.n_qubits - 1, 0])
                qml.RZ(W[0, 0], wires=0)
                qml.SWAP(wires=[self.n_qubits - 1, 0])

        def _circuit_qrnn_block(inputs, weights):
            qml.AngleEmbedding(inputs, self.wires)
            for W in weights:
                _layer_qrnn_block(W)
            return [qml.expval(qml.PauliZ(wires=w)) for w in self.wires]

        self.qlayer_circuit = qml.QNode(_circuit_qrnn_block, self.dev, interface="torch")

        weights_shapes = {"weights": (n_qlayers, n_qubits, 3)}

        self.clayer_in = nn.Linear(self.concat_size, n_qubits)
        self.VQC = {
            'circuit': qml.qnn.TorchLayer(self.qlayer_circuit, weights_shapes)
        }
        self.clayer_out = nn.Linear(self.n_qubits, self.hidden_size)

    def forward(self, x):
        batch_size = x.size(0) if self.batch_first else x.size(1)
        h_t = torch.zeros(batch_size, self.hidden_size, device=x.device)

        outputs = []
        for t in range(x.size(1 if self.batch_first else 0)):
            x_t = x[:, t, :] if self.batch_first else x[t, :, :]
            h_t = h_t.to(x.device)

            concat_input = torch.cat((x_t, h_t), dim=-1)

            q_input = self.clayer_in(concat_input)
            q_output = self.VQC['circuit'](q_input)
            h_t = self.clayer_out(q_output)

            outputs.append(h_t)

        outputs = torch.stack(outputs, dim=1 if self.batch_first else 0)
        return outputs, h_t

class Seq2Seq(nn.Module):
    def __init__(self, qrnn, hidden_size, tgt_vocab_size, device, dropout=0.3):
        super(Seq2Seq, self).__init__()
        self.qrnn = qrnn
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(tgt_vocab_size, hidden_size)

        # Added dropout for regularization
        self.dropout = nn.Dropout(dropout)

        # Attention mechanism
        self.attention = nn.Linear(hidden_size * 2, hidden_size)

        # Decoder with attention
        self.decoder = nn.GRUCell(hidden_size * 2, hidden_size)
        self.fc = nn.Linear(hidden_size, tgt_vocab_size)
        self.device = device

    def forward(self, src, tgt):
        # Encode source sequence
        encoded_seq, hidden = self.qrnn(src)

        batch_size = tgt.size(0)
        max_len = tgt.size(1)
        tgt_vocab_size = self.fc.out_features

        # Initialize outputs tensor
        outputs = torch.zeros(batch_size, max_len-1, tgt_vocab_size).to(self.device)

        # Embedding for target
        tgt_emb = self.embedding(tgt[:, :-1])

        # Initial decoder hidden state
        decoder_hidden = hidden

        for t in range(max_len - 1):
            # Current target embedding
            current_emb = tgt_emb[:, t, :]

            # Compute attention
            attention_weights = torch.softmax(
                torch.bmm(encoded_seq, current_emb.unsqueeze(-1)).squeeze(-1),
                dim=1
            )
            context_vector = torch.bmm(
                attention_weights.unsqueeze(1),
                encoded_seq
            ).squeeze(1)

            # Combine current embedding with context
            decoder_input = torch.cat([current_emb, context_vector], dim=1)
            decoder_input = self.dropout(decoder_input)

            # Update hidden state
            decoder_hidden = self.decoder(decoder_input, decoder_hidden)

            # Generate output
            output = self.fc(decoder_hidden)
            outputs[:, t, :] = output

        return outputs

# Vocabulary and Dataset classes remain the same as in the previous implementation
# Function to build vocabulary
def build_vocab(sentences, tokenizer, special_tokens=["<pad>", "<sos>", "<eos>", "<unk>"]):
    vocab = {tok: idx for idx, tok in enumerate(special_tokens)}
    idx = len(vocab)
    for sentence in sentences:
        for token in tokenizer(sentence):
            if token not in vocab:
                vocab[token] = idx
                idx += 1
    return vocab

def collate_fn(batch):
    src_batch = [item[0] for item in batch]
    tgt_batch = [item[1] for item in batch]

    # Pad the sequences
    src_padded = nn.utils.rnn.pad_sequence(src_batch, batch_first=True, padding_value=src_vocab["<pad>"])
    tgt_padded = nn.utils.rnn.pad_sequence(tgt_batch, batch_first=True, padding_value=tgt_vocab["<pad>"])

    # Convert src_padded to one-hot encoding
    src_one_hot = torch.zeros(src_padded.size(0), src_padded.size(1), len(src_vocab))
    for i, sequence in enumerate(src_padded):
        for j, idx in enumerate(sequence):
            if idx != src_vocab["<pad>"]:  # Skip padding tokens
                src_one_hot[i, j, idx] = 1

    return src_one_hot, tgt_padded

# Dataset Class
class TranslationDataset(Dataset):
    def __init__(self, data, src_tokenizer, tgt_tokenizer, src_vocab, tgt_vocab, max_len=20):
        self.data = data
        self.src_tokenizer = src_tokenizer
        self.tgt_tokenizer = tgt_tokenizer
        self.src_vocab = src_vocab
        self.tgt_vocab = tgt_vocab
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        src_sentence, tgt_sentence = self.data[idx]
        src_tokens = self.src_tokenizer(src_sentence)[:self.max_len]
        tgt_tokens = self.tgt_tokenizer(tgt_sentence)[:self.max_len]

        src_ids = [self.src_vocab.get(tok, self.src_vocab["<unk>"]) for tok in src_tokens]
        tgt_ids = [self.tgt_vocab.get(tok, self.tgt_vocab["<unk>"]) for tok in tgt_tokens]

        src_tensor = torch.tensor(src_ids, dtype=torch.long)
        tgt_tensor = torch.tensor(tgt_ids, dtype=torch.long)

        return src_tensor, tgt_tensor

# Training configuration
def train_model(train_loader, test_loader, src_vocab, tgt_vocab, idx_to_tgt):
    # Model hyperparameters
    input_size = len(src_vocab)
    hidden_size = 128  # Increased hidden size
    n_qubits = 8
    n_qlayers = 2

    # Move to GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize models
    qrnn = QRNN(
        input_size=input_size,
        hidden_size=hidden_size,
        n_qubits=n_qubits,
        n_qlayers=n_qlayers
    ).to(device)

    seq2seq = Seq2Seq(
        qrnn,
        hidden_size,
        len(tgt_vocab),
        device=device
    ).to(device)

    # Improved training configuration
    optimizer = torch.optim.Adam(seq2seq.parameters(), lr=0.01)  # Adjusted learning rate
    criterion = nn.CrossEntropyLoss(ignore_index=src_vocab["<pad>"])

    # Learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=0.5,
        patience=2,
        verbose=True
    )

    # Training loop with early stopping
    best_bleu = 0
    patience = 5
    patience_counter = 0

    for epoch in range(20):  # Increased number of epochs
        seq2seq.train()
        total_train_loss = 0

        for src_batch, tgt_batch in train_loader:
            src_batch = src_batch.to(device)
            tgt_batch = tgt_batch.to(device)

            optimizer.zero_grad()
            output = seq2seq(src_batch, tgt_batch)
            loss = criterion(output.view(-1, len(tgt_vocab)), tgt_batch[:, 1:].reshape(-1))

            loss.backward()
            torch.nn.utils.clip_grad_norm_(seq2seq.parameters(), max_norm=1)  # Gradient clipping
            optimizer.step()

            total_train_loss += loss.item()

        # Evaluate after each epoch
        seq2seq.eval()
        references = []
        hypotheses = []

        with torch.no_grad():
            for src_batch, tgt_batch in test_loader:
                src_batch = src_batch.to(device)
                tgt_batch = tgt_batch.to(device)
                outputs = seq2seq(src_batch, tgt_batch)
                predicted_ids = outputs.argmax(dim=-1).cpu().numpy()

                # Decode target and predicted sentences
                tgt_sentences = [[idx_to_tgt[idx] for idx in tgt if idx != tgt_vocab["<pad>"]] for tgt in tgt_batch.cpu().numpy()]
                pred_sentences = [[idx_to_tgt[idx] for idx in pred if idx != tgt_vocab["<pad>"]] for pred in predicted_ids]

                for ref, hyp in zip(tgt_sentences, pred_sentences):
                    references.append([ref])
                    hypotheses.append(hyp)

        # Calculate BLEU score
        smooth = SmoothingFunction()
        bleu = corpus_bleu(references, hypotheses, smoothing_function=smooth.method1)

        print(f"Epoch {epoch+1}, Train Loss: {total_train_loss/len(train_loader)}, BLEU Score: {bleu}")

        # Learning rate scheduling
        scheduler.step(total_train_loss)

        # Early stopping
        if bleu > best_bleu:
            best_bleu = bleu
            patience_counter = 0
            # Optionally save the model
            torch.save(seq2seq.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"Early stopping triggered at epoch {epoch+1}")
            break

    return seq2seq, best_bleu

# Main execution
# Load and preprocess the data
df = pd.read_csv("eng-french.csv")
df = df.head(50000)  # Increased dataset size
src_sentences = df["English words/sentences"].tolist()
tgt_sentences = df["French words/sentences"].tolist()

# Build vocabularies
src_vocab = build_vocab(src_sentences, src_tokenizer)
tgt_vocab = build_vocab(tgt_sentences, tgt_tokenizer)
idx_to_tgt = {idx: token for token, idx in tgt_vocab.items()}

# Prepare data
data = list(zip(src_sentences, tgt_sentences))
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Create datasets and dataloaders
train_dataset = TranslationDataset(train_data, src_tokenizer, tgt_tokenizer, src_vocab, tgt_vocab)
test_dataset = TranslationDataset(test_data, src_tokenizer, tgt_tokenizer, src_vocab, tgt_vocab)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, collate_fn=collate_fn)

# Train the model
trained_model, final_bleu = train_model(train_loader, test_loader, src_vocab, tgt_vocab, idx_to_tgt)
print(f"Final BLEU Score: {final_bleu}")



Epoch 1, Train Loss: 4.17784433517456, BLEU Score: 0.0180092311267726
Epoch 2, Train Loss: 3.280281586074829, BLEU Score: 0.023042954256169007
Epoch 3, Train Loss: 2.9387237461090088, BLEU Score: 0.025753096317923253
Epoch 4, Train Loss: 2.709567509460449, BLEU Score: 0.02989238167353509
Epoch 5, Train Loss: 2.543537724304199, BLEU Score: 0.029725932888591183
Epoch 6, Train Loss: 2.4171971504211425, BLEU Score: 0.03434493299723454
Epoch 7, Train Loss: 2.314530469703674, BLEU Score: 0.03889615796114987
Epoch 8, Train Loss: 2.236495530128479, BLEU Score: 0.0387596038108465
Epoch 9, Train Loss: 2.1712416412353517, BLEU Score: 0.04023406623588146
Epoch 10, Train Loss: 2.115628094291687, BLEU Score: 0.04191483756247041
Epoch 11, Train Loss: 2.0706162981033325, BLEU Score: 0.042735802816331124
Epoch 12, Train Loss: 2.0395117504119873, BLEU Score: 0.04553787613752363
Epoch 13, Train Loss: 2.0054229078292845, BLEU Score: 0.04483036657713459
Epoch 14, Train Loss: 1.9815472421646119, BLEU Score: