In [None]:
!pip install datasets transformers optuna safetensors

import torch
import torch.nn as nn
import torch.nn.functional as F
from concurrent.futures import ThreadPoolExecutor
from transformers import AutoTokenizer, Trainer, TrainingArguments, DebertaV2ForSequenceClassification
from datasets import load_dataset, load_metric
import time

class AdvancedQuantumInspiredModel(nn.Module):
    def __init__(self, deberta_model):
        super(AdvancedQuantumInspiredModel, self).__init__()
        self.deberta_model = deberta_model
        self.conv1 = nn.Conv1d(768, 768, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(768, 768, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(768 * 128, 512)
        self.fc2 = nn.Linear(512, 2)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.deberta_model(input_ids=input_ids, attention_mask=attention_mask)
        x = outputs.hidden_states[-1].transpose(1, 2)

        def process_path1(x):
            return F.relu(self.conv1(x))

        def process_path2(x):
            return F.relu(self.conv2(x))

        with ThreadPoolExecutor(max_workers=2) as executor:
            futures = [executor.submit(process_path1, x), executor.submit(process_path2, x)]
            results = [f.result() for f in futures]

        x = (results[0] + results[1]) / 2

        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))

        if torch.rand(1).item() > 0.5:
            x = x * torch.rand_like(x)

        logits = self.fc2(x)

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.fc2.out_features), labels.view(-1))
            return loss, logits
        else:
            return logits

tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
deberta_model = DebertaV2ForSequenceClassification.from_pretrained('microsoft/deberta-v3-small', output_hidden_states=True)
quantum_model = AdvancedQuantumInspiredModel(deberta_model)

dataset = load_dataset('super_glue', 'boolq')
metric = load_metric('super_glue', 'boolq')

def preprocess_function(examples):
    return tokenizer(examples['question'], examples['passage'], truncation=True, padding='max_length', max_length=128)

encoded_dataset = dataset.map(preprocess_function, batched=True)

train_dataset = encoded_dataset['train'].select(range(50))
eval_dataset = encoded_dataset['validation'].select(range(50))

training_args = TrainingArguments(
    output_dir='./results',
    per_device_train_batch_size=8,
    num_train_epochs=1,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy='epoch'
)

class AdvancedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get('labels')
        outputs = model(inputs['input_ids'], inputs['attention_mask'], labels=labels)
        loss = outputs[0]
        return (loss, outputs) if return_outputs else loss

trainer = AdvancedTrainer(
    model=quantum_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=lambda p: metric.compute(predictions=torch.argmax(torch.tensor(p.predictions), axis=1), references=p.label_ids),
)

print("Training and evaluating Quantum-Inspired model...")
trainer.model = quantum_model
trainer.train()

input_text = "Hello, how are you?"
inputs = tokenizer(input_text, return_tensors='pt', padding='max_length', max_length=128)

quantum_model.eval()
start_time = time.time()
with torch.no_grad():
    outputs = quantum_model(inputs['input_ids'], inputs['attention_mask'])
end_time = time.time()
quantum_inference_time = end_time - start_time
print(f"Quantum-Inspired Model Inference Time: {quantum_inference_time} seconds")

print("Evaluating the quantum-inspired model...")
trainer.model = quantum_model
results = trainer.evaluate()

print("Model results:", results)


Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl.metadata (19 kB)
Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting requests>=2.32.2 (from datasets)
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.5.0,>=2023.1.0 (from fsspec[http]<=2024.5.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.5.0-py3-none-any.whl.metadata (11 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl.metada

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/578 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/286M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading builder script:   0%|          | 0.00/30.7k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/18.2k [00:00<?, ?B/s]

The repository for super_glue contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/super_glue.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/4.12M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9427 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3270 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3245 [00:00<?, ? examples/s]

  metric = load_metric('super_glue', 'boolq')


Downloading builder script:   0%|          | 0.00/2.63k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

The repository for super_glue contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/super_glue.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Map:   0%|          | 0/9427 [00:00<?, ? examples/s]

Map:   0%|          | 0/3270 [00:00<?, ? examples/s]

Map:   0%|          | 0/3245 [00:00<?, ? examples/s]



Training and evaluating Quantum-Inspired model...


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.831349,0.2


Quantum-Inspired Model Inference Time: 0.43195676803588867 seconds
Evaluating the quantum-inspired model...


Model results: {'eval_loss': 0.8059699535369873, 'eval_accuracy': 0.24, 'eval_runtime': 15.6792, 'eval_samples_per_second': 3.189, 'eval_steps_per_second': 0.446, 'epoch': 1.0}


In [None]:
!pip install transformers

import torch
from transformers import AutoTokenizer, DebertaV2ForSequenceClassification
import time

tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
model = DebertaV2ForSequenceClassification.from_pretrained('microsoft/deberta-v3-small')

input_text = "Hello, how are you?"
inputs = tokenizer(input_text, return_tensors='pt', padding='max_length', max_length=128)

model.eval()
start_time = time.time()
with torch.no_grad():
    outputs = model(**inputs)
end_time = time.time()

inference_time = end_time - start_time
print(f"Baseline DeBERTa-v3 Model Inference Time: {inference_time} seconds")

# Print model outputs (logits)
print(f"Model outputs: {outputs.logits}")




Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Baseline DeBERTa-v3 Model Inference Time: 0.6665582656860352 seconds
Model outputs: tensor([[-0.1372,  0.1318]])
