In [1]:
!pip install torch psutil tqdm numpy rogue-score




ERROR: Could not find a version that satisfies the requirement rogue-score (from versions: none)
ERROR: No matching distribution found for rogue-score

[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Config, GPT2Model, DistilBertModel, GPT2TokenizerFast, DistilBertTokenizerFast,GPT2Tokenizer,GPT2LMHeadModel
import os
import time
import matplotlib.pyplot as plt
import numpy as np
import re
import gc  # Add this import at the top
import psutil
from tqdm import tqdm

import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Config, GPT2Model, DistilBertModel, GPT2TokenizerFast, DistilBertTokenizerFast
from typing import Optional, Dict  # Add this import for type hints

def log_to_file(message):
    """Helper function for logging"""
    print(message)

class CrossAttentionFusionLayer(nn.Module):
    def __init__(self, gpt2_dim: int, bert_dim: int, output_dim: int, dropout_rate: float, num_heads: int = 8):
        super().__init__()
        self.gpt2_proj = nn.Linear(gpt2_dim, output_dim)
        self.bert_proj = nn.Linear(bert_dim, output_dim)
        self.cross_attention = nn.MultiheadAttention(embed_dim=output_dim, num_heads=num_heads, dropout=dropout_rate, batch_first=True)
        self.dropout = nn.Dropout(dropout_rate)
        self.layer_norm = nn.LayerNorm(output_dim)

    def forward(self, gpt2_features: torch.Tensor, bert_features: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
        gpt2_proj = self.gpt2_proj(gpt2_features)  # [batch_size, seq_len, output_dim]
        bert_proj = self.bert_proj(bert_features)  # [batch_size, seq_len, output_dim]
        attn_mask = attention_mask.float().masked_fill(attention_mask == 0, float('-inf')).masked_fill(attention_mask == 1, 0)
        fused_features, _ = self.cross_attention(
            query=gpt2_proj,
            key=bert_proj,
            value=bert_proj,
            key_padding_mask=attention_mask == 0
        )
        fused_features = self.dropout(fused_features) + gpt2_proj  # Residual connection
        return self.layer_norm(fused_features)

class HybridGPT2DistilBERTMultiTask(nn.Module):
    def __init__(self, num_intents: int, num_categories: int, num_ner_labels: int,
                 dropout_rate: float, loss_weights: Optional[Dict[str, float]] = None,
                 ner_class_weights: Optional[torch.Tensor] = None,
                 category_class_weights: Optional[torch.Tensor] = None,
                 intent_class_weights: Optional[torch.Tensor] = None):
        super().__init__()
        log_to_file("Initializing model...")
        self.gpt2_config = GPT2Config.from_pretrained('gpt2')
        self.gpt2 = GPT2Model.from_pretrained('gpt2')
        self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')

        # Freeze all layers
        for param in self.gpt2.parameters():
            param.requires_grad = False
        for param in self.distilbert.parameters():
            param.requires_grad = False
        log_to_file("All GPT-2 and DistilBERT layers remain frozen")

        gpt2_dim = self.gpt2_config.n_embd
        bert_dim = self.distilbert.config.hidden_size
        hidden_size = gpt2_dim  # Keeping output dim same as GPT-2 for consistency

        self.fusion_layer = CrossAttentionFusionLayer(gpt2_dim, bert_dim, hidden_size, dropout_rate)

        self.intent_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.Tanh(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size, num_intents)
        )
        self.category_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.Tanh(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size, num_categories)
        )
        self.ner_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.Tanh(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size, num_ner_labels)
        )

        # Set default loss weights if not provided
        self.loss_weights = loss_weights or {
            'intent': 0.3,
            'category': 0.3,
            'ner': 0.4
        }

        # Loss functions with class weights
        self.intent_loss_fn = nn.CrossEntropyLoss(weight=intent_class_weights) if intent_class_weights is not None else nn.CrossEntropyLoss()
        self.category_loss_fn = nn.CrossEntropyLoss(weight=category_class_weights) if category_class_weights is not None else nn.CrossEntropyLoss()
        self.ner_loss_fn = nn.CrossEntropyLoss(weight=ner_class_weights) if ner_class_weights is not None else nn.CrossEntropyLoss()

    def forward(self, gpt2_input_ids: torch.Tensor, gpt2_attention_mask: torch.Tensor,
                distilbert_input_ids: torch.Tensor, distilbert_attention_mask: torch.Tensor,
                intent_labels: Optional[torch.Tensor] = None,
                category_labels: Optional[torch.Tensor] = None,
                ner_labels: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]:
        gpt2_outputs = self.gpt2(input_ids=gpt2_input_ids, attention_mask=gpt2_attention_mask)
        distilbert_outputs = self.distilbert(input_ids=distilbert_input_ids, attention_mask=distilbert_attention_mask)

        gpt2_features = gpt2_outputs.last_hidden_state
        bert_features = distilbert_outputs.last_hidden_state

        fused_features = self.fusion_layer(gpt2_features, bert_features, gpt2_attention_mask)

        batch_size = fused_features.shape[0]
        sequence_lengths = gpt2_attention_mask.sum(dim=1) - 1
        last_token_indexes = sequence_lengths.unsqueeze(-1).unsqueeze(-1).repeat(1, 1, fused_features.shape[-1])
        sequence_repr = torch.gather(fused_features, 1, last_token_indexes).squeeze(1)

        intent_logits = self.intent_head(sequence_repr)
        category_logits = self.category_head(sequence_repr)
        ner_logits = self.ner_head(fused_features)

        output_dict = {
            'intent_logits': intent_logits,
            'category_logits': category_logits,
            'ner_logits': ner_logits
        }

        if all(label is not None for label in [intent_labels, category_labels, ner_labels]):
            intent_loss = self.intent_loss_fn(intent_logits, intent_labels)
            category_loss = self.category_loss_fn(category_logits, category_labels)
            active_loss = gpt2_attention_mask.view(-1) == 1
            active_logits = ner_logits.view(-1, ner_logits.size(-1))[active_loss]
            active_labels = ner_labels.view(-1)[active_loss]
            ner_loss = self.ner_loss_fn(active_logits, active_labels)

            total_loss = (self.loss_weights['intent'] * intent_loss +
                          self.loss_weights['category'] * category_loss +
                          self.loss_weights['ner'] * ner_loss)

            output_dict.update({
                'loss': total_loss,
                'intent_loss': intent_loss,
                'category_loss': category_loss,
                'ner_loss': ner_loss
            })

        return output_dict

def inference_hybrid(model, text, gpt2_tokenizer, distilbert_tokenizer, label_encoders, max_length, device):
    """Run inference with the HybridGPT2DistilBERTMultiTask model with confidence scores as decimals"""
    model.eval()
    
    # Tokenize input
    gpt2_encoding = gpt2_tokenizer(
        text,
        max_length=max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )
    distilbert_encoding = distilbert_tokenizer(
        text,
        max_length=max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )

    inputs = {
        "gpt2_input_ids": gpt2_encoding["input_ids"].to(device),
        "gpt2_attention_mask": gpt2_encoding["attention_mask"].to(device),
        "distilbert_input_ids": distilbert_encoding["input_ids"].to(device),
        "distilbert_attention_mask": distilbert_encoding["attention_mask"].to(device)
    }

    # Run model
    with torch.no_grad():
        outputs = model(**inputs)

    # Get softmax probabilities for confidence scores
    intent_logits = outputs["intent_logits"]
    category_logits = outputs["category_logits"]
    ner_logits = outputs["ner_logits"]

    # Apply softmax to get probabilities
    intent_probs = torch.nn.functional.softmax(intent_logits, dim=-1)[0]
    category_probs = torch.nn.functional.softmax(category_logits, dim=-1)[0]
    ner_probs = torch.nn.functional.softmax(ner_logits, dim=-1)

    # Get top predictions and their confidences
    intent_pred = torch.argmax(intent_probs).cpu().item()
    intent_confidence = intent_probs[intent_pred].cpu().item()  # As decimal (0.0-1.0)

    category_pred = torch.argmax(category_probs).cpu().item()
    category_confidence = category_probs[category_pred].cpu().item()  # As decimal (0.0-1.0)

    ner_preds = torch.argmax(ner_probs, dim=-1).cpu().numpy()[0]
    ner_confidences = torch.max(ner_probs, dim=-1)[0][0].cpu().numpy()  # Get max probability for each token

    # Map to labels
    intent_decoder = {v: k for k, v in label_encoders["intent_encoder"].items()}
    category_decoder = {v: k for k, v in label_encoders["category_encoder"].items()}
    ner_decoder = {v: k for k, v in label_encoders["ner_label_encoder"].items()}

    intent_label = intent_decoder[intent_pred]
    category_label = category_decoder[category_pred]

    tokens = gpt2_tokenizer.convert_ids_to_tokens(inputs["gpt2_input_ids"][0].tolist())
    seq_len = int(inputs["gpt2_attention_mask"][0].sum().item())
    ner_labels = [ner_decoder[pred] for pred in ner_preds[:seq_len]]

    # Extract entities from NER labels with confidence
    entities = []
    current_entity = None
    entity_tokens = []
    entity_confidences = []
    entity_type = None

    for i, (token, label, confidence) in enumerate(zip(tokens[:seq_len], ner_labels, ner_confidences[:seq_len])):
        if label.startswith("B-"):
            # If we were tracking an entity, save it before starting a new one
            if current_entity is not None:
                entity_text = gpt2_tokenizer.convert_tokens_to_string(entity_tokens).strip()
                if entity_text:
                    # Calculate average confidence for the entity (as decimal)
                    avg_confidence = sum(entity_confidences) / len(entity_confidences)
                    entities.append({
                        "entity": entity_text,
                        "label": entity_type,
                        "confidence": avg_confidence
                    })

            # Start a new entity
            current_entity = label[2:]
            entity_type = label[2:]
            entity_tokens = [token]
            entity_confidences = [confidence]

        elif label.startswith("I-") and current_entity == label[2:]:
            # Continue current entity
            entity_tokens.append(token)
            entity_confidences.append(confidence)

        elif current_entity is not None:
            # End of an entity
            entity_text = gpt2_tokenizer.convert_tokens_to_string(entity_tokens).strip()
            if entity_text:
                # Calculate average confidence for the entity (as decimal)
                avg_confidence = sum(entity_confidences) / len(entity_confidences)
                entities.append({
                    "entity": entity_text,
                    "label": entity_type,
                    "confidence": avg_confidence
                })
            current_entity = None
            entity_tokens = []
            entity_confidences = []
            entity_type = None

    # Check for unfinished entity
    if current_entity is not None:
        entity_text = gpt2_tokenizer.convert_tokens_to_string(entity_tokens).strip()
        if entity_text:
            # Calculate average confidence for the entity (as decimal)
            avg_confidence = sum(entity_confidences) / len(entity_confidences)
            entities.append({
                "entity": entity_text,
                "label": entity_type,
                "confidence": avg_confidence
            })

    return {
        "intent": {"label": intent_label, "confidence": intent_confidence},
        "category": {"label": category_label, "confidence": category_confidence},
        "ner": entities
    }

def generate_response(model, tokenizer, instruction, classification, max_length=512, device="cuda"):
    """Generate a response using GPT-2 model"""
    model.eval()

    # Extract classification details
    intent = classification["intent"]["label"] if isinstance(classification["intent"], dict) else classification["intent"]
    category = classification["category"]["label"] if isinstance(classification["category"], dict) else classification["category"]

    # Clean up intent and category strings
    if isinstance(intent, str) and "[" in intent:
        intent = intent.strip("[]'")
    if isinstance(category, str) and "[" in category:
        category = category.strip("[]'")

    # Format entity information
    entities_text = ""
    if "ner" in classification and classification["ner"]:
        entities = []
        for entity in classification["ner"]:
            if isinstance(entity, dict) and "entity" in entity and "label" in entity:
                entities.append(f"{entity['entity']} ({entity['label']})")
        entities_text = ", ".join(entities)
    else:
        entities_text = "none"

    # Create a prompt for generating response
    input_text = f"[INST] User query: {instruction}\n\n" \
                 f"Based on the following classification:\n" \
                 f"- Intent: {intent}\n" \
                 f"- Category: {category}\n" \
                 f"- Entities: {entities_text}\n\n" \
                 f"Provide a helpful customer service response: [RESP]"

    # Tokenize input
    input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
    attention_mask = torch.ones_like(input_ids).to(device)

    try:
        # Generate response
        with torch.no_grad():
            output_ids = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_length=max_length,
                num_beams=5,
                no_repeat_ngram_size=2,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id
            )

        # Decode the generated text
        generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=False)

        # Extract response part
        if "[RESP]" in generated_text:
            response = generated_text.split("[RESP]")[1].strip()
            # Clean up any trailing tokens
            if "[EOS]" in response:
                response = response.split("[EOS]")[0].strip()
        else:
            # Fallback extraction
            response = generated_text[len(input_text):].strip()
        # Format steps if present
        steps_pattern = re.search(r'(\d+)\.\s+([A-Z])', response)
        if steps_pattern or "step" in response.lower() or "follow" in response.lower():
            # Format steps to be on separate lines
            for i in range(1, 10):
                step_marker = f"{i}. "
                if step_marker in response and f"\n{i}. " not in response:
                    response = response.replace(step_marker, f"\n{i}. ")

            # Clean up any excess newlines
            response = re.sub(r'\n\s*\n', '\n\n', response)
            response = response.lstrip('\n')

        # Clean any technical artifacts
        response = re.sub(r'https?://\S+', '', response)  # Remove URLs
        response = re.sub(r'<[^>]*>', '', response)  # Remove HTML tags
        response = re.sub(r'\{\s*"[^"]*":', '', response)  # Remove JSON-like content
        response = re.sub(r'\s+', ' ', response).strip()  # Clean up whitespace

        return response

    except Exception as e:
        print(f"Error in generate_response: {e}")
        return f"I apologize, but I couldn't generate a response. Error: {str(e)}"

# Memory measurement functions
def get_memory_usage():
    process = psutil.Process(os.getpid())
    mem_info = process.memory_info()
    return mem_info.rss / 1024 / 1024  # Convert bytes to MB

# Fix the get_peak_memory_usage function
def get_peak_memory_usage(func, *args, **kwargs):
    device_param = kwargs.pop('device', None)  # Remove device parameter before calling func
    
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.reset_peak_memory_stats()
    
    start_mem = get_memory_usage()
    result = func(*args, **kwargs)  # Call without device parameter
    end_mem = get_memory_usage()
    
    peak_gpu_mem = 0
    if torch.cuda.is_available() and device_param == "cuda":
        peak_gpu_mem = torch.cuda.max_memory_allocated() / 1024 / 1024  # MB
        return result, peak_gpu_mem
    else:
        return result, max(0, end_mem - start_mem)

class PerformanceTest:
    def __init__(self, model_paths, test_data_path, num_samples=50):
        self.output_dir = model_paths["hybrid_model_dir"]
        self.generation_model_path = model_paths["generation_model_path"]
        self.generation_tokenizer_path = model_paths["generation_tokenizer_path"]
        self.test_data_path = test_data_path
        self.num_samples = num_samples
        self.devices = ["cpu", "cuda"] if torch.cuda.is_available() else ["cpu"]
        self.results = {device: [] for device in self.devices}
        self.summary = {device: {"classification_time": [], "generation_time": [], "overall_time": [], "memory_usage": []} for device in self.devices}
    
    def load_models(self, device):
        print(f"\nLoading models on {device.upper()}...")
        
        # Load hybrid model for classification
        encoders_path = os.path.join(self.output_dir, "label_encoders.json")
        hyperparams_path = os.path.join(self.output_dir, "hyperparameters.json")
        model_path = os.path.join(self.output_dir, "hybrid_model.pth")
        
        print(f"Loading from: {encoders_path}")
        with open(encoders_path, 'r', encoding='utf-8') as f:
            self.label_encoders = json.load(f)
            
        with open(hyperparams_path, 'r', encoding='utf-8') as f:
            self.hyperparameters = json.load(f)
                
        self.gpt2_tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
        self.distilbert_tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
        
        if self.gpt2_tokenizer.pad_token is None:
            self.gpt2_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        if self.distilbert_tokenizer.pad_token is None:
            self.distilbert_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
                
        self.classification_model = HybridGPT2DistilBERTMultiTask(
            num_intents=len(self.label_encoders["intent_encoder"]),
            num_categories=len(self.label_encoders["category_encoder"]),
            num_ner_labels=len(self.label_encoders["ner_label_encoder"]),
            dropout_rate=self.hyperparameters["dropout_rate"]
        )
        
        if self.gpt2_tokenizer.pad_token_id is not None:
            self.classification_model.gpt2.resize_token_embeddings(len(self.gpt2_tokenizer))
        
        self.classification_model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
        self.classification_model.to(device)
        self.classification_model.eval()
        
        # Load generation model using from_pretrained instead of torch.load
        try:
            print(f"Attempting to load generation model from {self.generation_model_path}")
            self.generation_model = GPT2LMHeadModel.from_pretrained(self.generation_model_path).to(device)
            self.generation_tokenizer = GPT2Tokenizer.from_pretrained(self.generation_tokenizer_path)
            self.generation_tokenizer.pad_token = self.generation_tokenizer.eos_token
            self.generation_tokenizer.add_special_tokens({'additional_special_tokens': ['[INST]', '[RESP]', '[EOS]']})
            self.generation_model.resize_token_embeddings(len(self.generation_tokenizer))
        except Exception as e:
            print(f"Error loading generation model: {e}")
            print("Falling back to default GPT2...")
            self.generation_model = GPT2LMHeadModel.from_pretrained('gpt2').to(device)
            self.generation_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
            self.generation_tokenizer.pad_token = self.generation_tokenizer.eos_token
            self.generation_tokenizer.add_special_tokens({'additional_special_tokens': ['[INST]', '[RESP]', '[EOS]']})
            self.generation_model.resize_token_embeddings(len(self.generation_tokenizer))
        
        self.generation_model.eval()
        
        return self.classification_model, self.generation_model, self.gpt2_tokenizer, self.distilbert_tokenizer, self.generation_tokenizer
    
    def run_benchmark(self):
        # Load test data
        with open(self.test_data_path, "r", encoding="utf-8") as f:
            test_data = json.load(f)
            
        # Select samples for testing
        if len(test_data) > self.num_samples:
            test_samples = test_data[:self.num_samples]
        else:
            test_samples = test_data
            
        print(f"Running performance test on {len(test_samples)} samples")
        
        # Run tests on each device
        for device in self.devices:
            print(f"\n{'='*80}")
            print(f"RUNNING BENCHMARK ON {device.upper()}")
            print(f"{'='*80}")
            
            classification_model, generation_model, gpt2_tokenizer, distilbert_tokenizer, generation_tokenizer = self.load_models(device)
            
            for idx, sample in enumerate(tqdm(test_samples, desc=f"Testing on {device.upper()}")):
                instruction = sample["instruction"]
                
                # Measure classification time and memory
                def run_classification():
                    return inference_hybrid(
                        classification_model, 
                        instruction, 
                        gpt2_tokenizer, 
                        distilbert_tokenizer, 
                        self.label_encoders, 
                        self.hyperparameters["max_length"],
                        device
                    )
                
                classification_start = time.time()
                classification_result, classification_memory = get_peak_memory_usage(
                    run_classification, device=device
                )
                classification_end = time.time()
                classification_time = classification_end - classification_start
                
                # Create new input with classification results
                intent = classification_result["intent"]["label"]
                category = classification_result["category"]["label"]
                entities_text = ", ".join([f"{entity['entity']} ({entity['label']})" for entity in classification_result["ner"]]) if classification_result["ner"] else "none"
                new_input = f"{instruction} [Classified: Intent is '{intent}', Category is '{category}', Entities are {entities_text}]"
                
                # Measure generation time and memory
                def run_generation():
                    return generate_response(
                        generation_model,
                        generation_tokenizer,
                        instruction,
                        classification_result,
                        device=device
                    )
                
                generation_start = time.time()
                generated_response, generation_memory = get_peak_memory_usage(
                    run_generation, device=device
                )
                generation_end = time.time()
                generation_time = generation_end - generation_start
                
                # Calculate overall time and memory
                overall_time = classification_time + generation_time
                overall_memory = classification_memory + generation_memory
                
                # Store results
                result = {
                    "sample_id": idx + 1,
                    "instruction": instruction,
                    "new_input": new_input,
                    "generated_response": generated_response,
                    "classification_time": classification_time,
                    "generation_time": generation_time,
                    "overall_time": overall_time,
                    "classification_memory": classification_memory,
                    "generation_memory": generation_memory,
                    "overall_memory": overall_memory
                }
                self.results[device].append(result)
                
                # Update summary statistics
                self.summary[device]["classification_time"].append(classification_time)
                self.summary[device]["generation_time"].append(generation_time)
                self.summary[device]["overall_time"].append(overall_time)
                self.summary[device]["memory_usage"].append(overall_memory)
                
                # Print sample result
                print(f"\nSample {idx+1}/{len(test_samples)} (Device: {device.upper()}):")
                print(f"Instruction: {instruction}")
                print(f"New Input: {new_input}")
                print(f"Generated Response: {generated_response}")
                print(f"Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)")
                print(f"Hybrid Cross Attention | {classification_time:.4f}                    | {generation_time:.4f}                   | {overall_time:.4f}                   | {device.upper()}        | {overall_memory:.4f}")
            
            # Calculate average times
            self.summary[device]["avg_classification_time"] = sum(self.summary[device]["classification_time"]) / len(test_samples)
            self.summary[device]["avg_generation_time"] = sum(self.summary[device]["generation_time"]) / len(test_samples)
            self.summary[device]["avg_overall_time"] = sum(self.summary[device]["overall_time"]) / len(test_samples)
            self.summary[device]["avg_memory_usage"] = sum(self.summary[device]["memory_usage"]) / len(test_samples)
            
            print(f"\n{'='*80}")
            print(f"BENCHMARK SUMMARY FOR {device.upper()}")
            print(f"{'='*80}")
            print(f"Average Classification Time: {self.summary[device]['avg_classification_time']:.4f} seconds")
            print(f"Average Generation Time: {self.summary[device]['avg_generation_time']:.4f} seconds")
            print(f"Average Overall Time: {self.summary[device]['avg_overall_time']:.4f} seconds")
            print(f"Average Memory Usage: {self.summary[device]['avg_memory_usage']:.4f} MB")

        # Save results to files and create plots
        self.save_results()

    # Add to your PerformanceTest class
    def save_results(self):
        """Save benchmark results to JSON files in the specified format"""
        
        # Save CPU results
        if "cpu" in self.devices and "avg_classification_time" in self.summary["cpu"]:
            cpu_results = []
            
            for result in self.results["cpu"]:
                formatted_result = {
                    "Model": "Hybrid Cross attention",
                    "Classification Time (s)": result["classification_time"],
                    "Text Generation Time (s)": result["generation_time"],
                    "Overall Inference Time (s)": result["overall_time"],
                    "Device": "CPU",
                    "Memory Usage (MB)": result["overall_memory"],
                    "Instruction": result["instruction"],
                    "New Input": result["new_input"],
                    "Generated Response": result["generated_response"]
                }
                cpu_results.append(formatted_result)
            
            with open("performance_results_cpu.json", "w") as f:
                json.dump(cpu_results, f, indent=4)
            print("CPU results saved to performance_results_cpu.json")
        
        # Save GPU results
        if "cuda" in self.devices and "avg_classification_time" in self.summary["cuda"]:
            gpu_results = []
            
            for result in self.results["cuda"]:
                formatted_result = {
                    "Model": "Hybrid Cross Attention",
                    "Classification Time (s)": result["classification_time"],
                    "Text Generation Time (s)": result["generation_time"],
                    "Overall Inference Time (s)": result["overall_time"],
                    "Device": "GPU",
                    "Memory Usage (MB)": result["overall_memory"],
                    "Instruction": result["instruction"],
                    "New Input": result["new_input"],
                    "Generated Response": result["generated_response"]
                }
                gpu_results.append(formatted_result)
            
            with open("performance_results_gpu.json", "w") as f:
                json.dump(gpu_results, f, indent=4)
            print("GPU results saved to performance_results_gpu.json")
        
        # Create performance log and comparison plots
        self.create_performance_log()
        self.create_comparison_plots()

    def create_performance_log(self):
        """Create a text file with performance comparisons"""
        with open("performance_log.txt", "w") as f:
            f.write("=== PERFORMANCE BENCHMARK RESULTS ===\n\n")
            
            # Write CPU results
            if "cpu" in self.devices:
                f.write("CPU PERFORMANCE:\n")
                f.write(f"Average Classification Time: {self.summary['cpu']['avg_classification_time']:.4f} seconds\n")
                f.write(f"Average Generation Time: {self.summary['cpu']['avg_generation_time']:.4f} seconds\n")
                f.write(f"Average Overall Time: {self.summary['cpu']['avg_overall_time']:.4f} seconds\n")
                f.write(f"Average Memory Usage: {self.summary['cpu']['avg_memory_usage']:.4f} MB\n\n")
            
            # Write GPU results
            if "cuda" in self.devices and "cuda" in self.results:
                f.write("GPU PERFORMANCE:\n")
                f.write(f"Average Classification Time: {self.summary['cuda']['avg_classification_time']:.4f} seconds\n")
                f.write(f"Average Generation Time: {self.summary['cuda']['avg_generation_time']:.4f} seconds\n")
                f.write(f"Average Overall Time: {self.summary['cuda']['avg_overall_time']:.4f} seconds\n")
                f.write(f"Average Memory Usage: {self.summary['cuda']['avg_memory_usage']:.4f} MB\n\n")
            
            # Write comparison
            if "cpu" in self.devices and "cuda" in self.devices and "cuda" in self.results:
                f.write("CPU vs GPU COMPARISON:\n")
                class_speedup = self.summary['cpu']['avg_classification_time'] / self.summary['cuda']['avg_classification_time']
                gen_speedup = self.summary['cpu']['avg_generation_time'] / self.summary['cuda']['avg_generation_time']
                overall_speedup = self.summary['cpu']['avg_overall_time'] / self.summary['cuda']['avg_overall_time']
                
                f.write(f"Classification Speed Improvement: {class_speedup:.2f}x faster on GPU\n")
                f.write(f"Generation Speed Improvement: {gen_speedup:.2f}x faster on GPU\n")
                f.write(f"Overall Speed Improvement: {overall_speedup:.2f}x faster on GPU\n\n")
                
                f.write("Notes:\n")
                f.write("- GPU memory usage is typically higher but processing is faster\n")
                f.write("- The generation task shows the largest speed improvement on GPU\n")
            
            f.write("\n=== END OF REPORT ===\n")
        
        print("Performance log saved to performance_log.txt")

    def create_comparison_plots(self):
        """Create matplotlib comparison plots"""
        if "cpu" not in self.devices or "cuda" not in self.devices or "cuda" not in self.results:
            print("Both CPU and GPU results are needed for comparison plots")
            return
        
        # Set up the plots
        plt.figure(figsize=(16, 10))
        
        # 1. Processing Time Comparison
        plt.subplot(2, 2, 1)
        
        labels = ['Classification', 'Generation', 'Overall']
        cpu_times = [self.summary['cpu']['avg_classification_time'], 
                    self.summary['cpu']['avg_generation_time'],
                    self.summary['cpu']['avg_overall_time']]
        
        gpu_times = [self.summary['cuda']['avg_classification_time'], 
                    self.summary['cuda']['avg_generation_time'],
                    self.summary['cuda']['avg_overall_time']]
        
        x = np.arange(len(labels))
        width = 0.35
        
        plt.bar(x - width/2, cpu_times, width, label='CPU')
        plt.bar(x + width/2, gpu_times, width, label='GPU')
        
        plt.xlabel('Task')
        plt.ylabel('Time (seconds)')
        plt.title('Average Processing Time Comparison')
        plt.xticks(x, labels)
        plt.legend()
        
        # Add value labels on the bars
        for i, v in enumerate(cpu_times):
            plt.text(i - width/2, v + 0.01, f"{v:.2f}s", ha='center')
        
        for i, v in enumerate(gpu_times):
            plt.text(i + width/2, v + 0.01, f"{v:.2f}s", ha='center')
        
        # 2. Speed Improvement
        plt.subplot(2, 2, 2)
        
        speedups = [
            cpu_times[0] / gpu_times[0] if gpu_times[0] > 0 else 0,
            cpu_times[1] / gpu_times[1] if gpu_times[1] > 0 else 0,
            cpu_times[2] / gpu_times[2] if gpu_times[2] > 0 else 0
        ]
        
        plt.bar(x, speedups, width=0.5)
        plt.axhline(y=1.0, color='r', linestyle='-', alpha=0.3)
        plt.xlabel('Task')
        plt.ylabel('Times Faster on GPU')
        plt.title('CPU vs GPU Speed Improvement')
        plt.xticks(x, labels)
        
        # Add speed improvement labels
        for i, v in enumerate(speedups):
            plt.text(i, v + 0.1, f"{v:.1f}x", ha='center')
        
        # 3. Memory Usage
        plt.subplot(2, 2, 3)
        
        memory_labels = ['CPU', 'GPU']
        memory_usage = [self.summary['cpu']['avg_memory_usage'],
                    self.summary['cuda']['avg_memory_usage']]
        
        plt.bar(memory_labels, memory_usage)
        plt.xlabel('Device')
        plt.ylabel('Memory Usage (MB)')
        plt.title('Average Memory Usage')
        
        # Add memory usage labels
        for i, v in enumerate(memory_usage):
            plt.text(i, v + 0.1, f"{v:.1f} MB", ha='center')
        
        # 4. Time breakdown for both devices
        plt.subplot(2, 2, 4)
        
        # CPU breakdown
        cpu_breakdown = [
            self.summary['cpu']['avg_classification_time'] / self.summary['cpu']['avg_overall_time'] * 100,
            self.summary['cpu']['avg_generation_time'] / self.summary['cpu']['avg_overall_time'] * 100
        ]
        
        # GPU breakdown  
        gpu_breakdown = [
            self.summary['cuda']['avg_classification_time'] / self.summary['cuda']['avg_overall_time'] * 100,
            self.summary['cuda']['avg_generation_time'] / self.summary['cuda']['avg_overall_time'] * 100
        ]
        
        task_labels = ['Classification', 'Generation']
        
        x = np.arange(len(memory_labels))
        width = 0.35
        
        plt.bar(x - width/2, [cpu_breakdown[0], gpu_breakdown[0]], width, label='Classification')
        plt.bar(x - width/2, [cpu_breakdown[1], gpu_breakdown[1]], width, bottom=[cpu_breakdown[0], gpu_breakdown[0]], 
                label='Generation')
        
        plt.xlabel('Device')
        plt.ylabel('Percentage of Overall Time (%)')
        plt.title('Task Time Distribution')
        plt.xticks(x - width/2, memory_labels)
        plt.legend()
        
        # Add percentage labels
        for i, (c, g) in enumerate(zip(cpu_breakdown, gpu_breakdown)):
            plt.text(0 - width/2, c/2 if i == 0 else cpu_breakdown[0] + c/2, 
                    f"{c:.1f}%", ha='center')
            plt.text(1 - width/2, g/2 if i == 0 else gpu_breakdown[0] + g/2,
                    f"{g:.1f}%", ha='center')
        
        plt.tight_layout()
        plt.savefig('performance_comparison.png')
        plt.close()
        
        print("Performance comparison plots saved to performance_comparison.png")
def main():
    # Define paths
    model_paths = {
        "hybrid_model_dir": "../../Hybrid_Cross_Attention_Freeze",
        "generation_model_path": "../../text_generation_results_03-09-25/model",
        "generation_tokenizer_path": "../../text_generation_results_03-09-25/tokenizer"
    }
    
    test_data_path = "../../test.json"
    
    # Verify and print paths
    print(f"Hybrid model directory: {model_paths['hybrid_model_dir']}")
    print(f"Test data path: {test_data_path}")
    
    # Number of samples to test
    num_samples = 50  # You can adjust this number
    
    # Run the benchmark
    benchmark = PerformanceTest(model_paths, test_data_path, num_samples)
    benchmark.run_benchmark()

if __name__ == "__main__":
    # Check available devices
    device_info = "CUDA" if torch.cuda.is_available() else "CPU"
    if torch.cuda.is_available():
        for i in range(torch.cuda.device_count()):
            print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
    print(f"Running on: {device_info}")
    
    # Run the benchmark
    main()



GPU 0: NVIDIA GeForce RTX 3050 Laptop GPU
Running on: CUDA
Hybrid model directory: ../../Hybrid_Cross_Attention_Freeze
Test data path: ../../test.json
Running performance test on 50 samples

RUNNING BENCHMARK ON CPU

Loading models on CPU...
Loading from: ../../Hybrid_Cross_Attention_Freeze\label_encoders.json
Initializing model...
All GPT-2 and DistilBERT layers remain frozen


The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Attempting to load generation model from ../../text_generation_results_03-09-25/model


Testing on CPU:   2%|▏         | 1/50 [00:17<14:04, 17.24s/it]


Sample 1/50 (Device: CPU):
Instruction: i do not know what i have to do to get my invoice #85632
New Input: i do not know what i have to do to get my invoice #85632 [Classified: Intent is 'check_invoice', Category is 'invoice', Entities are none]
Generated Response: i'm sorry to hear that you're unsure about the steps to take in order to retrieve your invoice with the number #{{client last name}}. let me guide you through the process. first, please log in to your account on our website. once logged in, navigate to the "billing" or "invoices" section. there, you should be able to find a list of your invoices. look for a tab or link that says "view invoice." click on that tab, and you'll be directed to a page where you can view the invoice details. if you encounter any difficulties or have further questions, feel free to reach out to our customer support team. they will be more than happy to assist you further.
Model                | Classification Time (s)   | Text Generation Time (s) 

Testing on CPU:   4%|▍         | 2/50 [00:25<09:43, 12.15s/it]


Sample 2/50 (Device: CPU):
Instruction: transfer to live agent
New Input: transfer to live agent [Classified: Intent is 'contact_human_agent', Category is 'contact', Entities are live (delivery_city), agent (account_type)]
Generated Response: thank you for reaching out! i'm here to assist you in transferring to one of our live agents. your satisfaction is our top priority, and we want to ensure that you receive the support you need. could you please provide me with more details about the specific issue or question you have? this will help me direct you to the most suitable agent who can address your needs effectively.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.4511                    | 8.1363                   | 8.5875                   | CPU        | 0.0469


Testing on CPU:   6%|▌         | 3/50 [00:37<09:25, 12.03s/it]


Sample 3/50 (Device: CPU):
Instruction: i need help to see how long refunds take
New Input: i need help to see how long refunds take [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to assist you in understanding the timeframe for refunds. the time it takes for a refund to be processed can vary depending on several factors, such as the payment method used and the specific policies of the vendor or service provider. to provide you with accurate information, could you please provide me with your order number or any other relevant details? this will help me locate the refund request and give you the most up-to-date timeline. thank you for your cooperation, and i appreciate your patience while i gather the necessary information.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.3921                    | 11.49

Testing on CPU:   8%|▊         | 4/50 [00:52<10:07, 13.21s/it]


Sample 4/50 (Device: CPU):
Instruction: help to pseak to customer support
New Input: help to pseak to customer support [Classified: Intent is 'contact_customer_service', Category is 'contact', Entities are customer (account_type)]
Generated Response: thank you for reaching out! i'm here to provide you with the assistance you need to reach our dedicated support team. to get started, could you please provide me with more details about the specific issue or question you have? this will help me guide you through the process and ensure that your query is directed to the right department.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.4130                    | 14.5885                   | 15.0015                   | CPU        | 1.9844


Testing on CPU:  10%|█         | 5/50 [01:25<15:16, 20.37s/it]


Sample 5/50 (Device: CPU):
Instruction: i do not know what to do to swtch to the premium account
New Input: i do not know what to do to swtch to the premium account [Classified: Intent is 'switch_account', Category is 'account', Entities are premium (account_type)]
Generated Response: i appreciate your interest in switching to our {{account type}} account, and i'm here to guide you through the process step by step. to get started, could you please provide me with your account details, such as your username or email address? once i have this information, i'll be able to assist you further and ensure a smooth transition. rest assured, your security and privacy are of utmost importance to us, so your information will be handled with the utmost care.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.8451                    | 31.2312                   | 33.0762               

Testing on CPU:  12%|█▏        | 6/50 [02:32<26:28, 36.10s/it]


Sample 6/50 (Device: CPU):
Instruction: i need assistance resetting the key of my user account
New Input: i need assistance resetting the key of my user account [Classified: Intent is 'recover_password', Category is 'account', Entities are user (account_type)]
Generated Response: assuredly! i'm here to provide you with the assistance you need to reset your key to regain access to your account. let's work together to resolve this issue and ensure a smooth and secure reset process. here are the steps you can follow: 1. begin by visiting our platform's "{{login page url}}" to initiate the account recovery journey. you'll be prompted to enter the email address associated with your user profile. remember, your security and privacy are of utmost importance to us, so please refrain from sharing any personal identifiers here. 2. once you've entered your email, keep an eye on your inbox for any messages or notifications from our customer support team. they will be able to guide you through the

Testing on CPU:  14%|█▍        | 7/50 [03:08<25:55, 36.18s/it]


Sample 7/50 (Device: CPU):
Instruction: i want help taking a quick look at the bills from john robertson
New Input: i want help taking a quick look at the bills from john robertson [Classified: Intent is 'check_invoice', Category is 'invoice', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in quickly reviewing the invoices from {{salutation}} {{client last name}}. to provide you with the most accurate and up-to-date information, could you please provide me with some additional details such as the date range or any specific invoice numbers? this will help me narrow down the search and find the information you need. rest assured, i'll do my best to guide you through the process and ensure you have a seamless experience.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7366                    | 34.6054                   | 36.3420 

Testing on CPU:  16%|█▌        | 8/50 [03:45<25:31, 36.46s/it]


Sample 8/50 (Device: CPU):
Instruction: i can't enter a delivery address
New Input: i can't enter a delivery address [Classified: Intent is 'set_up_shipping_address', Category is 'shipping', Entities are delivery (account_category)]
Generated Response: i'm sorry to hear that you're having trouble entering a shipping address. let's work together to resolve this issue. to better assist you, could you please provide more details about the specific problem you are facing? are you encountering any error messages or is the system not accepting the address format? once i have a better understanding of the issue, i'll be able to provide you with the most accurate guidance and support. your satisfaction is our top priority, and we're committed to resolving this matter for you as quickly as possible.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.6904                    | 35.35

Testing on CPU:  18%|█▊        | 9/50 [04:12<22:46, 33.34s/it]


Sample 9/50 (Device: CPU):
Instruction: can you help me place an order?
New Input: can you help me place an order? [Classified: Intent is 'delivery_options', Category is 'contact', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in placing your order. your satisfaction is our top priority, and we want to ensure that you have a seamless experience with us. could you please provide me with more details about the specific products or services you are interested in? this will allow me to guide you through the ordering process accurately and efficiently.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7032                    | 24.7756                   | 26.4788                   | CPU        | 1.0391


Testing on CPU:  20%|██        | 10/50 [05:22<29:53, 44.83s/it]


Sample 10/50 (Device: CPU):
Instruction: i do not know how i can set up my shipping address
New Input: i do not know how i can set up my shipping address [Classified: Intent is 'set_up_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm sorry to hear that you're unsure about the process of setting up your delivery address. allow me to guide you through it step by step: 1. log in to your account on our website or app by navigating to the "my account" or "profile" section, where you'll find a dedicated section to manage your shipping addresses. you can usually find this option by clicking on your profile picture or selecting the option to "add a new address" from the dropdown menu. make sure to double-check all the required fields, such as street address, city, state, and zip code, to ensure accurate and up-to-date information for each address you want to add. if you encounter any difficulties or have further questions, please don't hesitate to reach ou

Testing on CPU:  22%|██▏       | 11/50 [06:57<39:06, 60.16s/it]


Sample 11/50 (Device: CPU):
Instruction: i need assistance to set my new delovery address up
New Input: i need assistance to set my new delovery address up [Classified: Intent is 'set_up_shipping_address', Category is 'shipping', Entities are new (account_type)]
Generated Response: i'll take care of it! i'm here to assist you in setting up your new delivery address. to get started, please follow these steps: 1. log in to your account on our website or app by navigating to the "my account" or "profile" section, where you should find an option to manage your shipping addresses. look for the option that says "add new address" and click on it. you will then be prompted to enter the required details, such as your street name, city, state, and zip code. make sure to double-check all the fields before saving the necessary information for each address you want to add.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memor

Testing on CPU:  24%|██▍       | 12/50 [07:23<31:24, 49.60s/it]


Sample 12/50 (Device: CPU):
Instruction: help me to check what payment methods are available
New Input: help me to check what payment methods are available [Classified: Intent is 'check_payment_methods', Category is 'payment', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in checking the available payment options. to provide you with accurate information, could you please provide me with your full name, email address, and a preferred payment method? once i have these details, i'll be able to guide you through the process step by step.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7563                    | 23.6898                   | 25.4462                   | CPU        | 10.1758


Testing on CPU:  26%|██▌       | 13/50 [07:45<25:26, 41.25s/it]


Sample 13/50 (Device: CPU):
Instruction: i call to leave my feedback for ur services
New Input: i call to leave my feedback for ur services [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: we appreciate your willingness to share your feedback about our services. your insights are valuable to us as they help us understand how we can improve and provide a better experience for our customers. please let me know if you have any specific questions or if there's anything else i can assist you with.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.6823                    | 20.3641                   | 22.0463                   | CPU        | 0.6406


Testing on CPU:  28%|██▊       | 14/50 [08:19<23:32, 39.22s/it]


Sample 14/50 (Device: CPU):
Instruction: open freemium account for wife
New Input: open freemium account for wife [Classified: Intent is 'create_account', Category is 'account', Entities are open fre (account_type), em (account_type), ium (account_type), account (account_type), wife (account_type)]
Generated Response: thank you for your interest in opening a {{account type}} account with us! i'm here to assist you every step of the way. to get started, could you please provide me with your wife's full name, email address, and a preferred username? once i have these details, i'll guide you through the account creation process and ensure a smooth experience for both of you.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.8357                    | 32.6992                   | 34.5349                   | CPU        | 1.0547


Testing on CPU:  30%|███       | 15/50 [08:52<21:41, 37.18s/it]


Sample 15/50 (Device: CPU):
Instruction: i am trying to see the status of order ord-1135283
New Input: i am trying to see the status of order ord-1135283 [Classified: Intent is 'cancel_order', Category is 'order', Entities are ord-1135283 (order_number)]
Generated Response: i'm sorry to hear that you're experiencing difficulties with canceling your order with the order number {{order number}}. your satisfaction is our top priority, and we're here to assist you every step of the way. to provide you with accurate information, could you please confirm if you made the purchase on our website or through another platform? once we have the necessary details, we'll be able to guide you through the cancellation process and ensure a smooth experience.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.8811                    | 30.5609                   | 32.4421                   |

Testing on CPU:  32%|███▏      | 16/50 [09:34<21:52, 38.61s/it]


Sample 16/50 (Device: CPU):
Instruction: wanna see ur payment modalities will yahelp me
New Input: wanna see ur payment modalities will yahelp me [Classified: Intent is 'check_payment_methods', Category is 'payment', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in viewing our available payment methods. we strive to provide a seamless and convenient payment experience for our valued customers. to view our payment options, you can visit our website and navigate to the "payments" or "checkout" section. there you will find a comprehensive list of our accepted methods, including credit/debit cards, paypal, bank transfers, apple pay, and google wallet. if you have any further questions or need additional guidance, please don't hesitate to let me know.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7234                    | 40.215

Testing on CPU:  34%|███▍      | 17/50 [10:15<21:39, 39.38s/it]


Sample 17/50 (Device: CPU):
Instruction: what do i ned to do to change to the real estate account?
New Input: what do i ned to do to change to the real estate account? [Classified: Intent is 'switch_account', Category is 'account', Entities are real (account_category), estate (account_category)]
Generated Response: i appreciate your interest in upgrading to our {{account category}} account. let me guide you through the process step by step. first, log in to your account on our website or mobile app. once you're logged in, locate the account settings or profile section. look for an option that says "upgrade" or "switch account type". click on that option, and you'll be directed to a page where you can make the necessary changes. if you encounter any difficulties or have further questions, please don't hesitate to reach out. i'm here to assist you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device    

Testing on CPU:  36%|███▌      | 18/50 [10:42<19:03, 35.72s/it]


Sample 18/50 (Device: CPU):
Instruction: notify of errorw with online registration
New Input: notify of errorw with online registration [Classified: Intent is 'registration_problems', Category is 'account', Entities are none]
Generated Response: we understand that encountering errors during the registration process can be frustrating. rest assured, we are here to assist you in resolving this issue. could you please provide more details about the specific error you are facing? this will help us identify the root cause and provide you with the most suitable solution. thank you for bringing this to our attention, and we appreciate your patience as we work together to resolve this matter.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.8127                    | 25.3840                   | 27.1967                   | CPU        | 0.0117


Testing on CPU:  38%|███▊      | 19/50 [11:07<16:46, 32.45s/it]


Sample 19/50 (Device: CPU):
Instruction: another standard accounmt for father
New Input: another standard accounmt for father [Classified: Intent is 'contact_human_agent', Category is 'account', Entities are standard (account_type), acc (delivery_country), father (delivery_city)]
Generated Response: i'm here to assist you in connecting with a human agent who can provide the support you need. please bear with me for a moment while i transfer you to one of our dedicated representatives who will be able to address your concerns. your satisfaction is our top priority, and we appreciate your patience as we work together to resolve this matter.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.8925                    | 22.9476                   | 24.8401                   | CPU        | 1.0586


Testing on CPU:  40%|████      | 20/50 [11:37<15:48, 31.61s/it]


Sample 20/50 (Device: CPU):
Instruction: i need infrmation about the cancellation of my enterprise account
New Input: i need infrmation about the cancellation of my enterprise account [Classified: Intent is 'delete_account', Category is 'account', Entities are enterprise (account_type)]
Generated Response: i understand that you need assistance with canceling your {{account type}} account. i'm here to help you through the process. could you please provide me with your account details, such as your username or email address, so that i can assist you further? your privacy and security are of utmost importance to us, and we will handle your information with the utmost care.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 2.0455                    | 27.5950                   | 29.6406                   | CPU        | 0.0117


Testing on CPU:  42%|████▏     | 21/50 [12:03<14:30, 30.00s/it]


Sample 21/50 (Device: CPU):
Instruction: i want help ordering a few of your product
New Input: i want help ordering a few of your product [Classified: Intent is 'place_order', Category is 'order', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in placing your order for our products. your satisfaction is our top priority, and we want to ensure that you have a seamless experience with us. could you please provide me with more details about the specific products you would like to order? this will help me guide you through the ordering process more effectively.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7253                    | 24.5292                   | 26.2544                   | CPU        | 1.0156


Testing on CPU:  44%|████▍     | 22/50 [12:42<15:20, 32.86s/it]


Sample 22/50 (Device: CPU):
Instruction: checking status of order ord-6772250
New Input: checking status of order ord-6772250 [Classified: Intent is 'cancel_order', Category is 'order', Entities are ord-6772250 (order_number)]
Generated Response: i'm sorry to hear that you're having trouble with canceling your order with the order number {{order number}}. your satisfaction is our top priority, and we're here to assist you every step of the way. to resolve this issue, could you please provide me with some additional information? specifically, i would need to know the platform or website where you made the purchase and any relevant order details. with this information, we can quickly look into the status and provide you with an accurate update. thank you for your patience and cooperation.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 2.0340                    | 37.4814  

Testing on CPU:  46%|████▌     | 23/50 [13:12<14:20, 31.86s/it]


Sample 23/50 (Device: CPU):
Instruction: deleting government account
New Input: deleting government account [Classified: Intent is 'delete_account', Category is 'account', Entities are government (account_type), account (account_type)]
Generated Response: i understand that you would like to delete your {{account type}} account. i apologize for any inconvenience this may have caused. to assist you further, could you please provide me with your account details, such as your username or email address, so that i can locate the account and guide you through the deletion process? rest assured, i'm here to support you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.9832                    | 27.5367                   | 29.5199                   | CPU        | 0.6289


Testing on CPU:  48%|████▊     | 24/50 [13:40<13:17, 30.67s/it]


Sample 24/50 (Device: CPU):
Instruction: my experience with your brand was bad, make a consumer claim
New Input: my experience with your brand was bad, make a consumer claim [Classified: Intent is 'complaint', Category is 'feedback', Entities are consumer (account_type)]
Generated Response: i apologize for any negative experience you may have had with our brand. your feedback is important to us, and we want to ensure that your concerns are addressed appropriately. to assist you further, could you please provide me with more details about the specific issue you encountered? this will help us understand the situation better and take the necessary steps to resolve it.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.8208                    | 26.0860                   | 27.9068                   | CPU        | 0.6562


Testing on CPU:  50%|█████     | 25/50 [14:09<12:32, 30.11s/it]


Sample 25/50 (Device: CPU):
Instruction: wann locate order ord-3779640
New Input: wann locate order ord-3779640 [Classified: Intent is 'change_order', Category is 'order', Entities are ord-3779640 (order_number)]
Generated Response: we understand that you would like to locate your order with the order number {{order number}}. we apologize for any inconvenience caused. to assist you further, could you please provide more details about the specific issue you are facing? this will help us better understand the situation and provide you with an appropriate solution. your satisfaction is our top priority, and we are here to support you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7432                    | 27.0633                   | 28.8064                   | CPU        | 0.9766


Testing on CPU:  52%|█████▏    | 26/50 [15:05<15:12, 38.02s/it]


Sample 26/50 (Device: CPU):
Instruction: i have to sde in what cases can i request a refund
New Input: i have to sde in what cases can i request a refund [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm happy to help! i understand that you need to know the situations in which you can request refunds. here are some common scenarios where you may be eligible for a reimbursement: 1. **product/service defect:** if the product or service you received has a defect or doesn't meet the description provided, you usually have grounds to request compensation. REMEMBER, our goal is to ensure your satisfaction, and we want to make sure you have a seamless experience with our products and services. if you encounter any issues or have further questions, please don't hesitate to reach out to our customer support team. they are available {{customer support hours}} and can provide you with the necessary guidance and support to resolve the is

Testing on CPU:  54%|█████▍    | 27/50 [15:27<12:45, 33.30s/it]


Sample 27/50 (Device: CPU):
Instruction: i'm trying to see at what time i can call customer support
New Input: i'm trying to see at what time i can call customer support [Classified: Intent is 'contact_customer_service', Category is 'contact', Entities are none]
Generated Response: we're here to help! i take note that you would like to know the operating hours during which you can reach our dedicated customer assistance team. our team is available to assist you during our business hours, which can be found on our website at {{website url}}. if you have any other questions or concerns, please feel free to let me know. your satisfaction is our top priority, and we are committed to providing you with the support you need.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.5431                    | 21.7617                   | 22.3049                   | CPU        | 10.1797


Testing on CPU:  56%|█████▌    | 28/50 [15:53<11:25, 31.15s/it]


Sample 28/50 (Device: CPU):
Instruction: assistance sending some feedback for ur proiducts
New Input: assistance sending some feedback for ur proiducts [Classified: Intent is 'place_order', Category is 'order', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in sending your feedback about our products and services. your opinion matters to us, and we appreciate your willingness to share your thoughts. could you please provide me with more details about the specific product or service you would like to provide feedback on? this will help me guide you through the process more effectively.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.8136                    | 24.3059                   | 26.1195                   | CPU        | 0.6289


Testing on CPU:  58%|█████▊    | 29/50 [16:29<11:22, 32.52s/it]


Sample 29/50 (Device: CPU):
Instruction: need assistance to cancel order ord-8227420
New Input: need assistance to cancel order ord-8227420 [Classified: Intent is 'cancel_order', Category is 'order', Entities are ord-8227420 (order_number)]
Generated Response: thank you for reaching out to us for assistance with canceling your order with the order number {{order number}}. we understand that circumstances may change, and we're here to help you through the cancellation process. to proceed with your request, please provide us with some additional information, such as your account details or any specific concerns you may have. this will allow us to assist you more effectively and ensure a smooth cancellation experience. your satisfaction is our top priority, so please don't hesitate to reach out if you have any further questions or concerns.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cro

Testing on CPU:  60%|██████    | 30/50 [17:17<12:22, 37.11s/it]


Sample 30/50 (Device: CPU):
Instruction: need assistance cancelinv purchase ord-8042947
New Input: need assistance cancelinv purchase ord-8042947 [Classified: Intent is 'cancel_order', Category is 'order', Entities are ord-8042947 (order_number)]
Generated Response: i'm here to assist you with canceling your purchase with the order number {{order number}}. rest assured, i'll guide you through the process step by step to ensure a smooth cancellation experience.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.9013                    | 45.9391                   | 47.8403                   | CPU        | 1.0664


Testing on CPU:  62%|██████▏   | 31/50 [18:06<12:51, 40.59s/it]


Sample 31/50 (Device: CPU):
Instruction: i don't know how i can download my bill #12588
New Input: i don't know how i can download my bill #12588 [Classified: Intent is 'get_invoice', Category is 'invoice', Entities are #12588 (person_name)]
Generated Response: i'm clearly cognizant your confusion about how to download your bill with the number {{person name}}. let me guide you through the process step by step. first, please log in to your account on our website. once you're logged in, navigate to the "billing" or "invoices" section. there, you should be able to find a list of your invoices. look for the specific invoice number, date range, or any other relevant information. if you encounter any difficulties or have further questions, feel free to reach out to our customer support team. they will be more than happy to assist you further.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cro

Testing on CPU:  64%|██████▍   | 32/50 [18:33<10:58, 36.61s/it]


Sample 32/50 (Device: CPU):
Instruction: i need help to see purchase ord-0521414 status
New Input: i need help to see purchase ord-0521414 status [Classified: Intent is 'track_order', Category is 'order', Entities are ord-0521414 status (order_number)]
Generated Response: thank you for reaching out! i'm here to assist you in checking the status of your purchase with the purchase number {{order number}}. to provide you with accurate information, could you please provide me with your full name, email address, and a preferred username? once i have these details, i'll be able to guide you through the process step by step.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.9615                    | 25.3462                   | 27.3077                   | CPU        | 0.9102


Testing on CPU:  66%|██████▌   | 33/50 [19:02<09:42, 34.27s/it]


Sample 33/50 (Device: CPU):
Instruction: am i entitled to a refund according to your policy?
New Input: am i entitled to a refund according to your policy? [Classified: Intent is 'get_refund', Category is 'refund', Entities are refund (currency_symbol)]
Generated Response: assuredly! i understand that you would like to know the status of your refund. your satisfaction is our top priority, and i'm here to assist you every step of the way. to provide you with accurate information, could you please provide me with your order number or any other relevant details? once i have these details, i'll be able to guide you through the refund process and ensure a smooth resolution.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7055                    | 27.1243                   | 28.8298                   | CPU        | 0.5234


Testing on CPU:  68%|██████▊   | 34/50 [22:00<20:39, 77.47s/it]


Sample 34/50 (Device: CPU):
Instruction: need to check what methods for delivery i can choose help me
New Input: need to check what methods for delivery i can choose help me [Classified: Intent is 'delivery_options', Category is 'delivery', Entities are none]
Generated Response: assuredly! i'm here to assist you in exploring the various delivery methods available to you. let's dive into the possibilities together: 1. standard shipping: this option typically takes {{date range}} business days for your package to arrive. it's perfect for non-urgent items when you have a bit more time to spare during the checkout process. you can also choose expedited shipping if you need your items a little faster, and overnight shipping for a quicker delivery if time is of the essence. this is an excellent choice for those who are not in a rush to receive their items, as it typically arrives within {{customer support hours}} to provide you with the best possible service.
Model                | Classifi

Testing on CPU:  70%|███████   | 35/50 [22:24<15:20, 61.39s/it]


Sample 35/50 (Device: CPU):
Instruction: ya have an address to submit some feednack
New Input: ya have an address to submit some feednack [Classified: Intent is 'review', Category is 'subscription', Entities are none]
Generated Response: thank you for expressing your interest in submitting your feedback about our services! we greatly value your input as it helps us understand your needs better and continuously improve our offerings. if you have any specific questions or if there's anything else i can assist you with, please feel free to let me know. i'm here to help!
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7090                    | 22.1445                   | 23.8535                   | CPU        | 0.5156


Testing on CPU:  72%|███████▏  | 36/50 [23:26<14:22, 61.60s/it]


Sample 36/50 (Device: CPU):
Instruction: show me in which situations  can i ask for refunds
New Input: show me in which situations  can i ask for refunds [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to assist you in understanding the situations where you can request a refund. let me provide you with the information you need: 1. **product/service defect:** if you receive a product or service that has a defect or does not meet the description provided, you are generally eligible for a reimbursement. however, it's important to note that it may take additional time for the product to be returned to you, and this can vary depending on several factors, such as the quality of the item, the vendor's policies, or the specific circumstances surrounding the defect.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention |

Testing on CPU:  74%|███████▍  | 37/50 [23:57<11:22, 52.50s/it]


Sample 37/50 (Device: CPU):
Instruction: help mke chatting with a person
New Input: help mke chatting with a person [Classified: Intent is 'contact_human_agent', Category is 'contact', Entities are none]
Generated Response: thank you for trusting us! i'm fully aware of you would like to chat with one of our representatives. our dedicated team is available during {{customer support hours}} to assist you with any questions or concerns you may have. please feel free to reach out to us during these hours, and we'll be more than happy to help you. is there anything specific you need assistance with?
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.9234                    | 29.3344                   | 31.2578                   | CPU        | 11.1289


Testing on CPU:  76%|███████▌  | 38/50 [24:22<08:48, 44.04s/it]


Sample 38/50 (Device: CPU):
Instruction: there are errors trying to update my shipping address
New Input: there are errors trying to update my shipping address [Classified: Intent is 'change_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm sorry to hear that you're encountering errors while updating your delivery address. let's work together to resolve this issue. could you please provide more details about the specific errors you are facing? this will help us better understand the problem and provide you with the most appropriate solution. your feedback is valuable to us, and we appreciate your patience and cooperation in resolving this matter.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7548                    | 22.5664                   | 24.3211                   | CPU        | 0.5195


Testing on CPU:  78%|███████▊  | 39/50 [24:48<07:07, 38.89s/it]


Sample 39/50 (Device: CPU):
Instruction: i wnt help swapping several items of order ord-9073141
New Input: i wnt help swapping several items of order ord-9073141 [Classified: Intent is 'change_order', Category is 'order', Entities are ord-9073141 (order_number)]
Generated Response: we understand that you need assistance with swapping multiple items in order number {{order number}}. we're here to help! to proceed with the swap, could you please provide more details about the items you would like to swap? this will allow us to process your request accurately and efficiently. your satisfaction is our top priority, and we want to ensure that your order meets your expectations.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7055                    | 25.1435                   | 26.8490                   | CPU        | 5.4492


Testing on CPU:  80%|████████  | 40/50 [25:19<06:04, 36.40s/it]


Sample 40/50 (Device: CPU):
Instruction: i don't know how i can order some of your article
New Input: i don't know how i can order some of your article [Classified: Intent is 'place_order', Category is 'order', Entities are none]
Generated Response: thank you for reaching out! i'm here to guide you through the process of ordering our articles. to place an order, you can visit our website and navigate to the "order history" or "my orders" section. there you will find a comprehensive list of all the articles you wish to order. if you have any specific questions or need further assistance, feel free to let me know. i'll be more than happy to help you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.6830                    | 28.9198                   | 30.6028                   | CPU        | 0.5195


Testing on CPU:  82%|████████▏ | 41/50 [25:56<05:27, 36.43s/it]


Sample 41/50 (Device: CPU):
Instruction: i don't know how i could modify my address
New Input: i don't know how i could modify my address [Classified: Intent is 'change_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm sorry to hear that you're unsure about how to modify your address. let me guide you through the process step by step. first, please log in to your account on our website and navigate to the "my account" or "profile" section. from there, you should be able to find an option to edit or update your shipping information. if you encounter any difficulties or need further assistance, feel free to reach out to our customer support team.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7433                    | 34.7553                   | 36.4986                   | CPU        | 0.5156


Testing on CPU:  84%|████████▍ | 42/50 [26:21<04:24, 33.10s/it]


Sample 42/50 (Device: CPU):
Instruction: order ord-9186353 status
New Input: order ord-9186353 status [Classified: Intent is 'track_order', Category is 'order', Entities are ord-9186353 status (order_number)]
Generated Response: thank you for reaching out! i'm here to assist you in checking the status of your order with the order number {{order number}}. to provide you with accurate information, could you please provide me with some additional details such as your name or any other relevant information? this will help me locate the specific order and give you the most up-to-date status.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7104                    | 23.6273                   | 25.3377                   | CPU        | 0.5625


Testing on CPU:  86%|████████▌ | 43/50 [26:47<03:36, 30.87s/it]


Sample 43/50 (Device: CPU):
Instruction: help me making a claim against your business
New Input: help me making a claim against your business [Classified: Intent is 'complaint', Category is 'feedback', Entities are business (account_type)]
Generated Response: i'm sorry to hear that you're facing an issue with our business. your feedback is important to us, and i want to ensure that we address your concerns appropriately. to assist you further, could you please provide me with more details about the nature of your claim? this will help us understand the situation better and take the necessary steps to resolve it.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 1.7695                    | 23.8924                   | 25.6620                   | CPU        | 0.9961


Testing on CPU:  88%|████████▊ | 44/50 [27:24<03:16, 32.77s/it]


Sample 44/50 (Device: CPU):
Instruction: i have got to see in which cases can i request my money back
New Input: i have got to see in which cases can i request my money back [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to assist you in understanding the cases where you can request a refund. let me provide you with the information you need: 1. **product/service defect:** if you receive a product or service that has a defect or does not meet the description provided, you may be eligible for a reimbursement.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 2.0325                    | 35.1572                   | 37.1898                   | CPU        | 0.5234


Testing on CPU:  90%|█████████ | 45/50 [27:38<02:16, 27.30s/it]


Sample 45/50 (Device: CPU):
Instruction: can you help me report troubles with payment?
New Input: can you help me report troubles with payment? [Classified: Intent is 'payment_issue', Category is 'payment', Entities are none]
Generated Response: assuredly! i'm here to assist you in reporting any troubles you're facing with your payment. your feedback is highly valuable to us, and we want to ensure that we address your concerns promptly and effectively. to report any payment issues, i recommend reaching out to our dedicated customer support team. they have the expertise to guide you through the process and address any concerns you may have. you can contact them through our website's live chat feature at {{website url}} or by calling {{customer support phone number}}. rest assured, we are committed to resolving this matter and ensuring a seamless payment experience for you.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device

Testing on CPU:  92%|█████████▏| 46/50 [27:48<01:28, 22.05s/it]


Sample 46/50 (Device: CPU):
Instruction: i try to notify of an error with registrations
New Input: i try to notify of an error with registrations [Classified: Intent is 'registration_problems', Category is 'account', Entities are none]
Generated Response: thank you for bringing this error to our attention. we understand that encountering errors during the registration process can be frustrating, and we apologize for any inconvenience caused. to assist you further, could you please provide more details about the specific error you are facing? this will enable us to investigate the issue thoroughly and provide you with the most appropriate solution. your feedback is crucial in helping us improve our services and ensure a seamless registration experience for all our users.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.4635                    | 9.3181                   |

Testing on CPU:  94%|█████████▍| 47/50 [28:00<00:57, 19.03s/it]


Sample 47/50 (Device: CPU):
Instruction: what do i have to do to cancel my platinum account?
New Input: what do i have to do to cancel my platinum account? [Classified: Intent is 'delete_account', Category is 'account', Entities are platinum (account_type)]
Generated Response: thank you for reaching out to us regarding the cancellation of your {{account type}} account. i understand that you would like to terminate your account, and i'm here to guide you through the process. to proceed with cancellation, i kindly request you to provide me with some additional information. could you please let me know which platform or service you are referring to? this will help me provide you with the most accurate and tailored assistance.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.4792                    | 11.5155                   | 11.9947                   | CPU        | 0.668

Testing on CPU:  96%|█████████▌| 48/50 [28:10<00:32, 16.22s/it]


Sample 48/50 (Device: CPU):
Instruction: how can i submit some feedback abot ur company
New Input: how can i submit some feedback abot ur company [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: thank you for your interest in providing feedback on our company! to submit your feedback, you can visit our website and navigate to the "contact us" or "feedback" section. there, there you will find a form or a contact information to reach out to our dedicated customer support team. they will be more than happy to assist you with the feedback submission process. we value your input and look forward to hearing from you!
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.4082                    | 9.2469                   | 9.6550                   | CPU        | 4.6992


Testing on CPU:  98%|█████████▊| 49/50 [28:19<00:14, 14.19s/it]


Sample 49/50 (Device: CPU):
Instruction: change  order ord-8335461
New Input: change  order ord-8335461 [Classified: Intent is 'cancel_order', Category is 'order', Entities are order (order_number), ord-8335461 (order_number)]
Generated Response: we understand that you would like to make changes to your order with the number {{order number}}. we apologize for any inconvenience caused. to assist you further, could you please provide more details about the specific changes you are interested in? this will help us ensure that we address your request accurately and efficiently. your satisfaction is our top priority, and we are here to support you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.4303                    | 9.0360                   | 9.4663                   | CPU        | 11.2227


Testing on CPU: 100%|██████████| 50/50 [28:27<00:00, 34.15s/it]


Sample 50/50 (Device: CPU):
Instruction: i need assistance to send some feedback for ur produucts
New Input: i need assistance to send some feedback for ur produucts [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: we appreciate your willingness to share your feedback about our products and services. your insights are valuable to us as they help us understand how we can better serve our customers. please let me know if there's anything specific you'd like to provide feedback on, and i'll be more than happy to assist you further.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.4313                    | 7.1601                   | 7.5914                   | CPU        | 0.6289

BENCHMARK SUMMARY FOR CPU
Average Classification Time: 1.5178 seconds
Average Generation Time: 32.6261 seconds
Average Overall Time: 34.1439 seconds
A




Initializing model...
All GPT-2 and DistilBERT layers remain frozen
Attempting to load generation model from ../../text_generation_results_03-09-25/model


Testing on CUDA:   2%|▏         | 1/50 [00:06<05:00,  6.13s/it]


Sample 1/50 (Device: CUDA):
Instruction: i do not know what i have to do to get my invoice #85632
New Input: i do not know what i have to do to get my invoice #85632 [Classified: Intent is 'check_invoice', Category is 'invoice', Entities are none]
Generated Response: i'm sorry to hear that you're unsure about the steps to take in order to retrieve your invoice with the number #{{client last name}}. let me guide you through the process. first, please log in to your account on our website. once logged in, navigate to the "billing" or "invoices" section. there, you should be able to find a list of your invoices. look for a tab or option that says "view invoice." click on that option, and you'll be directed to a page where you can view and download the invoice. if you encounter any difficulties or have further questions, feel free to reach out to our customer support team. we're here to assist you every step of the way.
Model                | Classification Time (s)   | Text Generation Ti

Testing on CUDA:   4%|▍         | 2/50 [00:08<03:07,  3.90s/it]


Sample 2/50 (Device: CUDA):
Instruction: transfer to live agent
New Input: transfer to live agent [Classified: Intent is 'contact_human_agent', Category is 'contact', Entities are live (delivery_city), agent (account_type)]
Generated Response: thank you for reaching out! i'm here to assist you in transferring to a live representative. your satisfaction is our top priority, and we want to ensure that you receive the support you need. could you please provide me with more details about the specific issue or question you have? this will enable me to guide you through the transfer process and provide you with the most accurate and personalized assistance.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1857                    | 2.1603                   | 2.3460                   | CUDA        | 2700.4609


Testing on CUDA:   6%|▌         | 3/50 [00:10<02:32,  3.24s/it]


Sample 3/50 (Device: CUDA):
Instruction: i need help to see how long refunds take
New Input: i need help to see how long refunds take [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to assist you in understanding the timeframe for refunds. the duration for processing refunds can vary depending on several factors, such as the payment method used and the specific policies of the vendor or service provider. to provide you with accurate information, could you please provide me with your order number or any other relevant details? this will enable me to check the status of your refund and give you an estimated timeframe. thank you for your cooperation, and i appreciate your patience while i gather the necessary information.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1693                    | 2.2689    

Testing on CUDA:   8%|▊         | 4/50 [00:13<02:11,  2.87s/it]


Sample 4/50 (Device: CUDA):
Instruction: help to pseak to customer support
New Input: help to pseak to customer support [Classified: Intent is 'contact_customer_service', Category is 'contact', Entities are customer (account_type)]
Generated Response: thank you for reaching out! i'm here to assist you with your query. to ensure a seamless experience, could you please provide me with more details about the specific issue or question you have? this will help me direct you to the most appropriate department or channel to address your concerns. your satisfaction is our top priority, and we're committed to resolving any concerns you may have.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.2893                    | 2.0101                   | 2.2994                   | CUDA        | 2697.0469


Testing on CUDA:  10%|█         | 5/50 [00:15<02:02,  2.73s/it]


Sample 5/50 (Device: CUDA):
Instruction: i do not know what to do to swtch to the premium account
New Input: i do not know what to do to swtch to the premium account [Classified: Intent is 'switch_account', Category is 'account', Entities are premium (account_type)]
Generated Response: i appreciate your interest in switching to our {{account type}} account, and i'm here to guide you through the process. to initiate the switch, could you please provide me with your account username or any other relevant information? once i have this information, i'll be able to assist you further and ensure a smooth transition. rest assured, your security and privacy are of utmost importance to us, so your information will be handled with the utmost care.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1830                    | 2.3011                   | 2.4840                   | CUDA 

Testing on CUDA:  12%|█▏        | 6/50 [00:18<01:56,  2.65s/it]


Sample 6/50 (Device: CUDA):
Instruction: i need assistance resetting the key of my user account
New Input: i need assistance resetting the key of my user account [Classified: Intent is 'recover_password', Category is 'account', Entities are user (account_type)]
Generated Response: assuredly! i'm here to provide you with the assistance you need to reset your key to regain access to your account. let's work together to resolve this issue and ensure a smooth and secure reset process. could you please provide me with more details about the specific issue you're facing? this will allow me to guide you through the necessary steps and provide the most accurate guidance. your satisfaction is our top priority, and we're committed to assisting you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1406                    | 2.3563                   | 2.4969   

Testing on CUDA:  14%|█▍        | 7/50 [00:20<01:49,  2.56s/it]


Sample 7/50 (Device: CUDA):
Instruction: i want help taking a quick look at the bills from john robertson
New Input: i want help taking a quick look at the bills from john robertson [Classified: Intent is 'check_invoice', Category is 'invoice', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in quickly accessing and reviewing the invoices from {{salutation}} {{client last name}}. to provide you with the most accurate and up-to-date information, could you please provide me with some additional details such as the {{date range}} or any other relevant information? this will help me locate the specific bills you are looking for and guide you through the process of retrieving them.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1917                    | 2.1728                   | 2.3645                   | CUDA        | 2712.8232


Testing on CUDA:  16%|█▌        | 8/50 [00:22<01:45,  2.51s/it]


Sample 8/50 (Device: CUDA):
Instruction: i can't enter a delivery address
New Input: i can't enter a delivery address [Classified: Intent is 'set_up_shipping_address', Category is 'shipping', Entities are delivery (account_category)]
Generated Response: i'm sorry to hear that you're having trouble entering your address. let's work together to resolve this issue. to better assist you, could you please provide more details about the specific problem you are facing? are you encountering any error messages or facing any other challenges? by understanding the nature of the issue, i'll be able to provide you with the most accurate guidance and support. your satisfaction is our top priority, and we're committed to resolving this matter for you as quickly as possible.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1325                    | 2.2783                   | 2.4109   

Testing on CUDA:  18%|█▊        | 9/50 [00:24<01:33,  2.27s/it]


Sample 9/50 (Device: CUDA):
Instruction: can you help me place an order?
New Input: can you help me place an order? [Classified: Intent is 'delivery_options', Category is 'contact', Entities are none]
Generated Response: assuredly! i'm here to assist you in placing your order and ensuring a seamless experience. to get started, could you please provide me with more details about the specific products or services you are interested in? this will allow me to provide you with accurate information and guide you through the ordering process step by step.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1781                    | 1.5630                   | 1.7411                   | CUDA        | 2686.9067


Testing on CUDA:  20%|██        | 10/50 [00:28<01:49,  2.73s/it]


Sample 10/50 (Device: CUDA):
Instruction: i do not know how i can set up my shipping address
New Input: i do not know how i can set up my shipping address [Classified: Intent is 'set_up_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm sorry to hear that you're unsure about the process of setting up your delivery address. allow me to guide you through it step by step: 1. log in to your account on our website or app by navigating to the "my account" or "profile" section, where you should find an option to manage your shipping addresses. you can usually find this option by clicking on your profile icon or a similar option in the settings or profile section of your web browser. if you encounter any difficulties or have further questions, please don't hesitate to reach out to our customer support team. they will be more than happy to assist you and provide you with the necessary instructions to set the address up successfully. remember, your satisfactio

Testing on CUDA:  22%|██▏       | 11/50 [00:32<01:58,  3.05s/it]


Sample 11/50 (Device: CUDA):
Instruction: i need assistance to set my new delovery address up
New Input: i need assistance to set my new delovery address up [Classified: Intent is 'set_up_shipping_address', Category is 'shipping', Entities are new (account_type)]
Generated Response: i'm here to assist you in setting up your new delivery address. to ensure a smooth process, please follow these steps: 1. log in to your account on our website or app using your username and password. 2. navigate to the "my account" or "profile" section. 3. look for the option to manage your shipping addresses. 4. click on that option and select the desired address format. 5. fill in the required details such as street name, city, state, and zip code. 6. follow the on-screen prompts or instructions to complete the setup process. if you encounter any difficulties or have further questions, feel free to reach out to our customer support team.
Model                | Classification Time (s)   | Text Generation

Testing on CUDA:  24%|██▍       | 12/50 [00:34<01:44,  2.74s/it]


Sample 12/50 (Device: CUDA):
Instruction: help me to check what payment methods are available
New Input: help me to check what payment methods are available [Classified: Intent is 'check_payment_methods', Category is 'payment', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in checking the payment options available to you. to provide you with accurate information, could you please provide me with your full name, email address, and a preferred payment method? once i have these details, i'll be able to guide you through the process step by step.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1699                    | 1.8636                   | 2.0335                   | CUDA        | 2696.1968


Testing on CUDA:  26%|██▌       | 13/50 [00:35<01:29,  2.42s/it]


Sample 13/50 (Device: CUDA):
Instruction: i call to leave my feedback for ur services
New Input: i call to leave my feedback for ur services [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: thank you for expressing your interest in providing feedback on our services! your feedback is highly valuable to us as it helps us understand our customers' experiences and make necessary improvements. please let me know if you have any specific questions or if there's anything else i can assist you with.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1911                    | 1.4817                   | 1.6728                   | CUDA        | 2685.1992


Testing on CUDA:  28%|██▊       | 14/50 [00:38<01:30,  2.51s/it]


Sample 14/50 (Device: CUDA):
Instruction: open freemium account for wife
New Input: open freemium account for wife [Classified: Intent is 'create_account', Category is 'account', Entities are open fre (account_type), em (account_type), ium (account_type), account (account_type), wife (account_type)]
Generated Response: thank you for your interest in opening a {{account type}} account with us! i'm here to assist you every step of the way. to get started, could you please provide me with your wife's full name, email address, and a preferred username? once i have these details, i'll guide you through the account creation process, ensuring a seamless experience for both of you. rest assured, we prioritize the security and privacy of our customers' information.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.2271                    | 2.4884                   | 2.7156       

Testing on CUDA:  30%|███       | 15/50 [00:41<01:29,  2.55s/it]


Sample 15/50 (Device: CUDA):
Instruction: i am trying to see the status of order ord-1135283
New Input: i am trying to see the status of order ord-1135283 [Classified: Intent is 'cancel_order', Category is 'order', Entities are ord-1135283 (order_number)]
Generated Response: i'm sorry to hear that you're having trouble viewing the progress of your order with the order number {{order number}}. i understand how important it is for you to have visibility on this matter. to assist you further, could you please provide me with some additional information? specifically, i would like to know the platform or website where you made the purchase. this will allow me to provide you with accurate and up-to-date information. your satisfaction is our top priority, and we're here to support you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1384                

Testing on CUDA:  32%|███▏      | 16/50 [00:43<01:20,  2.36s/it]


Sample 16/50 (Device: CUDA):
Instruction: wanna see ur payment modalities will yahelp me
New Input: wanna see ur payment modalities will yahelp me [Classified: Intent is 'check_payment_methods', Category is 'payment', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in viewing our available payment methods. to provide you with accurate information, could you please provide me with your full name, email address, and a preferred payment method? once i have these details, i'll be able to guide you through the process step by step.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1850                    | 1.7506                   | 1.9355                   | CUDA        | 2694.5947


Testing on CUDA:  34%|███▍      | 17/50 [00:46<01:23,  2.52s/it]


Sample 17/50 (Device: CUDA):
Instruction: what do i ned to do to change to the real estate account?
New Input: what do i ned to do to change to the real estate account? [Classified: Intent is 'switch_account', Category is 'account', Entities are real (account_category), estate (account_category)]
Generated Response: i appreciate your interest in upgrading to our {{account category}} account. let me guide you through the process step by step. first, log in to your account on our website or mobile app. once you're logged in, locate the account settings or profile section. look for an option that says "upgrade" or "switch account type." click on that option, and it will direct you to a page where you can make the necessary changes. if you encounter any difficulties or have further questions, please don't hesitate to reach out. i'm here to assist you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device   

Testing on CUDA:  36%|███▌      | 18/50 [00:48<01:18,  2.46s/it]


Sample 18/50 (Device: CUDA):
Instruction: notify of errorw with online registration
New Input: notify of errorw with online registration [Classified: Intent is 'registration_problems', Category is 'account', Entities are none]
Generated Response: we understand that encountering errors during the registration process can be frustrating, and we apologize for any inconvenience caused. your feedback is crucial in helping us improve our services and ensure a seamless registration experience for all our users. to assist you further, could you please provide more details about the specific error you are facing? this will enable us to investigate the issue thoroughly and provide you with the most suitable solution. thank you for bringing this to our attention.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1616                    | 2.1642                   | 2.3258           

Testing on CUDA:  38%|███▊      | 19/50 [00:50<01:10,  2.27s/it]


Sample 19/50 (Device: CUDA):
Instruction: another standard accounmt for father
New Input: another standard accounmt for father [Classified: Intent is 'contact_human_agent', Category is 'account', Entities are standard (account_type), acc (delivery_country), father (delivery_city)]
Generated Response: thank you for reaching out! i'm here to assist you in connecting with a human agent who can provide the support you need. please bear with me for a moment while i transfer you to one of our dedicated representatives who will be able to address your concerns. your satisfaction is our top priority, and we appreciate your patience as we work together to resolve this matter.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1481                    | 1.6554                   | 1.8035                   | CUDA        | 2708.5518


Testing on CUDA:  40%|████      | 20/50 [00:51<01:03,  2.10s/it]


Sample 20/50 (Device: CUDA):
Instruction: i need infrmation about the cancellation of my enterprise account
New Input: i need infrmation about the cancellation of my enterprise account [Classified: Intent is 'delete_account', Category is 'account', Entities are enterprise (account_type)]
Generated Response: i'm here to assist you with canceling your {{account type}} account. your satisfaction is our top priority, and we want to ensure that you have a seamless experience with us. could you please provide me with more details about your specific query or concern? this will help us better understand your needs and provide the most accurate guidance.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1898                    | 1.5315                   | 1.7213                   | CUDA        | 2696.9038


Testing on CUDA:  42%|████▏     | 21/50 [00:53<00:55,  1.92s/it]


Sample 21/50 (Device: CUDA):
Instruction: i want help ordering a few of your product
New Input: i want help ordering a few of your product [Classified: Intent is 'place_order', Category is 'order', Entities are none]
Generated Response: thank you for your interest in ordering our products! i'm here to assist you with the ordering process. could you please provide me with more details about the specific products you would like to order? this will help me guide you through the necessary steps and ensure a seamless experience.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1420                    | 1.3501                   | 1.4922                   | CUDA        | 2686.2852


Testing on CUDA:  44%|████▍     | 22/50 [00:56<01:00,  2.18s/it]


Sample 22/50 (Device: CUDA):
Instruction: checking status of order ord-6772250
New Input: checking status of order ord-6772250 [Classified: Intent is 'cancel_order', Category is 'order', Entities are ord-6772250 (order_number)]
Generated Response: i'm sorry to hear that you're experiencing difficulties while trying to check the status for your order with the order number {{order number}}. i understand how important it is for you to have a seamless experience with us. to assist you further, could you please provide me with some additional information? specifically, i would like to know the platform or website where you made the purchase. this will allow me to provide you with accurate and up-to-date information. your satisfaction is our top priority, and we're here to support you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1652                

Testing on CUDA:  46%|████▌     | 23/50 [00:58<00:59,  2.19s/it]


Sample 23/50 (Device: CUDA):
Instruction: deleting government account
New Input: deleting government account [Classified: Intent is 'delete_account', Category is 'account', Entities are government (account_type), account (account_type)]
Generated Response: i understand that you would like to delete your {{account type}} account, and i'm here to assist you with that. to proceed with the deletion, please provide me with your account details, such as your username or email address, so that i can locate the account and guide you through the necessary steps.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1710                    | 2.0403                   | 2.2113                   | CUDA        | 2711.4741


Testing on CUDA:  48%|████▊     | 24/50 [01:00<00:53,  2.05s/it]


Sample 24/50 (Device: CUDA):
Instruction: my experience with your brand was bad, make a consumer claim
New Input: my experience with your brand was bad, make a consumer claim [Classified: Intent is 'complaint', Category is 'feedback', Entities are consumer (account_type)]
Generated Response: i'm sorry to hear that your experience was unsatisfactory. your feedback is important to us, and i want to ensure that we address your concerns effectively. could you please provide me with more details about the specific issue you encountered? this will help us understand the situation better and take the necessary steps to resolve it.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1491                    | 1.5660                   | 1.7151                   | CUDA        | 2696.9043


Testing on CUDA:  50%|█████     | 25/50 [01:02<00:51,  2.05s/it]


Sample 25/50 (Device: CUDA):
Instruction: wann locate order ord-3779640
New Input: wann locate order ord-3779640 [Classified: Intent is 'change_order', Category is 'order', Entities are ord-3779640 (order_number)]
Generated Response: we understand that you would like to locate your order with the order number {{order number}}. we apologize for any inconvenience caused. to assist you further, could you please provide more details about the specific issue you are facing? this will help us better understand the situation and provide you with an appropriate solution. your satisfaction is our top priority, and we are here to support you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.2133                    | 1.8540                   | 2.0673                   | CUDA        | 2703.4932


Testing on CUDA:  52%|█████▏    | 26/50 [01:06<01:02,  2.61s/it]


Sample 26/50 (Device: CUDA):
Instruction: i have to sde in what cases can i request a refund
New Input: i have to sde in what cases can i request a refund [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to assist you in understanding the situations in which you may be eligible to request refunds. let me provide you with the information you need to understand the different scenarios: 1. **product/service defect:** if you receive a product or service that has a defect or does not meet the description provided, you are typically entitled to a reimbursement. however, it's important to note that the exact nature of the defect may depend on several factors, such as the manufacturing process, the vendor's refund policy, and the specific circumstances surrounding the product's delivery, so it may vary slightly from the company's official statement or the customer support provided by the manufacturer.
Model                | Cl

Testing on CUDA:  54%|█████▍    | 27/50 [01:08<00:57,  2.50s/it]


Sample 27/50 (Device: CUDA):
Instruction: i'm trying to see at what time i can call customer support
New Input: i'm trying to see at what time i can call customer support [Classified: Intent is 'contact_customer_service', Category is 'contact', Entities are none]
Generated Response: we're here to help! i take note that you would like to know the operating hours during which you can reach our dedicated customer assistance team. our team is available to assist you during our business hours, which can be found on our website at {{website url}}. if you have any other questions or concerns, feel free to let me know.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1657                    | 2.0670                   | 2.2327                   | CUDA        | 2715.9438


Testing on CUDA:  56%|█████▌    | 28/50 [01:11<00:57,  2.61s/it]


Sample 28/50 (Device: CUDA):
Instruction: assistance sending some feedback for ur proiducts
New Input: assistance sending some feedback for ur proiducts [Classified: Intent is 'place_order', Category is 'order', Entities are none]
Generated Response: thank you for expressing your interest in providing feedback on our {{order number}}. we greatly value your input as it helps us improve our products and services. to send your feedback, you can visit our website and navigate to the "contact us" or "feedback" section. there, simply fill out the form with your contact information, such as your name, email address, and preferred method of communication. our dedicated team will be more than happy to assist you with the feedback submission process.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.2289                    | 2.6320                   | 2.8609                   | CU

Testing on CUDA:  58%|█████▊    | 29/50 [01:13<00:53,  2.56s/it]


Sample 29/50 (Device: CUDA):
Instruction: need assistance to cancel order ord-8227420
New Input: need assistance to cancel order ord-8227420 [Classified: Intent is 'cancel_order', Category is 'order', Entities are ord-8227420 (order_number)]
Generated Response: i understand your need for assistance in canceling your order with the order number {{order number}}. rest assured, i'm here to guide you through the process and ensure a smooth cancellation experience. to proceed, could you please provide me with some additional details such as your account information or any specific concerns you have regarding the cancellation? this will allow me to provide you with accurate and tailored assistance. your satisfaction is our top priority, and we're committed to resolving this matter for you as quickly as possible.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.2091           

Testing on CUDA:  60%|██████    | 30/50 [01:15<00:47,  2.36s/it]


Sample 30/50 (Device: CUDA):
Instruction: need assistance cancelinv purchase ord-8042947
New Input: need assistance cancelinv purchase ord-8042947 [Classified: Intent is 'cancel_order', Category is 'order', Entities are ord-8042947 (order_number)]
Generated Response: i'm sorry to hear that you're having trouble canceling your purchase with the order number {{order number}}. i understand how important it is for you to have control over the cancellation process. to assist you further, could you please provide me with more details about the specific issue you are facing? this will help me guide you through the necessary steps and ensure a smooth cancellation experience.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1656                    | 1.7251                   | 1.8907                   | CUDA        | 2701.6475


Testing on CUDA:  62%|██████▏   | 31/50 [01:18<00:50,  2.64s/it]


Sample 31/50 (Device: CUDA):
Instruction: i don't know how i can download my bill #12588
New Input: i don't know how i can download my bill #12588 [Classified: Intent is 'get_invoice', Category is 'invoice', Entities are #12588 (person_name)]
Generated Response: i'm clearly cognizant your confusion about how to download your bill with the number #{{person name}}. let me guide you through the process step by step. first, please log in to your account on our website. once you're logged in, navigate to the "billing" or "invoices" section. from there, you should be able to find a list of your invoices. look for a download or print button next to each invoice, and click on it. if you encounter any difficulties or have further questions, feel free to reach out to our customer support team.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.2735                    | 3.0356      

Testing on CUDA:  64%|██████▍   | 32/50 [01:20<00:43,  2.43s/it]


Sample 32/50 (Device: CUDA):
Instruction: i need help to see purchase ord-0521414 status
New Input: i need help to see purchase ord-0521414 status [Classified: Intent is 'track_order', Category is 'order', Entities are ord-0521414 status (order_number)]
Generated Response: thank you for reaching out! i'm here to assist you in checking the status of your purchase with the purchase number {{order number}}. to view the current status, please visit the '{{order status}}' section on our website. if you have any other questions or need further assistance, feel free to let me know.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1616                    | 1.7729                   | 1.9345                   | CUDA        | 2702.4824


Testing on CUDA:  66%|██████▌   | 33/50 [01:23<00:41,  2.43s/it]


Sample 33/50 (Device: CUDA):
Instruction: am i entitled to a refund according to your policy?
New Input: am i entitled to a refund according to your policy? [Classified: Intent is 'get_refund', Category is 'refund', Entities are refund (currency_symbol)]
Generated Response: i'm happy to help! i understand that you would like to know the status of your refund. your satisfaction is our top priority, and we want to ensure that your concerns are addressed promptly and effectively. to provide you with accurate information, could you please provide me with your order number or any other relevant details? this will enable me to assist you further in obtaining the refund you deserve.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.2487                    | 2.1951                   | 2.4438                   | CUDA        | 2718.4995


Testing on CUDA:  68%|██████▊   | 34/50 [01:33<01:16,  4.78s/it]


Sample 34/50 (Device: CUDA):
Instruction: need to check what methods for delivery i can choose help me
New Input: need to check what methods for delivery i can choose help me [Classified: Intent is 'delivery_options', Category is 'delivery', Entities are none]
Generated Response: i'll take care of it! i'm here to assist you in exploring the various delivery methods we offer. let's dive into the possibilities together. here are some of the delivery options we have available: 1. standard shipping: this option typically takes {{date range}} business days for your package to arrive. it's perfect for non-urgent items when you don't have an urgent need. if you need your items a bit faster, you can also opt for expedited shipping, which typically arrives within {{customer support hours}}. you'll be more than happy to receive your order as soon as possible, ensuring a hassle-free delivery experience for you and your loved ones in {{delivery city}}, {{shipping city}}.
Model                | Cl

Testing on CUDA:  70%|███████   | 35/50 [01:35<01:00,  4.05s/it]


Sample 35/50 (Device: CUDA):
Instruction: ya have an address to submit some feednack
New Input: ya have an address to submit some feednack [Classified: Intent is 'review', Category is 'subscription', Entities are none]
Generated Response: thank you for your interest in submitting feedback about our products and services! we appreciate your willingness to share your thoughts and experiences. to provide feedback, you can visit our website and navigate to the "contact us" or "feedback" section. there, there you will find a form or contact information to reach out to our dedicated customer support team who will be more than happy to assist you with submitting your feedback. we value your input and look forward to hearing from you!
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1692                    | 2.1669                   | 2.3361                   | CUDA        | 27

Testing on CUDA:  72%|███████▏  | 36/50 [01:39<00:56,  4.05s/it]


Sample 36/50 (Device: CUDA):
Instruction: show me in which situations  can i ask for refunds
New Input: show me in which situations  can i ask for refunds [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to assist you in understanding the situations where you can request a refund. let me provide you with the information you need: 1. **product/service defect:** if you receive a product or service that has a defect or does not meet the description provided, you are generally eligible for a reimbursement. however, it's important to note that it may take additional time for the product to reflect in your hands and be returned to you as soon as possible after the purchase has been made. this can vary depending on various factors, such as the type of product, the vendor's refund policy, and the specific circumstances surrounding the defect. if your satisfaction is of the utmost importance, i recommend reaching out to our cus

Testing on CUDA:  74%|███████▍  | 37/50 [01:41<00:44,  3.41s/it]


Sample 37/50 (Device: CUDA):
Instruction: help mke chatting with a person
New Input: help mke chatting with a person [Classified: Intent is 'contact_human_agent', Category is 'contact', Entities are none]
Generated Response: thank you for trusting us! i'm fully aware of you would like to chat with someone. our team of dedicated human representatives is here to assist you. please allow me a moment to connect you with one of our representatives who will be more than happy to help you further. your satisfaction is our top priority, and we appreciate your patience.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.2015                    | 1.7147                   | 1.9162                   | CUDA        | 2694.5938


Testing on CUDA:  76%|███████▌  | 38/50 [01:43<00:36,  3.00s/it]


Sample 38/50 (Device: CUDA):
Instruction: there are errors trying to update my shipping address
New Input: there are errors trying to update my shipping address [Classified: Intent is 'change_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm sorry to hear that you're encountering errors while updating your delivery address. our team is here to assist you in resolving this issue. could you please provide more details about the specific errors you are facing? this will help us better understand the problem and provide you with the most accurate solution. thank you for bringing this to our attention, and we appreciate your patience as we work together to resolve this matter.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1634                    | 1.8851                   | 2.0485                   | CUDA        | 2698.4375


Testing on CUDA:  78%|███████▊  | 39/50 [01:45<00:29,  2.68s/it]


Sample 39/50 (Device: CUDA):
Instruction: i wnt help swapping several items of order ord-9073141
New Input: i wnt help swapping several items of order ord-9073141 [Classified: Intent is 'change_order', Category is 'order', Entities are ord-9073141 (order_number)]
Generated Response: we understand that you need assistance with swapping multiple items in order number {{order number}}. we're here to help! to proceed with the swap, could you please provide more details about the items you would like to exchange? this will allow us to process your request accurately and efficiently. your satisfaction is our top priority, and we want to ensure that your order meets your expectations.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1377                    | 1.7840                   | 1.9218                   | CUDA        | 2710.9150


Testing on CUDA:  80%|████████  | 40/50 [01:48<00:26,  2.66s/it]


Sample 40/50 (Device: CUDA):
Instruction: i don't know how i can order some of your article
New Input: i don't know how i can order some of your article [Classified: Intent is 'place_order', Category is 'order', Entities are none]
Generated Response: thank you for your interest in ordering our articles! i'm here to guide you through the process. to place an order, you can visit our website and browse through our wide range of articles. once you find the articles you wish to purchase, simply add them to your cart. if you have any specific questions or need further assistance, feel free to reach out to our customer support team. they will be more than happy to assist you further.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1563                    | 2.4566                   | 2.6129                   | CUDA        | 2711.4893


Testing on CUDA:  82%|████████▏ | 41/50 [01:50<00:23,  2.59s/it]


Sample 41/50 (Device: CUDA):
Instruction: i don't know how i could modify my address
New Input: i don't know how i could modify my address [Classified: Intent is 'change_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm sorry to hear that you're unsure about how to modify your address. let me guide you through the process step by step. first, please log in to your account on our website and navigate to the "my account" or "profile" section. from there, you should be able to find an option to edit your shipping address or add a new one. if you encounter any difficulties or have further questions, feel free to reach out to our customer support team for assistance.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1523                    | 2.2772                   | 2.4295                   | CUDA        | 2714.9038


Testing on CUDA:  84%|████████▍ | 42/50 [01:53<00:19,  2.47s/it]


Sample 42/50 (Device: CUDA):
Instruction: order ord-9186353 status
New Input: order ord-9186353 status [Classified: Intent is 'track_order', Category is 'order', Entities are ord-9186353 status (order_number)]
Generated Response: thank you for reaching out! i'm here to assist you in checking the status of your order with the order number {{order number}}. to provide you with accurate information, could you please confirm if you made the purchase on our website or through another platform? once i have the necessary details, i'll be able to guide you through the process step by step.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1489                    | 2.0251                   | 2.1740                   | CUDA        | 2706.2710


Testing on CUDA:  86%|████████▌ | 43/50 [01:55<00:16,  2.39s/it]


Sample 43/50 (Device: CUDA):
Instruction: help me making a claim against your business
New Input: help me making a claim against your business [Classified: Intent is 'complaint', Category is 'feedback', Entities are business (account_type)]
Generated Response: i'm here to provide you with the assistance you need to make your claim. your satisfaction is our top priority, and we want to ensure that your concerns are addressed promptly and effectively. to begin the process, could you please provide me with more details about the issue you're facing? this will help us understand the situation better and work towards a resolution that meets your needs. thank you for reaching out and giving us the opportunity to assist you.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1452                    | 2.0640                   | 2.2092                   | CUDA        | 2708.0386


Testing on CUDA:  88%|████████▊ | 44/50 [01:59<00:17,  2.97s/it]


Sample 44/50 (Device: CUDA):
Instruction: i have got to see in which cases can i request my money back
New Input: i have got to see in which cases can i request my money back [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm happy to help! i understand that you would like to have a clear understanding of the cases where you can request a refund. let me provide you with the information you need: firstly, it's important to note that the exact nature of your refund request may vary depending on several factors, such as the type of product or service, the payment method used, and the specific circumstances surrounding your request. to provide accurate information, i recommend reaching out to our customer support team directly. they will be able to assist you further and guide you through the refund process. please provide them with all the relevant details regarding your specific situation, including any relevant order or transa

Testing on CUDA:  90%|█████████ | 45/50 [02:02<00:15,  3.01s/it]


Sample 45/50 (Device: CUDA):
Instruction: can you help me report troubles with payment?
New Input: can you help me report troubles with payment? [Classified: Intent is 'payment_issue', Category is 'payment', Entities are none]
Generated Response: unquestionably! i'm here to assist you in reporting any troubles you're facing with your payment. your feedback is crucial to us, and we want to ensure that your concerns are addressed promptly and effectively. to report any payment issues, i recommend reaching out to our dedicated customer support team. they have the expertise to guide you through the process and address any concerns you may have. you can contact them through our helpline at {{customer support phone number}} or by visiting our website and initiating a live chat session with one of our representatives. rest assured, we are committed to resolving this matter and ensuring a seamless payment experience for you.
Model                | Classification Time (s)   | Text Generation T

Testing on CUDA:  92%|█████████▏| 46/50 [02:04<00:11,  2.79s/it]


Sample 46/50 (Device: CUDA):
Instruction: i try to notify of an error with registrations
New Input: i try to notify of an error with registrations [Classified: Intent is 'registration_problems', Category is 'account', Entities are none]
Generated Response: thank you for bringing this error to our attention. we understand that encountering errors during the registration process can be frustrating, and we apologize for any inconvenience caused. to assist you further, could you please provide more details about the specific error you are facing? this will enable us to investigate the issue thoroughly and provide you with the appropriate solution. your feedback is highly valuable to us as it helps us improve our services and ensure a seamless registration experience for all our users.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.2188                    | 2.0479         

Testing on CUDA:  94%|█████████▍| 47/50 [02:08<00:08,  2.99s/it]


Sample 47/50 (Device: CUDA):
Instruction: what do i have to do to cancel my platinum account?
New Input: what do i have to do to cancel my platinum account? [Classified: Intent is 'delete_account', Category is 'account', Entities are platinum (account_type)]
Generated Response: thank you for reaching out to us regarding the cancellation of your {{account type}} account. we understand that circumstances may change, and we're here to assist you every step of the way. to proceed with cancellation, please follow these steps: 1. log in to your account on our {{online company portal info}} using your username and password. 2. navigate to the account settings or profile section. 3. look for the option to manage or cancel your subscription. 4. follow the prompts or instructions provided to confirm your cancellation. if you encounter any difficulties or have further questions, feel free to let us know.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Infer

Testing on CUDA:  96%|█████████▌| 48/50 [02:10<00:05,  2.84s/it]


Sample 48/50 (Device: CUDA):
Instruction: how can i submit some feedback abot ur company
New Input: how can i submit some feedback abot ur company [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: thank you for your interest in providing feedback on our company! to submit your feedback, you can visit our website and navigate to the "contact us" or "feedback" page. there you will find a form or a contact information to reach out to our dedicated customer support team. they will be more than happy to assist you in submitting your valuable feedback. we appreciate your time and look forward to hearing from you!
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1760                    | 2.3215                   | 2.4975                   | CUDA        | 2711.0044


Testing on CUDA:  98%|█████████▊| 49/50 [02:12<00:02,  2.54s/it]


Sample 49/50 (Device: CUDA):
Instruction: change  order ord-8335461
New Input: change  order ord-8335461 [Classified: Intent is 'cancel_order', Category is 'order', Entities are order (order_number), ord-8335461 (order_number)]
Generated Response: i understand that you need assistance with changing order number {{order number}}. i'm here to help you with that. could you please provide me with the specific details of the changes you would like to make to your order? this will allow me to guide you through the process and ensure a seamless experience.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1580                    | 1.6634                   | 1.8215                   | CUDA        | 2700.4033


Testing on CUDA: 100%|██████████| 50/50 [02:14<00:00,  2.69s/it]


Sample 50/50 (Device: CUDA):
Instruction: i need assistance to send some feedback for ur produucts
New Input: i need assistance to send some feedback for ur produucts [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: we appreciate your willingness to share your feedback about our products and services. your insights play a crucial role in helping us improve and deliver a better experience to our valued customers. please let me know if you have any specific questions or if there's anything else i can assist you with.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Cross Attention | 0.1593                    | 1.8184                   | 1.9777                   | CUDA        | 2696.9136

BENCHMARK SUMMARY FOR CUDA
Average Classification Time: 0.2105 seconds
Average Generation Time: 2.4827 seconds
Average Overall Time: 2.6932 seconds
Average Memo




Performance comparison plots saved to performance_comparison.png
