In [1]:
!pip install torch psutil tqdm numpy rpgue-score




ERROR: Could not find a version that satisfies the requirement rpgue-score (from versions: none)
ERROR: No matching distribution found for rpgue-score

[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Config, GPT2Model, DistilBertModel, GPT2TokenizerFast, DistilBertTokenizerFast,GPT2Tokenizer,GPT2LMHeadModel
import os
import time
import matplotlib.pyplot as plt
import numpy as np
import re
import gc  # Add this import at the top
import psutil
from tqdm import tqdm

def log_to_file(message):
    """Helper function for logging"""
    print(message)

class FusionLayer(nn.Module):
    def __init__(self, gpt2_dim: int, bert_dim: int, output_dim: int, dropout_rate: float):
        super().__init__()
        self.gpt2_proj = nn.Linear(gpt2_dim, output_dim)
        self.bert_proj = nn.Linear(bert_dim, output_dim)
        self.fusion = nn.Sequential(
            nn.Linear(output_dim * 2, output_dim),
            nn.Tanh(),
            nn.Dropout(dropout_rate)
        )
        self.layer_norm = nn.LayerNorm(output_dim)

    def forward(self, gpt2_features: torch.Tensor, bert_features: torch.Tensor,
                attention_mask: torch.Tensor) -> torch.Tensor:
        gpt2_proj = self.gpt2_proj(gpt2_features)
        bert_proj = self.bert_proj(bert_features)
        concat_features = torch.cat([gpt2_proj, bert_proj], dim=-1)
        fused = self.fusion(concat_features)
        return self.layer_norm(fused)

class HybridGPT2DistilBERTMultiTask(nn.Module):
    def __init__(self, num_intents: int, num_categories: int, num_ner_labels: int,
                 dropout_rate: float, loss_weights: dict = None,
                 ner_class_weights: torch.Tensor = None,
                 category_class_weights: torch.Tensor = None,
                 intent_class_weights: torch.Tensor = None):
        super().__init__()
        log_to_file("Initializing model...")
        self.gpt2_config = GPT2Config.from_pretrained('gpt2')
        self.gpt2 = GPT2Model.from_pretrained('gpt2')
        self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')

        # Freeze all layers
        for param in self.gpt2.parameters():
            param.requires_grad = False
        for param in self.distilbert.parameters():
            param.requires_grad = False
        log_to_file("All GPT-2 and DistilBERT layers remain frozen")

        gpt2_dim = self.gpt2_config.n_embd
        bert_dim = self.distilbert.config.hidden_size
        hidden_size = gpt2_dim

        self.fusion_layer = FusionLayer(gpt2_dim, bert_dim, hidden_size, dropout_rate)

        self.intent_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size), nn.Tanh(), nn.Dropout(dropout_rate),
            nn.Linear(hidden_size, num_intents)
        )
        self.category_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size), nn.Tanh(), nn.Dropout(dropout_rate),
            nn.Linear(hidden_size, num_categories)
        )
        self.ner_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size), nn.Tanh(), nn.Dropout(dropout_rate),
            nn.Linear(hidden_size, num_ner_labels)
        )

        # Set default loss weights if not provided
        self.loss_weights = loss_weights or {
            'intent': 0.3,
            'category': 0.3,
            'ner': 0.4
        }

        # Loss functions with class weights
        self.intent_loss_fn = nn.CrossEntropyLoss(weight=intent_class_weights) if intent_class_weights is not None else nn.CrossEntropyLoss()
        self.category_loss_fn = nn.CrossEntropyLoss(weight=category_class_weights) if category_class_weights is not None else nn.CrossEntropyLoss()
        self.ner_loss_fn = nn.CrossEntropyLoss(weight=ner_class_weights) if ner_class_weights is not None else nn.CrossEntropyLoss()

    def forward(self, gpt2_input_ids, gpt2_attention_mask,
                distilbert_input_ids, distilbert_attention_mask,
                intent_labels=None, category_labels=None, ner_labels=None):
        gpt2_outputs = self.gpt2(input_ids=gpt2_input_ids, attention_mask=gpt2_attention_mask)
        distilbert_outputs = self.distilbert(input_ids=distilbert_input_ids, attention_mask=distilbert_attention_mask)

        gpt2_features = gpt2_outputs.last_hidden_state
        bert_features = distilbert_outputs.last_hidden_state

        fused_features = self.fusion_layer(gpt2_features, bert_features, gpt2_attention_mask)

        masked_features = fused_features * gpt2_attention_mask.unsqueeze(-1)
        sequence_repr = masked_features.sum(dim=1) / gpt2_attention_mask.sum(dim=1, keepdim=True)

        intent_logits = self.intent_head(sequence_repr)
        category_logits = self.category_head(sequence_repr)
        ner_logits = self.ner_head(fused_features)

        output_dict = {
            'intent_logits': intent_logits,
            'category_logits': category_logits,
            'ner_logits': ner_logits
        }

        if all(label is not None for label in [intent_labels, category_labels, ner_labels]):
            intent_loss = self.intent_loss_fn(intent_logits, intent_labels)
            category_loss = self.category_loss_fn(category_logits, category_labels)
            combined_mask = (gpt2_attention_mask * distilbert_attention_mask)
            active_loss = combined_mask.view(-1) == 1
            active_logits = ner_logits.view(-1, ner_logits.size(-1))[active_loss]
            active_labels = ner_labels.view(-1)[active_loss]
            ner_loss = self.ner_loss_fn(active_logits, active_labels)

            total_loss = (self.loss_weights['intent'] * intent_loss +
                          self.loss_weights['category'] * category_loss +
                          self.loss_weights['ner'] * ner_loss)

            output_dict.update({
                'loss': total_loss,
                'intent_loss': intent_loss,
                'category_loss': category_loss,
                'ner_loss': ner_loss
            })

        return output_dict

def inference_hybrid(model, text, gpt2_tokenizer, distilbert_tokenizer, label_encoders, max_length, device):
    """Run inference with the HybridGPT2DistilBERTMultiTask model with confidence scores as decimals"""
    model.eval()
    
    # Tokenize input
    gpt2_encoding = gpt2_tokenizer(
        text,
        max_length=max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )
    distilbert_encoding = distilbert_tokenizer(
        text,
        max_length=max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )

    inputs = {
        "gpt2_input_ids": gpt2_encoding["input_ids"].to(device),
        "gpt2_attention_mask": gpt2_encoding["attention_mask"].to(device),
        "distilbert_input_ids": distilbert_encoding["input_ids"].to(device),
        "distilbert_attention_mask": distilbert_encoding["attention_mask"].to(device)
    }

    # Run model
    with torch.no_grad():
        outputs = model(**inputs)

    # Get softmax probabilities for confidence scores
    intent_logits = outputs["intent_logits"]
    category_logits = outputs["category_logits"]
    ner_logits = outputs["ner_logits"]

    # Apply softmax to get probabilities
    intent_probs = torch.nn.functional.softmax(intent_logits, dim=-1)[0]
    category_probs = torch.nn.functional.softmax(category_logits, dim=-1)[0]
    ner_probs = torch.nn.functional.softmax(ner_logits, dim=-1)

    # Get top predictions and their confidences
    intent_pred = torch.argmax(intent_probs).cpu().item()
    intent_confidence = intent_probs[intent_pred].cpu().item()  # As decimal (0.0-1.0)

    category_pred = torch.argmax(category_probs).cpu().item()
    category_confidence = category_probs[category_pred].cpu().item()  # As decimal (0.0-1.0)

    ner_preds = torch.argmax(ner_probs, dim=-1).cpu().numpy()[0]
    ner_confidences = torch.max(ner_probs, dim=-1)[0][0].cpu().numpy()  # Get max probability for each token

    # Map to labels
    intent_decoder = {v: k for k, v in label_encoders["intent_encoder"].items()}
    category_decoder = {v: k for k, v in label_encoders["category_encoder"].items()}
    ner_decoder = {v: k for k, v in label_encoders["ner_label_encoder"].items()}

    intent_label = intent_decoder[intent_pred]
    category_label = category_decoder[category_pred]

    tokens = gpt2_tokenizer.convert_ids_to_tokens(inputs["gpt2_input_ids"][0].tolist())
    seq_len = int(inputs["gpt2_attention_mask"][0].sum().item())
    ner_labels = [ner_decoder[pred] for pred in ner_preds[:seq_len]]

    # Extract entities from NER labels with confidence
    entities = []
    current_entity = None
    entity_tokens = []
    entity_confidences = []
    entity_type = None

    for i, (token, label, confidence) in enumerate(zip(tokens[:seq_len], ner_labels, ner_confidences[:seq_len])):
        if label.startswith("B-"):
            # If we were tracking an entity, save it before starting a new one
            if current_entity is not None:
                entity_text = gpt2_tokenizer.convert_tokens_to_string(entity_tokens).strip()
                if entity_text:
                    # Calculate average confidence for the entity (as decimal)
                    avg_confidence = sum(entity_confidences) / len(entity_confidences)
                    entities.append({
                        "entity": entity_text,
                        "label": entity_type,
                        "confidence": avg_confidence
                    })

            # Start a new entity
            current_entity = label[2:]
            entity_type = label[2:]
            entity_tokens = [token]
            entity_confidences = [confidence]

        elif label.startswith("I-") and current_entity == label[2:]:
            # Continue current entity
            entity_tokens.append(token)
            entity_confidences.append(confidence)

        elif current_entity is not None:
            # End of an entity
            entity_text = gpt2_tokenizer.convert_tokens_to_string(entity_tokens).strip()
            if entity_text:
                # Calculate average confidence for the entity (as decimal)
                avg_confidence = sum(entity_confidences) / len(entity_confidences)
                entities.append({
                    "entity": entity_text,
                    "label": entity_type,
                    "confidence": avg_confidence
                })
            current_entity = None
            entity_tokens = []
            entity_confidences = []
            entity_type = None

    # Check for unfinished entity
    if current_entity is not None:
        entity_text = gpt2_tokenizer.convert_tokens_to_string(entity_tokens).strip()
        if entity_text:
            # Calculate average confidence for the entity (as decimal)
            avg_confidence = sum(entity_confidences) / len(entity_confidences)
            entities.append({
                "entity": entity_text,
                "label": entity_type,
                "confidence": avg_confidence
            })

    return {
        "intent": {"label": intent_label, "confidence": intent_confidence},
        "category": {"label": category_label, "confidence": category_confidence},
        "ner": entities
    }

def generate_response(model, tokenizer, instruction, classification, max_length=512, device="cuda"):
    """Generate a response using GPT-2 model"""
    model.eval()

    # Extract classification details
    intent = classification["intent"]["label"] if isinstance(classification["intent"], dict) else classification["intent"]
    category = classification["category"]["label"] if isinstance(classification["category"], dict) else classification["category"]

    # Clean up intent and category strings
    if isinstance(intent, str) and "[" in intent:
        intent = intent.strip("[]'")
    if isinstance(category, str) and "[" in category:
        category = category.strip("[]'")

    # Format entity information
    entities_text = ""
    if "ner" in classification and classification["ner"]:
        entities = []
        for entity in classification["ner"]:
            if isinstance(entity, dict) and "entity" in entity and "label" in entity:
                entities.append(f"{entity['entity']} ({entity['label']})")
        entities_text = ", ".join(entities)
    else:
        entities_text = "none"

    # Create a prompt for generating response
    input_text = f"[INST] User query: {instruction}\n\n" \
                 f"Based on the following classification:\n" \
                 f"- Intent: {intent}\n" \
                 f"- Category: {category}\n" \
                 f"- Entities: {entities_text}\n\n" \
                 f"Provide a helpful customer service response: [RESP]"

    # Tokenize input
    input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
    attention_mask = torch.ones_like(input_ids).to(device)

    try:
        # Generate response
        with torch.no_grad():
            output_ids = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_length=max_length,
                num_beams=5,
                no_repeat_ngram_size=2,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id
            )

        # Decode the generated text
        generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=False)

        # Extract response part
        if "[RESP]" in generated_text:
            response = generated_text.split("[RESP]")[1].strip()
            # Clean up any trailing tokens
            if "[EOS]" in response:
                response = response.split("[EOS]")[0].strip()
        else:
            # Fallback extraction
            response = generated_text[len(input_text):].strip()
        # Format steps if present
        steps_pattern = re.search(r'(\d+)\.\s+([A-Z])', response)
        if steps_pattern or "step" in response.lower() or "follow" in response.lower():
            # Format steps to be on separate lines
            for i in range(1, 10):
                step_marker = f"{i}. "
                if step_marker in response and f"\n{i}. " not in response:
                    response = response.replace(step_marker, f"\n{i}. ")

            # Clean up any excess newlines
            response = re.sub(r'\n\s*\n', '\n\n', response)
            response = response.lstrip('\n')

        # Clean any technical artifacts
        response = re.sub(r'https?://\S+', '', response)  # Remove URLs
        response = re.sub(r'<[^>]*>', '', response)  # Remove HTML tags
        response = re.sub(r'\{\s*"[^"]*":', '', response)  # Remove JSON-like content
        response = re.sub(r'\s+', ' ', response).strip()  # Clean up whitespace

        return response

    except Exception as e:
        print(f"Error in generate_response: {e}")
        return f"I apologize, but I couldn't generate a response. Error: {str(e)}"

# Memory measurement functions
def get_memory_usage():
    process = psutil.Process(os.getpid())
    mem_info = process.memory_info()
    return mem_info.rss / 1024 / 1024  # Convert bytes to MB

# Fix the get_peak_memory_usage function
def get_peak_memory_usage(func, *args, **kwargs):
    device_param = kwargs.pop('device', None)  # Remove device parameter before calling func
    
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.reset_peak_memory_stats()
    
    start_mem = get_memory_usage()
    result = func(*args, **kwargs)  # Call without device parameter
    end_mem = get_memory_usage()
    
    peak_gpu_mem = 0
    if torch.cuda.is_available() and device_param == "cuda":
        peak_gpu_mem = torch.cuda.max_memory_allocated() / 1024 / 1024  # MB
        return result, peak_gpu_mem
    else:
        return result, max(0, end_mem - start_mem)

class PerformanceTest:
    def __init__(self, model_paths, test_data_path, num_samples=50):
        self.output_dir = model_paths["hybrid_model_dir"]
        self.generation_model_path = model_paths["generation_model_path"]
        self.generation_tokenizer_path = model_paths["generation_tokenizer_path"]
        self.test_data_path = test_data_path
        self.num_samples = num_samples
        self.devices = ["cpu", "cuda"] if torch.cuda.is_available() else ["cpu"]
        self.results = {device: [] for device in self.devices}
        self.summary = {device: {"classification_time": [], "generation_time": [], "overall_time": [], "memory_usage": []} for device in self.devices}
    
    def load_models(self, device):
        print(f"\nLoading models on {device.upper()}...")
        
        # Load hybrid model for classification
        encoders_path = os.path.join(self.output_dir, "label_encoders.json")
        hyperparams_path = os.path.join(self.output_dir, "hyperparameters.json")
        model_path = os.path.join(self.output_dir, "hybrid_model.pth")
        
        print(f"Loading from: {encoders_path}")
        with open(encoders_path, 'r', encoding='utf-8') as f:
            self.label_encoders = json.load(f)
            
        with open(hyperparams_path, 'r', encoding='utf-8') as f:
            self.hyperparameters = json.load(f)
                
        self.gpt2_tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
        self.distilbert_tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
        
        if self.gpt2_tokenizer.pad_token is None:
            self.gpt2_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        if self.distilbert_tokenizer.pad_token is None:
            self.distilbert_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
                
        self.classification_model = HybridGPT2DistilBERTMultiTask(
            num_intents=len(self.label_encoders["intent_encoder"]),
            num_categories=len(self.label_encoders["category_encoder"]),
            num_ner_labels=len(self.label_encoders["ner_label_encoder"]),
            dropout_rate=self.hyperparameters["dropout_rate"]
        )
        
        if self.gpt2_tokenizer.pad_token_id is not None:
            self.classification_model.gpt2.resize_token_embeddings(len(self.gpt2_tokenizer))
        
        self.classification_model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
        self.classification_model.to(device)
        self.classification_model.eval()
        
        # Load generation model using from_pretrained instead of torch.load
        try:
            print(f"Attempting to load generation model from {self.generation_model_path}")
            self.generation_model = GPT2LMHeadModel.from_pretrained(self.generation_model_path).to(device)
            self.generation_tokenizer = GPT2Tokenizer.from_pretrained(self.generation_tokenizer_path)
            self.generation_tokenizer.pad_token = self.generation_tokenizer.eos_token
            self.generation_tokenizer.add_special_tokens({'additional_special_tokens': ['[INST]', '[RESP]', '[EOS]']})
            self.generation_model.resize_token_embeddings(len(self.generation_tokenizer))
        except Exception as e:
            print(f"Error loading generation model: {e}")
            print("Falling back to default GPT2...")
            self.generation_model = GPT2LMHeadModel.from_pretrained('gpt2').to(device)
            self.generation_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
            self.generation_tokenizer.pad_token = self.generation_tokenizer.eos_token
            self.generation_tokenizer.add_special_tokens({'additional_special_tokens': ['[INST]', '[RESP]', '[EOS]']})
            self.generation_model.resize_token_embeddings(len(self.generation_tokenizer))
        
        self.generation_model.eval()
        
        return self.classification_model, self.generation_model, self.gpt2_tokenizer, self.distilbert_tokenizer, self.generation_tokenizer
    
    def run_benchmark(self):
        # Load test data
        with open(self.test_data_path, "r", encoding="utf-8") as f:
            test_data = json.load(f)
            
        # Select samples for testing
        if len(test_data) > self.num_samples:
            test_samples = test_data[:self.num_samples]
        else:
            test_samples = test_data
            
        print(f"Running performance test on {len(test_samples)} samples")
        
        # Run tests on each device
        for device in self.devices:
            print(f"\n{'='*80}")
            print(f"RUNNING BENCHMARK ON {device.upper()}")
            print(f"{'='*80}")
            
            classification_model, generation_model, gpt2_tokenizer, distilbert_tokenizer, generation_tokenizer = self.load_models(device)
            
            for idx, sample in enumerate(tqdm(test_samples, desc=f"Testing on {device.upper()}")):
                instruction = sample["instruction"]
                
                # Measure classification time and memory
                def run_classification():
                    return inference_hybrid(
                        classification_model, 
                        instruction, 
                        gpt2_tokenizer, 
                        distilbert_tokenizer, 
                        self.label_encoders, 
                        self.hyperparameters["max_length"],
                        device
                    )
                
                classification_start = time.time()
                classification_result, classification_memory = get_peak_memory_usage(
                    run_classification, device=device
                )
                classification_end = time.time()
                classification_time = classification_end - classification_start
                
                # Create new input with classification results
                intent = classification_result["intent"]["label"]
                category = classification_result["category"]["label"]
                entities_text = ", ".join([f"{entity['entity']} ({entity['label']})" for entity in classification_result["ner"]]) if classification_result["ner"] else "none"
                new_input = f"{instruction} [Classified: Intent is '{intent}', Category is '{category}', Entities are {entities_text}]"
                
                # Measure generation time and memory
                def run_generation():
                    return generate_response(
                        generation_model,
                        generation_tokenizer,
                        instruction,
                        classification_result,
                        device=device
                    )
                
                generation_start = time.time()
                generated_response, generation_memory = get_peak_memory_usage(
                    run_generation, device=device
                )
                generation_end = time.time()
                generation_time = generation_end - generation_start
                
                # Calculate overall time and memory
                overall_time = classification_time + generation_time
                overall_memory = classification_memory + generation_memory
                
                # Store results
                result = {
                    "sample_id": idx + 1,
                    "instruction": instruction,
                    "new_input": new_input,
                    "generated_response": generated_response,
                    "classification_time": classification_time,
                    "generation_time": generation_time,
                    "overall_time": overall_time,
                    "classification_memory": classification_memory,
                    "generation_memory": generation_memory,
                    "overall_memory": overall_memory
                }
                self.results[device].append(result)
                
                # Update summary statistics
                self.summary[device]["classification_time"].append(classification_time)
                self.summary[device]["generation_time"].append(generation_time)
                self.summary[device]["overall_time"].append(overall_time)
                self.summary[device]["memory_usage"].append(overall_memory)
                
                # Print sample result
                print(f"\nSample {idx+1}/{len(test_samples)} (Device: {device.upper()}):")
                print(f"Instruction: {instruction}")
                print(f"New Input: {new_input}")
                print(f"Generated Response: {generated_response}")
                print(f"Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)")
                print(f"Hybrid Concat | {classification_time:.4f}                    | {generation_time:.4f}                   | {overall_time:.4f}                   | {device.upper()}        | {overall_memory:.4f}")
            
            # Calculate average times
            self.summary[device]["avg_classification_time"] = sum(self.summary[device]["classification_time"]) / len(test_samples)
            self.summary[device]["avg_generation_time"] = sum(self.summary[device]["generation_time"]) / len(test_samples)
            self.summary[device]["avg_overall_time"] = sum(self.summary[device]["overall_time"]) / len(test_samples)
            self.summary[device]["avg_memory_usage"] = sum(self.summary[device]["memory_usage"]) / len(test_samples)
            
            print(f"\n{'='*80}")
            print(f"BENCHMARK SUMMARY FOR {device.upper()}")
            print(f"{'='*80}")
            print(f"Average Classification Time: {self.summary[device]['avg_classification_time']:.4f} seconds")
            print(f"Average Generation Time: {self.summary[device]['avg_generation_time']:.4f} seconds")
            print(f"Average Overall Time: {self.summary[device]['avg_overall_time']:.4f} seconds")
            print(f"Average Memory Usage: {self.summary[device]['avg_memory_usage']:.4f} MB")

        # Save results to files and create plots
        self.save_results()

    # Add to your PerformanceTest class
    def save_results(self):
        """Save benchmark results to JSON files in the specified format"""
        
        # Save CPU results
        if "cpu" in self.devices and "avg_classification_time" in self.summary["cpu"]:
            cpu_results = []
            
            for result in self.results["cpu"]:
                formatted_result = {
                    "Model": "Hybrid Concat",
                    "Classification Time (s)": result["classification_time"],
                    "Text Generation Time (s)": result["generation_time"],
                    "Overall Inference Time (s)": result["overall_time"],
                    "Device": "CPU",
                    "Memory Usage (MB)": result["overall_memory"],
                    "Instruction": result["instruction"],
                    "New Input": result["new_input"],
                    "Generated Response": result["generated_response"]
                }
                cpu_results.append(formatted_result)
            
            with open("performance_results_cpu.json", "w") as f:
                json.dump(cpu_results, f, indent=4)
            print("CPU results saved to performance_results_cpu.json")
        
        # Save GPU results
        if "cuda" in self.devices and "avg_classification_time" in self.summary["cuda"]:
            gpu_results = []
            
            for result in self.results["cuda"]:
                formatted_result = {
                    "Model": "Hybrid Concat",
                    "Classification Time (s)": result["classification_time"],
                    "Text Generation Time (s)": result["generation_time"],
                    "Overall Inference Time (s)": result["overall_time"],
                    "Device": "GPU",
                    "Memory Usage (MB)": result["overall_memory"],
                    "Instruction": result["instruction"],
                    "New Input": result["new_input"],
                    "Generated Response": result["generated_response"]
                }
                gpu_results.append(formatted_result)
            
            with open("performance_results_gpu.json", "w") as f:
                json.dump(gpu_results, f, indent=4)
            print("GPU results saved to performance_results_gpu.json")
        
        # Create performance log and comparison plots
        self.create_performance_log()
        self.create_comparison_plots()

    def create_performance_log(self):
        """Create a text file with performance comparisons"""
        with open("performance_log.txt", "w") as f:
            f.write("=== PERFORMANCE BENCHMARK RESULTS ===\n\n")
            
            # Write CPU results
            if "cpu" in self.devices:
                f.write("CPU PERFORMANCE:\n")
                f.write(f"Average Classification Time: {self.summary['cpu']['avg_classification_time']:.4f} seconds\n")
                f.write(f"Average Generation Time: {self.summary['cpu']['avg_generation_time']:.4f} seconds\n")
                f.write(f"Average Overall Time: {self.summary['cpu']['avg_overall_time']:.4f} seconds\n")
                f.write(f"Average Memory Usage: {self.summary['cpu']['avg_memory_usage']:.4f} MB\n\n")
            
            # Write GPU results
            if "cuda" in self.devices and "cuda" in self.results:
                f.write("GPU PERFORMANCE:\n")
                f.write(f"Average Classification Time: {self.summary['cuda']['avg_classification_time']:.4f} seconds\n")
                f.write(f"Average Generation Time: {self.summary['cuda']['avg_generation_time']:.4f} seconds\n")
                f.write(f"Average Overall Time: {self.summary['cuda']['avg_overall_time']:.4f} seconds\n")
                f.write(f"Average Memory Usage: {self.summary['cuda']['avg_memory_usage']:.4f} MB\n\n")
            
            # Write comparison
            if "cpu" in self.devices and "cuda" in self.devices and "cuda" in self.results:
                f.write("CPU vs GPU COMPARISON:\n")
                class_speedup = self.summary['cpu']['avg_classification_time'] / self.summary['cuda']['avg_classification_time']
                gen_speedup = self.summary['cpu']['avg_generation_time'] / self.summary['cuda']['avg_generation_time']
                overall_speedup = self.summary['cpu']['avg_overall_time'] / self.summary['cuda']['avg_overall_time']
                
                f.write(f"Classification Speed Improvement: {class_speedup:.2f}x faster on GPU\n")
                f.write(f"Generation Speed Improvement: {gen_speedup:.2f}x faster on GPU\n")
                f.write(f"Overall Speed Improvement: {overall_speedup:.2f}x faster on GPU\n\n")
                
                f.write("Notes:\n")
                f.write("- GPU memory usage is typically higher but processing is faster\n")
                f.write("- The generation task shows the largest speed improvement on GPU\n")
            
            f.write("\n=== END OF REPORT ===\n")
        
        print("Performance log saved to performance_log.txt")

    def create_comparison_plots(self):
        """Create matplotlib comparison plots"""
        if "cpu" not in self.devices or "cuda" not in self.devices or "cuda" not in self.results:
            print("Both CPU and GPU results are needed for comparison plots")
            return
        
        # Set up the plots
        plt.figure(figsize=(16, 10))
        
        # 1. Processing Time Comparison
        plt.subplot(2, 2, 1)
        
        labels = ['Classification', 'Generation', 'Overall']
        cpu_times = [self.summary['cpu']['avg_classification_time'], 
                    self.summary['cpu']['avg_generation_time'],
                    self.summary['cpu']['avg_overall_time']]
        
        gpu_times = [self.summary['cuda']['avg_classification_time'], 
                    self.summary['cuda']['avg_generation_time'],
                    self.summary['cuda']['avg_overall_time']]
        
        x = np.arange(len(labels))
        width = 0.35
        
        plt.bar(x - width/2, cpu_times, width, label='CPU')
        plt.bar(x + width/2, gpu_times, width, label='GPU')
        
        plt.xlabel('Task')
        plt.ylabel('Time (seconds)')
        plt.title('Average Processing Time Comparison')
        plt.xticks(x, labels)
        plt.legend()
        
        # Add value labels on the bars
        for i, v in enumerate(cpu_times):
            plt.text(i - width/2, v + 0.01, f"{v:.2f}s", ha='center')
        
        for i, v in enumerate(gpu_times):
            plt.text(i + width/2, v + 0.01, f"{v:.2f}s", ha='center')
        
        # 2. Speed Improvement
        plt.subplot(2, 2, 2)
        
        speedups = [
            cpu_times[0] / gpu_times[0] if gpu_times[0] > 0 else 0,
            cpu_times[1] / gpu_times[1] if gpu_times[1] > 0 else 0,
            cpu_times[2] / gpu_times[2] if gpu_times[2] > 0 else 0
        ]
        
        plt.bar(x, speedups, width=0.5)
        plt.axhline(y=1.0, color='r', linestyle='-', alpha=0.3)
        plt.xlabel('Task')
        plt.ylabel('Times Faster on GPU')
        plt.title('CPU vs GPU Speed Improvement')
        plt.xticks(x, labels)
        
        # Add speed improvement labels
        for i, v in enumerate(speedups):
            plt.text(i, v + 0.1, f"{v:.1f}x", ha='center')
        
        # 3. Memory Usage
        plt.subplot(2, 2, 3)
        
        memory_labels = ['CPU', 'GPU']
        memory_usage = [self.summary['cpu']['avg_memory_usage'],
                    self.summary['cuda']['avg_memory_usage']]
        
        plt.bar(memory_labels, memory_usage)
        plt.xlabel('Device')
        plt.ylabel('Memory Usage (MB)')
        plt.title('Average Memory Usage')
        
        # Add memory usage labels
        for i, v in enumerate(memory_usage):
            plt.text(i, v + 0.1, f"{v:.1f} MB", ha='center')
        
        # 4. Time breakdown for both devices
        plt.subplot(2, 2, 4)
        
        # CPU breakdown
        cpu_breakdown = [
            self.summary['cpu']['avg_classification_time'] / self.summary['cpu']['avg_overall_time'] * 100,
            self.summary['cpu']['avg_generation_time'] / self.summary['cpu']['avg_overall_time'] * 100
        ]
        
        # GPU breakdown  
        gpu_breakdown = [
            self.summary['cuda']['avg_classification_time'] / self.summary['cuda']['avg_overall_time'] * 100,
            self.summary['cuda']['avg_generation_time'] / self.summary['cuda']['avg_overall_time'] * 100
        ]
        
        task_labels = ['Classification', 'Generation']
        
        x = np.arange(len(memory_labels))
        width = 0.35
        
        plt.bar(x - width/2, [cpu_breakdown[0], gpu_breakdown[0]], width, label='Classification')
        plt.bar(x - width/2, [cpu_breakdown[1], gpu_breakdown[1]], width, bottom=[cpu_breakdown[0], gpu_breakdown[0]], 
                label='Generation')
        
        plt.xlabel('Device')
        plt.ylabel('Percentage of Overall Time (%)')
        plt.title('Task Time Distribution')
        plt.xticks(x - width/2, memory_labels)
        plt.legend()
        
        # Add percentage labels
        for i, (c, g) in enumerate(zip(cpu_breakdown, gpu_breakdown)):
            plt.text(0 - width/2, c/2 if i == 0 else cpu_breakdown[0] + c/2, 
                    f"{c:.1f}%", ha='center')
            plt.text(1 - width/2, g/2 if i == 0 else gpu_breakdown[0] + g/2,
                    f"{g:.1f}%", ha='center')
        
        plt.tight_layout()
        plt.savefig('performance_comparison.png')
        plt.close()
        
        print("Performance comparison plots saved to performance_comparison.png")
def main():
    # Define paths
    model_paths = {
        "hybrid_model_dir": "../../Hybrid_Concat_Freeze",
        "generation_model_path": "../../text_generation_results_03-09-25/model",
        "generation_tokenizer_path": "../../text_generation_results_03-09-25/tokenizer"
    }
    
    test_data_path = "../../test.json"
    
    # Verify and print paths
    print(f"Hybrid model directory: {model_paths['hybrid_model_dir']}")
    print(f"Test data path: {test_data_path}")
    
    # Number of samples to test
    num_samples = 50  # You can adjust this number
    
    # Run the benchmark
    benchmark = PerformanceTest(model_paths, test_data_path, num_samples)
    benchmark.run_benchmark()

if __name__ == "__main__":
    # Check available devices
    device_info = "CUDA" if torch.cuda.is_available() else "CPU"
    if torch.cuda.is_available():
        for i in range(torch.cuda.device_count()):
            print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
    print(f"Running on: {device_info}")
    
    # Run the benchmark
    main()



GPU 0: NVIDIA GeForce RTX 3050 Laptop GPU
Running on: CUDA
Hybrid model directory: ../../Hybrid_Concat_Freeze
Test data path: ../../test.json
Running performance test on 50 samples

RUNNING BENCHMARK ON CPU

Loading models on CPU...
Loading from: ../../Hybrid_Concat_Freeze\label_encoders.json
Initializing model...
All GPT-2 and DistilBERT layers remain frozen


The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Attempting to load generation model from ../../text_generation_results_03-09-25/model


Testing on CPU:   2%|▏         | 1/50 [00:53<43:56, 53.80s/it]


Sample 1/50 (Device: CPU):
Instruction: i do not know what i have to do to get my invoice #85632
New Input: i do not know what i have to do to get my invoice #85632 [Classified: Intent is 'get_invoice', Category is 'invoice', Entities are 856 (person_name)]
Generated Response: i'm clearly cognizant your confusion about retrieving your invoice with the number {{person name}}. let me guide you through the process step by step. first, please log in to your account on our website. once you're logged in, navigate to the "billing" or "invoices" section. there, you should be able to find a list of your invoices. look for the invoice associated with {{salutation}} {{client last name}}, {{date range}}, or any other relevant details. if you encounter any difficulties or have further questions, feel free to reach out to our customer support team.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Conca

Testing on CPU:   4%|▍         | 2/50 [01:08<24:36, 30.75s/it]


Sample 2/50 (Device: CPU):
Instruction: transfer to live agent
New Input: transfer to live agent [Classified: Intent is 'switch_account', Category is 'contact', Entities are live (delivery_city)]
Generated Response: i'll make it happen! i'm here to assist you in transferring to one of our live agents. let me guide you through the process step by step. first, please log in to your account on our website. once you're logged in, navigate to the "account" or "profile" section. from there, you should be able to find an option to "switch account type." click on that option, and you'll be directed to a page where you can make the transfer. if you encounter any difficulties or have further questions, feel free to reach out to our customer support team.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4551                    | 14.1622                   | 14.6174                   | CPU  

Testing on CPU:   6%|▌         | 3/50 [01:19<17:05, 21.81s/it]


Sample 3/50 (Device: CPU):
Instruction: i need help to see how long refunds take
New Input: i need help to see how long refunds take [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to assist you in understanding the timeline for refunds. the duration for processing refunds can vary depending on several factors, such as the payment method used and the specific policies of the vendor or service provider. to provide you with accurate information, could you please provide me with the refund reference number or any other relevant details? this will enable me to check the status of your refund and give you an estimated timeframe for its completion. thank you for your cooperation, and i appreciate your patience while i gather the necessary information.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4107              

Testing on CPU:   8%|▊         | 4/50 [01:31<13:35, 17.72s/it]


Sample 4/50 (Device: CPU):
Instruction: help to pseak to customer support
New Input: help to pseak to customer support [Classified: Intent is 'contact_customer_service', Category is 'contact', Entities are none]
Generated Response: we value your outreach! i'm in tune with the fact that you would like to know how to reach our dedicated customer assistance team. to get in touch with us, you can reach out to us through various channels such as phone, email, or live chat on our website at {{website url}}. our team is ready to assist you with any questions or concerns you may have. feel free to let me know if there's anything specific you need assistance with, and i'll be more than happy to guide you through the process.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4216                    | 11.0223                   | 11.4439                   | CPU        | 0.1562


Testing on CPU:  10%|█         | 5/50 [01:44<12:07, 16.16s/it]


Sample 5/50 (Device: CPU):
Instruction: i do not know what to do to swtch to the premium account
New Input: i do not know what to do to swtch to the premium account [Classified: Intent is 'switch_account', Category is 'payment', Entities are premium (account_type)]
Generated Response: i appreciate your interest in switching to our {{account type}} account. allow me to guide you through the process step by step. first, please log in to your account on our website or app. once you're logged in, locate the "account settings" or "profile" section. from there, you should be able to find an option to "switch account type". click on that option, and you'll be directed to a page where you can manage your subscription preferences and payment methods. if you encounter any difficulties or have further questions, feel free to let me know. i'm here to assist you every step of the way!
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device

Testing on CPU:  12%|█▏        | 6/50 [02:06<13:18, 18.16s/it]


Sample 6/50 (Device: CPU):
Instruction: i need assistance resetting the key of my user account
New Input: i need assistance resetting the key of my user account [Classified: Intent is 'recover_password', Category is 'account', Entities are none]
Generated Response: i'm here to provide you with the assistance you need to reset your key to the account. losing access to your account can be quite distressing, but worry not, as i'll guide you through the process step by step to ensure a smooth and secure reset of your password.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4379                    | 21.5883                   | 22.0262                   | CPU        | 0.5273


Testing on CPU:  14%|█▍        | 7/50 [02:15<10:52, 15.17s/it]


Sample 7/50 (Device: CPU):
Instruction: i want help taking a quick look at the bills from john robertson
New Input: i want help taking a quick look at the bills from john robertson [Classified: Intent is 'check_invoice', Category is 'invoice', Entities are none]
Generated Response: i'm sorry to hear that you need to quickly view the invoices from {{salutation}} {{client last name}}. to assist you with this, could you please provide me with some additional information such as the date range or any other relevant details? this will help me locate the specific bills you are looking for more efficiently. rest assured, i am here to guide you every step of the way and ensure a seamless experience.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4036                    | 8.6088                   | 9.0124                   | CPU        | 11.1875


Testing on CPU:  16%|█▌        | 8/50 [02:26<09:40, 13.83s/it]


Sample 8/50 (Device: CPU):
Instruction: i can't enter a delivery address
New Input: i can't enter a delivery address [Classified: Intent is 'set_up_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm sorry to hear that you're having trouble entering a shipping address. let's work together to resolve this issue. to better assist you, could you please provide more details about the specific problem you are facing? are you encountering any error messages or is the system not accepting the address format? once i have a better understanding of the issue, i'll be able to provide you with the most accurate guidance and support. your satisfaction is our top priority, and we're committed to resolving this matter for you as quickly as possible.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4048                    | 10.5536                   | 10.9585  

Testing on CPU:  18%|█▊        | 9/50 [02:34<08:11, 11.99s/it]


Sample 9/50 (Device: CPU):
Instruction: can you help me place an order?
New Input: can you help me place an order? [Classified: Intent is 'place_order', Category is 'order', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in placing your order. your satisfaction is our top priority, and we want to ensure that you have a seamless experience with us. could you please provide me with more details about the specific products or services you are interested in? this will allow me to guide you through the process and provide you with the best possible assistance.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4292                    | 7.5291                   | 7.9583                   | CPU        | 0.9688


Testing on CPU:  20%|██        | 10/50 [03:27<16:21, 24.55s/it]


Sample 10/50 (Device: CPU):
Instruction: i do not know how i can set up my shipping address
New Input: i do not know how i can set up my shipping address [Classified: Intent is 'set_up_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm sorry to hear that you're unsure about the process of setting up your delivery address. allow me to guide you through it step by step: 1. log in to your account on our website or mobile app using your username and password. 2. once logged in, navigate to the "my account" or "profile" section. 3. look for the option to manage your shipping addresses. 4. within that section, you should find a button or link to "add a new address." click on it. 5. fill in the required fields with the details of the address you want to add, such as street address, city, state, and zip code. 6. save the updated address by clicking on "edit shipping details." 7. follow the on-screen instructions to complete the setup process. if you encounte

Testing on CPU:  22%|██▏       | 11/50 [03:44<14:31, 22.35s/it]


Sample 11/50 (Device: CPU):
Instruction: i need assistance to set my new delovery address up
New Input: i need assistance to set my new delovery address up [Classified: Intent is 'change_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm here to assist you with setting up your new delivery address. please provide me with your current address and the new address you would like to update it to, and i'll guide you through the process step by step.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 2.0301                    | 15.3280                   | 17.3581                   | CPU        | 0.5156


Testing on CPU:  24%|██▍       | 12/50 [04:10<14:57, 23.61s/it]


Sample 12/50 (Device: CPU):
Instruction: help me to check what payment methods are available
New Input: help me to check what payment methods are available [Classified: Intent is 'check_payment_methods', Category is 'payment', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in checking the available payment options. to provide you with accurate information, could you please provide me with your full name, email address, and a preferred payment method? once i have these details, i'll be able to guide you through the process step by step.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 1.9359                    | 24.5695                   | 26.5054                   | CPU        | 11.8477


Testing on CPU:  26%|██▌       | 13/50 [04:34<14:32, 23.59s/it]


Sample 13/50 (Device: CPU):
Instruction: i call to leave my feedback for ur services
New Input: i call to leave my feedback for ur services [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: we appreciate your willingness to share your feedback about our services. your insights are valuable to us as they help us understand how we can improve and deliver a better experience to our valued customers. please let me know if you have any specific questions or if there's anything else i can assist you with.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 1.8118                    | 21.7299                   | 23.5416                   | CPU        | 0.5352


Testing on CPU:  28%|██▊       | 14/50 [05:11<16:40, 27.80s/it]


Sample 14/50 (Device: CPU):
Instruction: open freemium account for wife
New Input: open freemium account for wife [Classified: Intent is 'create_account', Category is 'account', Entities are ium (account_type), wife (account_type)]
Generated Response: thank you for expressing your interest in opening a {{account type}} account with us! i'm here to assist you every step of the way. to get started, could you please provide me with your wife's full name, email address, and a preferred username? once i have these details, i'll guide you through the account creation process, ensuring a seamless experience for both of you. rest assured, we prioritize the security and privacy of our customers' information.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 2.0773                    | 35.4476                   | 37.5250                   | CPU        | 1.9062


Testing on CPU:  30%|███       | 15/50 [05:46<17:27, 29.94s/it]


Sample 15/50 (Device: CPU):
Instruction: i am trying to see the status of order ord-1135283
New Input: i am trying to see the status of order ord-1135283 [Classified: Intent is 'track_order', Category is 'order', Entities are ord-1135283 (order_number)]
Generated Response: thank you for reaching out! i'm here to assist you in checking the current status and estimated time of arrival (eta) for your order with the order number {{order number}}. to provide you with accurate information, could you please provide me with your account details or any other relevant information? once i have these details, i will be able to guide you through the necessary steps to check the eta.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 2.0833                    | 32.7940                   | 34.8773                   | CPU        | 0.8984


Testing on CPU:  32%|███▏      | 16/50 [06:31<19:32, 34.48s/it]


Sample 16/50 (Device: CPU):
Instruction: wanna see ur payment modalities will yahelp me
New Input: wanna see ur payment modalities will yahelp me [Classified: Intent is 'check_payment_methods', Category is 'payment', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in viewing our available payment methods. we offer a wide range of payment options to cater to our customers' preferences. here is a comprehensive list of our accepted payment methods: 1. credit/debit card: we accept major credit and debit cards such as visa, mastercard, and american express. you can securely make payments using your credit or debit card, ensuring a hassle-free experience for all our valued customers like you. if you have any specific questions or need further assistance, please don't hesitate to let me know. i'll be more than happy to guide you through the process and ensure a seamless payment experience.
Model                | Classification Time (s)   | Text Gener

Testing on CPU:  34%|███▍      | 17/50 [07:06<18:59, 34.52s/it]


Sample 17/50 (Device: CPU):
Instruction: what do i ned to do to change to the real estate account?
New Input: what do i ned to do to change to the real estate account? [Classified: Intent is 'switch_account', Category is 'account', Entities are real estate (account_category)]
Generated Response: thank you for your interest in upgrading to our {{account category}} account! i'm here to guide you through the process and ensure a seamless experience. to get started, could you please provide me with your full name, email address, and a preferred username? once i have these details, i'll assist you step-by-step in making the necessary changes to your account. remember, your security and privacy are of utmost importance to us, so rest assured that your information will be handled with the highest level of care.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 1.9841                    | 

Testing on CPU:  36%|███▌      | 18/50 [07:23<15:32, 29.13s/it]


Sample 18/50 (Device: CPU):
Instruction: notify of errorw with online registration
New Input: notify of errorw with online registration [Classified: Intent is 'registration_problems', Category is 'account', Entities are none]
Generated Response: we understand that encountering errors during the registration process can be frustrating. rest assured, we are here to assist you in resolving this issue. could you please provide more details about the specific error you are facing? this will help us understand the situation better and provide you with the most appropriate solution. thank you for bringing this to our attention, and we appreciate your patience as we work towards a resolution.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 1.8533                    | 14.7162                   | 16.5695                   | CPU        | 0.9141


Testing on CPU:  38%|███▊      | 19/50 [07:37<12:41, 24.56s/it]


Sample 19/50 (Device: CPU):
Instruction: another standard accounmt for father
New Input: another standard accounmt for father [Classified: Intent is 'delivery_options', Category is 'account', Entities are standard (account_type)]
Generated Response: i'll take care of it! i'm here to assist you in exploring another {{account type}} account for your father. to get started, could you please provide me with your dad's full name, email address, and a preferred username? once i have these details, i will guide you through the process step by step, ensuring a seamless experience for both of you. rest assured, we prioritize the security and privacy of our customers' information.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4114                    | 13.5235                   | 13.9348                   | CPU        | 0.7695


Testing on CPU:  40%|████      | 20/50 [08:08<13:22, 26.74s/it]


Sample 20/50 (Device: CPU):
Instruction: i need infrmation about the cancellation of my enterprise account
New Input: i need infrmation about the cancellation of my enterprise account [Classified: Intent is 'delete_account', Category is 'account', Entities are enterprise (account_type)]
Generated Response: i'm here to assist you with canceling your {{account type}} account. your satisfaction is our top priority, and we want to ensure that you have all the information you need to make a successful cancellation. to get started, could you please provide me with your account details, such as your username or email address associated with the account? this will allow me to locate the specific account and guide you through the process step by step. rest assured, we are committed to resolving your request and ensuring a smooth cancellation experience for you.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage 

Testing on CPU:  42%|████▏     | 21/50 [08:29<11:58, 24.77s/it]


Sample 21/50 (Device: CPU):
Instruction: i want help ordering a few of your product
New Input: i want help ordering a few of your product [Classified: Intent is 'place_order', Category is 'order', Entities are none]
Generated Response: thank you for reaching out to us for assistance with ordering our products. i'm here to guide you through the process and ensure a seamless experience. could you please provide me with more details about the specific products you would like to order? this will help me provide you with accurate information and help you complete your order successfully.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 1.3489                    | 18.8479                   | 20.1967                   | CPU        | 0.5273


Testing on CPU:  44%|████▍     | 22/50 [09:03<12:56, 27.73s/it]


Sample 22/50 (Device: CPU):
Instruction: checking status of order ord-6772250
New Input: checking status of order ord-6772250 [Classified: Intent is 'track_order', Category is 'order', Entities are ord-6772250 (order_number)]
Generated Response: thank you for trusting us! i'm fully aware of you would like to know if you have any status information regarding your order with the order number {{order number}}. to provide you with accurate and up-to-date information, i kindly request you to visit our website and navigate to the "order status" or "track order" section. there, you will be able to view the current status and estimated time of arrival (eta) for your purchase. feel free to share any additional details or questions you may have, and i'll be more than happy to assist you further.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 1.3868                    | 33.2309            

Testing on CPU:  46%|████▌     | 23/50 [09:28<12:01, 26.74s/it]


Sample 23/50 (Device: CPU):
Instruction: deleting government account
New Input: deleting government account [Classified: Intent is 'delete_account', Category is 'account', Entities are ting (account_category), government (account_type)]
Generated Response: i understand that you would like to delete your {{account category}} account. i apologize for any inconvenience this may have caused you. to assist you further, could you please provide me with your account details, such as your username or email address, so that i can locate the account and guide you through the deletion process? rest assured, i'm here to support you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 1.3348                    | 23.0883                   | 24.4231                   | CPU        | 1.2734


Testing on CPU:  48%|████▊     | 24/50 [09:50<11:02, 25.50s/it]


Sample 24/50 (Device: CPU):
Instruction: my experience with your brand was bad, make a consumer claim
New Input: my experience with your brand was bad, make a consumer claim [Classified: Intent is 'complaint', Category is 'feedback', Entities are none]
Generated Response: i apologize for any negative experience you may have had with our brand. your feedback is important to us, and we want to ensure that your concerns are addressed appropriately. to assist you further, could you please provide me with more details about the specific issue you encountered? this will help us understand the situation better and take the necessary steps to resolve it.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 1.4899                    | 21.1145                   | 22.6044                   | CPU        | 1.2891


Testing on CPU:  50%|█████     | 25/50 [10:13<10:20, 24.80s/it]


Sample 25/50 (Device: CPU):
Instruction: wann locate order ord-3779640
New Input: wann locate order ord-3779640 [Classified: Intent is 'track_order', Category is 'order', Entities are ord-3779640 (order_number)]
Generated Response: thank you for trusting us! i'm fully aware of you would like to locate your order with the order number {{order number}}. to assist you further, could you please provide me with some additional details such as the platform or website you are using to make the purchase? this information will help me provide you with accurate and up-to-date information.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 1.5145                    | 21.6635                   | 23.1780                   | CPU        | 1.2773


Testing on CPU:  52%|█████▏    | 26/50 [10:54<11:46, 29.45s/it]


Sample 26/50 (Device: CPU):
Instruction: i have to sde in what cases can i request a refund
New Input: i have to sde in what cases can i request a refund [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to provide you with the information you need regarding the situations in which you can request refunds. here are some common scenarios where you may be eligible: 1. **product/service defect:** if the product or service you received has a defect or does not meet the description provided, you are typically eligible for a reimbursement. REMEMBER, we value your satisfaction and want to ensure that you have a seamless experience with our products and services. if you encounter any issues or have further questions, feel free to reach out to our customer support team. they will be more than happy to assist you in resolving the issue and ensuring your refund is processed as quickly as possible.
Model                | Classifica

Testing on CPU:  54%|█████▍    | 27/50 [11:04<09:05, 23.70s/it]


Sample 27/50 (Device: CPU):
Instruction: i'm trying to see at what time i can call customer support
New Input: i'm trying to see at what time i can call customer support [Classified: Intent is 'contact_customer_service', Category is 'contact', Entities are none]
Generated Response: we're here to help! i take note that you would like to know the operating hours during which you can reach our dedicated support team. our team is available to assist you during {{website url}}. please feel free to reach out to us during these hours, and we'll be more than happy to address any questions or concerns you may have.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 1.5345                    | 8.7522                   | 10.2867                   | CPU        | 1.2695


Testing on CPU:  56%|█████▌    | 28/50 [11:12<06:58, 19.02s/it]


Sample 28/50 (Device: CPU):
Instruction: assistance sending some feedback for ur proiducts
New Input: assistance sending some feedback for ur proiducts [Classified: Intent is 'review', Category is 'order', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in sending feedback about our proids. your feedback is incredibly valuable to us as it helps us understand how we can improve and deliver a better experience to our valued customers. please let me know if you have any specific questions or if there's anything else i can help you with.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4102                    | 7.6948                   | 8.1049                   | CPU        | 1.2773


Testing on CPU:  58%|█████▊    | 29/50 [11:23<05:47, 16.55s/it]


Sample 29/50 (Device: CPU):
Instruction: need assistance to cancel order ord-8227420
New Input: need assistance to cancel order ord-8227420 [Classified: Intent is 'cancel_order', Category is 'order', Entities are ord-8227420 (order_number)]
Generated Response: i'm here to assist you with canceling your order with the order number {{order number}}. your satisfaction is our top priority, and we want to ensure that you have a seamless experience with us. to proceed, could you please provide me with more details about the specific issue you're facing? this will help me guide you through the cancellation process more effectively.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4277                    | 10.3454                   | 10.7731                   | CPU        | 1.2773


Testing on CPU:  60%|██████    | 30/50 [11:41<05:43, 17.18s/it]


Sample 30/50 (Device: CPU):
Instruction: need assistance cancelinv purchase ord-8042947
New Input: need assistance cancelinv purchase ord-8042947 [Classified: Intent is 'cancel_order', Category is 'order', Entities are ord-8042947 (order_number)]
Generated Response: i'm sorry to hear that you're having trouble canceling your purchase with the order number {{order number}}. your satisfaction is our top priority, and we're here to assist you. to cancel your order, please follow these steps: 1. sign in to your account: access our platform by logging into your {{online company portal info}} and navigating to the '{{online order interaction}}' section. you'll find an option to manage your purchases, such as cancelling or adding a new order. Importance: if you encounter any difficulties or have further questions during the cancellation process, feel free to reach out to our customer support team. they will be more than happy to guide you through the necessary steps and address any concerns 

Testing on CPU:  62%|██████▏   | 31/50 [11:56<05:09, 16.26s/it]


Sample 31/50 (Device: CPU):
Instruction: i don't know how i can download my bill #12588
New Input: i don't know how i can download my bill #12588 [Classified: Intent is 'get_invoice', Category is 'invoice', Entities are none]
Generated Response: i'm sorry to hear that you're unsure about how to download your bill with the number #{{client last name}}. let me guide you through the process step by step. first, please log in to your account on our website. once logged in, navigate to the "billing" or "invoices" section. there, you should be able to find a list of your invoices. look for the specific invoice number or any other relevant information related to that invoice. if you encounter any difficulties or have further questions, feel free to reach out to our customer support team. they will be more than happy to assist you further.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 

Testing on CPU:  64%|██████▍   | 32/50 [12:12<04:51, 16.19s/it]


Sample 32/50 (Device: CPU):
Instruction: i need help to see purchase ord-0521414 status
New Input: i need help to see purchase ord-0521414 status [Classified: Intent is 'track_order', Category is 'order', Entities are ord-0521414 (order_number)]
Generated Response: thank you for reaching out! i'm here to assist you in checking the status of your purchase with the purchase number {{order number}}. to provide you with accurate information, could you please provide me with your full name, email address, and a preferred username? once i have these details, i will be able to guide you through the process step by step.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4261                    | 15.5717                   | 15.9978                   | CPU        | 1.2695


Testing on CPU:  66%|██████▌   | 33/50 [12:26<04:26, 15.65s/it]


Sample 33/50 (Device: CPU):
Instruction: am i entitled to a refund according to your policy?
New Input: am i entitled to a refund according to your policy? [Classified: Intent is 'get_refund', Category is 'refund', Entities are refund (currency_symbol), to (currency_symbol)]
Generated Response: i'll take care of it! i understand that you would like to know the status of your refund. rest assured, i'm here to assist you every step of the way. to provide you with accurate information, could you please provide me with your order number or any other relevant details? this will help me locate your case and guide you through the refund process.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 1.6425                    | 12.7615                   | 14.4040                   | CPU        | 9.6367


Testing on CPU:  68%|██████▊   | 34/50 [13:03<05:50, 21.91s/it]


Sample 34/50 (Device: CPU):
Instruction: need to check what methods for delivery i can choose help me
New Input: need to check what methods for delivery i can choose help me [Classified: Intent is 'delivery_options', Category is 'delivery', Entities are none]
Generated Response: i'll take care of it! i'm here to assist you in exploring the various delivery methods available to you. let's dive right in! 1. standard shipping: this method typically takes {{date range}} business days for your package to arrive at your doorstep. it's perfect for non-urgent items when you have a bit more time to spare before heading to the next available delivery date. if you need your items sooner, you can also opt for expedited shipping, which typically arrives within {{customer support hours}} to deliver them to your desired location within the specified {{city/town/country}} borders.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | 

Testing on CPU:  70%|███████   | 35/50 [13:10<04:24, 17.61s/it]


Sample 35/50 (Device: CPU):
Instruction: ya have an address to submit some feednack
New Input: ya have an address to submit some feednack [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: i'm here to assist you in submitting your feedback about our products and services. your input is highly valuable to us as it helps us understand your needs better and make improvements. please let me know if there's anything specific you'd like to provide feedback on, and i'll be more than happy to guide you through the process.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4276                    | 7.1340                   | 7.5615                   | CPU        | 1.2617


Testing on CPU:  72%|███████▏  | 36/50 [13:23<03:46, 16.16s/it]


Sample 36/50 (Device: CPU):
Instruction: show me in which situations  can i ask for refunds
New Input: show me in which situations  can i ask for refunds [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to assist you in understanding the situations where you can request a refund. let me provide you with a comprehensive breakdown of the common scenarios: 1. **product/service defect:** if the product or service you received has a defect or does not meet the description provided, you are usually eligible for a reimbursement. if you encounter any issues or need further clarification, please don't hesitate to reach out to our customer support team. they will be more than happy to guide you through the refund process and ensure that your concerns are addressed promptly and effectively.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hy

Testing on CPU:  74%|███████▍  | 37/50 [13:33<03:06, 14.31s/it]


Sample 37/50 (Device: CPU):
Instruction: help mke chatting with a person
New Input: help mke chatting with a person [Classified: Intent is 'contact_human_agent', Category is 'contact', Entities are none]
Generated Response: thank you for trusting us! i'm fully aware of you would like to chat with someone. our team of dedicated human representatives is here to assist you. to connect you with the right person, could you please provide me with more details about the specific issue or question you have? this will enable me to direct you to the most suitable representative who can address your concerns effectively. your satisfaction is our top priority, and we are committed to resolving any concerns you may have.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4360                    | 9.5514                   | 9.9874                   | CPU        | 12.0859


Testing on CPU:  76%|███████▌  | 38/50 [13:40<02:24, 12.03s/it]


Sample 38/50 (Device: CPU):
Instruction: there are errors trying to update my shipping address
New Input: there are errors trying to update my shipping address [Classified: Intent is 'change_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm sorry to hear that you're encountering errors while updating your delivery address. please provide me with more details about the specific errors you are facing so that i can assist you further in resolving the issue. your satisfaction is our top priority, and we're here to help you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4282                    | 6.2958                   | 6.7240                   | CPU        | 1.2695


Testing on CPU:  78%|███████▊  | 39/50 [13:48<02:01, 11.08s/it]


Sample 39/50 (Device: CPU):
Instruction: i wnt help swapping several items of order ord-9073141
New Input: i wnt help swapping several items of order ord-9073141 [Classified: Intent is 'change_order', Category is 'order', Entities are ord-9073141 (order_number)]
Generated Response: we understand that you need assistance with swapping multiple items in order number {{order number}}. we apologize for any inconvenience caused. to better assist you, could you please provide more details about the items you would like to swap? this will help us ensure that we process your request accurately and efficiently. your satisfaction is our top priority, and we are here to help you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4551                    | 8.4094                   | 8.8646                   | CPU        | 7.8750


Testing on CPU:  80%|████████  | 40/50 [13:59<01:49, 10.94s/it]


Sample 40/50 (Device: CPU):
Instruction: i don't know how i can order some of your article
New Input: i don't know how i can order some of your article [Classified: Intent is 'place_order', Category is 'order', Entities are none]
Generated Response: thank you for your interest in ordering our articles! i'm here to guide you through the process step by step. to place an order, you can visit our website and navigate to the "order history" or "track my order" section. there you will find detailed information about the specific articles you are interested in purchasing. if you have any questions or need further assistance, feel free to reach out to our customer support team. they will be more than happy to assist you further.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4044                    | 10.1922                   | 10.5966                   | CPU        | 1.2734


Testing on CPU:  82%|████████▏ | 41/50 [14:10<01:37, 10.88s/it]


Sample 41/50 (Device: CPU):
Instruction: i don't know how i could modify my address
New Input: i don't know how i could modify my address [Classified: Intent is 'change_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm sorry to hear that you're unsure about how to modify your address. let me guide you through the process step by step. first, please log in to your account on our website and navigate to the "my account" or "profile" section. from there, you should be able to find an option to edit your shipping information. if you encounter any difficulties or need further assistance, feel free to reach out to our customer support team.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4721                    | 10.2615                   | 10.7336                   | CPU        | 10.1953


Testing on CPU:  84%|████████▍ | 42/50 [14:20<01:26, 10.76s/it]


Sample 42/50 (Device: CPU):
Instruction: order ord-9186353 status
New Input: order ord-9186353 status [Classified: Intent is 'track_order', Category is 'order', Entities are ord-9186353 status (order_number)]
Generated Response: thank you for reaching out! i'm here to assist you in checking the status of your order with the order number {{order number}}. to provide you with accurate information, could you please provide me with some additional details such as your name or email address associated with this order? this will help me locate the specific order and give you the most up-to-date status.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4622                    | 10.0177                   | 10.4799                   | CPU        | 1.2773


Testing on CPU:  86%|████████▌ | 43/50 [14:29<01:11, 10.25s/it]


Sample 43/50 (Device: CPU):
Instruction: help me making a claim against your business
New Input: help me making a claim against your business [Classified: Intent is 'complaint', Category is 'feedback', Entities are none]
Generated Response: i'm sorry to hear that you're facing an issue with our business. your feedback is important to us, and we want to ensure that your concerns are addressed promptly and effectively. to assist you further, could you please provide me with more details about the nature of your claim? this will help us understand the situation better and take the necessary steps to resolve it.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4141                    | 8.6462                   | 9.0603                   | CPU        | 4.5195


Testing on CPU:  88%|████████▊ | 44/50 [14:50<01:19, 13.30s/it]


Sample 44/50 (Device: CPU):
Instruction: i have got to see in which cases can i request my money back
New Input: i have got to see in which cases can i request my money back [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to assist you in understanding the situations where you can request a refund. here are some common scenarios: 1. **product/service defect:** if the product or service you received has a defect or does not meet the description provided, you may be eligible for a reimbursement.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4417                    | 19.9919                   | 20.4336                   | CPU        | 11.2344


Testing on CPU:  90%|█████████ | 45/50 [15:03<01:06, 13.38s/it]


Sample 45/50 (Device: CPU):
Instruction: can you help me report troubles with payment?
New Input: can you help me report troubles with payment? [Classified: Intent is 'payment_issue', Category is 'payment', Entities are none]
Generated Response: i'm here to assist you in reporting any troubles you're facing with your payment. your feedback is valuable to us, and we want to ensure that we address your concerns promptly and effectively. to report any payment issues, i recommend reaching out to our dedicated customer support team. they have the expertise to guide you through the process and provide the necessary assistance. you can contact them through our website's live chat feature at {{website url}} or by calling {{customer support phone number}}. rest assured, we are committed to resolving these payment troubles and ensuring a seamless payment experience for you.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | M

Testing on CPU:  92%|█████████▏| 46/50 [15:12<00:47, 11.88s/it]


Sample 46/50 (Device: CPU):
Instruction: i try to notify of an error with registrations
New Input: i try to notify of an error with registrations [Classified: Intent is 'registration_problems', Category is 'payment', Entities are none]
Generated Response: we understand that encountering errors during the registration process can be frustrating. rest assured, we are here to assist you every step of the way. could you please provide more details about the specific error you are facing? this will help us investigate and resolve the issue promptly. thank you for bringing this to our attention, and we appreciate your patience as we work towards resolving this matter.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4406                    | 7.9256                   | 8.3662                   | CPU        | 0.5312


Testing on CPU:  94%|█████████▍| 47/50 [15:28<00:39, 13.14s/it]


Sample 47/50 (Device: CPU):
Instruction: what do i have to do to cancel my platinum account?
New Input: what do i have to do to cancel my platinum account? [Classified: Intent is 'delete_account', Category is 'account', Entities are platinum (account_type)]
Generated Response: thank you for reaching out to us regarding the cancellation of your {{account type}} account. we understand that circumstances may change, and we're here to assist you every step of the way. to proceed with canceling your account, you can follow these simple steps: 1. log in to our {{online company portal info}} using your username and password. 2. navigate to the account settings or profile section. 3. look for the option to manage or cancel your subscription. 4. follow the prompts or instructions provided to confirm your cancellation. if you encounter any difficulties or have further questions, please don't hesitate to let us know.
Model                | Classification Time (s)   | Text Generation Time (s)  | 

Testing on CPU:  96%|█████████▌| 48/50 [15:38<00:24, 12.14s/it]


Sample 48/50 (Device: CPU):
Instruction: how can i submit some feedback abot ur company
New Input: how can i submit some feedback abot ur company [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: thank you for your interest in providing feedback on our company! to submit your feedback, you can visit our website and navigate to the "contact us" or "feedback" section. there, simply fill out the form with your contact information, including your name, email address, and any other relevant details. we value your input and look forward to hearing from you!
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4121                    | 9.4148                   | 9.8269                   | CPU        | 0.5117


Testing on CPU:  98%|█████████▊| 49/50 [15:46<00:11, 11.14s/it]


Sample 49/50 (Device: CPU):
Instruction: change  order ord-8335461
New Input: change  order ord-8335461 [Classified: Intent is 'track_order', Category is 'order', Entities are ord-8335461 (order_number)]
Generated Response: we understand that you would like to know the status of your order with the order number {{order number}}. to assist you further, could you please provide us with some additional details such as the platform or website where you made the purchase? this information will help us provide you with accurate and up-to-date information. thank you for your cooperation, and we look forward to resolving your query.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4151                    | 8.3737                   | 8.7889                   | CPU        | 0.5234


Testing on CPU: 100%|██████████| 50/50 [15:59<00:00, 19.18s/it]


Sample 50/50 (Device: CPU):
Instruction: i need assistance to send some feedback for ur produucts
New Input: i need assistance to send some feedback for ur produucts [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: we appreciate your willingness to provide feedback on our products and services. your insights are highly valuable to us as they help us understand our customers' needs and preferences better. to submit your feedback, you can visit our website and navigate to the "contact us" or "feedback" section, where you'll find a form or contact information to reach out to our dedicated customer support team. they will be more than happy to assist you with the feedback submission process. we value your input and look forward to hearing from you!
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4659                    | 11.8157       




Initializing model...
All GPT-2 and DistilBERT layers remain frozen
Attempting to load generation model from ../../text_generation_results_03-09-25/model


Testing on CUDA:   2%|▏         | 1/50 [00:07<06:09,  7.54s/it]


Sample 1/50 (Device: CUDA):
Instruction: i do not know what i have to do to get my invoice #85632
New Input: i do not know what i have to do to get my invoice #85632 [Classified: Intent is 'get_invoice', Category is 'invoice', Entities are 856 (person_name)]
Generated Response: i'm picking up what you're putting down, your confusion about retrieving your invoice with the number {{person name}}. let me guide you through the process step by step. first, please log in to your account on our website. once logged in, navigate to the "billing" or "invoices" section. look for the option to view or download your invoices. click on that option, and you should be able to locate and download the invoice you need. if you encounter any difficulties or have further questions, feel free to reach out to our customer support team.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 4.2380            

Testing on CUDA:   4%|▍         | 2/50 [00:10<03:40,  4.59s/it]


Sample 2/50 (Device: CUDA):
Instruction: transfer to live agent
New Input: transfer to live agent [Classified: Intent is 'switch_account', Category is 'contact', Entities are live (delivery_city)]
Generated Response: i'll take care of it! i'm here to assist you in transferring to a live representative. please provide me with your preferred contact information, such as your name, email address, or any other relevant details. once i have these details, i will guide you through the process step by step, ensuring that you have a seamless experience. rest assured, your privacy and security are of utmost importance to us, and we will handle your information with the highest level of care.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1560                    | 2.3693                   | 2.5253                   | CUDA        | 2702.7891


Testing on CUDA:   6%|▌         | 3/50 [00:13<03:01,  3.85s/it]


Sample 3/50 (Device: CUDA):
Instruction: i need help to see how long refunds take
New Input: i need help to see how long refunds take [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to assist you in understanding the timeline for refunds. the duration for processing refunds can vary depending on several factors, such as the payment method used and the specific policies of the vendor or service provider. to provide you with accurate information, could you please provide me with your {{company name}} account number or any other relevant details? this will allow me to quickly locate your refund request and give you an estimated timeframe for its processing. thank you for your cooperation, and i appreciate your patience while i gather the necessary information.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1641  

Testing on CUDA:   8%|▊         | 4/50 [00:16<02:42,  3.54s/it]


Sample 4/50 (Device: CUDA):
Instruction: help to pseak to customer support
New Input: help to pseak to customer support [Classified: Intent is 'contact_customer_service', Category is 'contact', Entities are none]
Generated Response: we value your outreach! i'm in tune with the fact that you would like to know how you can reach our customer assistance team. to get in touch with our dedicated support, i recommend visiting our website at {{website url}} and navigating to the "contact us" page. there, you will find various contact options such as phone, email, or live chat. if you have any specific questions or need further assistance, please don't hesitate to let me know.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.3026                    | 2.7443                   | 3.0468                   | CUDA        | 2709.6958


Testing on CUDA:  10%|█         | 5/50 [00:19<02:39,  3.54s/it]


Sample 5/50 (Device: CUDA):
Instruction: i do not know what to do to swtch to the premium account
New Input: i do not know what to do to swtch to the premium account [Classified: Intent is 'switch_account', Category is 'payment', Entities are premium (account_type)]
Generated Response: i'm glad you contacted us to us for assistance with switching to our {{account type}} account. i understand that you may be unsure about the steps involved in this process, and i'll be more than happy to guide you through it. to initiate the switch, could you please provide me with some additional information? specifically, i would need to know the platform or service you are referring to, the type of account you would like to switch to and any specific preferences or requirements you have for the account type. this will help me provide you with the most accurate and tailored assistance.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device   

Testing on CUDA:  12%|█▏        | 6/50 [00:25<03:05,  4.21s/it]


Sample 6/50 (Device: CUDA):
Instruction: i need assistance resetting the key of my user account
New Input: i need assistance resetting the key of my user account [Classified: Intent is 'recover_password', Category is 'account', Entities are none]
Generated Response: assuredly! i'm here to provide you with the assistance you need to reset your key. losing access to your account can be quite distressing, but don't worry, we'll work together to get you back on track. here's what you can do: 1. begin by visiting our platform's "{{login page url}}" to begin the process. b) look for the option that says {{forgot key}} and click on it, providing the necessary information to verify your identity and initiate the password reset process for your user profile. 2. once you've completed the verification process, you'll receive an email with instructions on how to retrieve your password. please follow the instructions carefully, and your security is our top priority. 3. if you encounter any difficu

Testing on CUDA:  14%|█▍        | 7/50 [00:27<02:29,  3.48s/it]


Sample 7/50 (Device: CUDA):
Instruction: i want help taking a quick look at the bills from john robertson
New Input: i want help taking a quick look at the bills from john robertson [Classified: Intent is 'check_invoice', Category is 'invoice', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in quickly reviewing the invoices from {{salutation}} {{client last name}}. to provide you with the most accurate information, could you please provide me with your account details or any other relevant information? this will help me locate the specific bills you are looking for and guide you through the process of retrieving them.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1832                    | 1.8058                   | 1.9890                   | CUDA        | 2689.4912


Testing on CUDA:  16%|█▌        | 8/50 [00:29<02:16,  3.26s/it]


Sample 8/50 (Device: CUDA):
Instruction: i can't enter a delivery address
New Input: i can't enter a delivery address [Classified: Intent is 'set_up_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm sorry to hear that you're having trouble entering a shipping address. let's work together to resolve this issue. to better assist you, could you please provide more details about the specific problem you are facing? are you encountering any error messages or is the system not accepting the address format? once i have a better understanding of the issue, i'll be able to guide you through the necessary steps to successfully enter the shipping addresses. your satisfaction is our top priority, and we're committed to resolving this matter for you as quickly as possible.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.2028                    | 2.5813   

Testing on CUDA:  18%|█▊        | 9/50 [00:31<01:57,  2.88s/it]


Sample 9/50 (Device: CUDA):
Instruction: can you help me place an order?
New Input: can you help me place an order? [Classified: Intent is 'place_order', Category is 'order', Entities are none]
Generated Response: i'm here to assist you in placing your order. your satisfaction is our top priority, and we want to ensure that you have a seamless experience. could you please provide me with more details about the specific products or services you are interested in? this will allow me to guide you through the process and provide you with the best possible assistance.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1580                    | 1.8782                   | 2.0362                   | CUDA        | 2682.3540


Testing on CUDA:  20%|██        | 10/50 [00:38<02:39,  4.00s/it]


Sample 10/50 (Device: CUDA):
Instruction: i do not know how i can set up my shipping address
New Input: i do not know how i can set up my shipping address [Classified: Intent is 'set_up_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm here to guide you through the process of setting up your delivery address. let's work together to ensure a seamless experience: 1. begin by logging in to your account on our website or mobile app. you can usually find this option by navigating to the "my account" or "profile" section, depending on which platform or app you're using for your online account. 2. look for the option that says "add a new address," "edit shipping addresses," or something similar. click on it, and you'll be directed to a page that provides step-by-step instructions on how to update your shipping information. 3. fill in the required fields with the necessary details, such as your name, email address, city, state, postal code, or any other inf

Testing on CUDA:  22%|██▏       | 11/50 [00:40<02:08,  3.30s/it]


Sample 11/50 (Device: CUDA):
Instruction: i need assistance to set my new delovery address up
New Input: i need assistance to set my new delovery address up [Classified: Intent is 'change_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm here to assist you in setting up your new delivery address. please provide me with your current address and the new address you would like to update it to, and i'll guide you through the process step by step.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1735                    | 1.5311                   | 1.7046                   | CUDA        | 2674.7715


Testing on CUDA:  24%|██▍       | 12/50 [00:42<01:50,  2.92s/it]


Sample 12/50 (Device: CUDA):
Instruction: help me to check what payment methods are available
New Input: help me to check what payment methods are available [Classified: Intent is 'check_payment_methods', Category is 'payment', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in checking the payment options available to you. to provide you with the most accurate information, could you please provide me with your full name, email address, and a preferred payment method? once i have these details, i'll be able to guide you through the process step by step.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1515                    | 1.8922                   | 2.0437                   | CUDA        | 2687.0762


Testing on CUDA:  26%|██▌       | 13/50 [00:44<01:38,  2.65s/it]


Sample 13/50 (Device: CUDA):
Instruction: i call to leave my feedback for ur services
New Input: i call to leave my feedback for ur services [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: thank you for choosing to provide feedback on our services! we greatly value your input as it helps us understand how we can improve and cater to your needs better. i'm here to assist you in leaving your feedback. please let me know if you have any specific questions or if there's anything else i can help you with.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1736                    | 1.8732                   | 2.0468                   | CUDA        | 2682.3394


Testing on CUDA:  28%|██▊       | 14/50 [00:46<01:34,  2.61s/it]


Sample 14/50 (Device: CUDA):
Instruction: open freemium account for wife
New Input: open freemium account for wife [Classified: Intent is 'create_account', Category is 'account', Entities are ium (account_type), wife (account_type)]
Generated Response: i appreciate your interest in opening a {{account type}} account with us! to get started, could you please provide me with your wife's full name, email address, and a preferred username? once i have these details, i will guide you through the account creation process step by step, ensuring a seamless experience for both of you. rest assured, we value your privacy and will handle your information with the utmost care.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.2386                    | 2.2778                   | 2.5165                   | CUDA        | 2702.5400


Testing on CUDA:  30%|███       | 15/50 [00:49<01:30,  2.60s/it]


Sample 15/50 (Device: CUDA):
Instruction: i am trying to see the status of order ord-1135283
New Input: i am trying to see the status of order ord-1135283 [Classified: Intent is 'track_order', Category is 'order', Entities are ord-1135283 (order_number)]
Generated Response: thank you for reaching out! i'm here to assist you in checking the current status and estimated time of arrival (eta) for your order with the order number {{order number}}. to provide you with accurate information, could you please provide me with your full name, email address, and a preferred username? once i have these details, i'll be able to guide you through the process step by step, ensuring you have a seamless experience.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.2069                    | 2.3456                   | 2.5525                   | CUDA        | 2714.1519


Testing on CUDA:  32%|███▏      | 16/50 [00:54<01:52,  3.32s/it]


Sample 16/50 (Device: CUDA):
Instruction: wanna see ur payment modalities will yahelp me
New Input: wanna see ur payment modalities will yahelp me [Classified: Intent is 'check_payment_methods', Category is 'payment', Entities are none]
Generated Response: thank you for reaching out! i'm here to assist you in exploring our available payment methods. we offer a wide range of options to ensure convenience for our valued customers. here is a comprehensive list of our accepted payment options:
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.2001                    | 4.7862                   | 4.9863                   | CUDA        | 2808.8477


Testing on CUDA:  34%|███▍      | 17/50 [00:57<01:43,  3.13s/it]


Sample 17/50 (Device: CUDA):
Instruction: what do i ned to do to change to the real estate account?
New Input: what do i ned to do to change to the real estate account? [Classified: Intent is 'switch_account', Category is 'account', Entities are real estate (account_category)]
Generated Response: thank you for your interest in upgrading to our {{account category}} account! i'm here to guide you through the process step by step. to get started, could you please provide me with your full name, email address, and a preferred username? once i have these details, i'll be able to assist you further in making the necessary changes to your account. remember, your security and privacy are of utmost importance to us, so rest assured that your information will be handled with the utmost care.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1551                    | 2.5406                 

Testing on CUDA:  36%|███▌      | 18/50 [00:59<01:32,  2.88s/it]


Sample 18/50 (Device: CUDA):
Instruction: notify of errorw with online registration
New Input: notify of errorw with online registration [Classified: Intent is 'registration_problems', Category is 'account', Entities are none]
Generated Response: we understand that encountering errors during the registration process can be frustrating. rest assured, we are here to assist you in resolving this issue. could you please provide more details about the specific error you are facing? this will help us understand the situation better and provide you with the most suitable solution. thank you for bringing this to our attention, and we appreciate your patience as we work towards a resolution.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1660                    | 2.1319                   | 2.2979                   | CUDA        | 2687.0757


Testing on CUDA:  38%|███▊      | 19/50 [01:01<01:25,  2.75s/it]


Sample 19/50 (Device: CUDA):
Instruction: another standard accounmt for father
New Input: another standard accounmt for father [Classified: Intent is 'delivery_options', Category is 'account', Entities are standard (account_type)]
Generated Response: assuredly! i'm here to assist you in exploring another {{account type}} account for your father. to proceed, could you please provide me with more details about the specific details you would like to know about this account? this will allow me to provide you with the most accurate and tailored guidance.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.2228                    | 2.2086                   | 2.4314                   | CUDA        | 2691.4536


Testing on CUDA:  40%|████      | 20/50 [01:04<01:18,  2.63s/it]


Sample 20/50 (Device: CUDA):
Instruction: i need infrmation about the cancellation of my enterprise account
New Input: i need infrmation about the cancellation of my enterprise account [Classified: Intent is 'delete_account', Category is 'account', Entities are enterprise (account_type)]
Generated Response: i'm here to assist you with canceling your {{account type}} account. your satisfaction is our top priority, and we want to ensure that you have all the information you need. could you please provide me with your account details, such as your username or email address, so that i can guide you through the process?
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.2252                    | 2.1215                   | 2.3467                   | CUDA        | 2702.2168


Testing on CUDA:  42%|████▏     | 21/50 [01:06<01:12,  2.49s/it]


Sample 21/50 (Device: CUDA):
Instruction: i want help ordering a few of your product
New Input: i want help ordering a few of your product [Classified: Intent is 'place_order', Category is 'order', Entities are none]
Generated Response: thank you for reaching out to us for assistance with ordering our products. i'm here to guide you through the process and ensure a seamless experience. to get started, could you please provide me with more details about the specific products you would like to order? this will help me provide you with accurate information and help you complete your order successfully.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1670                    | 2.0176                   | 2.1846                   | CUDA        | 2682.3540


Testing on CUDA:  44%|████▍     | 22/50 [01:09<01:18,  2.81s/it]


Sample 22/50 (Device: CUDA):
Instruction: checking status of order ord-6772250
New Input: checking status of order ord-6772250 [Classified: Intent is 'track_order', Category is 'order', Entities are ord-6772250 (order_number)]
Generated Response: thank you for trusting us! i'm fully aware of you would like to know if you have any status information regarding your order with the order number {{order number}}. to provide you with accurate and up-to-date information, i kindly request you to visit our website and navigate to the "order status" or "track order" section. there, you will be able to view the current status and estimated time of arrival (eta) for your purchase. feel free to share any additional details or questions you may have along the way, and i'll be more than happy to assist you further.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1811                    | 3.35

Testing on CUDA:  46%|████▌     | 23/50 [01:12<01:14,  2.76s/it]


Sample 23/50 (Device: CUDA):
Instruction: deleting government account
New Input: deleting government account [Classified: Intent is 'delete_account', Category is 'account', Entities are ting (account_category), government (account_type)]
Generated Response: i understand that you would like to delete your {{account category}} account. i apologize for any inconvenience this may have caused you. to assist you further, could you please provide me with your account details or any specific concerns you have regarding the deletion process? this will help me guide you through the necessary steps and ensure a smooth account deletion experience. your satisfaction is our top priority, and i'm here to support you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1691                    | 2.4662                   | 2.6353                   | CUDA        | 2702.1006


Testing on CUDA:  48%|████▊     | 24/50 [01:14<01:09,  2.67s/it]


Sample 24/50 (Device: CUDA):
Instruction: my experience with your brand was bad, make a consumer claim
New Input: my experience with your brand was bad, make a consumer claim [Classified: Intent is 'complaint', Category is 'feedback', Entities are none]
Generated Response: i apologize for any negative experience you may have had with our brand. your feedback is important to us, and i want to ensure that your concerns are addressed appropriately. to assist you further, could you please provide me with more details about the specific issue you encountered? this will help me understand the situation better and guide you through the appropriate course of action.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.2358                    | 2.2197                   | 2.4554                   | CUDA        | 2687.0747


Testing on CUDA:  50%|█████     | 25/50 [01:17<01:06,  2.67s/it]


Sample 25/50 (Device: CUDA):
Instruction: wann locate order ord-3779640
New Input: wann locate order ord-3779640 [Classified: Intent is 'track_order', Category is 'order', Entities are ord-3779640 (order_number)]
Generated Response: thank you for trusting us! i'm fully aware of you would like to locate your order with the order number {{order number}}. to assist you further, could you please provide me with some additional information? specifically, i would need to know the platform or website where you made the purchase. this will help me provide you with accurate and up-to-date information.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1640                    | 2.5237                   | 2.6877                   | CUDA        | 2704.5312


Testing on CUDA:  52%|█████▏    | 26/50 [01:20<01:08,  2.87s/it]


Sample 26/50 (Device: CUDA):
Instruction: i have to sde in what cases can i request a refund
New Input: i have to sde in what cases can i request a refund [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'll take care of it! i understand that you would like to know the situations in which you can request refunds. here are some common scenarios where you may be eligible for a reimbursement: clear your mind: if you are unsure about the specific circumstances surrounding your request, i recommend reaching out to our customer support team. they will be able to provide you with the most accurate and up-to-date information regarding the refund policies and procedures applicable to your specific situation. this will enable them to guide you through the necessary steps and ensure that your concerns are addressed promptly and effectively.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference T

Testing on CUDA:  54%|█████▍    | 27/50 [01:23<01:02,  2.72s/it]


Sample 27/50 (Device: CUDA):
Instruction: i'm trying to see at what time i can call customer support
New Input: i'm trying to see at what time i can call customer support [Classified: Intent is 'contact_customer_service', Category is 'contact', Entities are none]
Generated Response: we're here to help! i take note that you would like to know the operating hours during which you can contact our customer assistance team. our dedicated team is available to assist you from {{toll-free number}}. please feel free to reach out to us during these hours, and we'll be more than happy to provide you with the support you need. how else may i be of assistance to you today?
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.2250                    | 2.1577                   | 2.3827                   | CUDA        | 2699.0146


Testing on CUDA:  56%|█████▌    | 28/50 [01:26<01:01,  2.81s/it]


Sample 28/50 (Device: CUDA):
Instruction: assistance sending some feedback for ur proiducts
New Input: assistance sending some feedback for ur proiducts [Classified: Intent is 'review', Category is 'order', Entities are none]
Generated Response: we appreciate your willingness to provide feedback on our products and services. your feedback is highly valuable to us as it helps us understand our customers' needs better and make improvements. to send your valuable feedback, you can visit our website and navigate to the "contact us" or "feedback" section. there you will find a form to fill out with your details, suggestions, or any other questions you may have. we value your input and look forward to hearing from you!
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1500                    | 2.8719                   | 3.0219                   | CUDA        | 2708.6401


Testing on CUDA:  58%|█████▊    | 29/50 [01:28<00:56,  2.67s/it]


Sample 29/50 (Device: CUDA):
Instruction: need assistance to cancel order ord-8227420
New Input: need assistance to cancel order ord-8227420 [Classified: Intent is 'cancel_order', Category is 'order', Entities are ord-8227420 (order_number)]
Generated Response: i'm here to assist you with canceling your order with the order number {{order number}}. your satisfaction is our top priority, and we want to ensure that you have a seamless experience with us. to proceed, could you please provide me with some additional details? specifically, i would need to know the platform or service you are using to make the purchase. this information will help me guide you through the cancellation process accurately and efficiently.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1621                    | 2.1780                   | 2.3400                   | CUDA        | 2712.8599


Testing on CUDA:  60%|██████    | 30/50 [01:33<01:04,  3.23s/it]


Sample 30/50 (Device: CUDA):
Instruction: need assistance cancelinv purchase ord-8042947
New Input: need assistance cancelinv purchase ord-8042947 [Classified: Intent is 'cancel_order', Category is 'order', Entities are ord-8042947 (order_number)]
Generated Response: i'm here to assist you with canceling your purchase with the order number {{order number}}. rest assured, i'll guide you through the process step by step to ensure a smooth cancellation experience.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1660                    | 4.3552                   | 4.5212                   | CUDA        | 2830.0317


Testing on CUDA:  62%|██████▏   | 31/50 [01:36<01:01,  3.25s/it]


Sample 31/50 (Device: CUDA):
Instruction: i don't know how i can download my bill #12588
New Input: i don't know how i can download my bill #12588 [Classified: Intent is 'get_invoice', Category is 'invoice', Entities are none]
Generated Response: i'm sorry to hear that you're unsure about how to download your bill with the number {{client last name}}. let me guide you through the process step by step. first, please log in to your account on our website. once logged in, navigate to the "billing" or "invoices" section. there, you should be able to find a list of your invoices. look for the specific invoice you need and click on it to view the download options. if you encounter any difficulties or have further questions, feel free to reach out to our customer support team.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1571                    | 3.1577                   | 3.3147  

Testing on CUDA:  64%|██████▍   | 32/50 [01:38<00:52,  2.91s/it]


Sample 32/50 (Device: CUDA):
Instruction: i need help to see purchase ord-0521414 status
New Input: i need help to see purchase ord-0521414 status [Classified: Intent is 'track_order', Category is 'order', Entities are ord-0521414 (order_number)]
Generated Response: thank you for reaching out! i'm here to assist you in checking the status of your purchase with the purchase number {{order number}}. to provide you with accurate information, could you please provide me with your full name, email address, and a preferred username? once i have these details, i'll be able to guide you through the process and ensure that you have a seamless experience.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1898                    | 1.9127                   | 2.1025                   | CUDA        | 2700.7998


Testing on CUDA:  66%|██████▌   | 33/50 [01:40<00:43,  2.57s/it]


Sample 33/50 (Device: CUDA):
Instruction: am i entitled to a refund according to your policy?
New Input: am i entitled to a refund according to your policy? [Classified: Intent is 'get_refund', Category is 'refund', Entities are refund (currency_symbol), to (currency_symbol)]
Generated Response: i'm here to assist you with your query regarding the status of your refund. your satisfaction is our top priority, and we want to ensure that you receive the assistance you need. could you please provide me with more details about the refund you would like to request? this will enable me to guide you through the process and ensure a satisfactory resolution.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1551                    | 1.6249                   | 1.7800                   | CUDA        | 2690.4751


Testing on CUDA:  68%|██████▊   | 34/50 [01:44<00:49,  3.08s/it]


Sample 34/50 (Device: CUDA):
Instruction: need to check what methods for delivery i can choose help me
New Input: need to check what methods for delivery i can choose help me [Classified: Intent is 'delivery_options', Category is 'delivery', Entities are none]
Generated Response: assuredly! i'm here to assist you in exploring the various delivery methods available to you. let's dive into the possibilities: 1. standard shipping: this method typically takes {{date range}} business days for your package to arrive at your doorstep. it's perfect for non-urgent items when you don't have an urgent need. you can also opt for expedited shipping if you're in a hurry to get your items delivered to your desired location.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1676                    | 4.0899                   | 4.2576                   | CUDA        | 2805.7422


Testing on CUDA:  70%|███████   | 35/50 [01:47<00:43,  2.88s/it]


Sample 35/50 (Device: CUDA):
Instruction: ya have an address to submit some feednack
New Input: ya have an address to submit some feednack [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: thank you for expressing your interest in submitting feedback for our company! we appreciate your willingness to share your thoughts and experiences with us. to provide feedback, you can visit our website and navigate to the "feedback" or "contact us" page. there you will find a form or a contact information to reach out to our dedicated customer support team. they will guide you through the process and ensure that your feedback reaches the appropriate department. we value your input and look forward to hearing from you!
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1535                    | 2.2670                   | 2.4205                   | 

Testing on CUDA:  72%|███████▏  | 36/50 [01:50<00:41,  2.96s/it]


Sample 36/50 (Device: CUDA):
Instruction: show me in which situations  can i ask for refunds
New Input: show me in which situations  can i ask for refunds [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to assist you in understanding the situations where you can request a refund. let me provide you with the information you need: 1. **product/service defect:** if you receive a product or service that has a defect or does not meet the description provided, you are generally eligible for a reimbursement. however, it's important to note that it may take additional time for the product to be delivered to the correct location, and we want to ensure that you have a seamless experience with our refund process. please provide me with your order details so that i can quickly look into it for you.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage

Testing on CUDA:  74%|███████▍  | 37/50 [01:52<00:33,  2.61s/it]


Sample 37/50 (Device: CUDA):
Instruction: help mke chatting with a person
New Input: help mke chatting with a person [Classified: Intent is 'contact_human_agent', Category is 'contact', Entities are none]
Generated Response: your message means a lot! i'm aligned with the idea that you would like to chat with one of our representatives. our dedicated team is available to assist you during {{customer support hours}}. please let me know if there's anything else i can help you with.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1467                    | 1.6424                   | 1.7891                   | CUDA        | 2683.7388


Testing on CUDA:  76%|███████▌  | 38/50 [01:53<00:28,  2.38s/it]


Sample 38/50 (Device: CUDA):
Instruction: there are errors trying to update my shipping address
New Input: there are errors trying to update my shipping address [Classified: Intent is 'change_shipping_address', Category is 'shipping', Entities are none]
Generated Response: i'm sorry to hear that you're encountering errors while updating your delivery address. to assist you further, could you please provide more details about the specific errors you are facing? this will help me better understand the issue and provide you with the most accurate solution. your satisfaction is our top priority, and we're here to support you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1827                    | 1.6418                   | 1.8245                   | CUDA        | 2684.6187


Testing on CUDA:  78%|███████▊  | 39/50 [01:55<00:24,  2.25s/it]


Sample 39/50 (Device: CUDA):
Instruction: i wnt help swapping several items of order ord-9073141
New Input: i wnt help swapping several items of order ord-9073141 [Classified: Intent is 'change_order', Category is 'order', Entities are ord-9073141 (order_number)]
Generated Response: we understand that you need assistance with swapping multiple items in order number {{order number}}. we apologize for any inconvenience caused. to better assist you, could you please provide more details about the items you would like to swap? this will help us ensure that we process your request accurately and efficiently. your satisfaction is our top priority, and we are here to support you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1545                    | 1.7980                   | 1.9526                   | CUDA        | 2702.8203


Testing on CUDA:  80%|████████  | 40/50 [01:58<00:24,  2.44s/it]


Sample 40/50 (Device: CUDA):
Instruction: i don't know how i can order some of your article
New Input: i don't know how i can order some of your article [Classified: Intent is 'place_order', Category is 'order', Entities are none]
Generated Response: thank you for reaching out to us for assistance with placing an order for our articles. i understand that you may be unsure about the process, and i'm here to help guide you through it. to place your order, you can visit our website and navigate to the "order history" or "my orders" section. there you will find detailed information about your purchase, including the items you wish to order. if you have any specific questions or need further assistance, please feel free to let me know.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1815                    | 2.6924                   | 2.8739                   | CUDA        | 2724.46

Testing on CUDA:  82%|████████▏ | 41/50 [02:01<00:23,  2.60s/it]


Sample 41/50 (Device: CUDA):
Instruction: i don't know how i could modify my address
New Input: i don't know how i could modify my address [Classified: Intent is 'change_shipping_address', Category is 'shipping', Entities are none]
Generated Response: we understand that you may be unsure about how to modify your address. let us guide you through the process step by step. first, please log in to your account on our website. once you're logged in, navigate to the "my account" or "profile" section. from there, you should be able to find an option to edit your shipping information. click on that option and make the necessary changes. if you encounter any difficulties or have further questions, feel free to reach out to our customer support team. we're here to assist you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.2022                    | 2.7702          

Testing on CUDA:  84%|████████▍ | 42/50 [02:04<00:20,  2.58s/it]


Sample 42/50 (Device: CUDA):
Instruction: order ord-9186353 status
New Input: order ord-9186353 status [Classified: Intent is 'track_order', Category is 'order', Entities are ord-9186353 status (order_number)]
Generated Response: thank you for reaching out! i'm here to assist you in checking the status of your order with the order number {{order number}}. to provide you with accurate information, could you please confirm if you made the purchase on our website or through another platform? once i have the necessary details, i'll be able to guide you through the process step by step.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1600                    | 2.3735                   | 2.5335                   | CUDA        | 2707.5859


Testing on CUDA:  86%|████████▌ | 43/50 [02:06<00:17,  2.51s/it]


Sample 43/50 (Device: CUDA):
Instruction: help me making a claim against your business
New Input: help me making a claim against your business [Classified: Intent is 'complaint', Category is 'feedback', Entities are none]
Generated Response: i'm sorry to hear that you're facing an issue and would like to file a complaint against our business. your feedback is important to us, and we want to ensure that your concerns are addressed appropriately. to assist you further, could you please provide me with more details about the nature of your claim? this will help us understand the situation better and provide you with the most appropriate guidance and support.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.2171                    | 2.1236                   | 2.3407                   | CUDA        | 2687.7871


Testing on CUDA:  88%|████████▊ | 44/50 [02:12<00:21,  3.64s/it]


Sample 44/50 (Device: CUDA):
Instruction: i have got to see in which cases can i request my money back
New Input: i have got to see in which cases can i request my money back [Classified: Intent is 'check_refund_policy', Category is 'refund', Entities are none]
Generated Response: i'm here to assist you in understanding the situations where you can request a refund. let me provide you with the information you need: 1. **product/service defect:** if you receive a product or service that has a defect or does not meet the description provided, you are typically eligible for a reimbursement. it's important to note that the exact nature of the defect may vary depending on several factors, such as the product's manufacturing process, the type of service, and the specific circumstances surrounding the issue(s) that led to the defective product being returned or exchanged for another product. in most cases, refunds are processed within a few business days after the return or exchange has been

Testing on CUDA:  90%|█████████ | 45/50 [02:16<00:17,  3.58s/it]


Sample 45/50 (Device: CUDA):
Instruction: can you help me report troubles with payment?
New Input: can you help me report troubles with payment? [Classified: Intent is 'payment_issue', Category is 'payment', Entities are none]
Generated Response: assuredly! i'm here to assist you in reporting any troubles you're facing with your payment. your feedback is incredibly valuable to us, as it helps us identify areas for improvement and ensure a seamless payment experience for all our customers. to report any payment issues, i recommend reaching out to our dedicated customer support team. they have the expertise to guide you through the process and address any concerns you may have. you can contact them through our website's live chat feature at {{website url}} or by calling {{customer support phone number}}. please let me know if there's anything else i can help you with.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     |

Testing on CUDA:  92%|█████████▏| 46/50 [02:19<00:13,  3.39s/it]


Sample 46/50 (Device: CUDA):
Instruction: i try to notify of an error with registrations
New Input: i try to notify of an error with registrations [Classified: Intent is 'registration_problems', Category is 'payment', Entities are none]
Generated Response: we appreciate your proactive approach in notifying us about the error you encountered during the registration process. your feedback is crucial in helping us improve our services and ensure a seamless experience for all our users. to assist you further, could you please provide us with more details regarding the specific error? this will enable us to investigate and resolve the issue promptly. thank you for bringing this to our attention, and rest assured, we are here to support you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.1971                    | 2.7297                   | 2.9268               

Testing on CUDA:  94%|█████████▍| 47/50 [02:22<00:10,  3.50s/it]


Sample 47/50 (Device: CUDA):
Instruction: what do i have to do to cancel my platinum account?
New Input: what do i have to do to cancel my platinum account? [Classified: Intent is 'delete_account', Category is 'account', Entities are platinum (account_type)]
Generated Response: thank you for reaching out to us regarding the cancellation of your {{account type}} account. i understand that you would like to terminate your account, and i'm here to guide you through the process. to proceed with cancellation, please provide me with some additional information such as the account details or any specific concerns you may have. this will help me assist you more effectively and ensure a smooth cancellation experience.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.2813                    | 3.4937                   | 3.7750                   | CUDA        | 2694.4004


Testing on CUDA:  96%|█████████▌| 48/50 [02:33<00:11,  5.72s/it]


Sample 48/50 (Device: CUDA):
Instruction: how can i submit some feedback abot ur company
New Input: how can i submit some feedback abot ur company [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: thank you for expressing your interest in providing feedback on our company. to submit your feedback, you can visit our website and navigate to the "contact us" or "feedback" section. there, simply fill out the form with your contact information, including your name, email address, and preferred method of communication. our team of dedicated representatives will be more than happy to assist you with any questions or concerns you may have. we value your input and look forward to hearing from you!
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.5800                    | 10.3113                   | 10.8913                   | CUDA        | 27

Testing on CUDA:  98%|█████████▊| 49/50 [02:42<00:06,  6.74s/it]


Sample 49/50 (Device: CUDA):
Instruction: change  order ord-8335461
New Input: change  order ord-8335461 [Classified: Intent is 'track_order', Category is 'order', Entities are ord-8335461 (order_number)]
Generated Response: we understand that you would like to know the status of your order with the order number {{order number}}. to provide you with accurate information, could you please provide us with your account details or contact our customer support team? they will be able to assist you further and provide the necessary guidance. your satisfaction is our top priority, and we are here to support you every step of the way.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.4809                    | 8.6360                   | 9.1168                   | CUDA        | 2692.9424


Testing on CUDA: 100%|██████████| 50/50 [02:50<00:00,  3.41s/it]


Sample 50/50 (Device: CUDA):
Instruction: i need assistance to send some feedback for ur produucts
New Input: i need assistance to send some feedback for ur produucts [Classified: Intent is 'review', Category is 'feedback', Entities are none]
Generated Response: we appreciate your willingness to share your feedback about our products and services. your insights are valuable to us as they help us understand how we can improve and deliver a better experience to our valued customers. please let me know if you have any specific questions or if there's anything else i can assist you with.
Model                | Classification Time (s)   | Text Generation Time (s)  | Overall Inference Time (s) | Device     | Memory Usage (MB)
Hybrid Concat | 0.5802                    | 7.0559                   | 7.6361                   | CUDA        | 2682.3403

BENCHMARK SUMMARY FOR CUDA
Average Classification Time: 0.2887 seconds
Average Generation Time: 3.1228 seconds
Average Overall Time: 3.4115 second




Performance comparison plots saved to performance_comparison.png
