In [2]:
"""
BASELINE VS BsDD ENRICHMENT TEST - SIMPLIFIED VERSION
Complete cell - copy and paste into ONE Jupyter cell
"""

import pandas as pd
import json
import requests
import time
from typing import Dict, List, Optional
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import re
import warnings
import ifcopenshell

warnings.filterwarnings('ignore')

# Clear GPU memory
torch.cuda.empty_cache()


class BsddClient:
    """Real buildingSMART Data Dictionary API client"""
    
    def __init__(self):
        self.base_url = "https://api.bsdd.buildingsmart.org/api"
        self.ifc_namespace_uri = "https://identifier.buildingsmart.org/uri/buildingsmart/ifc/4.3"
        self.cache = {}
        self.session = requests.Session()
        self.session.headers.update({'Accept': 'application/json'})
    
    def search_class(self, class_name: str) -> Optional[Dict]:
        """Search for an IFC class in bsDD"""
        cache_key = f"class_{class_name}"
        if cache_key in self.cache:
            return self.cache[cache_key]
        
        try:
            url = f"{self.base_url}/Class/v1"
            params = {
                'namespaceUri': self.ifc_namespace_uri,
                'classificationUri': f"{self.ifc_namespace_uri}/class/{class_name}"
            }
            
            response = self.session.get(url, params=params, timeout=10)
            
            if response.status_code == 200:
                data = response.json()
                self.cache[cache_key] = data
                return data
            else:
                return None
                
        except Exception as e:
            print(f"Warning: bsDD API error for {class_name}: {e}")
            return None
    
    def get_class_properties(self, class_name: str) -> List[str]:
        """Get properties for an IFC class"""
        class_info = self.search_class(class_name)
        if not class_info:
            return []
        
        properties = []
        if 'classProperties' in class_info:
            for prop in class_info['classProperties']:
                if 'name' in prop:
                    properties.append(prop['name'])
        
        return properties
    
    def get_class_description(self, class_name: str) -> str:
        """Get description for an IFC class"""
        class_info = self.search_class(class_name)
        if not class_info:
            return ""
        
        return class_info.get('definition', '')
    
    def get_predefined_types(self, class_name: str) -> List[str]:
        """Get PredefinedType enumerations"""
        class_info = self.search_class(class_name)
        if not class_info:
            return []
        
        predefined_types = []
        if 'classProperties' in class_info:
            for prop in class_info['classProperties']:
                if prop.get('name') == 'PredefinedType' and 'allowedValues' in prop:
                    predefined_types = [v.get('value', '') for v in prop['allowedValues']]
        
        return predefined_types


class EnrichmentSystem:
    """BsDD enrichment system"""
    
    def __init__(self):
        print("Initializing bsDD enrichment system...")
        self.bsdd_client = BsddClient()
        print("✓ BsDD client ready")
    
    def get_bsdd_info(self, ifc_file, question: str) -> str:
        """Get bsDD API information for entities in the question"""
        ifc_entities = re.findall(r'Ifc\w+', question, re.IGNORECASE)
        
        if not ifc_entities:
            return ""
        
        info_parts = ["=== bsDD API DEFINITIONS ===\n"]
        
        # Get info for each unique IFC entity found
        seen_entities = set()
        for entity in ifc_entities:
            if entity in seen_entities:
                continue
            seen_entities.add(entity)
            
            description = self.bsdd_client.get_class_description(entity)
            properties = self.bsdd_client.get_class_properties(entity)
            predefined_types = self.bsdd_client.get_predefined_types(entity)
            
            if description or properties or predefined_types:
                info_parts.append(f"{entity}:")
                
                if description:
                    info_parts.append(f"  Definition: {description}")
                
                if predefined_types:
                    info_parts.append(f"  PredefinedType options: {', '.join(predefined_types[:10])}")
                
                if properties:
                    info_parts.append(f"  Common properties: {', '.join(properties[:15])}")
                
                info_parts.append("")
        
        result = "\n".join(info_parts)
        return result if len(info_parts) > 1 else ""


class QuestionTypeDetector:
    """Detect question type for appropriate formatting"""
    
    @staticmethod
    def detect_question_type(question: str) -> str:
        q_lower = question.lower().strip()
        
        if q_lower.startswith("how many"):
            return "counting"
        if (q_lower.startswith("does ") or q_lower.startswith("is ") or 
            q_lower.startswith("are ") or q_lower.startswith("do ") or 
            q_lower.startswith("can ") or q_lower.startswith("which ")):
            return "factual"
        if "compare" in q_lower or "difference" in q_lower:
            return "comparison"
        if (q_lower.startswith("list ") or "what are" in q_lower):
            return "list"
        return "semantic"
    
    @staticmethod
    def get_instruction_for_type(question_type: str) -> str:
        instructions = {
            "counting": "Provide ONLY the number.",
            "factual": "Answer directly and concisely.",
            "list": "List items clearly, separated by commas.",
            "comparison": "Provide a clear comparison highlighting key differences.",
            "semantic": "Provide a clear, direct answer."
        }
        return instructions.get(question_type, instructions["semantic"])


class BaselineVsBsddTester:
    """Test baseline vs bsDD enrichment"""
    
    def __init__(self, model_name: str = "Qwen/Qwen2.5-1.5B-Instruct"):
        print(f"\n{'='*80}")
        print(f"LOADING MODEL: {model_name}")
        print(f"{'='*80}\n")
        
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Device: {self.device}")
        
        print("Loading tokenizer...")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
        print("✓ Tokenizer loaded")
        
        print("Loading model weights...")
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
            trust_remote_code=True,
            low_cpu_mem_usage=True
        )
        print("✓ Model weights loaded")
        
        print("Moving model to GPU...")
        self.model = self.model.to(self.device)
        print("✓ Model on GPU")
        
        self.model.eval()
        print("✓ Model in eval mode")
        
        self.enrichment = EnrichmentSystem()
        self.question_detector = QuestionTypeDetector()
        
        print("\n✓✓✓ ALL SYSTEMS READY ✓✓✓\n")
    
    def load_ifc(self, ifc_path: str):
        """Load IFC file"""
        print(f"Loading IFC file: {ifc_path}")
        self.ifc_file = ifcopenshell.open(ifc_path)
        print(f"✓ IFC Schema: {self.ifc_file.schema}")
        
        self.products = self.ifc_file.by_type('IfcProduct')
        print(f"✓ Loaded {len(self.products)} IFC products")
        
        self._index_ifc_data()
    
    def _index_ifc_data(self):
        """Index basic IFC data for context"""
        print("\nIndexing IFC data...")
        self.element_counts = {}
        self.materials = set()
        self.spatial_elements = {}
        
        for product in self.products:
            elem_type = product.is_a()
            self.element_counts[elem_type] = self.element_counts.get(elem_type, 0) + 1
            
            if hasattr(product, 'HasAssociations'):
                for assoc in product.HasAssociations:
                    if assoc.is_a('IfcRelAssociatesMaterial'):
                        material = assoc.RelatingMaterial
                        if material.is_a('IfcMaterial'):
                            self.materials.add(material.Name)
        
        for storey in self.ifc_file.by_type('IfcBuildingStorey'):
            self.spatial_elements[storey.Name] = {
                'elevation': storey.Elevation if hasattr(storey, 'Elevation') else None
            }
        
        print(f"✓ Indexed {len(self.element_counts)} element types")
    
    def _extract_relevant_context(self, question: str) -> str:
        """Extract relevant IFC model context for the question"""
        context = []
        q_lower = question.lower()
        
        context.append(f"IFC Schema: {self.ifc_file.schema}")
        
        # Add element counts if question asks about counting
        if "how many" in q_lower:
            for elem_type, count in self.element_counts.items():
                if elem_type.lower() in q_lower:
                    context.append(f"{elem_type} count: {count}")
                    break
        
        # Add materials if question mentions materials
        if "material" in q_lower:
            context.append(f"\nMaterials in model: {', '.join(sorted(self.materials))}")
        
        # Add spatial structure if relevant
        if any(word in q_lower for word in ['storey', 'elevation', 'floor', 'level']):
            context.append("\nSpatial Structure:")
            for name, info in self.spatial_elements.items():
                context.append(f"  {name}: Elevation {info['elevation']}")
        
        return "\n".join(context)
    
    def _prepare_context(self, question: str, config: str, question_type: str) -> str:
        """Prepare context for baseline or bsDD configuration"""
        context_parts = []
        
        if config == "baseline":
            # Baseline: Just the question
            context_parts.append("=== QUESTION ===")
            context_parts.append(question)
            
        elif config == "bsdd_only":
            # BsDD: Model data + bsDD definitions + question
            context_parts.append("=== IFC MODEL DATA ===")
            context_parts.append(self._extract_relevant_context(question))
            
            bsdd_info = self.enrichment.get_bsdd_info(self.ifc_file, question)
            if bsdd_info:
                context_parts.append(f"\n{bsdd_info}")
            
            context_parts.append("\n=== QUESTION ===")
            context_parts.append(question)
        
        # Add instruction based on question type
        context_parts.append(f"\nInstruction: {self.question_detector.get_instruction_for_type(question_type)}")
        
        return "\n".join(context_parts)
    
    def generate_answer(self, question: str, config: str, question_type: str) -> str:
        """Generate answer using the model"""
        try:
            context = self._prepare_context(question, config, question_type)
            
            system_msg = "You are an IFC expert. Answer questions clearly and concisely based on the provided information."
            
            messages = [
                {"role": "system", "content": system_msg},
                {"role": "user", "content": context}
            ]
            
            text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
            inputs = self.tokenizer(text, return_tensors="pt", padding=False, truncation=True, max_length=2048)
            
            input_ids = inputs['input_ids'].to(self.device)
            attention_mask = inputs['attention_mask'].to(self.device)
            
            with torch.no_grad():
                outputs = self.model.generate(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    max_new_tokens=250,
                    do_sample=False,
                    pad_token_id=self.tokenizer.pad_token_id,
                    eos_token_id=self.tokenizer.eos_token_id,
                )
            
            new_tokens = outputs[0][len(input_ids[0]):]
            response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
            return response.strip()
            
        except Exception as e:
            return f"Error: {str(e)}"
    
    def run_test(self) -> pd.DataFrame:
        """Test baseline vs bsDD on predefined questions"""
        
        # Hardcoded questions
        questions = [
            {
                'question': 'Which IFC entity holds the world coordinate system for model geometry?',
                'category': 'Schema',
                'difficulty': 'Medium'
            },
            {
                'question': 'Which property in Pset_WindowCommon records whether a window is external?',
                'category': 'Property Sets',
                'difficulty': 'Easy'
            },
            {
                'question': 'Which entities define local object placement in 3D?',
                'category': 'Geometry',
                'difficulty': 'Medium'
            },
            {
                'question': 'Compare the semantic requirements for modeling a structural IfcSlab and a structural IfcBeam. What are the key property and role differences?',
                'category': 'Comparison',
                'difficulty': 'Hard'
            }
        ]
        
        configs = ["baseline", "bsdd_only"]
        results = []
        
        print(f"\n{'='*80}")
        print(f"RUNNING TEST - {len(questions)} QUESTIONS × 2 CONFIGURATIONS")
        print(f"{'='*80}\n")
        
        for idx, q_data in enumerate(questions):
            question = q_data['question']
            category = q_data['category']
            difficulty = q_data['difficulty']
            
            question_type = self.question_detector.detect_question_type(question)
            
            print(f"\n[{idx+1}/{len(questions)}] {question[:80]}...")
            
            for config in configs:
                print(f"  {config:18s}...", end=" ", flush=True)
                
                start_time = time.time()
                predicted_answer = self.generate_answer(question, config, question_type)
                end_time = time.time()
                
                result = {
                    'question_id': idx,
                    'category': category,
                    'difficulty': difficulty,
                    'question_type': question_type,
                    'question': question,
                    'configuration': config,
                    'predicted_answer': predicted_answer,
                    'time_seconds': end_time - start_time
                }
                
                results.append(result)
                print(f"Time: {(end_time - start_time)*1000:.1f}ms")
                print(f"       Answer: {predicted_answer[:80]}...")
        
        return pd.DataFrame(results)
    
    def generate_report(self, results_df: pd.DataFrame, output_path: str):
        """Generate comprehensive report"""
        print(f"\n{'='*80}")
        print("GENERATING REPORT...")
        print(f"{'='*80}\n")
        
        with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
            # All results
            results_df.to_excel(writer, sheet_name='All_Results', index=False)
            
            # Summary statistics
            summary = results_df.groupby('configuration').agg({
                'time_seconds': ['mean', 'std', 'sum', 'count']
            }).round(4)
            summary.columns = ['Avg Time (s)', 'Std Time', 'Total Time (s)', 'Num Questions']
            summary['Avg Time (ms)'] = (summary['Avg Time (s)'] * 1000).round(1)
            summary.to_excel(writer, sheet_name='Summary')
            
            # Side-by-side comparison
            comparison_data = []
            for q_id in results_df['question_id'].unique():
                q_data = results_df[results_df['question_id'] == q_id]
                baseline_row = q_data[q_data['configuration'] == 'baseline'].iloc[0]
                bsdd_row = q_data[q_data['configuration'] == 'bsdd_only'].iloc[0]
                
                comparison_data.append({
                    'Question': baseline_row['question'],
                    'Category': baseline_row['category'],
                    'Difficulty': baseline_row['difficulty'],
                    'Baseline Answer': baseline_row['predicted_answer'],
                    'BsDD Answer': bsdd_row['predicted_answer'],
                    'Baseline Time (ms)': baseline_row['time_seconds'] * 1000,
                    'BsDD Time (ms)': bsdd_row['time_seconds'] * 1000
                })
            
            comparison_df = pd.DataFrame(comparison_data)
            comparison_df.to_excel(writer, sheet_name='Side_by_Side', index=False)
        
        print(f"✓ Report saved: {output_path}\n")
        
        # Console summary
        print(f"{'='*80}")
        print("DETAILED RESULTS - BASELINE VS BsDD")
        print(f"{'='*80}\n")
        
        num_questions = len(results_df['question_id'].unique())
        
        for idx in range(num_questions):
            baseline_row = results_df[(results_df['question_id'] == idx) & 
                                     (results_df['configuration'] == 'baseline')].iloc[0]
            bsdd_row = results_df[(results_df['question_id'] == idx) & 
                                 (results_df['configuration'] == 'bsdd_only')].iloc[0]
            
            print(f"\n{'='*80}")
            print(f"QUESTION {idx+1} [{baseline_row['category']} - {baseline_row['difficulty']}]")
            print(f"{'='*80}")
            print(f"\n{baseline_row['question']}\n")
            
            print(f"{'BASELINE ANSWER':-^80}")
            print(f"Time: {baseline_row['time_seconds']*1000:.1f}ms")
            print(f"{baseline_row['predicted_answer']}\n")
            
            print(f"{'BsDD ENRICHED ANSWER':-^80}")
            print(f"Time: {bsdd_row['time_seconds']*1000:.1f}ms")
            print(f"{bsdd_row['predicted_answer']}\n")
        
        # Overall summary
        print(f"\n{'='*80}")
        print("OVERALL SUMMARY")
        print(f"{'='*80}\n")
        
        baseline_avg = results_df[results_df['configuration'] == 'baseline']['time_seconds'].mean() * 1000
        bsdd_avg = results_df[results_df['configuration'] == 'bsdd_only']['time_seconds'].mean() * 1000
        
        print(f"{'Configuration':<20} {'Avg Time (ms)':<15}")
        print("-" * 40)
        print(f"{'Baseline':<20} {baseline_avg:>14.1f}")
        print(f"{'BsDD Enriched':<20} {bsdd_avg:>14.1f}")
        print(f"\nTime Overhead: {bsdd_avg - baseline_avg:+.1f}ms ({((bsdd_avg/baseline_avg - 1)*100):+.1f}%)")


# ============================================================================
# MAIN EXECUTION
# ============================================================================

def main():
    """Run baseline vs bsDD comparison test"""
    
    print("\n" + "="*80)
    print("BASELINE VS BsDD ENRICHMENT TEST")
    print("="*80)
    
    # Configuration
    model_name = "Qwen/Qwen2.5-0.5B-Instruct"
    ifc_file_path = "C:/DfMA/2R1 4.3.ifc"
    output_report_path = "C:/DfMA/baseline_vs_bsdd_comparison.xlsx"
    
    print(f"\nModel: {model_name}")
    print(f"IFC file: {ifc_file_path}")
    print(f"Output: {output_report_path}\n")
    
    try:
        # Initialize tester
        tester = BaselineVsBsddTester(model_name=model_name)
        
        # Load IFC
        tester.load_ifc(ifc_file_path)
        
        # Run test
        results_df = tester.run_test()
        
        # Generate report
        tester.generate_report(results_df, output_report_path)
        
        print("\n" + "="*80)
        print("✓✓✓ TEST COMPLETE ✓✓✓")
        print(f"Results saved to: {output_report_path}")
        print("="*80)
        
    except Exception as e:
        print(f"\n❌ ERROR: {e}")
        import traceback
        traceback.print_exc()


# Run the test
main()


BASELINE VS BsDD ENRICHMENT TEST

Model: Qwen/Qwen2.5-0.5B-Instruct
IFC file: C:/DfMA/2R1 4.3.ifc
Output: C:/DfMA/baseline_vs_bsdd_comparison.xlsx


LOADING MODEL: Qwen/Qwen2.5-0.5B-Instruct

Device: cuda
Loading tokenizer...
✓ Tokenizer loaded
Loading model weights...
✓ Model weights loaded
Moving model to GPU...
✓ Model on GPU
✓ Model in eval mode
Initializing bsDD enrichment system...
✓ BsDD client ready

✓✓✓ ALL SYSTEMS READY ✓✓✓

Loading IFC file: C:/DfMA/2R1 4.3.ifc
✓ IFC Schema: IFC4X3
✓ Loaded 107 IFC products

Indexing IFC data...
✓ Indexed 11 element types

RUNNING TEST - 4 QUESTIONS × 2 CONFIGURATIONS


[1/4] Which IFC entity holds the world coordinate system for model geometry?...
  baseline          ... Time: 791.0ms
       Answer: The World Coordinate System (WCS) is held by the World Geodetic System 1984 (WGS...
  bsdd_only         ... Time: 796.8ms
       Answer: The World Coordinate System (WCS) is held by the World Coordinate System entity ...

[2/4] Which property i

In [None]:
"""
BASELINE VS BsDD ENRICHMENT TEST - SIMPLIFIED VERSION
Complete cell - copy and paste into ONE Jupyter cell
"""

import pandas as pd
import json
import requests
import time
from typing import Dict, List, Optional
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import re
import warnings
import ifcopenshell

warnings.filterwarnings('ignore')

# Clear GPU memory
torch.cuda.empty_cache()


class BsddClient:
    """Real buildingSMART Data Dictionary API client"""
    
    def __init__(self):
        self.base_url = "https://api.bsdd.buildingsmart.org/api"
        self.ifc_namespace_uri = "https://identifier.buildingsmart.org/uri/buildingsmart/ifc/4.3"
        self.cache = {}
        self.session = requests.Session()
        self.session.headers.update({'Accept': 'application/json'})
    
    def search_class(self, class_name: str) -> Optional[Dict]:
        """Search for an IFC class in bsDD"""
        cache_key = f"class_{class_name}"
        if cache_key in self.cache:
            return self.cache[cache_key]
        
        try:
            url = f"{self.base_url}/Class/v1"
            params = {
                'namespaceUri': self.ifc_namespace_uri,
                'classificationUri': f"{self.ifc_namespace_uri}/class/{class_name}"
            }
            
            response = self.session.get(url, params=params, timeout=10)
            
            if response.status_code == 200:
                data = response.json()
                self.cache[cache_key] = data
                return data
            else:
                return None
                
        except Exception as e:
            print(f"Warning: bsDD API error for {class_name}: {e}")
            return None
    
    def get_class_properties(self, class_name: str) -> List[str]:
        """Get properties for an IFC class"""
        class_info = self.search_class(class_name)
        if not class_info:
            return []
        
        properties = []
        if 'classProperties' in class_info:
            for prop in class_info['classProperties']:
                if 'name' in prop:
                    properties.append(prop['name'])
        
        return properties
    
    def get_class_description(self, class_name: str) -> str:
        """Get description for an IFC class"""
        class_info = self.search_class(class_name)
        if not class_info:
            return ""
        
        return class_info.get('definition', '')
    
    def get_predefined_types(self, class_name: str) -> List[str]:
        """Get PredefinedType enumerations"""
        class_info = self.search_class(class_name)
        if not class_info:
            return []
        
        predefined_types = []
        if 'classProperties' in class_info:
            for prop in class_info['classProperties']:
                if prop.get('name') == 'PredefinedType' and 'allowedValues' in prop:
                    predefined_types = [v.get('value', '') for v in prop['allowedValues']]
        
        return predefined_types


class EnrichmentSystem:
    """BsDD enrichment system"""
    
    def __init__(self):
        print("Initializing bsDD enrichment system...")
        self.bsdd_client = BsddClient()
        print("✓ BsDD client ready")
    
    def get_bsdd_info(self, ifc_file, question: str) -> str:
        """Get bsDD API information for entities in the question"""
        ifc_entities = re.findall(r'Ifc\w+', question, re.IGNORECASE)
        
        if not ifc_entities:
            return ""
        
        info_parts = ["=== bsDD API DEFINITIONS ===\n"]
        
        # Get info for each unique IFC entity found
        seen_entities = set()
        for entity in ifc_entities:
            if entity in seen_entities:
                continue
            seen_entities.add(entity)
            
            description = self.bsdd_client.get_class_description(entity)
            properties = self.bsdd_client.get_class_properties(entity)
            predefined_types = self.bsdd_client.get_predefined_types(entity)
            
            if description or properties or predefined_types:
                info_parts.append(f"{entity}:")
                
                if description:
                    info_parts.append(f"  Definition: {description}")
                
                if predefined_types:
                    info_parts.append(f"  PredefinedType options: {', '.join(predefined_types[:10])}")
                
                if properties:
                    info_parts.append(f"  Common properties: {', '.join(properties[:15])}")
                
                info_parts.append("")
        
        result = "\n".join(info_parts)
        return result if len(info_parts) > 1 else ""


class QuestionTypeDetector:
    """Detect question type for appropriate formatting"""
    
    @staticmethod
    def detect_question_type(question: str) -> str:
        q_lower = question.lower().strip()
        
        if q_lower.startswith("how many"):
            return "counting"
        if (q_lower.startswith("does ") or q_lower.startswith("is ") or 
            q_lower.startswith("are ") or q_lower.startswith("do ") or 
            q_lower.startswith("can ") or q_lower.startswith("which ")):
            return "factual"
        if "compare" in q_lower or "difference" in q_lower:
            return "comparison"
        if (q_lower.startswith("list ") or "what are" in q_lower):
            return "list"
        return "semantic"
    
    @staticmethod
    def get_instruction_for_type(question_type: str) -> str:
        instructions = {
            "counting": "Provide ONLY the number.",
            "factual": "Answer directly and concisely.",
            "list": "List items clearly, separated by commas.",
            "comparison": "Provide a clear comparison highlighting key differences.",
            "semantic": "Provide a clear, direct answer."
        }
        return instructions.get(question_type, instructions["semantic"])


class BaselineVsBsddTester:
    """Test baseline vs bsDD enrichment"""
    
    def __init__(self, model_name: str = "Qwen/Qwen2.5-1.5B-Instruct"):
        print(f"\n{'='*80}")
        print(f"LOADING MODEL: {model_name}")
        print(f"{'='*80}\n")
        
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Device: {self.device}")
        
        print("Loading tokenizer...")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
        print("✓ Tokenizer loaded")
        
        print("Loading model weights...")
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
            trust_remote_code=True,
            low_cpu_mem_usage=True
        )
        print("✓ Model weights loaded")
        
        print("Moving model to GPU...")
        self.model = self.model.to(self.device)
        print("✓ Model on GPU")
        
        self.model.eval()
        print("✓ Model in eval mode")
        
        self.enrichment = EnrichmentSystem()
        self.question_detector = QuestionTypeDetector()
        
        print("\n✓✓✓ ALL SYSTEMS READY ✓✓✓\n")
    
    def load_ifc(self, ifc_path: str):
        """Load IFC file"""
        print(f"Loading IFC file: {ifc_path}")
        self.ifc_file = ifcopenshell.open(ifc_path)
        print(f"✓ IFC Schema: {self.ifc_file.schema}")
        
        self.products = self.ifc_file.by_type('IfcProduct')
        print(f"✓ Loaded {len(self.products)} IFC products")
        
        self._index_ifc_data()
    
    def _index_ifc_data(self):
        """Index basic IFC data for context"""
        print("\nIndexing IFC data...")
        self.element_counts = {}
        self.materials = set()
        self.spatial_elements = {}
        
        for product in self.products:
            elem_type = product.is_a()
            self.element_counts[elem_type] = self.element_counts.get(elem_type, 0) + 1
            
            if hasattr(product, 'HasAssociations'):
                for assoc in product.HasAssociations:
                    if assoc.is_a('IfcRelAssociatesMaterial'):
                        material = assoc.RelatingMaterial
                        if material.is_a('IfcMaterial'):
                            self.materials.add(material.Name)
        
        for storey in self.ifc_file.by_type('IfcBuildingStorey'):
            self.spatial_elements[storey.Name] = {
                'elevation': storey.Elevation if hasattr(storey, 'Elevation') else None
            }
        
        print(f"✓ Indexed {len(self.element_counts)} element types")
    
    def _extract_relevant_context(self, question: str) -> str:
        """Extract relevant IFC model context for the question"""
        context = []
        q_lower = question.lower()
        
        context.append(f"IFC Schema: {self.ifc_file.schema}")
        
        # Add element counts if question asks about counting
        if "how many" in q_lower:
            for elem_type, count in self.element_counts.items():
                if elem_type.lower() in q_lower:
                    context.append(f"{elem_type} count: {count}")
                    break
        
        # Add materials if question mentions materials
        if "material" in q_lower:
            context.append(f"\nMaterials in model: {', '.join(sorted(self.materials))}")
        
        # Add spatial structure if relevant
        if any(word in q_lower for word in ['storey', 'elevation', 'floor', 'level']):
            context.append("\nSpatial Structure:")
            for name, info in self.spatial_elements.items():
                context.append(f"  {name}: Elevation {info['elevation']}")
        
        return "\n".join(context)
    
    def _prepare_context(self, question: str, config: str, question_type: str) -> str:
        """Prepare context for baseline or bsDD configuration"""
        context_parts = []
        
        if config == "baseline":
            # Baseline: Just the question
            context_parts.append("=== QUESTION ===")
            context_parts.append(question)
            
        elif config == "bsdd_only":
            # BsDD: Model data + bsDD definitions + question
            context_parts.append("=== IFC MODEL DATA ===")
            context_parts.append(self._extract_relevant_context(question))
            
            bsdd_info = self.enrichment.get_bsdd_info(self.ifc_file, question)
            if bsdd_info:
                context_parts.append(f"\n{bsdd_info}")
            
            context_parts.append("\n=== QUESTION ===")
            context_parts.append(question)
        
        # Add instruction based on question type
        context_parts.append(f"\nInstruction: {self.question_detector.get_instruction_for_type(question_type)}")
        
        return "\n".join(context_parts)
    
    def generate_answer(self, question: str, config: str, question_type: str) -> str:
        """Generate answer using the model"""
        try:
            context = self._prepare_context(question, config, question_type)
            
            system_msg = "You are an IFC expert. Answer questions clearly and concisely based on the provided information."
            
            messages = [
                {"role": "system", "content": system_msg},
                {"role": "user", "content": context}
            ]
            
            text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
            inputs = self.tokenizer(text, return_tensors="pt", padding=False, truncation=True, max_length=2048)
            
            input_ids = inputs['input_ids'].to(self.device)
            attention_mask = inputs['attention_mask'].to(self.device)
            
            with torch.no_grad():
                outputs = self.model.generate(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    max_new_tokens=250,
                    do_sample=False,
                    pad_token_id=self.tokenizer.pad_token_id,
                    eos_token_id=self.tokenizer.eos_token_id,
                )
            
            new_tokens = outputs[0][len(input_ids[0]):]
            response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
            return response.strip()
            
        except Exception as e:
            return f"Error: {str(e)}"
    
    def run_test(self) -> pd.DataFrame:
        """Test baseline vs bsDD on predefined questions"""
        
        # Hardcoded questions
        questions = [
            {
                'question': 'Which IFC entity holds the world coordinate system for model geometry?',
                'category': 'Schema',
                'difficulty': 'Medium'
            },
            {
                'question': 'Which property in Pset_WindowCommon records whether a window is external?',
                'category': 'Property Sets',
                'difficulty': 'Easy'
            },
            {
                'question': 'Which entities define local object placement in 3D?',
                'category': 'Geometry',
                'difficulty': 'Medium'
            },
            {
                'question': 'Compare the semantic requirements for modeling a structural IfcSlab and a structural IfcBeam. What are the key property and role differences?',
                'category': 'Comparison',
                'difficulty': 'Hard'
            }
        ]
        
        configs = ["baseline", "bsdd_only"]
        results = []
        
        print(f"\n{'='*80}")
        print(f"RUNNING TEST - {len(questions)} QUESTIONS × 2 CONFIGURATIONS")
        print(f"{'='*80}\n")
        
        for idx, q_data in enumerate(questions):
            question = q_data['question']
            category = q_data['category']
            difficulty = q_data['difficulty']
            
            question_type = self.question_detector.detect_question_type(question)
            
            print(f"\n[{idx+1}/{len(questions)}] {question[:80]}...")
            
            for config in configs:
                print(f"  {config:18s}...", end=" ", flush=True)
                
                start_time = time.time()
                predicted_answer = self.generate_answer(question, config, question_type)
                end_time = time.time()
                
                result = {
                    'question_id': idx,
                    'category': category,
                    'difficulty': difficulty,
                    'question_type': question_type,
                    'question': question,
                    'configuration': config,
                    'predicted_answer': predicted_answer,
                    'time_seconds': end_time - start_time
                }
                
                results.append(result)
                print(f"Time: {(end_time - start_time)*1000:.1f}ms")
                print(f"       Answer: {predicted_answer[:80]}...")
        
        return pd.DataFrame(results)
    
    def generate_report(self, results_df: pd.DataFrame, output_path: str):
        """Generate comprehensive report"""
        print(f"\n{'='*80}")
        print("GENERATING REPORT...")
        print(f"{'='*80}\n")
        
        with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
            # All results
            results_df.to_excel(writer, sheet_name='All_Results', index=False)
            
            # Summary statistics
            summary = results_df.groupby('configuration').agg({
                'time_seconds': ['mean', 'std', 'sum', 'count']
            }).round(4)
            summary.columns = ['Avg Time (s)', 'Std Time', 'Total Time (s)', 'Num Questions']
            summary['Avg Time (ms)'] = (summary['Avg Time (s)'] * 1000).round(1)
            summary.to_excel(writer, sheet_name='Summary')
            
            # Side-by-side comparison
            comparison_data = []
            for q_id in results_df['question_id'].unique():
                q_data = results_df[results_df['question_id'] == q_id]
                baseline_row = q_data[q_data['configuration'] == 'baseline'].iloc[0]
                bsdd_row = q_data[q_data['configuration'] == 'bsdd_only'].iloc[0]
                
                comparison_data.append({
                    'Question': baseline_row['question'],
                    'Category': baseline_row['category'],
                    'Difficulty': baseline_row['difficulty'],
                    'Baseline Answer': baseline_row['predicted_answer'],
                    'BsDD Answer': bsdd_row['predicted_answer'],
                    'Baseline Time (ms)': baseline_row['time_seconds'] * 1000,
                    'BsDD Time (ms)': bsdd_row['time_seconds'] * 1000
                })
            
            comparison_df = pd.DataFrame(comparison_data)
            comparison_df.to_excel(writer, sheet_name='Side_by_Side', index=False)
        
        print(f"✓ Report saved: {output_path}\n")
        
        # Console summary
        print(f"{'='*80}")
        print("DETAILED RESULTS - BASELINE VS BsDD")
        print(f"{'='*80}\n")
        
        num_questions = len(results_df['question_id'].unique())
        
        for idx in range(num_questions):
            baseline_row = results_df[(results_df['question_id'] == idx) & 
                                     (results_df['configuration'] == 'baseline')].iloc[0]
            bsdd_row = results_df[(results_df['question_id'] == idx) & 
                                 (results_df['configuration'] == 'bsdd_only')].iloc[0]
            
            print(f"\n{'='*80}")
            print(f"QUESTION {idx+1} [{baseline_row['category']} - {baseline_row['difficulty']}]")
            print(f"{'='*80}")
            print(f"\n{baseline_row['question']}\n")
            
            print(f"{'BASELINE ANSWER':-^80}")
            print(f"Time: {baseline_row['time_seconds']*1000:.1f}ms")
            print(f"{baseline_row['predicted_answer']}\n")
            
            print(f"{'BsDD ENRICHED ANSWER':-^80}")
            print(f"Time: {bsdd_row['time_seconds']*1000:.1f}ms")
            print(f"{bsdd_row['predicted_answer']}\n")
        
        # Overall summary
        print(f"\n{'='*80}")
        print("OVERALL SUMMARY")
        print(f"{'='*80}\n")
        
        baseline_avg = results_df[results_df['configuration'] == 'baseline']['time_seconds'].mean() * 1000
        bsdd_avg = results_df[results_df['configuration'] == 'bsdd_only']['time_seconds'].mean() * 1000
        
        print(f"{'Configuration':<20} {'Avg Time (ms)':<15}")
        print("-" * 40)
        print(f"{'Baseline':<20} {baseline_avg:>14.1f}")
        print(f"{'BsDD Enriched':<20} {bsdd_avg:>14.1f}")
        print(f"\nTime Overhead: {bsdd_avg - baseline_avg:+.1f}ms ({((bsdd_avg/baseline_avg - 1)*100):+.1f}%)")


# ============================================================================
# MAIN EXECUTION
# ============================================================================

def main():
    """Run baseline vs bsDD comparison test"""
    
    print("\n" + "="*80)
    print("BASELINE VS BsDD ENRICHMENT TEST")
    print("="*80)
    
    # Configuration
    model_name = "Qwen/Qwen2.5-1.5B-Instruct"
    ifc_file_path = "C:/DfMA/2R1 4.3.ifc"
    output_report_path = "C:/DfMA/baseline_vs_bsdd_comparison.xlsx"
    
    print(f"\nModel: {model_name}")
    print(f"IFC file: {ifc_file_path}")
    print(f"Output: {output_report_path}\n")
    
    try:
        # Initialize tester
        tester = BaselineVsBsddTester(model_name=model_name)
        
        # Load IFC
        tester.load_ifc(ifc_file_path)
        
        # Run test
        results_df = tester.run_test()
        
        # Generate report
        tester.generate_report(results_df, output_report_path)
        
        print("\n" + "="*80)
        print("✓✓✓ TEST COMPLETE ✓✓✓")
        print(f"Results saved to: {output_report_path}")
        print("="*80)
        
    except Exception as e:
        print(f"\n❌ ERROR: {e}")
        import traceback
        traceback.print_exc()


# Run the test
main()


BASELINE VS BsDD ENRICHMENT TEST

Model: Qwen/Qwen2.5-1.5B-Instruct
IFC file: C:/DfMA/2R1 4.3.ifc
Output: C:/DfMA/baseline_vs_bsdd_comparison.xlsx


LOADING MODEL: Qwen/Qwen2.5-1.5B-Instruct

Device: cuda
Loading tokenizer...
✓ Tokenizer loaded
Loading model weights...
✓ Model weights loaded
Moving model to GPU...
✓ Model on GPU
✓ Model in eval mode
Initializing bsDD enrichment system...
✓ BsDD client ready

✓✓✓ ALL SYSTEMS READY ✓✓✓

Loading IFC file: C:/DfMA/2R1 4.3.ifc
✓ IFC Schema: IFC4X3
✓ Loaded 107 IFC products

Indexing IFC data...
✓ Indexed 11 element types

RUNNING TEST - 4 QUESTIONS × 2 CONFIGURATIONS


[1/4] Which IFC entity holds the world coordinate system for model geometry?...
  baseline          ... Time: 891.4ms
       Answer: The IFC entity that holds the world coordinate system for model geometry is the ...
  bsdd_only         ... Time: 863.8ms
       Answer: The IFC entity that holds the world coordinate system for model geometry is the ...

[2/4] Which property i