In [1]:
import ezdxf
import pandas as pd
import numpy as np
from shapely.geometry import LineString, MultiLineString
from shapely.ops import linemerge
import networkx as nx
from sklearn.cluster import DBSCAN
from difflib import get_close_matches
import re
import os
from datetime import datetime
import ezdxf
import re
from collections import defaultdict
from typing import List, Dict, Any
import ezdxf
import numpy as np
import math
from typing import List, Tuple, Set, Dict, Any, Optional, Union
import logging
from collections import defaultdict, deque
import time

# overlapping lines and duplicate

In [2]:
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger('ezdxf-overkill')

class Vector:
    def __init__(self, x: float, y: float, z: float = 0.0):
        self.x = float(x)
        self.y = float(y)
        self.z = float(z)
    
    @classmethod
    def from_points(cls, p1: Tuple[float, float, float], p2: Tuple[float, float, float]):
        return cls(p2[0] - p1[0], p2[1] - p1[1], p2[2] - p1[2])
    
    def dot(self, other):
        return self.x * other.x + self.y * other.y + self.z * other.z
    
    def cross(self, other):
        return Vector(
            self.y * other.z - self.z * other.y,
            self.z * other.x - self.x * other.z,
            self.x * other.y - self.y * other.x
        )
    
    def length(self):
        return math.sqrt(self.x**2 + self.y**2 + self.z**2)
    
    def normalize(self):
        length = self.length()
        if length == 0:
            return Vector(0, 0, 0)
        return Vector(self.x/length, self.y/length, self.z/length)
    
    def __str__(self):
        return f"Vector({self.x}, {self.y}, {self.z})"

class BoundingBox:
    def __init__(self, min_x, min_y, min_z, max_x, max_y, max_z):
        self.min_x = min_x
        self.min_y = min_y
        self.min_z = min_z
        self.max_x = max_x
        self.max_y = max_y
        self.max_z = max_z
    
    @classmethod
    def from_points(cls, points):
        if not points:
            return cls(0, 0, 0, 0, 0, 0)
        
        min_x = min_y = min_z = float('inf')
        max_x = max_y = max_z = float('-inf')
        
        for point in points:
            min_x = min(min_x, point[0])
            min_y = min(min_y, point[1])
            min_z = min(min_z, point[2] if len(point) > 2 else 0)
            max_x = max(max_x, point[0])
            max_y = max(max_y, point[1])
            max_z = max(max_z, point[2] if len(point) > 2 else 0)
        
        return cls(min_x, min_y, min_z, max_x, max_y, max_z)
    
    def overlaps(self, other, tolerance=0):
        return (
            self.max_x + tolerance >= other.min_x and
            self.min_x - tolerance <= other.max_x and
            self.max_y + tolerance >= other.min_y and
            self.min_y - tolerance <= other.max_y and
            self.max_z + tolerance >= other.min_z and
            self.min_z - tolerance <= other.max_z
        )
    
    def distance_to(self, other):
        dx = max(0, max(self.min_x - other.max_x, other.min_x - self.max_x))
        dy = max(0, max(self.min_y - other.max_y, other.min_y - self.max_y))
        dz = max(0, max(self.min_z - other.max_z, other.min_z - self.max_z))
        
        return math.sqrt(dx*dx + dy*dy + dz*dz)

class GridIndex:
    def __init__(self, cell_size=1.0):
        self.cell_size = cell_size
        self.grid = defaultdict(list)
        self.entity_cells = {}
    
    def _get_cell_coords(self, point):
        x, y, z = point
        return (int(x / self.cell_size), int(y / self.cell_size), int(z / self.cell_size))
    
    def _get_cells_for_bbox(self, bbox):
        min_cell_x = int(bbox.min_x / self.cell_size)
        min_cell_y = int(bbox.min_y / self.cell_size)
        min_cell_z = int(bbox.min_z / self.cell_size)
        max_cell_x = int(bbox.max_x / self.cell_size) + 1
        max_cell_y = int(bbox.max_y / self.cell_size) + 1
        max_cell_z = int(bbox.max_z / self.cell_size) + 1
        
        cells = []
        for x in range(min_cell_x, max_cell_x):
            for y in range(min_cell_y, max_cell_y):
                for z in range(min_cell_z, max_cell_z):
                    cells.append((x, y, z))
        
        return cells
    
    def insert(self, entity, bbox):
        handle = get_entity_handle(entity)
        cells = self._get_cells_for_bbox(bbox)
        
        for cell in cells:
            self.grid[cell].append(entity)
        
        self.entity_cells[handle] = cells
    
    def remove(self, entity):
        handle = get_entity_handle(entity)
        
        if handle in self.entity_cells:
            for cell in self.entity_cells[handle]:
                if cell in self.grid and entity in self.grid[cell]:
                    self.grid[cell].remove(entity)
            
            del self.entity_cells[handle]
    
    def query(self, bbox):
        cells = self._get_cells_for_bbox(bbox)
        
        result = set()
        for cell in cells:
            result.update(self.grid.get(cell, []))
        
        return list(result)
    
    def clear(self):
        self.grid.clear()
        self.entity_cells.clear()

def get_entity_handle(entity):
    """Safely get an entity's handle with fallbacks."""
    if hasattr(entity, 'dxf') and hasattr(entity.dxf, 'handle'):
        return entity.dxf.handle
    elif hasattr(entity, 'handle'):
        return entity.handle
    else:
        # Fallback to using object ID if no handle is available
        return id(entity)

def point_distance(p1: Tuple[float, float, float], p2: Tuple[float, float, float]) -> float:
    return math.sqrt((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2 + (p2[2] - p1[2])**2)

def is_same_point(p1: Tuple[float, float, float], p2: Tuple[float, float, float], tolerance: float) -> bool:
    return point_distance(p1, p2) <= tolerance

def point_to_line_distance(point: Tuple[float, float, float], 
                          line_start: Tuple[float, float, float], 
                          line_end: Tuple[float, float, float]) -> float:
    if line_start == line_end:
        return point_distance(point, line_start)
    
    line_vec = Vector.from_points(line_start, line_end)
    point_vec = Vector.from_points(line_start, point)
    
    line_length = line_vec.length()
    if line_length == 0:
        return point_vec.length()
    
    line_vec_normalized = line_vec.normalize()
    
    dot_product = point_vec.dot(line_vec_normalized)
    
    proj_vec = Vector(
        line_vec_normalized.x * dot_product,
        line_vec_normalized.y * dot_product,
        line_vec_normalized.z * dot_product
    )
    
    perp_vec = Vector(
        point_vec.x - proj_vec.x,
        point_vec.y - proj_vec.y,
        point_vec.z - proj_vec.z
    )
    
    return perp_vec.length()

def point_on_line(point: Tuple[float, float, float], 
                 line_start: Tuple[float, float, float], 
                 line_end: Tuple[float, float, float], 
                 tolerance: float) -> bool:
    distance = point_to_line_distance(point, line_start, line_end)
    
    if distance > tolerance:
        return False
    
    line_length = point_distance(line_start, line_end)
    p_to_start = point_distance(point, line_start)
    p_to_end = point_distance(point, line_end)
    
    return p_to_start <= line_length + tolerance and p_to_end <= line_length + tolerance

def are_collinear(p1: Tuple[float, float, float], p2: Tuple[float, float, float], 
                 p3: Tuple[float, float, float], tolerance: float) -> bool:
    v1 = Vector.from_points(p1, p2)
    v2 = Vector.from_points(p1, p3)
    
    cross = v1.cross(v2)
    
    return cross.length() <= tolerance * max(v1.length(), v2.length())

def get_closest_point_on_line(point: Tuple[float, float, float], 
                             line_start: Tuple[float, float, float], 
                             line_end: Tuple[float, float, float]) -> Tuple[float, float, float]:
    if line_start == line_end:
        return line_start
    
    line_vec = Vector.from_points(line_start, line_end)
    point_vec = Vector.from_points(line_start, point)
    
    line_length_sq = line_vec.dot(line_vec)
    if line_length_sq == 0:
        return line_start
    
    dot_product = point_vec.dot(line_vec) / line_length_sq
    
    dot_product = max(0, min(1, dot_product))
    
    return (
        line_start[0] + dot_product * (line_end[0] - line_start[0]),
        line_start[1] + dot_product * (line_end[1] - line_start[1]),
        line_start[2] + dot_product * (line_end[2] - line_start[2])
    )

def are_lines_colinear(line1_start: Tuple[float, float, float], line1_end: Tuple[float, float, float],
                       line2_start: Tuple[float, float, float], line2_end: Tuple[float, float, float],
                       tolerance: float) -> bool:
    if not are_collinear(line1_start, line1_end, line2_start, tolerance):
        return False
    if not are_collinear(line1_start, line1_end, line2_end, tolerance):
        return False
    
    v1 = Vector.from_points(line1_start, line1_end)
    v2 = Vector.from_points(line2_start, line2_end)
    
    if v1.length() == 0 or v2.length() == 0:
        return True
    
    v1 = v1.normalize()
    v2 = v2.normalize()
    
    dot_product = abs(v1.dot(v2))
    
    return abs(dot_product - 1) <= tolerance

def lines_overlap(line1_start: Tuple[float, float, float], line1_end: Tuple[float, float, float],
                 line2_start: Tuple[float, float, float], line2_end: Tuple[float, float, float],
                 tolerance: float) -> Tuple[bool, Optional[Tuple[Tuple[float, float, float], Tuple[float, float, float]]]]:
    if not are_lines_colinear(line1_start, line1_end, line2_start, line2_end, tolerance):
        return (False, None)
    
    v1 = Vector.from_points(line1_start, line1_end)
    
    if v1.length() == 0:
        if is_same_point(line1_start, line2_start, tolerance) or is_same_point(line1_start, line2_end, tolerance):
            return (True, (line1_start, line1_start))
        return (False, None)
    
    v1 = v1.normalize()
    
    def project_point(point):
        v = Vector.from_points(line1_start, point)
        return v.dot(v1)
    
    t1_start = 0.0
    t1_end = project_point(line1_end)
    t2_start = project_point(line2_start)
    t2_end = project_point(line2_end)
    
    if t1_end < t1_start:
        t1_start, t1_end = t1_end, t1_start
    if t2_end < t2_start:
        t2_start, t2_end = t2_end, t2_start
    
    if t1_end < t2_start - tolerance or t2_end < t1_start - tolerance:
        return (False, None)
    
    overlap_start = max(t1_start, t2_start)
    overlap_end = min(t1_end, t2_end)
    
    def get_point_at_param(t):
        return (
            line1_start[0] + t * v1.x,
            line1_start[1] + t * v1.y,
            line1_start[2] + t * v1.z
        )
    
    p_start = get_point_at_param(overlap_start)
    p_end = get_point_at_param(overlap_end)
    
    return (True, (p_start, p_end))

def get_entity_dxf_attribs(entity) -> Dict[str, Any]:
    attribs = {}
    common_attribs = [
        'layer', 'linetype', 'color', 'lineweight', 'ltscale', 
        'invisible', 'true_color', 'transparency'
    ]
    
    if hasattr(entity, 'dxf'):
        for attr in common_attribs:
            if hasattr(entity.dxf, attr):
                try:
                    value = getattr(entity.dxf, attr)
                    if value is not None:
                        attribs[attr] = value
                except (AttributeError, ValueError):
                    pass
    
    return attribs

def get_line_coords(line) -> Tuple[Tuple[float, float, float], Tuple[float, float, float]]:
    # Check if we're dealing with an ezdxf entity or custom data structure
    if hasattr(line, 'dxf'):
        if hasattr(line.dxf, 'start'):
            if hasattr(line.dxf.start, 'z'):
                start = (line.dxf.start.x, line.dxf.start.y, line.dxf.start.z)
            else:
                start = (line.dxf.start.x, line.dxf.start.y, 0.0)
            
            if hasattr(line.dxf.end, 'z'):
                end = (line.dxf.end.x, line.dxf.end.y, line.dxf.end.z)
            else:
                end = (line.dxf.end.x, line.dxf.end.y, 0.0)
        elif hasattr(line, 'start_point') and hasattr(line, 'end_point'):
            start = line.start_point
            end = line.end_point
        else:
            logger.warning(f"Couldn't determine line coordinates structure: {line}")
            start = (0, 0, 0)
            end = (0, 0, 0)
    elif hasattr(line, 'start') and hasattr(line, 'end'):
        # Handle custom Line objects with simple start/end attributes
        start_point = line.start
        end_point = line.end
        
        if isinstance(start_point, tuple):
            start = (start_point[0], start_point[1], start_point[2] if len(start_point) > 2 else 0.0)
        else:
            start = (start_point.x, start_point.y, start_point.z if hasattr(start_point, 'z') else 0.0)
        
        if isinstance(end_point, tuple):
            end = (end_point[0], end_point[1], end_point[2] if len(end_point) > 2 else 0.0)
        else:
            end = (end_point.x, end_point.y, end_point.z if hasattr(end_point, 'z') else 0.0)
    else:
        # If we can't determine the coordinates, use fallbacks
        logger.warning(f"Couldn't extract coordinates for line: {line}")
        start = (0, 0, 0)
        end = (1, 1, 0)
    
    return start, end

def get_arc_data(arc) -> Tuple[Tuple[float, float, float], float, float, float]:
    """Get center, radius, start angle, and end angle of an arc."""
    try:
        if hasattr(arc, 'dxf'):
            center = (arc.dxf.center.x, arc.dxf.center.y, arc.dxf.center.z if hasattr(arc.dxf.center, 'z') else 0.0)
            radius = arc.dxf.radius
            start_angle = arc.dxf.start_angle  # In degrees
            end_angle = arc.dxf.end_angle  # In degrees
            return center, radius, start_angle, end_angle
        else:
            logger.warning(f"Couldn't determine arc data structure: {arc}")
            return (0, 0, 0), 1.0, 0.0, 90.0
    except Exception as e:
        logger.error(f"Error extracting arc data: {e}")
        return (0, 0, 0), 1.0, 0.0, 90.0

def get_arc_point(center, radius, angle_deg):
    """Calculate point on arc at a given angle in degrees."""
    angle_rad = math.radians(angle_deg)
    x = center[0] + radius * math.cos(angle_rad)
    y = center[1] + radius * math.sin(angle_rad)
    return (x, y, center[2])

def get_arc_endpoints(arc) -> Tuple[Tuple[float, float, float], Tuple[float, float, float]]:
    """Get start and end points of an arc."""
    center, radius, start_angle, end_angle = get_arc_data(arc)
    
    start_point = get_arc_point(center, radius, start_angle)
    end_point = get_arc_point(center, radius, end_angle)
    
    return start_point, end_point

def get_arc_bbox(arc) -> BoundingBox:
    """Calculate the bounding box of an arc."""
    center, radius, start_angle, end_angle = get_arc_data(arc)
    
    # Ensure that start_angle is less than end_angle
    if start_angle > end_angle:
        end_angle += 360

    # Check if the arc crosses the 0, 90, 180, or 270-degree points
    crosses = [0, 90, 180, 270]
    points = []
    
    # Add start and end points
    points.append(get_arc_point(center, radius, start_angle))
    points.append(get_arc_point(center, radius, end_angle))
    
    # Add extreme points if arc crosses them
    for angle in crosses:
        if start_angle <= angle <= end_angle or (start_angle > end_angle and (angle <= end_angle or angle >= start_angle)):
            points.append(get_arc_point(center, radius, angle))
    
    return BoundingBox.from_points(points)

def get_line_bbox(line) -> BoundingBox:
    start, end = get_line_coords(line)
    return BoundingBox.from_points([start, end])

def get_entity_bbox(entity) -> BoundingBox:
    """Get bounding box for any supported entity."""
    if hasattr(entity, 'dxftype'):
        entity_type = entity.dxftype()
        if entity_type == 'LINE':
            return get_line_bbox(entity)
        elif entity_type == 'ARC':
            return get_arc_bbox(entity)
        elif entity_type in ['CIRCLE', 'ELLIPSE']:
            # For circles and ellipses, would need custom bounding box calculation
            # This is a placeholder until we implement full support
            return BoundingBox(0, 0, 0, 0, 0, 0)
    
    # Default fallback
    logger.warning(f"Unknown entity type for bounding box: {entity}")
    return BoundingBox(0, 0, 0, 0, 0, 0)

def lines_completely_overlap(line1, line2, tolerance: float) -> bool:
    l1_start, l1_end = get_line_coords(line1)
    l2_start, l2_end = get_line_coords(line2)
    
    if not are_lines_colinear(l1_start, l1_end, l2_start, l2_end, tolerance):
        return False
    
    same_direction = (is_same_point(l1_start, l2_start, tolerance) and 
                     is_same_point(l1_end, l2_end, tolerance))
    reverse_direction = (is_same_point(l1_start, l2_end, tolerance) and 
                        is_same_point(l1_end, l2_start, tolerance))
    
    return same_direction or reverse_direction

def create_line_from_segment(msp, start_point, end_point, dxfattribs=None):
    try:
        return msp.add_line(start=start_point, end=end_point, dxfattribs=dxfattribs)
    except Exception as e:
        logger.warning(f"Error creating line with attributes: {e}")
        return msp.add_line(start=start_point, end=end_point)

def create_arc_from_parameters(msp, center, radius, start_angle, end_angle, dxfattribs=None):
    try:
        return msp.add_arc(center=center, radius=radius, start_angle=start_angle, end_angle=end_angle, dxfattribs=dxfattribs)
    except Exception as e:
        logger.warning(f"Error creating arc with attributes: {e}")
        return msp.add_arc(center=center, radius=radius, start_angle=start_angle, end_angle=end_angle)

def detect_duplicates(entities, tolerance):
    """Detect duplicate entities (lines, arcs, etc.)"""
    duplicates = set()
    entity_info = {}
    
    for entity in entities:
        handle = get_entity_handle(entity)
        
        if hasattr(entity, 'dxftype'):
            entity_type = entity.dxftype()
            
            if entity_type == 'LINE':
                start, end = get_line_coords(entity)
                
                if start > end:  # Ensure consistent ordering
                    start, end = end, start
                    
                # Create a key that is tolerant to small differences
                line_key = (
                    'LINE',
                    round(start[0] / tolerance) * tolerance,
                    round(start[1] / tolerance) * tolerance,
                    round(start[2] / tolerance) * tolerance,
                    round(end[0] / tolerance) * tolerance,
                    round(end[1] / tolerance) * tolerance,
                    round(end[2] / tolerance) * tolerance,
                )
                
                if line_key in entity_info:
                    duplicates.add(handle)
                else:
                    entity_info[line_key] = handle
            
            elif entity_type == 'ARC':
                center, radius, start_angle, end_angle = get_arc_data(entity)
                
                # Create a key that is tolerant to small differences
                arc_key = (
                    'ARC',
                    round(center[0] / tolerance) * tolerance,
                    round(center[1] / tolerance) * tolerance,
                    round(center[2] / tolerance) * tolerance,
                    round(radius / tolerance) * tolerance,
                    round(start_angle / tolerance) * tolerance,
                    round(end_angle / tolerance) * tolerance,
                )
                
                if arc_key in entity_info:
                    duplicates.add(handle)
                else:
                    entity_info[arc_key] = handle
    
    return duplicates

def build_mergeable_graph(entities, tolerance):
    """Build a graph of entities that can potentially be merged."""
    graph = defaultdict(set)
    entity_info = {}
    handle_to_entity = {}
    
    # Use a spatial index for efficient neighbor finding
    cell_size = max(1.0, tolerance * 1000)
    spatial_index = GridIndex(cell_size=cell_size)
    
    logger.info("Building spatial index...")
    for entity in entities:
        handle = get_entity_handle(entity)
        
        if hasattr(entity, 'dxftype'):
            entity_type = entity.dxftype()
            
            if entity_type == 'LINE':
                start, end = get_line_coords(entity)
                entity_info[handle] = {'type': 'LINE', 'coords': (start, end)}
                handle_to_entity[handle] = entity
                
                bbox = get_line_bbox(entity)
                spatial_index.insert(entity, bbox)
            
            elif entity_type == 'ARC':
                center, radius, start_angle, end_angle = get_arc_data(entity)
                start_point, end_point = get_arc_endpoints(entity)
                
                entity_info[handle] = {
                    'type': 'ARC', 
                    'data': (center, radius, start_angle, end_angle),
                    'endpoints': (start_point, end_point)
                }
                handle_to_entity[handle] = entity
                
                bbox = get_arc_bbox(entity)
                spatial_index.insert(entity, bbox)
    
    logger.info("Building mergeable entity graph...")
    entity_count = len(entities)
    processed = 0
    
    for entity in entities:
        processed += 1
        if processed % 1000 == 0:
            logger.info(f"Processing entity {processed}/{entity_count}...")
        
        handle = get_entity_handle(entity)
        if handle not in entity_info:
            continue
        
        entity_data = entity_info[handle]
        entity_type = entity_data['type']
        
        bbox = get_entity_bbox(entity)
        expanded_bbox = BoundingBox(
            bbox.min_x - tolerance,
            bbox.min_y - tolerance,
            bbox.min_z - tolerance,
            bbox.max_x + tolerance,
            bbox.max_y + tolerance,
            bbox.max_z + tolerance
        )
        
        potential_matches = spatial_index.query(expanded_bbox)
        
        for other_entity in potential_matches:
            other_handle = get_entity_handle(other_entity)
                
            if other_handle == handle:
                continue
            
            if other_handle not in entity_info:
                continue
                
            if other_handle in graph[handle] or handle in graph[other_handle]:
                continue
            
            other_data = entity_info[other_handle]
            other_type = other_data['type']
            
            # Skip if entities are of different types
            if entity_type != other_type:
                continue
            
            # Handle LINE entities
            if entity_type == 'LINE':
                start, end = entity_data['coords']
                other_start, other_end = other_data['coords']
                
                # Check if points are close enough
                if point_distance(start, other_start) > tolerance and \
                   point_distance(start, other_end) > tolerance and \
                   point_distance(end, other_start) > tolerance and \
                   point_distance(end, other_end) > tolerance:
                    
                    min_dist = min(
                        point_to_line_distance(start, other_start, other_end),
                        point_to_line_distance(end, other_start, other_end),
                        point_to_line_distance(other_start, start, end),
                        point_to_line_distance(other_end, start, end)
                    )
                    
                    if min_dist > tolerance:
                        continue
                
                if are_lines_colinear(start, end, other_start, other_end, tolerance):
                    has_overlap, _ = lines_overlap(start, end, other_start, other_end, tolerance)
                    
                    if has_overlap:
                        graph[handle].add(other_handle)
                        graph[other_handle].add(handle)
                        continue
                    
                    connections = [
                        (start, other_start),
                        (start, other_end),
                        (end, other_start),
                        (end, other_end)
                    ]
                    
                    for p1, p2 in connections:
                        if is_same_point(p1, p2, tolerance):
                            graph[handle].add(other_handle)
                            graph[other_handle].add(handle)
                            break
            
            # Handle ARC entities
            elif entity_type == 'ARC':
                entity_center, entity_radius, entity_start_angle, entity_end_angle = entity_data['data']
                other_center, other_radius, other_start_angle, other_end_angle = other_data['data']
                
                # Check if arcs have same center and radius (within tolerance)
                center_distance = point_distance(entity_center, other_center)
                radius_difference = abs(entity_radius - other_radius)
                
                if center_distance > tolerance or radius_difference > tolerance:
                    continue
                
                # Normalize angles to 0-360 range
                entity_start_angle = entity_start_angle % 360
                entity_end_angle = entity_end_angle % 360
                other_start_angle = other_start_angle % 360
                other_end_angle = other_end_angle % 360
                
                # If end angle is less than start angle, add 360 to make it easier to check overlap
                if entity_end_angle < entity_start_angle:
                    entity_end_angle += 360
                if other_end_angle < other_start_angle:
                    other_end_angle += 360
                
                # Check if arcs overlap or are adjacent
                if (entity_start_angle <= other_end_angle + tolerance and 
                    entity_end_angle + tolerance >= other_start_angle):
                    graph[handle].add(other_handle)
                    graph[other_handle].add(handle)
                    continue
                
                # Check if arcs are adjacent (within tolerance)
                if (abs(entity_start_angle - other_end_angle) <= tolerance or
                    abs(entity_end_angle - other_start_angle) <= tolerance):
                    graph[handle].add(other_handle)
                    graph[other_handle].add(handle)
    
    return graph, entity_info, handle_to_entity

def find_connected_components(graph):
    """Find connected components in the graph."""
    visited = set()
    components = []
    
    for node in graph:
        if node in visited:
            continue
        
        component = set()
        queue = deque([node])
        
        while queue:
            current = queue.popleft()
            if current in visited:
                continue
                
            visited.add(current)
            component.add(current)
            
            for neighbor in graph[current]:
                if neighbor not in visited:
                    queue.append(neighbor)
        
        components.append(component)
    
    return components

def can_merge_arcs(arc1_data, arc2_data, tolerance):
    """Check if two arcs can be merged into a single arc."""
    center1, radius1, start_angle1, end_angle1 = arc1_data
    center2, radius2, start_angle2, end_angle2 = arc2_data
    
    # Normalize angles to 0-360 range
    start_angle1 = start_angle1 % 360
    end_angle1 = end_angle1 % 360
    start_angle2 = start_angle2 % 360
    end_angle2 = end_angle2 % 360
    
    # If end angle is less than start angle, add 360
    if end_angle1 < start_angle1:
        end_angle1 += 360
    if end_angle2 < start_angle2:
        end_angle2 += 360
    
    # Check if centers and radii are close enough
    center_distance = point_distance(center1, center2)
    radius_difference = abs(radius1 - radius2)
    
    if center_distance > tolerance or radius_difference > tolerance:
        return False
    
    # Check if arcs are adjacent or overlapping
    if abs(start_angle1 - end_angle2) <= tolerance or abs(end_angle1 - start_angle2) <= tolerance:
        return True
    
    # Check if one arc contains the other
    if (start_angle1 - tolerance <= start_angle2 <= end_angle1 + tolerance or
        start_angle1 - tolerance <= end_angle2 <= end_angle1 + tolerance or
        start_angle2 - tolerance <= start_angle1 <= end_angle2 + tolerance or
        start_angle2 - tolerance <= end_angle1 <= end_angle2 + tolerance):
        return True
    
    return False

def merge_arcs(arc_datas, tolerance):
    """Merge a list of arc data into one or more arcs."""
    if not arc_datas:
        return []
    
    # Group arcs by center and radius
    groups = {}
    for center, radius, start_angle, end_angle in arc_datas:
        key = (round(center[0]/tolerance), round(center[1]/tolerance), round(center[2]/tolerance), round(radius/tolerance))
        if key not in groups:
            groups[key] = []
        groups[key].append((center, radius, start_angle, end_angle))
    
    result_arcs = []
    
    for center_radius_group in groups.values():
        # Normalize all angles to 0-360 range
        for i in range(len(center_radius_group)):
            center, radius, start_angle, end_angle = center_radius_group[i]
            start_angle = start_angle % 360
            end_angle = end_angle % 360
            if end_angle < start_angle:
                end_angle += 360
            center_radius_group[i] = (center, radius, start_angle, end_angle)
        
        # Sort by start angle
        center_radius_group.sort(key=lambda x: x[2])
        
        # Merge arcs
        merged_arcs = []
        current_arc = center_radius_group[0]
        
        for next_arc in center_radius_group[1:]:
            center1, radius1, start_angle1, end_angle1 = current_arc
            center2, radius2, start_angle2, end_angle2 = next_arc
            
            # If the next arc's start angle is within tolerance of the current arc's end angle,
            # or if the next arc starts within the current arc's angle range, merge them
            if end_angle1 + tolerance >= start_angle2:
                current_arc = (center1, radius1, start_angle1, max(end_angle1, end_angle2))
            else:
                merged_arcs.append(current_arc)
                current_arc = next_arc
        
        merged_arcs.append(current_arc)
        
        # Normalize merged arcs' angles back to DXF convention
        for center, radius, start_angle, end_angle in merged_arcs:
            # Ensure angles are in 0-360 range
            start_angle = start_angle % 360
            end_angle = end_angle % 360
            
            # If the arc spans more than 360 degrees, create a full circle
            if end_angle - start_angle >= 360 - tolerance:
                start_angle = 0
                end_angle = 360
            
            result_arcs.append((center, radius, start_angle, end_angle))
    
    return result_arcs

def merge_component_entities(component, entity_info, handle_to_entity, msp, tolerance):
    """Merge a connected component of entities."""
    if not component:
        return [], set()
    
    # Group entities by type
    lines = []
    arcs = []
    delete_handles = set()
    
    for handle in component:
        if handle in entity_info:
            entity_data = entity_info[handle]
            entity_type = entity_data['type']
            
            if entity_type == 'LINE':
                start, end = entity_data['coords']
                lines.append((start, end))
            elif entity_type == 'ARC':
                arc_data = entity_data['data']
                arcs.append(arc_data)
            
            delete_handles.add(handle)
    
    new_entities = []
    
    # Process lines
    if lines:
        ref_handle = next(iter(component))
        dxfattribs = get_entity_dxf_attribs(handle_to_entity[ref_handle])
        
        all_points = []
        for start, end in lines:
            all_points.append(start)
            all_points.append(end)
        
        if not all_points:
            pass  # Skip if no points
        else:
            # Find a reference vector to project points onto
            ref_start, ref_end = lines[0]
            ref_vector = Vector.from_points(ref_start, ref_end)
            
            if ref_vector.length() == 0:
                different_points = []
                for i, p1 in enumerate(all_points):
                    for p2 in all_points[i+1:]:
                        if not is_same_point(p1, p2, tolerance):
                            different_points = [p1, p2]
                            break
                    if different_points:
                        break
                
                if not different_points:
                    new_line = create_line_from_segment(msp, ref_start, ref_start, dxfattribs)
                    new_entities.append(new_line)
                else:
                    ref_start, ref_end = different_points
                    ref_vector = Vector.from_points(ref_start, ref_end)
            
            if ref_vector.length() > 0:
                ref_vector = ref_vector.normalize()
                
                # Project all points onto the reference vector
                projected_points = []
                for point in all_points:
                    point_vector = Vector.from_points(ref_start, point)
                    projection = point_vector.dot(ref_vector)
                    projected_points.append((projection, point))
                
                projected_points.sort(key=lambda x: x[0])
                
                # Create a list of unique points
                unique_points = []
                for _, point in projected_points:
                    is_duplicate = False
                    for existing in unique_points:
                        if is_same_point(existing, point, tolerance):
                            is_duplicate = True
                            break
                    
                    if not is_duplicate:
                        unique_points.append(point)
                
                # Filter out collinear middle points
                filtered_points = []
                if len(unique_points) <= 2:
                    filtered_points = unique_points
                else:
                    filtered_points = [unique_points[0]]
                    
                    for i in range(1, len(unique_points) - 1):
                        p_prev = unique_points[i-1]
                        p_curr = unique_points[i]
                        p_next = unique_points[i+1]
                        
                        if not are_collinear(p_prev, p_curr, p_next, tolerance):
                            filtered_points.append(p_curr)
                    
                    filtered_points.append(unique_points[-1])
                
                # Create new line segments
                for i in range(len(filtered_points) - 1):
                    start = filtered_points[i]
                    end = filtered_points[i + 1]
                    
                    if not is_same_point(start, end, tolerance):
                        new_line = create_line_from_segment(msp, start, end, dxfattribs)
                        new_entities.append(new_line)
    
    # Process arcs
    if arcs:
        ref_handle = next(iter(component))
        dxfattribs = get_entity_dxf_attribs(handle_to_entity[ref_handle])
        
        merged_arcs = merge_arcs(arcs, tolerance)
        
        for center, radius, start_angle, end_angle in merged_arcs:
            # Check if this is a full circle
            if abs(end_angle - start_angle - 360) < tolerance or abs(end_angle - start_angle) < tolerance:
                # Create a circle instead of an arc
                try:
                    new_circle = msp.add_circle(center=center, radius=radius, dxfattribs=dxfattribs)
                    new_entities.append(new_circle)
                except Exception as e:
                    logger.warning(f"Error creating circle: {e}")
                    # Fallback to creating an arc
                    new_arc = create_arc_from_parameters(msp, center, radius, 0, 360, dxfattribs)
                    new_entities.append(new_arc)
            else:
                new_arc = create_arc_from_parameters(msp, center, radius, start_angle, end_angle, dxfattribs)
                new_entities.append(new_arc)
    
    return new_entities, delete_handles

def optimize_polyline_segments(polyline, tolerance: float) -> bool:
    """Optimize a polyline by removing redundant vertices."""
    try:
        if polyline.dxftype() == 'LWPOLYLINE':
            vertices = []
            for i in range(len(polyline)):
                point = polyline.get_point(i)
                vertices.append((point[0], point[1], 0.0))
        elif polyline.dxftype() == 'POLYLINE':
            vertices = []
            for vertex in polyline.vertices():
                if hasattr(vertex, 'dxf'):
                    x = vertex.dxf.location[0]
                    y = vertex.dxf.location[1]
                    z = vertex.dxf.location[2] if len(vertex.dxf.location) > 2 else 0.0
                    vertices.append((x, y, z))
        else:
            return False
        
        if len(vertices) < 3:
            return False
        
        # Identify collinear middle points
        to_remove = []
        for i in range(1, len(vertices) - 1):
            if are_collinear(vertices[i-1], vertices[i], vertices[i+1], tolerance):
                to_remove.append(i)
        
        # If we found collinear points, create a new polyline without them
        if to_remove:
            # This would require modifying the polyline, which is complex
            # For now, just return whether we found optimizations
            return len(to_remove) > 0
        
        return False
        
    except Exception as e:
        logger.error(f"Error optimizing polyline: {e}")
        return False

def get_all_entities_by_type(msp, entity_type):
    """Get all entities of a specific type from the modelspace."""
    return [entity for entity in msp if entity.dxftype() == entity_type]

def destroy_entity_safely(entity):
    """Safely destroy an entity."""
    try:
        if hasattr(entity, 'destroy'):
            entity.destroy()
            return True
        else:
            logger.warning(f"Entity does not have destroy method: {entity}")
            return False
    except Exception as e:
        logger.error(f"Error destroying entity: {e}")
        return False

def get_polylines_by_floor_layers(msp):
    """Extract all polylines from Floor layers (Floor 1, Floor 2, etc.)"""
    floor_polylines = []
    
    for entity in msp:
        if entity.dxftype() in ['LWPOLYLINE', 'POLYLINE']:
            if hasattr(entity, 'dxf') and hasattr(entity.dxf, 'layer'):
                layer_name = entity.dxf.layer
                if layer_name.startswith('Floor ') and layer_name[6:].strip().isdigit():
                    # Create a copy of the polyline attributes to restore later
                    polyline_data = {
                        'entity': entity,
                        'type': entity.dxftype(),
                        'layer': layer_name,
                        'handle': get_entity_handle(entity),
                        'dxfattribs': get_entity_dxf_attribs(entity)
                    }
                    
                    # Extract vertices data
                    vertices = []
                    try:
                        if entity.dxftype() == 'LWPOLYLINE':
                            for i in range(len(entity)):
                                point = entity.get_point(i)
                                bulge = entity.get_bulge(i)
                                vertices.append((point[0], point[1], bulge))
                            
                            # Get closed status
                            polyline_data['closed'] = entity.closed
                            
                        elif entity.dxftype() == 'POLYLINE':
                            for vertex in entity.vertices():
                                if hasattr(vertex, 'dxf'):
                                    x = vertex.dxf.location[0]
                                    y = vertex.dxf.location[1]
                                    z = vertex.dxf.location[2] if len(vertex.dxf.location) > 2 else 0.0
                                    bulge = vertex.dxf.bulge if hasattr(vertex.dxf, 'bulge') else 0.0
                                    vertices.append((x, y, z, bulge))
                            
                            # Get closed status
                            polyline_data['closed'] = entity.is_closed
                    
                    except Exception as e:
                        logger.error(f"Error extracting polyline data: {e}")
                        continue
                    
                    polyline_data['vertices'] = vertices
                    floor_polylines.append(polyline_data)
    
    logger.info(f"Found {len(floor_polylines)} polylines in Floor layers")
    return floor_polylines

def restore_floor_polylines(msp, floor_polylines):
    """Restore the preserved floor polylines to the drawing"""
    restored_count = 0
    
    for polyline_data in floor_polylines:
        try:
            if polyline_data['type'] == 'LWPOLYLINE':
                # Create a new lightweight polyline
                points = [(v[0], v[1]) for v in polyline_data['vertices']]
                bulges = [v[2] for v in polyline_data['vertices']]
                
                new_polyline = msp.add_lwpolyline(
                    points, 
                    dxfattribs=polyline_data['dxfattribs'],
                    close=polyline_data['closed']
                )
                
                # Set bulges
                for i, bulge in enumerate(bulges):
                    if bulge != 0:
                        new_polyline.set_bulge(i, bulge)
                
            elif polyline_data['type'] == 'POLYLINE':
                # Create a new polyline
                new_polyline = msp.add_polyline3d(
                    [], 
                    dxfattribs=polyline_data['dxfattribs'],
                    close=polyline_data['closed']
                )
                
                # Add vertices
                for vertex_data in polyline_data['vertices']:
                    if len(vertex_data) == 4:  # (x, y, z, bulge)
                        x, y, z, bulge = vertex_data
                        vertex = new_polyline.add_vertex((x, y, z))
                        if hasattr(vertex, 'dxf') and bulge != 0:
                            vertex.dxf.bulge = bulge
            
            restored_count += 1
            
        except Exception as e:
            logger.error(f"Error restoring polyline: {e}")
    
    logger.info(f"Restored {restored_count} polylines to Floor layers")
    return restored_count

def remove_entities_safely(entities_list):
    """Remove entities from the drawing safely"""
    removed_count = 0
    for entity in entities_list:
        if destroy_entity_safely(entity):
            removed_count += 1
    return removed_count

def overkill(dxf_file_path: str, output_file_path: str = None, tolerance: float = 1e-6, max_iterations: int = 10) -> None:
    """Main function to perform the OVERKILL operation on a DXF file."""
    start_time = time.time()
    
    if output_file_path is None:
        output_file_path = dxf_file_path
    
    try:
        doc = ezdxf.readfile(dxf_file_path)
    except Exception as e:
        logger.error(f"Error reading DXF file: {e}")
        return
    
    msp = doc.modelspace()
    
    # First, identify and save polylines from Floor layers
    logger.info("Identifying polylines in Floor layers...")
    floor_polylines = get_polylines_by_floor_layers(msp)
    
    # Remove the Floor layer polylines temporarily
    removed_floor_polylines = 0
    if floor_polylines:
        logger.info("Temporarily removing polylines from Floor layers...")
        removed_floor_polylines = remove_entities_safely([data['entity'] for data in floor_polylines])
        logger.info(f"Temporarily removed {removed_floor_polylines} polylines from Floor layers")
    
    total_deleted = 0
    total_created = 0
    original_entity_count = 0
    
    logger.info("Starting OVERKILL process...")
    
    iteration = 0
    changes_made = True
    
    while changes_made and iteration < max_iterations:
        iteration += 1
        logger.info(f"\n--- Iteration {iteration} ---")
        
        # Get all supported entities
        lines = get_all_entities_by_type(msp, 'LINE')
        arcs = get_all_entities_by_type(msp, 'ARC')
        
        entities = lines + arcs
        
        if iteration == 1:
            original_entity_count = len(entities)
            
        if len(entities) == 0:
            logger.info("No supported entities found in the drawing.")
            break
            
        logger.info(f"Processing {len(entities)} entities "
                   f"({len(lines)} lines, {len(arcs)} arcs)...")
        
        # Detect and remove duplicates
        duplicate_handles = detect_duplicates(entities, tolerance)
        
        duplicates_deleted = 0
        for handle in duplicate_handles:
            for entity in entities:
                entity_handle = get_entity_handle(entity)
                if entity_handle == handle:
                    destroy_entity_safely(entity)
                    duplicates_deleted += 1
                    break
        
        if duplicates_deleted > 0:
            logger.info(f"Removed {duplicates_deleted} duplicate entities.")
            total_deleted += duplicates_deleted
            
            # Refresh the entity list
            lines = get_all_entities_by_type(msp, 'LINE')
            arcs = get_all_entities_by_type(msp, 'ARC')
            entities = lines + arcs
        
        logger.info("Analyzing overlapping and connecting entities...")
        graph, entity_info, handle_to_entity = build_mergeable_graph(entities, tolerance)
        
        components = find_connected_components(graph)
        
        mergeable_components = [comp for comp in components if len(comp) > 1]
        logger.info(f"Found {len(mergeable_components)} sets of entities that can be merged.")
        
        if not mergeable_components:
            logger.info("No more entities can be merged.")
            changes_made = False
            break
        
        logger.info("Merging entity components...")
        iteration_new_entities = []
        iteration_handles_to_delete = set()
        
        for i, component in enumerate(mergeable_components):
            if i % 100 == 0 and i > 0:
                logger.info(f"Processed {i}/{len(mergeable_components)} components...")
            
            component_new_entities, component_delete_handles = merge_component_entities(
                component, entity_info, handle_to_entity, msp, tolerance
            )
            
            iteration_new_entities.extend(component_new_entities)
            iteration_handles_to_delete.update(component_delete_handles)
        
        entities_deleted = 0
        for handle in iteration_handles_to_delete:
            if handle in handle_to_entity:
                destroy_entity_safely(handle_to_entity[handle])
                entities_deleted += 1
        
        logger.info(f"Iteration {iteration} results:")
        logger.info(f"  - Entities merged/deleted: {entities_deleted}")
        logger.info(f"  - New entities created: {len(iteration_new_entities)}")
        
        total_deleted += entities_deleted
        total_created += len(iteration_new_entities)
        
        changes_made = (entities_deleted > 0) or (duplicates_deleted > 0)
    
    # Restore the Floor layer polylines
    if floor_polylines:
        logger.info("Restoring polylines to Floor layers...")
        restored_count = restore_floor_polylines(msp, floor_polylines)
        logger.info(f"Restored {restored_count} polylines to Floor layers")
    
    try:
        doc.saveas(output_file_path)
        end_time = time.time()
        execution_time = end_time - start_time
        
        current_entity_count = len(get_all_entities_by_type(msp, 'LINE')) + len(get_all_entities_by_type(msp, 'ARC'))
        
        logger.info(f"\nOVERKILL operation completed in {execution_time:.2f} seconds:")
        logger.info(f"  - Iterations performed: {iteration}")
        logger.info(f"  - Original entity count: {original_entity_count}")
        logger.info(f"  - Total entities deleted: {total_deleted}")
        logger.info(f"  - Total new entities created: {total_created}")
        logger.info(f"  - Final entity count: {current_entity_count}")
        logger.info(f"  - Entity reduction: {original_entity_count - current_entity_count} entities "
                  f"({((original_entity_count - current_entity_count) / original_entity_count * 100):.1f}%)")
        if floor_polylines:
            logger.info(f"  - Floor layer polylines preserved: {len(floor_polylines)}")
        logger.info(f"  - Result saved to {output_file_path}")
    except Exception as e:
        logger.error(f"Error saving DXF file: {e}")

def main0(input_dxf0, output_dxf, tolerance=1e-6, max_iterations=10):
    """Entry point function for the OVERKILL tool."""
    print(f"Starting OVERKILL process on {input_dxf0}")
    print(f"Using tolerance value: {tolerance}")
    print(f"Maximum iterations: {max_iterations}")
    print("This may take a moment for complex drawings...")
    
    overkill(input_dxf0, output_dxf, tolerance, max_iterations)

# all polylines to lines

In [3]:
def lwpolyline_to_lines(doc):
    modelspace = doc.modelspace()
    new_lines = []
    original_polylines = []
    skipped_polylines = []
    lwpolylines = list(modelspace.query('LWPOLYLINE'))
    
    for lwpolyline in lwpolylines:
        layer = lwpolyline.dxf.layer if hasattr(lwpolyline.dxf, 'layer') else ""
        
        # Check if layer matches any of the patterns to skip
        skip_layer = False
        
        # Check for "FloorX" or "Floor X" pattern (where X is a number)
        if layer.lower().startswith("floor") or layer.lower().startswith("floor "):
            layer_suffix = layer[5:].strip() if layer.lower().startswith("floor") else layer[6:].strip()
            if layer_suffix.isdigit() or (len(layer_suffix) > 0 and layer_suffix[0].isdigit()):
                skip_layer = True
        
        # Check for specific named floors
        floor_names = ["ground floor", "first floor", "second floor", "third floor", 
                      "fourth floor", "fifth floor", "sixth floor", "seventh floor", 
                      "terrace floor"]
        
        if layer.lower() in floor_names:
            skip_layer = True
        
        if skip_layer:
            skipped_polylines.append(lwpolyline)
            continue
        
        original_polylines.append(lwpolyline)
        color = lwpolyline.dxf.color if hasattr(lwpolyline.dxf, 'color') else None
        linetype = lwpolyline.dxf.linetype if hasattr(lwpolyline.dxf, 'linetype') else None
        
        try:
            points = list(lwpolyline.vertices())
            if len(points) < 2:
                continue
            
            for j in range(len(points) - 1):
                start_point = points[j]
                end_point = points[j + 1]
                new_line = modelspace.add_line(start=start_point, end=end_point)
                new_line.dxf.layer = layer
                if color is not None:
                    new_line.dxf.color = color
                if linetype is not None:
                    new_line.dxf.linetype = linetype
                new_lines.append(new_line)
            
            if lwpolyline.closed:
                new_line = modelspace.add_line(start=points[-1], end=points[0])
                new_line.dxf.layer = layer
                if color is not None:
                    new_line.dxf.color = color
                if linetype is not None:
                    new_line.dxf.linetype = linetype
                new_lines.append(new_line)
        except:
            pass
    
    return new_lines, original_polylines, skipped_polylines

def delete_original_polylines(doc, polylines):
    modelspace = doc.modelspace()
    for polyline in polylines:
        try:
            modelspace.delete_entity(polyline)
        except:
            pass

def verify_conversion(doc):
    modelspace = doc.modelspace()
    return len(list(modelspace.query('LWPOLYLINE')))

def main1(input_dxf1, output_dxf, delete_originals=True):
    try:
        doc = ezdxf.readfile(input_dxf1)
        new_lines, original_polylines, skipped_polylines = lwpolyline_to_lines(doc)
        
        # Only delete polylines that were actually converted
        if delete_originals and original_polylines:
            delete_original_polylines(doc, original_polylines)
            
        remaining_polylines = verify_conversion(doc)
        print(f"Converted {len(original_polylines)} polylines to {len(new_lines)} lines")
        print(f"Skipped {len(skipped_polylines)} polylines on floor layers")
        print(f"Remaining polylines after conversion: {remaining_polylines}")
        
        doc.saveas(output_dxf)
        return True
    except Exception as e:
        print(f"Error in conversion: {str(e)}")
        return False

# all layers to 0

In [4]:
import ezdxf

def move_entities_to_zero_layer(doc, preserve_layers=None):
    """
    Moves all entities to layer '0' except those in specified preserve_layers.

    Parameters:
    - doc: An ezdxf document object.
    - preserve_layers: List of layers to keep unchanged (default: floor layers).
    """
    if preserve_layers is None:
        preserve_layers = [
            "Floor1", "Floor2", "Floor3", "Floor4", 
            "Floor5", "Floor 6", "Floor 7", "Floor 8", 
            "terrace floor"
        ]

    for entity in doc.modelspace():
        if entity.dxf.layer.lower() not in [layer.lower() for layer in preserve_layers]:
            entity.dxf.layer = "0"  # Move entity to layer '0'

def main2(input_dxf2, output_dxf):
    """
    Reads a DXF file, moves entities to layer '0' except specified layers, and saves it.

    Parameters:
    - input_dxf2 (str): Path to input DXF file.
    - output_dxf (str): Path to save the modified DXF file.
    """
    try:
        doc = ezdxf.readfile(input_dxf2)  # Load DXF file
        move_entities_to_zero_layer(doc)  # Move entities
        doc.saveas(output_dxf)  # Save modified DXF
        print(f"DXF file saved as {output_dxf}")
    except FileNotFoundError:
        print(f"Error: File not found - {input_dxf2}")
    except ezdxf.DXFStructureError:
        print(f"Error: Invalid or corrupted DXF file - {input_dxf2}")
        


# deleteing blocks 

In [5]:


def count_and_remove_blocks(input_dxf3, output_dxf):
    try:
        # Load DXF file
        print(f"Loading DXF file: {input_dxf3}")
        dwg = ezdxf.readfile(input_dxf3)
        
        # Count blocks
        block_counts = {}
        for block in dwg.modelspace().query('INSERT'):
            block_name = block.dxf.name.upper()
            block_counts[block_name] = block_counts.get(block_name, 0) + 1
        
        # Print block counts
        print("\nBlock Counts:")
        for name, count in block_counts.items():
            print(f"  {name}: {count}")
        
        # Process and delete blocks
        blocks_processed = 0
        entities_deleted = 0
        blocks_to_process = list(dwg.modelspace().query('INSERT'))
        
        for entity in blocks_to_process:
            blocks_processed += 1
            block_name = entity.dxf.name
            try:
                dwg.modelspace().delete_entity(entity)
                entities_deleted += 1
                #print(f"Deleted block reference: {block_name}")
            except Exception as e:
                print(f"  Error processing block {block_name}: {str(e)}")
        
        #print(f"Processed {blocks_processed} blocks")
        #print(f"Deleted {entities_deleted} entities")
        
        # Save the modified document
        dwg.saveas(output_dxf)
        #print(f"\nSaved modified DXF file: {output_dxf}")
        
        return block_counts
    except ezdxf.DXFError as e:
        print(f"DXF Error: {str(e)}")
    except Exception as e:
        print(f"Error: {str(e)}")
        return {}

def main3(input_dxf3, output_dxf):
    count_and_remove_blocks(input_dxf3, output_dxf)
    



# deleting hatch

In [6]:
def get_path_type_name(path) -> str:
    """
    Get the path type name safely for any path type.
    
    Args:
        path: Hatch boundary path object
        
    Returns:
        str: Name of the path type
    """
    if hasattr(path, 'path_type'):
        path_types = {
            1: 'External',
            2: 'Polyline',
            4: 'Derived',
            8: 'Textbox',
            16: 'Outermost'
        }
        return path_types.get(path.path_type, 'Unknown')
    return type(path).__name__

def analyze_and_remove_hatches(file_path: str, output_path: str) -> None:
    """
    Detects, analyzes, and removes hatch entities from a DXF file.
    
    Args:
        file_path (str): Path to the input DXF file
        output_path (str): Path to save the output DXF file
    """
    try:
        doc = ezdxf.readfile(file_path)
        msp = doc.modelspace()
        
        hatch_entities = msp.query('HATCH')
        
        for hatch in hatch_entities:
            msp.delete_entity(hatch)
        
        doc.saveas(output_path)
        validate_hatches(output_path)
        
    except Exception as e:
        raise

def validate_hatches(file_path: str) -> None:
    """
    Validates that all hatch entities were properly removed.
    
    Args:
        file_path (str): Path to the DXF file to validate
    """
    try:
        doc = ezdxf.readfile(file_path)
        msp = doc.modelspace()
        
        remaining_hatches = list(msp.query('HATCH'))
        if remaining_hatches:
            pass
        
    except FileNotFoundError:
        pass
    except Exception as e:
        pass

def main4(input_dxf4,output_dxf):
    
    analyze_and_remove_hatches(input_dxf4, output_dxf)
    validate_hatches(output_dxf)



# mtext correction 

In [7]:
import ezdxf
from rapidfuzz import process
import re

# Define the dictionary of correct room names (ordered for priority)
room_name_dict = [
    'Master Bedroom', 'Bedroom', 'Bathroom', 'Kitchen', 'Living Room', 'Dining Room', 'Foyer', 'Garage', 
    'Staircase', 'Balcony', 'Reading Area', 'Powder Room', 'Lift', 'Office', 'Hall', 'Storeroom','Parking',
    'Utility Area', 'Wash Area', 'Driveway', 'Fence', 'Gate', 'Door', 'Mandir + Dining area',
    'Window', 'Wall', 'Ceiling', 'Basement', 'Conference Room', 'Home Theater', 'Pantry Area', 
    'Garden', 'Entrance', 'Terrace', 'Multipurpose Room', 'Parking Area', 'Store Room', 'Passage', 'Walkway', 
    'Corridor', 'O.T.S', 'Sitout', 'Terrace Plan','Storage + Wash area','Mandir + Dining area',
    'Hanging Garden', 'Window Sitting', 'Vegetable Garden', 'Guest Bedroom', 'Drawing Room','DECK','Walking Area','Meditation Room', 
    'Atrium', 'Gazebo', 'Dressing Area', 'Mumty', 'WashBasin', 'Lounge', 'V.I.P Lounge', 'Guest Room','Landing Area',
    'Parents Bedroom',  'Dining Room', 'Reading Room','Deck','Family Room','Balcony','Garage Door','Garage','Hanging Garden','Hanging Pool',
    'Multipurpose Hall'
]

# Expanded mapping for special cases with broader variations
expanded_variations = {
    'Bathroom': [
                'bathroom', 'bathrom', 'batroom', 'bathrum', 'bathrm', 'bathrooom','Bathroom3','Bathroom 1','Bathroom1','Bathroom2',
                'bathroon', 'bathrom', 'bathruom', 'bahtroom', 'bathrm', 'bthroom','Toileeet','Co. Toilet','Bathroom 2','Bathroom4','Bathroom5',
                'bathrom', 'bathruum', 'bathrum', 'batheroom','Lavatory', 'bathrome','COMMON TOILET','Bathroom 3^J5','Bathroom 3^J5',
                'bath', 'bath_room', 'bath.', 'bath_', 'bthrm', 'bathrm','Existing Toilets','Bathroom 1^J5','Bathroom 2^J5','Bathroom 4^J5'
                'rest room', 'toilet', 'washroom', 'loo', 'WC', 'water closet','Toilet"','{\FVerdana;\W3.09116; }TOILET',
                'bath-room','TOILET\P3','TOILET+DRESS','TOILET \P3','Common\PToilet\P8','Common\PToilet','TOILET 10'
            ],
            'Bedroom': [
                'bedroom','Bedroom1','Bedroom2','Bedroom3', 'bedrom', 'bedrm', 'bedrum', 'beroom', 'bedrom',
                'bedroon', 'beadroom', 'bedrooom', 'bedrome','Bedroom4', 'bedrume','BEDROOM\P',
                'bed room', 'bed_room', 'bed.', 'bed_', 'bdrm', 'br',
                'bed-room','Bedroo m','BEDROOM\P 30'
            ],
            'Kitchen': [
                'kitchen', 'kichen', 'kitchn', 'kicthen', 'kithen', 'ktchen',
                'kitchon', 'ketchin', 'kitched', 'kitch', 'kitchem', 'kitchine',
                'kichen', 'kitchne', 'ktchn', 'ketchn','\H30x;KITCHEN\P17',
                'kit', 'kit.', 'kit_', 'kitchen_area', 'cooking_area','{\FVerdana;\W2.35307; }KITCHEN',
                'cook room', 'cookroom', 'ktch', 'ktch.', 'ktn', 'kit-area'
            ],
            'Living Room': [
                'living room', 'livng room', 'living rum', 'liveing room', 'livingroom',
                'living rom', 'livin room', 'livving room', 'living roon', 'livroom',
                'liv room', 'livingrm', 'living rm','Living Cum Dining',
                'lr', 'l_r', 'liv_rm', 'liv.rm', 'living', 'lounge','Living Room',
                'living_room', 'living.room', 'liv-room', 'living area'  # Fixed: space, not underscore
            ],
            'Dining Room': [
                'dining room', 'dinning room', 'dining rum', 'dineing room', 'diningroom',
                'dining rom', 'dining rm', 'dinin room', 'dinig room', 'dning room',
                'dr', 'd_r', 'din_rm', 'din.rm', 'dining', 'dine','Dining Room',
                'dining_room', 'dining.room', 'din-room', 'dining area',  # Fixed: space, not underscore
                'dinner room', 'dinner_room', 'dining hall', 'dining_hall'
            ],
            'Foyer': [
        'foyer', 'foier', 'foyar', 'foer', 'foyeer', 'foyyer','COMMON FOYER',
        'foir', 'foyre', 'foye', 'fayer', 'foerr', 'foyr','COMMON FOYER',
        'foeyer', 'foyerr', 'ffoyer', 'foer', 'foyar', 'foyeur',
        'foier', 'fioyer', 'fouyer', 'foye', 'foy er', 'fo-yer','Foyer area'
    ],
            'Garage': [
                'garage', 'garag', 'grage', 'garege', 'garaj', 'garrage',
                'gabage', 'garege', 'garadge', 'garige', 'garrage', 'garaj',
                'grge', 'grage', 'gaage', 'garge',
                'gar', 'gar.', 'gar_', 'car_garage', 'auto_garage',
                'vehicle storage', 'car storage', 'garage_space', 'grg',
                'car garage', 'auto garage', 'car_park', 'car-garage'
            ],
            'Staircase': [
                'staircase', 'stairway', 'stairwell', 'staircase', 'starecase',
                'stairces', 'stairkas', 'stairecase', 'staircase', 'stairwel',
                'stair', 'str', 'st.', 'stc', 'stair_', 'stairs',
                'stair_case', 'stair.case', 'stair-case', 'stairway',
                'step', 'steps', 'stair_well', 'stair_way'
            ],
            'Balcony': [
                'balcony', 'balconey', 'balconi', 'balconey', 'balkony',
                'balcny', 'balconey', 'balcon', 'balconies', 'balconie',
                'bal', 'bal.', 'bal_', 'balc', 'blcny',
                'balcony_area', 'open_balcony', 
                'open_terrace', 'bal-area'
            ],
            'Powder Room': [
        'powder room', 'powderroom', 'powder_room', 'powder-room',
        'powdr room', 'powdr_room', 'powderrom', 'powder rom','Powder^JRoom',
        'pwd room', 'pwd_room', 'pwdr room', 'pwdrroom','Powder^JRoom',
        'powder rm', 'powderrooom', 'powderoom', 'pwder room'
            ],

            'Lift': [
                'elevator', 'elevater', 'elivator', 'elevador', 'elavator',
                'elevetor', 'elevtr', 'elevater', 'elivater', 'elevatir',
                'elev', 'elv', 'elev.', 'lift', 'lift_area',
                'elevator_space', 'lift_core', 'elev_core', 'lift-core',
                'elevator_shaft', 'lift_shaft', 'elev-area','{\FVerdana;\W3.50148; }LIFT'
            ],
            'Office': [
                'office', 'ofice', 'offise', 'offfice', 'offic',
                'ofis', 'offyce', 'ofice', 'oface', 'offis',
                'off', 'off.', 'off_', 'workspace', 'work_space',
                'office_room', 'work_area', 'office_area', 'off-room',
                'working_space', 'work_station', 'off-area'
            ],
            'Hall': [
                'hall', 'hal', 'halll', 'hall1', 'haal', 'hawl', 'haul',
                'entry hall', 'entry_hall', 'entry-hall', 'entryhall', 'entr_hall',
                'living hall', 'living_hall', 'livinghall', 'liv_hall', 'liv.hall',
                'lobby', 'loby', 'lobey', 'lobbey', 'lobb', 'lby',
                'entrance hall', 'entrance_hall', 'ent_hall', 'ent.hall',
                'hl', 'h_l', 'ent_hl', 'hal_', 'hall_'
            ],
            'Storeroom': [
                'storeroom', 'storroom', 'storoom', 'stor_room', 'store_room',
                'storerum', 'storerrom', 'storrm', 'storagerm', 'storge_room',
                'storage room', 'storage_room', 'storage-room', 'storage_area',
                'store', 'storage', 'strg', 'strg_rm', 'stor_rm', 'str_rm',
                'utility room', 'utility_room', 'util_room', 'util_rm',
                'cupboard', 'cubboard', 'cupbard', 'cpbrd', 'cup_brd',
                'str', 'stor', 'strm', 'st_rm', 'stor_', 'str_','{\FVerdana;\W3.61606; }STORE'
            ],

            'Utility Area': [
                'utility room', 'utilty room', 'utility rum', 'util room', 'utlity room',
                'utility_room', 'utilityroom', 'util_room', 'utilityrm', 'util_rm',
                'laundry room', 'laundry_room', 'laundryroom', 'landry room',
                'utility', 'utlty', 'util', 'laundry', 'lundry', 'wash_rm',
                'ut_rm', 'ut.rm', 'util', 'ut_', 'util_', 'lndry'
            ],
            'Wash Area':[
                'wash area', 'wash_area', 'washarea', 'washing area', 'washing_area','WASH\P8',
                'WASH \P8','WASH\P8','wash_rm','WASH','Wash Area \P5'
            ],

            'Pathway': [
                'pathway', 'pathwy', 'pathay', 'pthway', 'pathwey', 'pathwai',
                'walkway', 'walkwy', 'walk_way', 'walking_path', 'walk path',
                'sidewalk', 'side_walk', 'sidewlk', 'side walk', 'sdwlk',
                'path', 'pth', 'path_', 'walking_area', 'pedestrian_path',
                'pw', 'p_w', 'pthw', 'ww', 'w_w', 'swlk'
            ],
            'Driveway': [
                'driveway', 'drivway', 'drivwey', 'drivwy', 'drivewey', 'drivway',
                'car driveway', 'car_driveway', 'cardriveway', 'car_drive',
                'parking driveway', 'parking_drive', 'park_drive', 'drive_way',
                'vehicle drive', 'vehicle_drive', 'vehicular_path', 'car_path',
                'drv', 'dw', 'd_w', 'dr_wy', 'drv_', 'drvwy'
            ],
            'Fence': [
                'fence', 'fense', 'fenc', 'fens', 'fense', 'fench',
                'fnc', 'fn', 'f_w', 'bw', 'b_w', 'p_w'
            ],
            'Gate': [
                'gate', 'gat', 'gte', 'geit', 'gaet', 'gatte',
                'entrance gate', 'entrance_gate', 'entry gate', 'entry_gate',
                'main gate', 'main_gate', 'maingate', 'front_gate',
                'exit gate', 'exit_gate', 'side gate', 'side_gate',
                'gt', 'g_t', 'ent_gt', 'ext_gt', 'm_gt', 'gat_'
            ],
            'Door': [
                'door', 'dor', 'doar', 'dore', 'doore', 'door1',
                'entrance door', 'entrance_door', 'entry door', 'entry_door',
                'exit door', 'exit_door', 'front door', 'front_door',
                'main door', 'main_door', 'side door', 'side_door',
                'dr', 'd_r', 'entr_dr', 'ext_dr', 'm_dr', 'dr_'
            ],
            'Window': [
                'window', 'windo', 'windw', 'wndow', 'windoo', 'winder',
                'glass window', 'glass_window', 'glasswindow', 'glass_wind',
                'side window', 'side_window', 'sidewindow', 'side_wind',
                'ventilation window', 'vent_window', 'vent window', 'vent_wind',
                'wnd', 'win', 'w_d', 'gl_wnd', 'v_wnd', 'wndw'
            ],
            'Wall': [
                'wall', 'wal', 'waal', 'woll', 'waall', 'wall1',
                'partition wall', 'partition_wall', 'partitionwall', 'part_wall',
                'boundary wall', 'boundary_wall', 'boundarywall', 'bound_wall',
                'divider', 'divider wall', 'divider_wall', 'div_wall',
                'wl', 'w_l', 'p_wl', 'b_wl', 'd_wl', 'wall_'
            ],
            'Ceiling': [
                'ceiling', 'celing', 'cieling', 'ceeling', 'ciling', 'ceilng',
                'roof', 'rof', 'ruf', 'roof_ceiling', 'roof_level',
                'roofing', 'roofng', 'roof_finish', 'ceiling_finish',
                'false ceiling', 'false_ceiling', 'suspended_ceiling', 'susp_ceiling',
                'clg', 'c_g', 'rf', 'r_f', 'ceil_', 'roof_'
            ],
            'Attic': [
                'attic', 'atic', 'attik', 'atik', 'attick', 'attics',
                'loft', 'lofft', 'lauft', 'loft_space', 'loft_area',
                'roof space', 'roof_space', 'roofspace', 'roof_storage',
                'attic room', 'attic_room', 'atticroom', 'attic_storage',
                'att', 'a_t', 'lft', 'l_f', 'att_', 'loft_'
            ],
            'Basement': [
                'basement', 'basment', 'basemnt', 'basment', 'basement1',
                'underground floor', 'underground_floor', 'under_floor',
                'cellar', 'celler', 'seller', 'wine_cellar', 'storage_cellar',
                'lower floor', 'lower_floor', 'lower_level', 'sub_floor',
                'bsmt', 'b_t', 'ug_fl', 'lwr_fl', 'base_', 'cell_'
            ],
            'Conference Room': [
                'conference room', 'conferance room', 'confrence room', 'conf room',
                'meeting room', 'meeting_room', 'meetingroom', 'meet_room',
                'board room', 'board_room', 'boardroom', 'board_rm',
                'conference hall', 'conference_hall', 'conf_hall', 'meeting_hall',
                'conf', 'c_r', 'meet_r', 'brd_rm', 'conf_', 'mtg_'
            ],
            'Home Theater': [
                'home theater', 'home theatre', 'hometheater', 'hometheatre',
                'media room', 'media_room', 'mediaroom', 'media_center',
                'cinema room', 'cinema_room', 'cinemaroom', 'movie_room',
                'theater room', 'theatre_room', 'entertainment_room', 'ent_room',
                'h_t', 'thtr', 'med_rm', 'cin_rm', 'ent_rm', 'mov_rm'
            ],
            'Master Bedroom': [
                'master bedroom', 'master bedrom', 'master bedrm', 'mastr bedroom',
                'master bedrum', 'mastre bedroom', 'master bed room', 'masterbed room',
                'master beedroom', 'mstr bedroom', 'master bedroom1', 'masterbedroom',
                'master_bedroom', 'master-bedroom', 'master_bed', 'master bed',
                'main bedroom', 'main_bedroom', 'main bed', 'main_bed',
                'primary bedroom', 'primary_bedroom', 'primary bed', 'primary_bed',
                'mstr_bdrm', 'mstr_br', 'mbr', 'm_br', 'mb', 'mbed',
                'master_br', 'mstr_bed', 'mstr.bed', 'mstr.br',
                'mbdrm', 'mastbr', 'mstrbr', 'mbedroom',
                'masterbedroom', 'masterbed', 'masterbr', 'masterbrm',
                'mstrbedrm', 'mstrbdrm', 'masterbdrm', 'mstrbed'
            ],
            'Master Bathroom': [
                'master bathroom', 'master bathrom', 'master bathrm', 'mastr bathroom',
                'master bathrum', 'mastre bathroom', 'master bath room', 'masterbath room',
                'master bathrom', 'mstr bathroom', 'master bathroom1', 'masterbathroom',
                'master_bathroom', 'master-bathroom', 'master_bath', 'master bath',
                'main bathroom', 'main_bathroom', 'main bath', 'main_bath',
                'mstr_bath', 'mstr_ba', 'mba', 'm_ba', 'mbth', 'mbath',
                'master_ba', 'mstr_bth', 'mstr.bath', 'mstr.ba',
                'mbath', 'mastba', 'mstrba', 'mbathroom','Master Toilet',
                'masterbathroom', 'masterbath', 'masterba', 'masterbrm',
                'mstrbathrm', 'mstrbath', 'masterbth', 'mstrwc',
                'master wc', 'master_wc', 'mstr wc', 'mstr_wc',
                'ensuite wc', 'ensuite_wc', 'en suite wc', 'en_suite_wc'
            ],
            'Pooja Area': [
                'pooja room', 'puja room', 'pooja_room', 'puja_room',
                'poojaroom', 'pujaroom', 'pooja-room', 'puja-room','Mnndir',
                'prayer room', 'prayer_room', 'worship room', 'worship_room',
                'mandir room', 'mandir_room', 'temple room', 'temple_room',
                'devghar', 'dev_ghar', 'devgriha', 'dev_griha', 'temple',
                'meditation room', 'meditation_room', 'spiritual room', 'spiritual_room',
                'pooja rum', 'puja rum', 'pooja ruum', 'puja ruum',
                'pooja rom', 'puja rom', 'poojha room', 'pujha room',
                'pooja rooom', 'puja rooom', 'pooja roon', 'puja roon',
                'pr', 'p_r', 'pjr', 'pj_rm', 'p_rm', 'pr_rm',
                'pooja_rm', 'puja_rm', 'pooja.rm', 'puja.rm',
                'mandir', 'mndr', 'templ', 'tmpl','Mnndir',
                'poojamandir', 'pujamandir', 'poojaspace', 'pujaspace',
                'poojaarea', 'pujaarea', 'mandirroom', 'templespace',
                'devmandir', 'dev_mandir', 'ghar_mandir', 'gharmandir',
                'poojamandir', 'puja_mandir', 'home_temple', 'hometemple'
            ],
            'Garden': [
                'garden', 'garde', 'gardn', 'gardenn', 'gaarden',
                'gareden', 'gardden', 'grden', 'gardyn', 'gardn',
                'garden_area', 'garden_space', 'garden_zone',
                'landscaped_area', 'landscape_area', 'landscape_zone',
                'green_area', 'green_space', 'green_zone',
                'landscape', 'landscaping', 'landscaped',
                'planted_area', 'planting_area', 'planter_zone',
                'soft_scape', 'softscape', 'soft_area',
                'gdn', 'grdn', 'gar', 'gr', 'g',
                'gard', 'g_a', 'ga', 'g_z', 'gz',
                'lnd', 'lscape', 'lscpe', 'ldsp',
                'front_garden', 'rear_garden', 'side_garden','Garden Area',
                'back_garden', 'main_garden', 'central_garden',
                'private_garden', 'common_garden', 'shared_garden',
                'gardenarea', 'gardenspace', 'gardenzone',
                'greenarea', 'greenspace', 'greenzone',
                'garden1', 'garden2', 'garden3',
                'garden_1', 'garden_2', 'garden_3',
                'gdn1', 'gdn2', 'gdn3','Garden'
            ],
            'Entrance': [
                'entrance', 'enterance', 'entrence', 'enternce', 'entrans',
                'enterence', 'entrense', 'entrace', 'enterence', 'entrenc',
                'main_entrance', 'main entrance', 'entry_point', 'entry point',
                'front_entrance', 'front entry', 'primary_entrance', 'primary entry',
                'side_entrance', 'side entry', 'rear_entrance', 'rear entry',
                'entry', 'entry_way', 'entryway', 'entrance_way',
                'entrance_lobby', 'entry_lobby', 'entrance_foyer', 'entry_foyer',
                'entrance_porch', 'entry_porch', 'entrance_vestibule', 'entry_vestibule',
                'ent', 'entr', 'ent_', 'entr_',
                'm_ent', 'main_ent', 'f_ent', 'front_ent',
                's_ent', 'side_ent', 'r_ent', 'rear_ent',
                'main_entry', 'front_entry', 'side_entry', 'back_entry',
                'service_entrance', 'service_entry', 'staff_entrance', 'staff_entry',
                'visitor_entrance', 'visitor_entry', 'public_entrance', 'public_entry',
                'mainentrance', 'frontentrance', 'sideentrance', 'rearentrance',
                'mainentry', 'frontentry', 'sideentry', 'rearentry',
                'entrypoint', 'entryway', 'entranceway', 'entryarea',
                'entrance1', 'entrance2', 'entrance3',
                'entry1', 'entry2', 'entry3',
                'ent1', 'ent2', 'ent3',
                'access', 'access_point', 'ingress', 'ingress_point',
                'approach', 'approach_way', 'entry_access', 'entrance_access',
                'portal', 'gateway', 'entry_gate', 'entrance_gate',
                'doorway', 'main_door', 'front_door', 'entry_door'
            ],

            'Terrace': [
                'terrace', 'Terrace','terace', 'terrase', 'teracce', 'terrase',
                'terrasse', 'terasse', 'terace', 'tarrace', 'terrac',
                'terrace_area', 'terrace_space', 'terrace_deck','{\FArial;\W0.95238; }Trrce',
                'open_terrace', 'covered_terrace', 'private_terrace',
                'roof_terrace', 'sky_terrace', 'garden_terrace',
                'Trrce ','{\FArial;\W0.95238; }Trrce',
                'patio', 'patio_area', 'outdoor_patio',
                'rooftop', 'rooftop_area', 
                'ter', 'terr', 'tr', 't_r','Trrce',
                'ter_', 'terr_', 'tr_', 'trc','{\FArial;\W0.95238; }Trrce ',
                'ter.', 'terr.', 'tr.', 'trc.',
                'front_terrace', 'rear_terrace', 'side_terrace',
                'upper_terrace', 'lower_terrace', 'main_terrace',
                'private_terrace', 'common_terrace', 'shared_terrace',
                'terracearea', 'terracespace', 'terracessit',
                'roofterrace', 'skyterrace', 'openterrace',
                'deckterrace', 'terraceroom', 'terracespot',
                'terrace1', 'terrace2', 'terrace3',
                'terrace_1', 'terrace_2', 'terrace_3','Terrace ',
                'ter1', 'ter2', 'ter3','{\FArial;\W0.95238; }Trrce ',
                
                'terrace_balcony', 'terrace_patio', 'terrace_lounge',
                'outdoor_terrace', 'open_air_terrace', 'top_terrace',
                'ter_floor', 'terr_level', 'terrace_level',
                'terrace_floor', 'roof_level_terrace', 'top_floor_terrace',
                'terrace_garden',  'landscape_terrace'
            ],

                 'Multipurpose Room': [
        # Common spelling variations and misspellings
        'multipurpose room', 'multi purpose room', 'multi-purpose room',
        'multipurposeroom', 'multi_purpose_room', 'multi.purpose.room',
        'multipurpose', 'multi purpose', 'multi-purpose',
        'multipurpos room', 'multi purpos room', 'multipurpose rom',
        'multipurpos', 'multi purpos', 'multi-purpos',
        'multipurpose rm', 'multi purpose rm', 'multi-purpose rm',

        # Common abbreviations
        'mpr', 'mp_rm', 'mpr_rm', 'm_p_r',
        'mp_room', 'm_p_room', 'mp.rm', 'mpr.rm',
        'multi_rm', 'mult_rm', 'mprm', 'mpr_',

        # Functional variations
        'flexible room', 'flexible space', 'flex room',
        'flex space', 'flexroom', 'flexspace',
        'flexible use room', 'flexible use space',
        'adaptable room', 'adaptable space',

        # Activity-specific variations
        'activity room', 'activity space', 'activity_room',
        'function room', 'function hall', 'function_room',
        'community room', 'community space', 'community_room',
        'common room', 'common space', 'common_room',

        # Combined purpose terms
        'multi activity room', 'multi_activity_room',
        'multi function room', 'multi_function_room',
        'multiuse room', 'multi use room', 'multi_use_room',
        'all purpose room', 'all-purpose room', 'all_purpose_room',

        # Technical variations
        'versatile space', 'versatile room', 'versatile_room',
        'convertible space', 'convertible room', 'convertible_room',
        'utility space', 'utility room', 'utility_room',

        # With numbers
        'multipurpose room1', 'multipurpose room 1', 'multipurpose_room_1',
        'mpr1', 'mpr 1', 'mpr_1',
        'multi purpose 1', 'multi-purpose 1', 'multi_purpose_1',

        # Additional descriptive variations
        'mixed use room', 'mixed-use room', 'mixed_use_room',
        'shared space room', 'shared-space room', 'shared_space_room',
        'general purpose room', 'general-purpose room', 'general_purpose_room',

        # Professional/technical terms
        'multifunctional room', 'multi functional room', 'multi_functional_room',
        'multimodal space', 'multi modal space', 'multi_modal_space',
        'polyvalent room', 'poly valent room', 'poly_valent_room',

        # Special use cases
        'events room', 'event room', 'event_room',
        'program room', 'programme room', 'program_room',
        'activities room', 'activities space', 'activities_room',

        # Combined abbreviations
        'mpurp_rm', 'mpurp.rm', 'm_purp_rm',
        'mult_purp', 'mlt_prp', 'mp_space',
        'flex_rm', 'flex.rm', 'flex_space'
    ],

            'Store Room': [
                'store room', 'storage room', 'store', 'storage', 'utility room',
                'storage area', 'store area', 'utility storage', 'storage closet',
                'storage space', 'storeroom','STORE\P 7','STORE','STORE\P','STORE'
            ],

        'Passage': [
            # Common spelling variations and misspellings
            'passage', 'passge', 'passag', 'pasage', 'passg',
            'passege', 'passaje', 'passege', 'passige', 'pasege',
            'passageway', 'passage way', 'passage-way',

            # Common abbreviations
            'psg', 'pas', 'psg_', 'psgw', 'psge',
            'pass', 'passe', 'p_way', 'p.way', 'p_w',
            'psg.', 'pas.', 'pass.', 'p.', 'pw.',

            # Location-specific variations
            'front passage', 'front_passage', 'front-passage',
            'rear passage', 'rear_passage', 'rear-passage',
            'side passage', 'side_passage', 'side-passage',
            'main passage', 'main_passage', 'main-passage',
            'service passage', 'service_passage', 'service-passage',

            # Functional variations
            'connecting passage', 'connection passage', 'connector passage',
            'linking passage', 'link passage', 'connecting_passage',
            'circulation passage', 'circu passage', 'circ_passage',
            'access passage', 'access_passage', 'access-passage',
            'movement passage', 'movement_passage', 'move_passage',

            # With numbers for identification
            'passage1', 'passage2', 'passage3',
            'passage_1', 'passage_2', 'passage_3',
            'psg1', 'psg2', 'psg3','Wide Service Passage'

            # Technical variations
            'thoroughfare', 'thorough_fare', 'thorough-fare',
            'walkthrough', 'walk_through', 'walk-through',
            'walkway', 'walk_way', 'walk-way',

            # Combined terms
            'passage corridor', 'passage_corridor', 'passage-corridor',
            'passage hall', 'passage_hall', 'passage-hall',
            'passage area', 'passage_area', 'passage-area',

            # Usage-specific
            'entry passage', 'entry_passage', 'entry-passage',
            'exit passage', 'exit_passage', 'exit-passage',
            'through passage', 'through_passage', 'thru_passage'
        ],
            'Walkway':[

            # Technical variations
            'thoroughfare', 'thorough_fare', 'thorough-fare',
            'walkthrough', 'walk_through', 'walk-through',
            'walkway', 'walk_way', 'walk-way','Walkway'
            ],

        'Corridor': [
            # Common spelling variations and misspellings
            'corridor', 'corridr', 'corridore', 'coridoor', 'corridoor',
            'corador', 'coridor', 'corridr', 'coridore', 'corridore',
            'coridoor', 'corridoor', 'corrdr', 'corridr', 'corridoor',

            # Common abbreviations
            'cor', 'corr', 'crdr', 'crd', 'cdr',
            'cor.', 'corr.', 'c.', 'cr.', 'crd.',
            'cor_', 'corr_', 'c_', 'cr_', 'crd_',

            # Location-specific variations
            'main corridor', 'main_corridor', 'main-corridor',
            'side corridor', 'side_corridor', 'side-corridor',
            'central corridor', 'central_corridor', 'central-corridor',
            'inner corridor', 'inner_corridor', 'inner-corridor',
            'outer corridor', 'outer_corridor', 'outer-corridor',

            # Functional variations
            'service corridor', 'service_corridor', 'service-corridor',
            'public corridor', 'public_corridor', 'public-corridor',
            'private corridor', 'private_corridor', 'private-corridor',
            'access corridor', 'access_corridor', 'access-corridor',
            'circulation corridor', 'circulation_corridor', 'circ_corridor',

            # With numbers for identification
            'corridor1', 'corridor2', 'corridor3',
            'corridor_1', 'corridor_2', 'corridor_3',
            'cor1', 'cor2', 'cor3',
            'corr1', 'corr2', 'corr3',

            # Floor-specific
            'ground corridor', 'ground_corridor', 'ground-corridor',
            'first corridor', 'first_corridor', 'first-corridor',
            'upper corridor', 'upper_corridor', 'upper-corridor',

            # Technical variations
            'hallway', 'hall_way', 'hall-way',
            'passageway', 'passage_way', 'passage-way',
            'circulation path', 'circulation_path', 'circ_path'

            # Usage-specific
            'connecting corridor', 'connecting_corridor', 'connect-corridor',
            'link corridor', 'link_corridor', 'link-corridor',
            'transition corridor', 'transition_corridor', 'trans-corridor',

            # Combined terms
            'corridor passage', 'corridor_passage', 'corridor-passage',
            'corridor hall', 'corridor_hall', 'corridor-hall',
            'corridor space', 'corridor_space', 'corridor-space',

            # Special purpose
            'fire corridor', 'fire_corridor', 'fire-corridor',
            'escape corridor', 'escape_corridor', 'escape-corridor',
            'emergency corridor', 'emergency_corridor', 'emerg-corridor',

            # Alternative terms
            'gallery', 'galleria', 'walkway',
            'concourse', 'arcade', 'promenade',
            'aisle', 'passageway', 'pathway'
        ],
            'Powder Room': [
        'powder room', 'powderroom', 'powder_room', 'powder-room',
        'powdr room', 'powdr_room', 'powderrom', 'powder rom','Powder^JRoom',
        'pwd room', 'pwd_room', 'pwdr room', 'pwdrroom','Powder^JRoom',
        'powder rm', 'powderrooom', 'powderoom', 'pwder room'
            ],
            'Multipurpose Room': [
        # Common spelling variations and misspellings
        'multipurpose room', 'multi purpose room', 'multi-purpose room',
        'multipurposeroom', 'multi_purpose_room', 'multi.purpose.room',
        'multipurpose', 'multi purpose', 'multi-purpose',
        'multipurpos room', 'multi purpos room', 'multipurpose rom',
        'multipurpos', 'multi purpos', 'multi-purpos',
        'multipurpose rm', 'multi purpose rm', 'multi-purpose rm',

        # Common abbreviations
        'mpr', 'mp_rm', 'mpr_rm', 'm_p_r',
        'mp_room', 'm_p_room', 'mp.rm', 'mpr.rm',
        'multi_rm', 'mult_rm', 'mprm', 'mpr_',

        # Functional variations
        'flexible room', 'flexible space', 'flex room',
        'flex space', 'flexroom', 'flexspace',
        'flexible use room', 'flexible use space',
        'adaptable room', 'adaptable space',

        # Activity-specific variations
        'activity room', 'activity space', 'activity_room',
        'function room', 'function hall', 'function_room',
        'community room', 'community space', 'community_room',
        'common room', 'common space', 'common_room',

        # Combined purpose terms
        'multi activity room', 'multi_activity_room',
        'multi function room', 'multi_function_room',
        'multiuse room', 'multi use room', 'multi_use_room',
        'all purpose room', 'all-purpose room', 'all_purpose_room',

        # Technical variations
        'versatile space', 'versatile room', 'versatile_room',
        'convertible space', 'convertible room', 'convertible_room',
        'utility space', 'utility room', 'utility_room',

        # With numbers
        'multipurpose room1', 'multipurpose room 1', 'multipurpose_room_1',
        'mpr1', 'mpr 1', 'mpr_1',
        'multi purpose 1', 'multi-purpose 1', 'multi_purpose_1',

        # Additional descriptive variations
        'mixed use room', 'mixed-use room', 'mixed_use_room',
        'shared space room', 'shared-space room', 'shared_space_room',
        'general purpose room', 'general-purpose room', 'general_purpose_room',

        # Professional/technical terms
        'multifunctional room', 'multi functional room', 'multi_functional_room',
        'multimodal space', 'multi modal space', 'multi_modal_space',
        'polyvalent room', 'poly valent room', 'poly_valent_room',

        # Special use cases
        'events room', 'event room', 'event_room',
        'program room', 'programme room', 'program_room',
        'activities room', 'activities space', 'activities_room',

        # Combined abbreviations
        'mpurp_rm', 'mpurp.rm', 'm_purp_rm',
        'mult_purp', 'mlt_prp', 'mp_space',
        'flex_rm', 'flex.rm', 'flex_space'
    ],
'O.T.S': [
        'ots', 'OTS', 'O.T.S', 'O.T.S.', 'OTS1', 'OTS 1', 'O.T.S 1', 'O.T.S. 1', 'OTS2', 'OTS 2','OTs','ots' 
        'O.T.S 2', 'O.T.S. 2', 'OTS3', 'OTS 3', 'O.T.S 3', 'O.T.S. 3', 'O T S', 'OT', 'OT.S','OTS ' ,'OTS',
        'Open To Sky', 'open to sky', 'Open-To-Sky', 'open-to-sky', 'OpToSky', 'O.T.Sky', 'OTSky',
        'Open_To_Sky', 'open_to_sky', 'OTS area', 'O.T.S. area', 'O.T.S area', 'Sky area', 'otsa',
    ],
    
            'Passage': [
            # Common spelling variations and misspellings
            'passage', 'passge', 'passag', 'pasage', 'passg',
            'passege', 'passaje', 'passege', 'passige', 'pasege',
            'passageway', 'passage way', 'passage-way',

            # Common abbreviations
            'psg', 'pas', 'psg_', 'psgw', 'psge',
            'pass', 'passe', 'p_way', 'p.way', 'p_w',
            'psg.', 'pas.', 'pass.', 'p.', 'pw.',

            # Location-specific variations
            'front passage', 'front_passage', 'front-passage',
            'rear passage', 'rear_passage', 'rear-passage',
            'side passage', 'side_passage', 'side-passage',
            'main passage', 'main_passage', 'main-passage',
            'service passage', 'service_passage', 'service-passage',

            # Functional variations
            'connecting passage', 'connection passage', 'connector passage',
            'linking passage', 'link passage', 'connecting_passage',
            'circulation passage', 'circu passage', 'circ_passage',
            'access passage', 'access_passage', 'access-passage',
            'movement passage', 'movement_passage', 'move_passage',

            # With numbers for identification
            'passage1', 'passage2', 'passage3',
            'passage_1', 'passage_2', 'passage_3',
            'psg1', 'psg2', 'psg3','Wide Service Passage'

            # Technical variations
            'thoroughfare', 'thorough_fare', 'thorough-fare',
            'walkthrough', 'walk_through', 'walk-through',
            'walkway', 'walk_way', 'walk-way',

            # Combined terms
            'passage corridor', 'passage_corridor', 'passage-corridor',
            'passage hall', 'passage_hall', 'passage-hall',
            'passage area', 'passage_area', 'passage-area','Walking Area',

            # Usage-specific
            'entry passage', 'entry_passage', 'entry-passage',
            'exit passage', 'exit_passage', 'exit-passage',
            'through passage', 'through_passage', 'thru_passage'
        ],
            'Sitout': [
        # Common spelling variations and misspellings
        'sitout', 'sit out', 'sit-out', 'sit_out',
        'sittout', 'site out', 'siteout', 'site-out',
        'sitoot', 'sittingout', 'sitting out', 'sitting-out',
        'sitdown', 'sit down', 'sit-down', 'sit_down',

        # Common abbreviations
        'so', 's_o', 'st', 'st_o',
        'sto', 'st.o', 'st.out', 'st_out',
        'sout', 's.out', 's_out', 'st.o.',


        # Location specific
        'front sitout', 'front_sitout', 'front-sitout',
        'rear sitout', 'rear_sitout', 'rear-sitout',
        'side sitout', 'side_sitout', 'side-sitout',
        'main sitout', 'main_sitout', 'main-sitout',



        # With numbers for identification
        'sitout1', 'sitout2', 'sitout3',
        'sitout_1', 'sitout_2', 'sitout_3',
        'sit out 1', 'sit out 2', 'sit out 3',

        # Combined terms
        'sitout terrace', 'sitout_terrace', 'sitout-terrace',
        'sitout deck', 'sitout_deck', 'sitout-deck',
        'sitout area', 'sitout_area', 'sitout-area',


    ],
    'Deck': [
    # Common spelling variations and misspellings
    'deck', 'dec', 'dek', 'decks',
    'dek', 'dack', 'dekk', 'dock',
    'dek room', 'deck room', 'deck-room', 'deck_room',
    'dek area', 'deck area', 'deck-area', 'deck_area',
    'dck', 'decc', 'dech', 'deckk','DECK',
    'open deck', 'open-deck', 'open_deck',
    'wooden deck', 'wood deck', 'wood-deck'
    ],

        'Corridor': [
            # Common spelling variations and misspellings
            'corridor', 'corridr', 'corridore', 'coridoor', 'corridoor',
            'corador', 'coridor', 'corridr', 'coridore', 'corridore',
            'coridoor', 'corridoor', 'corrdr', 'corridr', 'corridoor',

            # Common abbreviations
            'cor', 'corr', 'crdr', 'crd', 'cdr',
            'cor.', 'corr.', 'c.', 'cr.', 'crd.',
            'cor_', 'corr_', 'c_', 'cr_', 'crd_',

            # Location-specific variations
            'main corridor', 'main_corridor', 'main-corridor',
            'side corridor', 'side_corridor', 'side-corridor',
            'central corridor', 'central_corridor', 'central-corridor',
            'inner corridor', 'inner_corridor', 'inner-corridor',
            'outer corridor', 'outer_corridor', 'outer-corridor',

            # Functional variations
            'service corridor', 'service_corridor', 'service-corridor',
            'public corridor', 'public_corridor', 'public-corridor',
            'private corridor', 'private_corridor', 'private-corridor',
            'access corridor', 'access_corridor', 'access-corridor',
            'circulation corridor', 'circulation_corridor', 'circ_corridor',

            # With numbers for identification
            'corridor1', 'corridor2', 'corridor3',
            'corridor_1', 'corridor_2', 'corridor_3',
            'cor1', 'cor2', 'cor3',
            'corr1', 'corr2', 'corr3',

            # Floor-specific
            'ground corridor', 'ground_corridor', 'ground-corridor',
            'first corridor', 'first_corridor', 'first-corridor',
            'upper corridor', 'upper_corridor', 'upper-corridor',

            # Technical variations
            'hallway', 'hall_way', 'hall-way',
            'passageway', 'passage_way', 'passage-way',
            'circulation path', 'circulation_path', 'circ_path'

            # Usage-specific
            'connecting corridor', 'connecting_corridor', 'connect-corridor',
            'link corridor', 'link_corridor', 'link-corridor',
            'transition corridor', 'transition_corridor', 'trans-corridor',

            # Combined terms
            'corridor passage', 'corridor_passage', 'corridor-passage',
            'corridor hall', 'corridor_hall', 'corridor-hall',
            'corridor space', 'corridor_space', 'corridor-space',

            # Special purpose
            'fire corridor', 'fire_corridor', 'fire-corridor',
            'escape corridor', 'escape_corridor', 'escape-corridor',
            'emergency corridor', 'emergency_corridor', 'emerg-corridor',

            # Alternative terms
            'gallery', 'galleria', 'walkway',
            'concourse', 'arcade', 'promenade',
            'aisle', 'passageway', 'pathway'
        ],
            'Ground Floor Plan': [
                'ground floor plan', 'ground floor', 'gf plan', 'g/f plan',
                'ground level plan', 'ground level', 'ground floor layout',
                'gf layout', 'ground floor drawing', 'ground plan', 'g.f plan',
                'gf', 'g/f', 'g.f','{\FVerdana;\W5.26240; }GROUND FLOOR'
            ],
            'First Floor Plan': [
                'first floor plan', 'first floor', 'ff plan', 'f/f plan',
                'first level plan', 'first level', 'first floor layout',
                'ff layout', 'first floor drawing', 'ff', 'f/f', 'f.f',
                'f.f plan', '1st floor plan', '1st floor'
            ],
            'Atrium': [
        'atrium', 'courtyard', 'central hall', 'interior court', 'lobby',
        'open space', 'glass hall', 'entrance hall', 'reception area',
        'central garden', 'indoor plaza', 'light well', 'skylit court',
         'indoor courtyard', 'winter garden', 'pavilion',
        'grand entrance', 'rotunda', 'central space','atrium', 'atrum', 'attrium', 'atreum', 'atruim', 'atriam', 
        'atriam', 'atryum', 'atrim', 'atriam', 'artium', 'atriom', 
        'atriumm', 'aatrium', 'atriun', 'atriun', 'atrimu', 'atrrium',
        'atriuum', 'atreum', 'atrui', 'atrim', 'a-trium', 'at rium'
    ],

            "Gazebo": [
        "gazebo","garden gazebo","outdoor pavilion","garden pavilion","garden shelter","pergola","outdoor shelter","patio gazebo","gazebo structure","garden house","summer house","outdoor sitting area","covered patio structure","backyard gazebo","garden retreat"
    ],


            'Dressing Area': [
                'dressing area', 'dressing room', 'dressing', 'dress room','Wardrobe','Walk-In\PCloset ','Walk-in\PCloset ','Walk-in\PCloset',
                'wardrobe room', 'closet room', 'walk-in closet', 'walk in closet','Walk-in\PCloset','Dressing Area 1','Dressing Area 2',
                'Dressing Area 3','Dressing Area1','Dressing Area2','Dressing Area3','Dressing Area4','Dressing 1','Dressing 2','Dressing 3',
                'dressing space', 'changing room', 'wardrobe area', 'dressing chamber','Wardrobe Pathway',
            ],
            'Mumty': [
        'mumty', 'roof turret', 'roof lantern', 'cupola', 'roof monitor',
        'roof belvedere', 'roof pavilion', 'roof house', 'roof structure',
        'dormer structure', 'roof tower', 'roof observatory', 'lookout turret',
        'roof extension', 'attic structure', 'roof projection', 'roof addition',
                'mumty', 'mmty', 'muty', 'mummty', 'mumtty', 'mumpty', 
        'mumti', 'munty', 'momty', 'mumdy', 'mamty', 'mumyt', '{\FArial;\W0.80952; }Mmty',
        'mtmy', 'mutmy', 'numty', 'mum-ty', 'mum ty', 'mumtey',
        'mumyy', 'mumtiy', 'mumty1', 'mummtty', 'mumtyy','Mumty\P'
    ],
            'WashBasin': [
        'wash basin', 'washbasin', 'wash bassin', 'wash-basin','Basin', 'washbowl',
        'wash basiin', 'wash basn', 'wash bason', 'wash basen', 'wash baisin',
        'wash basin', 'wash basic', 'washbasin', 'washbaisin', 'wash bazin','Wash^JBasin',
        'wash bashin', 'wash basin', 'wash-bowl', 'was basin', 'wash bason',
        'wash bossin', 'wash baisn','WashBasin', 'whash basin', 'wash bisin', 'wahsbasin'
    ],
            'Lounge': [
        # Common spelling variations and misspellings
        'lounge', 'loung', 'lounj', 'loungue', 'louneg', 'lunge', 'loungee',
        'longue', 'launje', 'laungh', 'loungh', 'launge', 'launce', 'loung area','\pxi-3,l4,t4;T.V.^ILounge',
        'launge', 'longue', 'lunj', 'loung e', 'loung-e', 'lougne', 'loungge','ILounge','\pxi-3,l4,t4;T.V.^ILounge'

        # Common abbreviations
        'lng', 'lnge', 'lge', 'loun.', 'lou.', 'l.area', 'loun_area'
            ],
            'V.I.P lounge': [
        'V.I.P lounge', 'VIP lounge', 'exclusive lounge', 'premium lounge',
        'private lounge', 'elite area', 'members lounge', 'executive lounge',
        'priority lounge', 'first-class lounge', 'luxury waiting area',
        'preferred guest area', 'celebrity lounge', 'high-profile guest area',
        'privileged access lounge', 'special guest lounge', 'premier lounge',
        'reserved lounge', 'select guest area', 'concierge lounge','V.I.P lounge', 'VIP lounge', 'V.I.P. lounge', 'vip lounge',
        'V.I.P loung', 'V.I.P. longue', 'V.IP lounge', 'V.I.P launge',
        'V.I.P longe', 'V.I.P loungue', 'VIP longue', 'V.I.P lounj',
        'VI.P lounge', 'V.I.P lounger', 'V.I.P lunge', 'VIP lownge',
        'V.I.P. launch', 'V.I.P louge', 'V I P lounge', 'vip-lounge'
    ],
            'Guest Room': [
                'guest room', 'guest bedroom', 'guest suite', 'visitors room',
                'guest quarters', 'guest chamber', 'guest accommodation',
                'guest sleeping room', 'visitor bedroom', 'visitor suite',
                'guest bed room', 'guest living space'
            ]
}

# Mapping for specific direct translations
specific_mappings = {
    'PUJA ROOM': 'Pooja Area/Pooja Room',
    'DINNING AREA': 'Dining Room',
    'DINNING ROOM': 'Dining Room',
    'PARENTS BEDROOM': 'Parents Bedroom',
    'READING ROOM': 'Reading Area',
    'MULTIPURPOSE HALL': 'Multipurpose Room',
    'SERVICE PASSAGE': 'Passage',
    'LIFT': 'Elevator',
    'Dressing Area':'Wardrobe Pathway',
    'Dressing Area':'Dressing 1',
    'Family Area':'Family Room'
}

def split_at_format_marker(text):
    """Split text at formatting codes and return both parts."""
    # Remove \pxi format codes from the text
    text = re.sub(r'\\pxi[^;]*;', '', text)
    
    # Common format markers in AutoCAD
    markers = ['\\P', '\\p', '^j', '\\H','^J']
    
    # Find position of first marker
    positions = []
    for marker in markers:
        pos = text.find(marker)
        if pos != -1:
            positions.append((pos, marker))
    
    if positions:
        # Get the earliest marker
        pos, marker = min(positions, key=lambda x: x[0])
        return text[:pos].strip(), marker + text[pos+len(marker):]
    
    # No marker found
    return text.strip(), ""

def extract_room_info(text):
    """Extract the room name and dimensions from text like 'Living room 1\P17' X 14''"""
    # Split by format markers
    room_part, format_part = split_at_format_marker(text)
    
    # Extract dimensions if they exist in the format part
    dimensions = ""
    if format_part:
        # Look for dimension patterns like "17' X 14'" in the format part
        dim_match = re.search(r'(\d+[\'"]?\s*[Xx]\s*\d+[\'"]?)', format_part)
        if dim_match:
            dimensions = dim_match.group(1)
    
    return room_part.strip(), dimensions, format_part

def correct_text(text):
    """Corrects the text based on expanded variations first, then room_name_dict using fuzzy matching."""
    if not text or not text.strip():
        return text
    
    # Remove any \pxi format codes
    text = re.sub(r'\\pxi[^;]*;', '', text)
    
    # Handle format markers
    main_text, format_part = split_at_format_marker(text)
    corrected_room = correct_room_name(main_text)
    
    return corrected_room + format_part

def correct_room_name(room_name):
    """Core function to correct just the room name part."""
    if not room_name or not room_name.strip():
        return room_name
    
    # Check expanded variation dictionaries first (more flexible matching)
    room_upper = room_name.upper()
    for correct_name, variations in expanded_variations.items():
        for variation in variations:
            if room_upper == variation.upper() or room_name == variation:
                return correct_name.upper()
    
    # Check for specific mappings (case insensitive)
    for key, value in specific_mappings.items():
        if room_upper == key:
            return value.upper()
    
    # Convert to title case for matching
    room_title = room_name.strip().title()
    
    # Try fuzzy matching with the original dictionary
    result = process.extractOne(room_title, room_name_dict, score_cutoff=85)
    
    if result:
        return result[0].upper()
    else:
        # If no match found, just return the uppercase version
        return room_title.upper()

def correct_dxf_text(file_path, output_path):
    """Corrects TEXT and MTEXT entities in a DXF file."""
    doc = ezdxf.readfile(file_path)  # Load DXF file
    msp = doc.modelspace()  # Access model space
    
    corrections_made = 0
    unchanged_count = 0
    
    # Process TEXT entities
    for text_entity in msp.query("TEXT"):
        old_text = text_entity.dxf.text
        new_text = correct_text(old_text)
        
        if old_text != new_text:  # Only update if correction is made
            print(f"Corrected TEXT: '{old_text}' → '{new_text}'")
            text_entity.dxf.text = new_text
            corrections_made += 1
        else:
            unchanged_count += 1
    
    # Process MTEXT entities
    for mtext_entity in msp.query("MTEXT"):
        old_text = mtext_entity.dxf.text
        new_text = correct_text(old_text)
        
        if old_text != new_text:  # Only update if correction is made
            print(f"Corrected MTEXT: '{old_text}' → '{new_text}'")
            mtext_entity.dxf.text = new_text
            corrections_made += 1
        else:
            unchanged_count += 1
    
    # Save the corrected DXF file
    doc.saveas(output_path)
    
    # Display summary
    #print(f"\nSummary:")
    #print(f"Total text entities: {corrections_made + unchanged_count}")
    #print(f"Corrected entities: {corrections_made}")
    #print(f"Unchanged entities: {unchanged_count}")
    #print(f"Corrected DXF saved as: {output_path}")
    

def load_dictionary():
    # Return the existing dictionaries as a dictionary
    return {
        'room_name_dict': room_name_dict,
        'expanded_variations': expanded_variations,
        'specific_mappings': specific_mappings
    }

def main5(input_dxf5, output_dxf):
    dictionary = load_dictionary()
    correct_dxf_text(input_dxf5, output_dxf)
    #print(f"Corrected DXF saved as {output_dxf}")

# floor distribution 

In [12]:
import ezdxf
from collections import defaultdict
import re

def slice_text_entity(text):
    """
    Parse text entity into name, dimensions, and suffix parts.
    Handles both regular text and multi-line text with \P separator.
    """
    text = text.strip('" ').strip()
    name = ""
    dimensions = ""
    suffix = ""
    
    if '\P' in text:
        parts = text.split('\P')
        name = parts[0].strip()
        dim_suffix = parts[1].strip()
        if ' ' in dim_suffix:
            dim_part, suffix_part = dim_suffix.rsplit(' ', 1)
            dimensions = dim_part.strip()
            suffix = suffix_part.strip()
        else:
            dimensions = dim_suffix.strip()
    else:
        parts = text.split(' ')
        name = text  # Take the whole text as name
        if len(parts) > 1 and parts[-1].isdigit():
            # If last part is a digit, assume it's a numeric identifier
            name = ' '.join(parts[:-1])
            suffix = parts[-1]
            
    return name, dimensions, suffix

def point_in_polygon(point, polygon):
    """
    Check if a point is inside a polygon using ray casting algorithm.
    """
    x, y = point
    n = len(polygon)
    inside = False
    
    p1x, p1y = polygon[0]
    for i in range(1, n + 1):
        p2x, p2y = polygon[i % n]
        if y > min(p1y, p2y):
            if y <= max(p1y, p2y):
                if x <= max(p1x, p2x):
                    if p1y != p2y:
                        xinters = (y - p1y) * (p2x - p1x) / (p2y - p1y) + p1x
                    if p1x == p2x or x <= xinters:
                        inside = not inside
        p1x, p1y = p2x, p2y
    
    return inside

def is_dimension_text(text, is_on_dimension_layer=False):
    """
    Determine if text is likely a dimension annotation.
    """
    if is_on_dimension_layer:
        return True
        
    text = text.strip()
    
    # Check if text contains dimension patterns with room name
    if '\P' in text:
        parts = text.split('\P')
        if len(parts) >= 2:
            dimension_part = parts[1].strip()
            # Check if the dimension part contains dimension-like characters
            return (
                ("'" in dimension_part or '"' in dimension_part) and 
                ('x' in dimension_part.lower() or 'X' in dimension_part)
            )
    
    # Other dimension patterns
    dimension_patterns = [
        # Dimension notation with feet/inches
        ("'" in text or '"' in text) and ('x' in text.lower() or 'X' in text),
        # Area notations
        "sq" in text.lower() and any(c.isdigit() for c in text),
        # Elevation markers
        (text.startswith("EL") or text.startswith("el") or text.startswith("+") or text.startswith("-")) and any(c.isdigit() for c in text)
    ]
    
    return any(dimension_patterns)

def is_numeric_only(text):
    """
    Check if text contains only numeric characters (possibly with some separators).
    """
    # Remove common separators
    cleaned_text = text.replace(',', '').replace('.', '').replace('-', '').replace(' ', '')
    return cleaned_text.isdigit()

def should_skip_numbering(text):
    """
    Check if text should be skipped for numbering based on specific patterns.
    """
    if not text or not text.strip():
        return True  # Skip empty text
        
    # Clean the text by removing format codes and trimming whitespace
    clean_text = re.sub(r'\\pxi[^;]*;', '', text).strip()
    text_upper = clean_text.upper()
    
    # Skip D1, D2, D3... W1, W2, W3... S1, S2, S3... patterns
    pattern1 = re.compile(r'^[DWS]\d+$')
    
    # Skip texts with parentheses containing +, -, or 00
    pattern2 = re.compile(r'.*\(\+.*\).*|.*\(-.*\).*|.*\(00\).*')
    
    # Skip specific text terms - all uppercase for consistent comparison
    special_terms = [
        "-UP", "UP", "DN", "DOWN", 
        "PARKING AREA", "PARKING", "PARK", 
        "LIFT", "ELEVATOR", 
        "GARAGE"
    ]
    
    # Check if the text contains any of the special terms
    contains_special_term = any(term in text_upper for term in special_terms)
    
    return (pattern1.match(clean_text) is not None or 
            pattern2.match(clean_text) is not None or 
            contains_special_term)



def extract_text_by_floor_levels(dxf_path):
    """
    Extract all text entities that are inside polylines, organized by floor levels.
    """
    try:
        doc = ezdxf.readfile(dxf_path)
        msp = doc.modelspace()
        
        # Floor level mapping
        floor_mapping = {
            #"ground floor": ["ground floor", "ground", "g/f", "gf", "ground_floor", "0", "0 floor"],
            "Floor 1": ["first floor", "first", "1/f", "ff", "f/f", "1f", "first_floor", "1", "1 floor", "1st floor"],
            "Floor 2": ["second floor", "second", "2/f", "sf", "s/f", "2f", "second_floor", "2", "2 floor", "2nd floor"],
            "Floor 3": ["third floor", "third", "3/f", "tf", "t/f", "3f", "third_floor", "3", "3 floor", "3rd floor"],
            "Floor 4": ["fourth floor", "fourth", "4/f", "ff", "4f", "fourth_floor", "4", "4 floor", "4th floor"],
            "Floor 5": ["fifth floor", "fifth", "5/f", "ff", "5f", "fifth_floor", "5", "5 floor", "5th floor"],
            "Floor 6": ["sixth floor", "sixth", "6/f", "sf", "6f", "sixth_floor", "6", "6 floor", "6th floor"],
            "Floor 7": ["seventh floor", "seventh", "7/f", "sf", "7f", "seventh_floor", "7", "7 floor", "7th floor"],
            "Floor 8": ["eighth floor", "eighth", "8/f", "ef", "8f", "eighth_floor", "8", "8 floor", "8th floor"],
            "terrace floor": ["terrace floor", "terrace", "t/f", "tf", "terrace_floor", "roof", "roof floor"]
        }
        
        # Get all layers in the document
        layers = {layer.dxf.name for layer in doc.layers}
        print(f"Layers in the document: {', '.join(layers)}")
        
        # Identify dimension layers
        dimension_layers = set()
        for layer in layers:
            layer_lower = layer.lower()
            if any(dim_keyword in layer_lower for dim_keyword in ['dim', 'dimension', 'dimensions', 'quote']):
                dimension_layers.add(layer)
                print(f"Identified dimension layer: {layer}")
        
        # Map layers to floor levels
        floor_layers = {}
        for layer in layers:
            layer_lower = layer.lower()
            for floor_name, patterns in floor_mapping.items():
                if any(pattern.lower() in layer_lower for pattern in patterns):
                    floor_layers[layer] = floor_name
                    break
        
        if '0' in layers and '0' not in floor_layers:
            floor_layers['0'] = "default"
        
        print(f"Detected floor layers: {floor_layers}")
        
        # Extract all closed polylines
        all_polylines = []
        polyline_id = 0
        
        for entity in msp.query('LWPOLYLINE POLYLINE'):
            is_closed = False
            vertices = []
            
            if entity.dxftype() == 'LWPOLYLINE':
                is_closed = entity.closed
                if is_closed:
                    points = entity.get_points()
                    vertices = [(p[0], p[1]) for p in points]
            elif entity.dxftype() == 'POLYLINE':
                is_closed = entity.is_closed
                if is_closed:
                    vertices = [(v.dxf.location[0], v.dxf.location[1]) for v in entity.vertices]
            
            if is_closed and len(vertices) >= 3:
                layer = entity.dxf.layer
                
                floor = None
                if layer in floor_layers:
                    floor = floor_layers[layer]
                else:
                    floor = "unknown"
                
                # Calculate centroid and area
                x_sum = sum(v[0] for v in vertices)
                y_sum = sum(v[1] for v in vertices)
                centroid = (x_sum / len(vertices), y_sum / len(vertices))
                
                area = 0.0
                for i in range(len(vertices)):
                    j = (i + 1) % len(vertices)
                    area += vertices[i][0] * vertices[j][1]
                    area -= vertices[j][0] * vertices[i][1]
                area = abs(area) / 2.0
                
                all_polylines.append({
                    'id': polyline_id,
                    'entity': entity,
                    'vertices': vertices,
                    'layer': layer,
                    'floor': floor,
                    'centroid': centroid,
                    'area': area,
                    'contained_text': []
                })
                
                polyline_id += 1
        
        print(f"Found {len(all_polylines)} closed polylines")
        
        # Extract all text entities
        text_entities = []
        
        # Extract MTEXT entities
        for entity in msp.query('MTEXT'):
            layer = entity.dxf.layer
            text = entity.text
            position = entity.dxf.insert
            
            # Skip text entities on dimension layers
            if layer in dimension_layers:
                print(f"Skipping MTEXT on dimension layer {layer}: '{text}'")
                continue
            
            floor = None
            if layer in floor_layers:
                floor = floor_layers[layer]
            else:
                text_lower = text.lower()
                for floor_name, patterns in floor_mapping.items():
                    if any(pattern.lower() in text_lower for pattern in patterns):
                        floor = floor_name
                        break
            
            if not floor:
                floor = "unknown"
            
            text_entities.append({
                'type': 'MTEXT',
                'text': text,
                'layer': layer,
                'floor': floor,
                'position': position,
                'entity': entity,
                'containing_polyline': None,
                'is_dimension': layer in dimension_layers
            })
        
        # Extract TEXT entities
        for entity in msp.query('TEXT'):
            layer = entity.dxf.layer
            text = entity.dxf.text
            position = entity.dxf.insert
            
            # Skip text entities on dimension layers
            if layer in dimension_layers:
                print(f"Skipping TEXT on dimension layer {layer}: '{text}'")
                continue
            
            floor = None
            if layer in floor_layers:
                floor = floor_layers[layer]
            else:
                text_lower = text.lower()
                for floor_name, patterns in floor_mapping.items():
                    if any(pattern.lower() in text_lower for pattern in patterns):
                        floor = floor_name
                        break
            
            if not floor:
                floor = "unknown"
            
            text_entities.append({
                'type': 'TEXT',
                'text': text,
                'layer': layer,
                'floor': floor,
                'position': position,
                'entity': entity,
                'containing_polyline': None,
                'is_dimension': layer in dimension_layers
            })
        
        print(f"Found {len(text_entities)} text entities")
        
        # Determine which text entities are inside which polylines
        for text_entity in text_entities:
            position = text_entity['position']
            point = (position[0], position[1])
            
            for poly in all_polylines:
                if point_in_polygon(point, poly['vertices']):
                    text_entity['containing_polyline'] = poly['id']
                    poly['contained_text'].append(text_entity)
                    
                    # Update floor information
                    if (poly['floor'] == "default" or poly['floor'] == "unknown") and text_entity['floor'] != "unknown":
                        poly['floor'] = text_entity['floor']
                    
                    if text_entity['floor'] == "unknown" and poly['floor'] not in ["default", "unknown"]:
                        text_entity['floor'] = poly['floor']
                    
                    break
        
        # Define floor order and prepare result structures
        floor_order = ["Floor 1", "Floor 2", "Floor 3", "Floor 4", "Floor 5","Floor 6","Floor 7","Floor 8", "terrace floor"]
        text_by_floor = {floor: [] for floor in floor_order + ["unknown"]}
        
        polylines_with_text = [poly for poly in all_polylines if poly['contained_text']]
        print(f"Found {len(polylines_with_text)} polylines containing text")
        
        # Organize text by floor
        for floor in floor_order:
            floor_polylines = [poly for poly in polylines_with_text if poly['floor'] == floor]
            
            for poly in floor_polylines:
                for text in poly['contained_text']:
                    # Parse text into components
                    name, dimensions, suffix = slice_text_entity(text['text'])
                    
                    text_by_floor[floor].append({
                        'original_text': text['text'],
                        'name': name,
                        'dimensions': dimensions,
                        'suffix': suffix,
                        'type': text['type'],
                        'layer': text['layer'],
                        'position': text['position'],
                        'containing_polyline_id': poly['id'],
                        'containing_polyline_layer': poly['layer'],
                        'polyline_centroid': poly['centroid'],
                        'is_dimension': text.get('is_dimension', False)
                    })
        
        # Count texts by floor
        text_counts = {floor: len(texts) for floor, texts in text_by_floor.items()}
        
        # Prepare results
        results = {
            'floor_order': floor_order,
            'text_by_floor': text_by_floor,
            'text_counts': text_counts,
            'total_texts': sum(text_counts.values()),
            'polylines_with_text': len(polylines_with_text),
            'total_polylines': len(all_polylines),
            'dimension_layers': dimension_layers
        }
        
        # Organize text by polyline
        text_by_polyline = {floor: {} for floor in floor_order + ["unknown"]}
        
        for floor in floor_order + ["unknown"]:
            floor_polylines = [poly for poly in polylines_with_text if poly['floor'] == floor]
            
            for poly in floor_polylines:
                poly_id = f"poly_{poly['id']}"
                text_by_polyline[floor][poly_id] = []
                
                for text in poly['contained_text']:
                    name, dimensions, suffix = slice_text_entity(text['text'])
                    
                    text_by_polyline[floor][poly_id].append({
                        'original_text': text['text'],
                        'name': name,
                        'dimensions': dimensions,
                        'suffix': suffix,
                        'type': text['type'],
                        'layer': text['layer'],
                        'position': text['position'],
                        'is_dimension': text.get('is_dimension', False)
                    })
        
        results['text_by_polyline'] = text_by_polyline
        
        return results
    
    except Exception as e:
        print(f"Error processing DXF file: {e}")
        import traceback
        traceback.print_exc()
        return None

def print_floor_text_summary(results):
    """
    Print a summary of text entities by floor.
    """
    if not results:
        print("No results to display.")
        return
    
    print("\n===== TEXT ENTITIES BY FLOOR =====")
    print(f"Total text entities found in polylines: {results['total_texts']}")
    print(f"Total polylines with text: {results['polylines_with_text']} (out of {results['total_polylines']} total polylines)")
    print(f"Dimension layers (skipped): {', '.join(results['dimension_layers'])}")
    
    for floor in results['floor_order'] + ["unknown"]:
        text_count = results['text_counts'][floor]
        if text_count > 0:
            print(f"\n--- {floor.upper()} ({text_count} text entities) ---")
            if floor in results['text_by_polyline']:
                polyline_groups = results['text_by_polyline'][floor]
                for poly_id, texts in polyline_groups.items():
                    if texts:
                        print(f"\n  Polyline {poly_id} ({len(texts)} texts):")
                        for text in texts:
                            print(f"    - Original: \"{text['original_text']}\"")
                            print(f"    - Name: {text['name']}")
                            print(f"    - Dimensions: {text['dimensions']}")
                            print(f"    - Suffix: {text['suffix']} ({text['type']} on layer {text['layer']})")
                            if 'text' in text:
                                print(f"    - Numbered Text: \"{text['text']}\"")
                            if 'skipped_reason' in text:
                                print(f"    - Skipped: {text['skipped_reason']}")

def number_repeated_text_across_floors(results):
    """
    Improved function to assign sequence numbers to repeated text entities across floors.
    This version ensures all repeated text gets numbered, including dimensions.
    With added constraints: Skip numbering for purely numeric values and specific patterns.
    """
    if not results:
        print("No results to process.")
        return None
    
    # Track room name occurrences globally across all floors
    room_name_occurrences = {}
    
    # Count skipped and numbered text entities
    skipped_count = 0
    numbered_count = 0
    numeric_skipped = 0
    pattern_skipped = 0
    
    # Arrays for debugging
    processed_texts = {
        "numbered": [],
        "dimension_preserved": [],
        "numeric_skipped": [],
        "pattern_skipped": []
    }
    
    # Process each floor in order
    floor_order = results['floor_order'] + ["unknown"]
    
    # First pass: Identify all unique room/space names and count occurrences
    for floor in floor_order:
        if floor in results['text_by_polyline']:
            for poly_id, texts in results['text_by_polyline'][floor].items():
                for text in texts:
                    # Get the text content
                    original_text = text['original_text'].strip()
                    
                    # Skip if text is just a number
                    if is_numeric_only(original_text):
                        continue
                    
                    # Skip if text matches specific patterns
                    if should_skip_numbering(original_text):
                        continue
                    
                    # Extract room name
                    is_dimension = is_dimension_text(original_text, text.get('is_dimension', False))
                    
                    if is_dimension and '\P' in original_text:
                        # For text with dimensions, use the first part as room name
                        room_name = original_text.split('\P')[0].strip()
                    else:
                        # For regular text, use the whole text as room name
                        # Remove any existing numbering if present
                        parts = original_text.split()
                        if len(parts) > 1 and parts[-1].isdigit():
                            room_name = ' '.join(parts[:-1])
                        else:
                            room_name = original_text
                    
                    # Increment occurrence count for this room name
                    if room_name in room_name_occurrences:
                        room_name_occurrences[room_name] += 1
                    else:
                        room_name_occurrences[room_name] = 1
    
    # Filter to only keep names that appear more than once
    repeated_room_names = {name: 0 for name, count in room_name_occurrences.items() if count > 1}
    
    # Second pass: Apply numbering to repeated text entities
    for floor in floor_order:
        if floor in results['text_by_polyline']:
            for poly_id, texts in results['text_by_polyline'][floor].items():
                if texts:
                    # Sort texts by position (top to bottom, left to right)
                    texts.sort(key=lambda t: (-t['position'][1], t['position'][0]))
                    
                    for text in texts:
                        original_text = text['original_text'].strip()
                        
                        # Skip if text is just a number
                        if is_numeric_only(original_text):
                            text['text'] = original_text
                            text['skipped_reason'] = "Numeric value"
                            numeric_skipped += 1
                            processed_texts["numeric_skipped"].append(original_text)
                            continue
                        
                        # Skip if text matches specific patterns (D1, W1, S1, etc. or (+...), (-...), (00), or special terms)
                        if should_skip_numbering(original_text):
                            text['text'] = original_text
                            text['skipped_reason'] = "Pattern match (D#, W#, S#, special terms, or parentheses patterns)"
                            pattern_skipped += 1
                            processed_texts["pattern_skipped"].append(original_text)
                            continue
                        
                        is_dimension = is_dimension_text(original_text, text.get('is_dimension', False))
                        
                        if is_dimension and '\P' in original_text:
                            # Split into room name and dimensions
                            room_part, dimension_part = original_text.split('\P', 1)
                            room_name = room_part.strip()
                            
                            # Only number if this is a repeated room name
                            if room_name in repeated_room_names:
                                repeated_room_names[room_name] += 1
                                sequence_num = repeated_room_names[room_name]
                                
                                # Create numbered text with dimensions preserved
                                numbered_text = f"{room_name} {sequence_num}\\P{dimension_part}"
                                
                                # Store the result
                                text['name'] = f"{room_name} {sequence_num}"
                                text['text'] = numbered_text
                                text['sequence_number'] = sequence_num
                                
                                numbered_count += 1
                                processed_texts["numbered"].append({
                                    "original": original_text,
                                    "numbered": numbered_text
                                })
                            else:
                                # Single occurrence, preserve original
                                text['text'] = original_text
                                processed_texts["dimension_preserved"].append(original_text)
                        else:
                            # Regular text without dimensions
                            # Remove any existing numbering
                            parts = original_text.split()
                            if len(parts) > 1 and parts[-1].isdigit():
                                room_name = ' '.join(parts[:-1])
                            else:
                                room_name = original_text
                            
                            # Only number if this is a repeated room name
                            if room_name in repeated_room_names:
                                repeated_room_names[room_name] += 1
                                sequence_num = repeated_room_names[room_name]
                                
                                # Create numbered text
                                numbered_text = f"{room_name} {sequence_num}"
                                
                                # Store the result
                                text['name'] = numbered_text
                                text['text'] = numbered_text
                                text['sequence_number'] = sequence_num
                                
                                numbered_count += 1
                                processed_texts["numbered"].append({
                                    "original": original_text,
                                    "numbered": numbered_text
                                })
                            else:
                                # Single occurrence, preserve original
                                text['text'] = original_text
    
    # Print results for debugging
    print("\nRoom types with numbering:")
    for room_name, count in room_name_occurrences.items():
        if count > 1:
            print(f"{room_name}: {count} occurrences")
    
    print(f"\nNumbered {numbered_count} text entities and preserved {len(processed_texts['dimension_preserved'])} unique text entities")
    print(f"Skipped {numeric_skipped} numeric values")
    print(f"Skipped {pattern_skipped} pattern-matched texts (D#, W#, S#, special terms, or parentheses patterns)")
    
    # Print info about numbered texts for debugging
    print("\nNumbered texts (examples):")
    for numbered in processed_texts["numbered"][:20]:  # Limit to 20 examples
        print(f"  - '{numbered['original']}' → '{numbered['numbered']}'")
    
    if len(processed_texts["numbered"]) > 20:
        print(f"  ... and {len(processed_texts['numbered']) - 20} more")
    
    # Print info about skipped numeric values
    print("\nSkipped numeric values (examples):")
    for numeric in processed_texts["numeric_skipped"][:20]:  # Limit to 20 examples
        print(f"  - '{numeric}'")
    
    if len(processed_texts["numeric_skipped"]) > 20:
        print(f"  ... and {len(processed_texts['numeric_skipped']) - 20} more")
    
    # Print info about pattern-skipped values
    print("\nSkipped pattern-matched values (examples):")
    for pattern in processed_texts["pattern_skipped"][:20]:  # Limit to 20 examples
        print(f"  - '{pattern}'")
    
    if len(processed_texts["pattern_skipped"]) > 20:
        print(f"  ... and {len(processed_texts['pattern_skipped']) - 20} more")
    
    return results

def save_numbered_text_to_dxf(input_dxf, output_dxf, results):
    """
    Save the numbered text entities to a new DXF file.
    """
    try:
        doc = ezdxf.readfile(input_dxf)
        msp = doc.modelspace()
        updated_count = 0
        
        # Map position to updated text
        position_to_text = {}
        
        # Collect the updated text data
        for floor in results['floor_order'] + ["unknown"]:
            if floor in results['text_by_polyline']:
                for poly_id, texts in results['text_by_polyline'][floor].items():
                    for text in texts:
                        pos_tuple = tuple(text['position'])
                        if 'text' in text:  # Ensure 'text' key exists
                            position_to_text[pos_tuple] = text['text']
        
        # Update TEXT entities
        for entity in msp.query('TEXT'):
            pos = tuple(entity.dxf.insert)
            if pos in position_to_text:
                entity.dxf.text = position_to_text[pos]
                updated_count += 1
        
        # Update MTEXT entities
        for entity in msp.query('MTEXT'):
            pos = tuple(entity.dxf.insert)
            if pos in position_to_text:
                entity.text = position_to_text[pos]
                updated_count += 1
        
        # Save the updated DXF file
        doc.saveas(output_dxf)
        print(f"Updated {updated_count} text entities in the DXF file")
        return True
    
    except Exception as e:
        print(f"Error saving numbered text to DXF: {e}")
        import traceback
        traceback.print_exc()
        return False

def main6(input_dxf6, output_dxf, debug=True):
    """
    Main function for processing DXF files - extracts text by floor levels,
    numbers repeated text entities, and saves the result to a new DXF file.
    """
    print(f"Processing input file: {input_dxf6}")
    print(f"Output will be saved to: {output_dxf}")
    
    # Extract text entities from DXF file
    results = extract_text_by_floor_levels(input_dxf6)
    if not results:
        print("Failed to extract text from DXF file.")
        return False
    
    # Number repeated text entities
    results = number_repeated_text_across_floors(results)
    if not results:
        print("Failed to number text entities.")
        return False
    
    # Print summary if in debug mode
    if debug:
        print_floor_text_summary(results)
    
    # Save the updated DXF file
    success = save_numbered_text_to_dxf(input_dxf6, output_dxf, results)
    if success:
        print(f"Corrected DXF saved as {output_dxf}")
        return True
    else:
        print(f"Failed to save corrected DXF.")
        return False


    

# layer management

In [9]:
def process_dxf(input_dxf7, output_dxf):
    # 1. Takes input DXF
    doc = ezdxf.readfile(input_dxf7)
    msp = doc.modelspace()
    
    # 2. Deletes all layers excluding layer 0
    layers_to_remove = [layer.dxf.name for layer in doc.layers if layer.dxf.name != '0']
    for layer_name in layers_to_remove:
        doc.layers.remove(layer_name)
    
    # 3. Detects only text or mtext and 4. Creates layers from detected text/mtext
    for entity in msp:
        if entity.dxftype() in ['TEXT', 'MTEXT']:
            text_content = entity.dxf.text if entity.dxftype() == 'TEXT' else entity.dxf.text
            # Replace non-alphanumeric (except underscore) with underscore, max 31 chars
            layer_name = ''.join('' if not (c.isalnum() or c == '') else c for c in text_content)[:31]
            # If empty after processing, use a default name
            if not layer_name:
                layer_name = "TEXT_LAYER"
            if layer_name not in doc.layers:
                doc.layers.new(layer_name)
            entity.dxf.layer = layer_name
    
    # 5. Gives out final DXF file
    doc.saveas(output_dxf)
    
def main7(input_dxf7, output_dxf):
    process_dxf(input_dxf7, output_dxf)

In [None]:
def main_final(input_dxf0, final_output,tolerance, max_iterations):
    """
    Main function to handle sequential DXF processing and clean up intermediate files.

    Args:
        input_dxf1 (str): The initial input DXF file.
        final_output (str): Final output DXF file name.
    """
    intermediate_files = ["1.dxf", "2.dxf", "3.dxf", "4.dxf","5.dxf","6.dxf","7.dxf"]

    # Sequential processing using provided main functions
    main0(input_dxf0=input_dxf0, output_dxf=intermediate_files[0],tolerance=tolerance,max_iterations=max_iterations)
    main1(input_dxf1=intermediate_files[0], output_dxf=intermediate_files[1])
    main2(input_dxf2=intermediate_files[1], output_dxf=intermediate_files[2])
    main3(input_dxf3=intermediate_files[2], output_dxf=intermediate_files[3])
    main4(input_dxf4=intermediate_files[3], output_dxf=intermediate_files[4])
    main5(input_dxf5=intermediate_files[4], output_dxf=intermediate_files[5])
    main6(input_dxf6=intermediate_files[5], output_dxf=intermediate_files[6])
    main7(input_dxf7=intermediate_files[6], output_dxf=final_output)
    files_structure = {"Intermediate Files": intermediate_files}
    print("Files structure:", files_structure)

    for file in intermediate_files:
        if os.path.exists(file):
            try:
                os.remove(file)
                print(f"Deleted: {file}")
            except Exception as e:
                print(f"Error deleting {file}: {e}")
        else:
            print(f"File not found: {file}")


In [13]:
main_final(
    input_dxf0="4new4.dxf",
    final_output="T11modified.dxf",
    tolerance = 0.000001,
    max_iterations = 5
)


Starting OVERKILL process on 4new4.dxf
Using tolerance value: 1e-06
Maximum iterations: 5
This may take a moment for complex drawings...


INFO: Identifying polylines in Floor layers...
INFO: Found 0 polylines in Floor layers
INFO: Starting OVERKILL process...
INFO: 
--- Iteration 1 ---
INFO: Processing 707 entities (707 lines, 0 arcs)...
INFO: Removed 53 duplicate entities.
INFO: Analyzing overlapping and connecting entities...
INFO: Building spatial index...
INFO: Building mergeable entity graph...
INFO: Found 55 sets of entities that can be merged.
INFO: Merging entity components...
INFO: Iteration 1 results:
INFO:   - Entities merged/deleted: 151
INFO:   - New entities created: 55
INFO: 
--- Iteration 2 ---
INFO: Processing 558 entities (558 lines, 0 arcs)...
INFO: Analyzing overlapping and connecting entities...
INFO: Building spatial index...
INFO: Building mergeable entity graph...
INFO: Found 0 sets of entities that can be merged.
INFO: No more entities can be merged.
INFO: 
OVERKILL operation completed in 2.94 seconds:
INFO:   - Iterations performed: 2
INFO:   - Original entity count: 707
INFO:   - Total entities

Converted 96 polylines to 256 lines
Skipped 3 polylines on floor layers
Remaining polylines after conversion: 3
DXF file saved as 3.dxf
Loading DXF file: 3.dxf

Block Counts:
Corrected TEXT: 'Ground floor plan' → 'GROUND FLOOR PLAN'
Corrected TEXT: 'First floor plan' → 'FIRST FLOOR PLAN'
Corrected TEXT: 'Terrace plan' → 'TERRACE PLAN'
Corrected MTEXT: '\pxi-3,l4,t4;Living room\P17' X 14'' → 'LIVING ROOM\P17' X 14''
Corrected MTEXT: 'Mandir + Dining area\P22'3" X 10'' → 'MANDIR + DINING AREA\P22'3" X 10''
Corrected MTEXT: '\pxi-3,l4,t4;Dressing area\P5'3" X 7'' → 'DRESSING AREA\P5'3" X 7''
Corrected MTEXT: '\pxi-3,l4,t4;Bedroom 2\P14'3" X 12'' → 'BEDROOM\P14'3" X 12''
Corrected MTEXT: '\pxi-3,l4,t4;Toilet\P5' X 7'' → 'BATHROOM\P5' X 7''
Corrected MTEXT: '\pxi-3,l4,t4;Dressing area\P5' X 6'7"' → 'DRESSING AREA\P5' X 6'7"'
Corrected MTEXT: '\pxi-3,l4,t4;Bedroom 1\P10' X 12'' → 'BEDROOM\P10' X 12''
Corrected MTEXT: '3' Wide O.T.S\Ppond / garden\Parea' → 'O.T.S\Ppond / garden\Parea'
Correct

Updated 28 text entities in the DXF file
Corrected DXF saved as 7.dxf
Files structure: {'Intermediate Files': ['1.dxf', '2.dxf', '3.dxf', '4.dxf', '5.dxf', '6.dxf', '7.dxf']}
Deleted: 1.dxf
Deleted: 2.dxf
Deleted: 3.dxf
Deleted: 4.dxf
Deleted: 5.dxf
Deleted: 6.dxf
Deleted: 7.dxf
