# **0. Package layout**

Proposed top-level package:

```text
etabs_text_log/
    __init__.py
    model.py          # dataclasses for EtabsModel & friends
    parser.py         # $et → EtabsModel
    diffing.py        # EtabsModel → raw diff objects
    aggregate.py      # raw diff → aggregated clusters (designer-level changes)
    location.py       # grid/story tagging helpers
    summarize.py      # LLM-facing summarization helpers
    versioning.py     # snapshot management, version registry
    cli.py            # command-line interface (etabs-log)
    mcp_server.py     # MCP tool wrappers (optional)
```

# **1. Core data model (`model.py`)**

These types represent a parsed `.$et` snapshot.

```python
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Optional, Dict, List, Tuple, Literal


# --- Basics ---

@dataclass
class ProgramInfo:
    program: str                # "ETABS"
    version: str                # "22.1.0"
    build: Optional[str] = None
    source_file: Optional[str] = None  # path to .$et snapshot


@dataclass
class Story:
    name: str                   # "L14"
    elevation: float            # global Z
    height: Optional[float] = None
    is_master_story: bool = False
    index: Optional[int] = None  # sort order


@dataclass
class GridLine:
    name: str                   # "A", "1"
    coord: float                # X or Y coordinate
    direction: Literal["X", "Y"]
    # maybe flags like reference_grid etc.


@dataclass
class Joint:
    name: str                   # joint label
    x: float
    y: float
    z: float
    story: Optional[str] = None  # filled by location tagging
    grid_x: Optional[str] = None
    grid_y: Optional[str] = None


# --- Sections / materials ---

@dataclass
class Material:
    name: str                  # "A992Fy50"
    type: Literal["steel", "concrete", "other"]
    # Only the handful of fields we care about in v1:
    E: Optional[float] = None
    Fy: Optional[float] = None
    fc: Optional[float] = None
    density: Optional[float] = None
    # plus raw field dict for anything else:
    raw_fields: Dict[str, str] = field(default_factory=dict)


@dataclass
class FrameSection:
    name: str                  # "W14X90"
    material: str              # Material.name
    shape_type: str            # "I", "Channel", etc.
    shape_label: Optional[str] = None   # vendor shape name if different
    # Minimal geometric props:
    area: Optional[float] = None
    Ix: Optional[float] = None
    Iy: Optional[float] = None
    J: Optional[float] = None
    # For unknown / extra fields:
    raw_fields: Dict[str, str] = field(default_factory=dict)


# --- Structural objects ---

@dataclass
class LocationInfo:
    story: Optional[str] = None
    grid_x: Optional[str] = None
    grid_y: Optional[str] = None

    # Optional: bounding region (for members spanning between grids)
    grid_x_span: Optional[Tuple[str, str]] = None
    grid_y_span: Optional[Tuple[str, str]] = None


@dataclass
class FrameObject:
    name: str                  # ETABS object label
    joint_i: str               # Joint.name
    joint_j: str               # Joint.name
    section: str               # FrameSection.name
    story: Optional[str] = None
    # Derived:
    object_type: Optional[Literal["column", "beam", "brace", "frame"]] = None
    location: LocationInfo = field(default_factory=LocationInfo)
    # Extra:
    raw_fields: Dict[str, str] = field(default_factory=dict)


# --- Loads ---

@dataclass
class LoadPattern:
    name: str                  # "DEAD", "LL", "WINDX"
    load_type: str             # "DEAD", "LIVE", "WIND", etc. (ETABS type)
    self_weight_multiplier: float = 0.0
    raw_fields: Dict[str, str] = field(default_factory=dict)


@dataclass
class LoadCase:
    name: str
    case_type: str             # "Linear Static", etc.
    pattern: Optional[str] = None   # associated primary pattern if simple
    is_auto: bool = False
    raw_fields: Dict[str, str] = field(default_factory=dict)


@dataclass
class LoadComboTerm:
    name: str                  # pattern/case/combo name
    factor: float


@dataclass
class LoadCombo:
    name: str
    design_type: Optional[str] = None  # "Strength", "Service", etc.
    terms: List[LoadComboTerm] = field(default_factory=list)
    raw_fields: Dict[str, str] = field(default_factory=dict)


# --- Model root ---

@dataclass
class EtabsModel:
    program_info: ProgramInfo
    stories: Dict[str, Story] = field(default_factory=dict)
    grids: List[GridLine] = field(default_factory=list)
    joints: Dict[str, Joint] = field(default_factory=dict)
    frames: Dict[str, FrameObject] = field(default_factory=dict)
    materials: Dict[str, Material] = field(default_factory=dict)
    frame_sections: Dict[str, FrameSection] = field(default_factory=dict)
    load_patterns: Dict[str, LoadPattern] = field(default_factory=dict)
    load_cases: Dict[str, LoadCase] = field(default_factory=dict)
    load_combos: Dict[str, LoadCombo] = field(default_factory=dict)

    # Optional: raw sections that we don’t yet understand
    raw_sections: Dict[str, List[str]] = field(default_factory=dict)
```

# **2. Parsing API (`parser.py`)**

The idea: we pipe an `.$et` file in and get an `EtabsModel` out.

```python
from pathlib import Path
from .model import EtabsModel

def parse_et_file(path: str | Path) -> EtabsModel:
    """
    Parse an ETABS .$et (or .e2k) model text file into an EtabsModel.

    Responsibilities:
    - Read file as text.
    - Split into sections using lines starting with '$ ' as headers.
    - For each known section, call a section-specific parser function.
    - Build and return an EtabsModel instance.
    - Unknown sections are stored in EtabsModel.raw_sections.
    """
    ...


def parse_sections_from_text(text: str) -> dict[str, list[str]]:
    """
    Lower-level helper: splits raw text into a mapping:
        {section_name: [lines_without_header]}

    Example:
        "$ JOINT COORDINATES"
        "   J1   0.0 0.0 0.0"
        "$ FRAME OBJECTS"
        "   F1   J1  J2  ..."

    Returns:
        {"JOINT COORDINATES": [...], "FRAME OBJECTS": [...], ...}
    """
    ...
```

Section-specific parsers (internal):

```python
from typing import Iterable
from .model import (
    Joint,
    FrameObject,
    Story,
    GridLine,
    Material,
    FrameSection,
    LoadPattern,
    LoadCase,
    LoadCombo,
)

def parse_story_data(lines: Iterable[str]) -> dict[str, Story]:
    ...

def parse_grid_lines(lines: Iterable[str]) -> list[GridLine]:
    ...

def parse_joint_coordinates(lines: Iterable[str]) -> dict[str, Joint]:
    ...

def parse_frame_objects(lines: Iterable[str]) -> dict[str, FrameObject]:
    ...

def parse_material_properties(lines: Iterable[str]) -> dict[str, Material]:
    ...

def parse_frame_sections(lines: Iterable[str]) -> dict[str, FrameSection]:
    ...

def parse_load_patterns(lines: Iterable[str]) -> dict[str, LoadPattern]:
    ...

def parse_load_cases(lines: Iterable[str]) -> dict[str, LoadCase]:
    ...

def parse_load_combinations(lines: Iterable[str]) -> dict[str, LoadCombo]:
    ...
```

# **3. Location tagging (`location.py`)**

Once we have an `EtabsModel`, we want to populate story+grid for joints and frames.

```python
from .model import EtabsModel, Joint, FrameObject, LocationInfo

def attach_story_and_grid_tags(model: EtabsModel, *, coord_tol: float = 1e-3) -> None:
    """
    Mutates the model in-place:
    - For each Joint, infers story, grid_x, grid_y from story elevations & grid lines.
    - For each FrameObject, infers object_type (column/beam/brace) & LocationInfo.

    This should be called after parse_et_file() and before diffing.
    """
    ...


def classify_frame_object(frame: FrameObject, model: EtabsModel) -> None:
    """
    Update frame.object_type based on orientation and section name.
    """
    ...
```

# **4. Diff engine (`diffing.py`)**

## 4.1. Raw changes

Think of this as “low-level diff objects” before aggregation.

```python
from dataclasses import dataclass
from typing import Any, Dict, List, Literal, Optional
from .model import EtabsModel, FrameObject, FrameSection, Material, LoadCombo, LoadPattern, LoadCase, LocationInfo


@dataclass
class FieldChange:
    field: str
    old: Any
    new: Any


@dataclass
class ObjectAdded:
    object_type: str           # "frame", "joint", "material", "load_combo", ...
    key: str                   # e.g. frame name
    new_data: Dict[str, Any]   # serialized snapshot


@dataclass
class ObjectRemoved:
    object_type: str
    key: str
    old_data: Dict[str, Any]


@dataclass
class ObjectModified:
    object_type: str
    key: str
    changes: List[FieldChange]
    # Optional helper fields:
    location: Optional[LocationInfo] = None  # for frames/joints/areas


@dataclass
class RawDiff:
    """
    Raw, un-aggregated changes grouped by type.
    """
    added: List[ObjectAdded]
    removed: List[ObjectRemoved]
    modified: List[ObjectModified]
```

## 4.2. Diff entrypoint

Entry point for computing a raw diff between two models. We’ll likely have internal helpers as well.

```python
def diff_models(
    old: EtabsModel,
    new: EtabsModel,
    *,
    numeric_tol: dict[str, float] | None = None,
) -> RawDiff:
    """
    Compare two EtabsModel instances and produce a RawDiff.

    - numeric_tol: Optional mapping from field name to absolute tolerance;
      values below tolerance are treated as unchanged.
    - object identity:
        - For collections keyed by name (materials, frame sections, load combos),
          set identity is straightforward.
        - For frames/joints, rely primarily on object name; identity heuristics
          can be extended later if needed.
    """
    ...


def diff_mapping(
    object_type: str,
    old_map: Dict[str, Any],
    new_map: Dict[str, Any],
    *,
    field_exclude: set[str] | None = None,
    numeric_tol: dict[str, float] | None = None,
) -> RawDiff:
    ...
```

# **5. Aggregation layer (`aggregate.py`)**

## 5.1. Aggregated change types

These are closer to “designer-speak”.

```python
from dataclasses import dataclass, field
from typing import List, Literal, Optional, Dict, Any
from .model import LocationInfo
from .diffing import FieldChange


@dataclass
class SectionSwapCluster:
    """"""
    Aggregated change: group of frames where the section changed from A → B
    under some common location / object-type pattern.
    """"""
    object_type: Literal["column", "beam", "brace", "frame"]
    story: Optional[str]                     # e.g. "L14" or None
    old_section: str
    new_section: str
    count: int
    example_objects: List[str] = field(default_factory=list)
    grid_region: Optional[Dict[str, Any]] = None   # e.g. {"grid_x": ["A","D"], "grid_y": ["1","4"]}


@dataclass
class LoadComboChange:
    """
    Changes to a load combination definition.
    """
    name: str
    change_type: Literal["added", "removed", "modified"]
    old_terms: Optional[List[Dict[str, Any]]] = None
    new_terms: Optional[List[Dict[str, Any]]] = None
    # For "modified", maybe a list of term-level diffs:
    term_changes: Optional[List[Dict[str, Any]]] = None


@dataclass
class MaterialPropertyChange:
    material: str
    changed_fields: Dict[str, FieldChange]


@dataclass
class AggregatedDiff:
    """
    High-level, user-facing changes grouped by category.
    """
    section_swaps: List[SectionSwapCluster] = field(default_factory=list)
    load_combo_changes: List[LoadComboChange] = field(default_factory=list)
    material_changes: List[MaterialPropertyChange] = field(default_factory=list)
    geometry_changes: List[Dict[str, Any]] = field(default_factory=list)
    # catch-all for other categories:
    other_changes: List[Dict[str, Any]] = field(default_factory=list)
```

## 5.2. Aggregation API

API for turning low-level raw diffs into aggregated, designer-friendly changes.

```python
from .diffing import RawDiff
from .model import EtabsModel


def aggregate_diff(
    raw_diff: RawDiff,
    old: EtabsModel,
    new: EtabsModel,
) -> AggregatedDiff:
    """
    Convert low-level RawDiff into AggregatedDiff:
    - group frame section changes into SectionSwapClusters
    - map object locations to stories/grids
    - treat added/removed load combos as LoadComboChange objects
    - identify significant material property changes
    """
    ...
```

# **6. Summarization & LLM interface (`summarize.py`)**

## 6.1. LLM client abstraction

Keep it lightweight so we can plug in OpenAI, local models, etc.

```python
from dataclasses import asdict
from typing import Protocol, Literal
from .aggregate import AggregatedDiff


class LLMClient(Protocol):
    """
    Abstract protocol so we can swap OpenAI / local models.
    """

    def summarize(self, prompt: str) -> str:
        ...


def build_summary_prompt(
    old_label: str,
    new_label: str,
    aggregated: AggregatedDiff,
    *,
    style: Literal["short", "detailed"] = "short",
) -> str:
    """
    Build a text prompt for the LLM using AggregatedDiff.

    - Encourage the model to:
        - Group changes by category.
        - Use 'columns on Story L14' language.
        - Avoid listing every object for large clusters (show counts).
    """
    # Implementation: format aggregated diff into a JSON-like summary and embed
    # in a system + user prompt template.
    ...


def summarize_diff_to_markdown(
    llm: LLMClient,
    old_label: str,
    new_label: str,
    aggregated: AggregatedDiff,
    *,
    style: Literal["short", "detailed"] = "short",
) -> str:
    """
    High-level helper: build prompt → call LLM → return markdown summary.
    """
    prompt = build_summary_prompt(old_label, new_label, aggregated, style=style)
    return llm.summarize(prompt)
```

# **7. Versioning & log management (`versioning.py`)**

This layer keeps track of multiple snapshots and log entries.

```python
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Optional, List, Literal
from .model import EtabsModel
from .diffing import RawDiff
from .aggregate import AggregatedDiff
from .summarize import LLMClient


@dataclass
class ModelVersion:
    id: str                   # opaque or derived from timestamp/commit
    path: Path                # path to .$et
    created_at: datetime
    label: Optional[str] = None   # "v12 – 2025-11-13"


@dataclass
class ModelLogEntry:
    version_old: ModelVersion
    version_new: ModelVersion
    raw_diff: RawDiff
    aggregated_diff: AggregatedDiff
    summary_markdown: str
    created_at: datetime


class VersionStore:
    """
    Manages a collection of EtabsModel snapshots & diff logs
    for a given project/model.
    """

    def __init__(self, root_dir: Path):
        self.root_dir = root_dir

    def register_snapshot(self, et_path: Path, *, label: Optional[str] = None) -> ModelVersion:
        """
        Copy or link the .$et file into the store, register metadata.
        Returns a ModelVersion object.
        """
        ...

    def list_versions(self) -> List[ModelVersion]:
        ...

    def get_version(self, version_id: str) -> ModelVersion:
        ...

    def compute_log_entry(
        self,
        old_version: ModelVersion,
        new_version: ModelVersion,
        *,
        llm: Optional[LLMClient] = None,
        style: Literal["short", "detailed"] = "short",
    ) -> ModelLogEntry:
        """
        Parse both models, tag locations, diff, aggregate, optionally summarize.
        """
        ...
```

# **8. CLI sketch (`cli.py`)**

Thin wrapper around the above APIs.

```python
import argparse
from pathlib import Path
from .parser import parse_et_file
from .location import attach_story_and_grid_tags
from .diffing import diff_models
from .aggregate import aggregate_diff
from .summarize import summarize_diff_to_markdown
from .versioning import VersionStore


def main(argv=None):
    parser = argparse.ArgumentParser(prog="etabs-log")
    sub = parser.add_subparsers(dest="cmd", required=True)

    # etabs-log diff old.et new.et
    p_diff = sub.add_parser("diff")
    p_diff.add_argument("old")
    p_diff.add_argument("new")
    p_diff.add_argument("--style", choices=["short", "detailed"], default="short")

    # etabs-log snapshot project_root model.et
    p_snap = sub.add_parser("snapshot")
    p_snap.add_argument("project_root")
    p_snap.add_argument("model_et")

    args = parser.parse_args(argv)

    if args.cmd == "diff":
        old_model = parse_et_file(args.old)
        new_model = parse_et_file(args.new)
        attach_story_and_grid_tags(old_model)
        attach_story_and_grid_tags(new_model)

        raw = diff_models(old_model, new_model)
        aggregated = aggregate_diff(raw, old_model, new_model)

        # For now: no real LLM client; just pretty-print aggregated diff
        from .summarize import DummyLLMClient  # define a stub
        llm = DummyLLMClient()
        md = summarize_diff_to_markdown(llm, args.old, args.new, aggregated, style=args.style)
        print(md)

    elif args.cmd == "snapshot":
        store = VersionStore(Path(args.project_root))
        v = store.register_snapshot(Path(args.model_et))
        print(f"Registered snapshot: {v.id} ({v.path})")
```

# **9. MCP server entrypoints (`mcp_server.py`)**

Very rough – the idea is to expose tools for listing versions and getting diffs.

```python
# pseudo-code, not full MCP boilerplate:

def tool_list_versions(project_root: str) -> list[dict]:
    store = VersionStore(Path(project_root))
    versions = store.list_versions()
    return [
        {"id": v.id, "label": v.label, "created_at": v.created_at.isoformat()}
        for v in versions
    ]


def tool_get_diff(project_root: str, old_id: str, new_id: str) -> dict:
    store = VersionStore(Path(project_root))
    old_v = store.get_version(old_id)
    new_v = store.get_version(new_id)

    # parse, diff, aggregate
    # (optionally cache these results on disk to avoid repetition)
    ...
```

# Next steps...

* Fill in these modules more concretely (e.g. a realistic `parse_sections_from_text` and a simple `$ JOINT COORDINATES` parser)
* Design a `DummyLLMClient` that just renders `AggregatedDiff` into Markdown without calling an actual model (good for testing the pipeline before wiring in a real API).