The following details out the steps taken to update the `dict.txt` file per case [Update dict.txt - remove xylazine terms](https://github.com/suzytamang/clever-rockies/issues/19).

This will treat each modification as a separate and distinct series of actions (deletes), which will be used in turn to inform case [Support Feature: Renumbering and Validation Script](https://github.com/suzytamang/clever-rockies/issues/22).

In [None]:
# dataclass to hold terminology entry

from dataclasses import dataclass
from typing import Any, Callable


@dataclass
class Term:
    id: int
    term: str
    subclass_name: str
    class_name: str

In [None]:
# Need to load existing copy of dict.txt
import pandas as pd

ID_LABEL = "id"
TERM_LABEL = "term"
SUBCLASS_LABEL = "subclass_name"
CLASS_LABEL = "class_name"

TERMINOLOGY_PATH = r"D:\DaxWorkspace\PERC\clever-rockies\res\dicts\dict.txt"

terminology_df: pd.DataFrame = pd.read_csv(
    TERMINOLOGY_PATH,
    delimiter="|",
    names=[ID_LABEL, TERM_LABEL, SUBCLASS_LABEL, CLASS_LABEL],
)

terminology_df.head(10)

In [None]:
# pre-check validations
# - Does the term in question exist?
# - Are there any validation errors in the terminology?


class TermValidator:

    def __init__(self, *rules: Callable[[Term], bool]) -> None:
        rules = list(rules)
        # if no rules provided, then default to asserting term is valid
        if len(rules) == 0:
            rules.append(lambda x: True)
        self._rules = rules

    def __call__(self, term: Term) -> bool:
        return all(is_valid(term) for is_valid in self._rules)


class TerminologyValidator:

    def __init__(
        self, terms_df: pd.DataFrame, term_validator: TermValidator
    ) -> None:
        self._term_validator: TermValidator = term_validator
        self._terms_df: pd.DataFrame = terms_df

    def __call__(self) -> bool:
        self._terms_df["is_valid_term"] = self._terms_df.apply(
            lambda term: self._term_validator(term), axis=0
        )
        self.check_for_duplicate_ids()
        self.check_for_duplicate_terms()
        
    @property
    def invalid_entries(self):
        pass

    def check_for_duplicate_ids(self) -> bool:
        duplicate_mask: pd.Series = self._terms_df[ID_LABEL].duplicated()
        find duplicates and note references
        self._terms_df[''] = self._terms_df[duplicate_mask]
            
        return duplicate_mask

    def check_for_duplicate_terms(self) -> bool:
        duplicate_mask: pd.Series = self._terms_df[TERM_LABEL].duplicated()
        return duplicate_mask

    def does_term_exist(self, term: Term) -> bool:
        term_count = len(
            self._terms_df[self._terms_df[TERM_LABEL] == term.term]
        )
        if term_count > 1:
            raise Exception("More than one instance of term found: %s", term)
        return term_count == 1

In [None]:
terminology_validator: TerminologyValidator = TerminologyValidator(
    terminology_df, TermValidator()
)

if terminology_validator() is False:
    terminology_validator.invalid_entries