In [1]:
# ─── 0.  Setup ─────────────────────────────────────────────────────────────
from pathlib import Path
from Ashaar.bait_analysis import BaitAnalysis
import tensorflow as tf, functools, torch, numpy as np

# Disable eager for TF-1.x code inside Ashaar
tf.compat.v1.disable_eager_execution()
torch.load = functools.partial(torch.load, weights_only=False)

# ─── 1.  (Re)build a fresh analyser instance ───────────────────────────────
root      = Path.cwd()                        # folder that holds poetry_diacritizer/
analysis  = BaitAnalysis(abs_path=str(root))  # use_cbhg=True by default

# Restore the meter weights
ckpt = root / "deep-learning-models/meters_model/cp.ckpt"
analysis.METERS_MODEL.load_weights(str(ckpt)).expect_partial()

# ─── 2.  Patch ONLY the `clean()` method (safe, non-recursive) ─────────────
import regex as re
from types import MethodType

# Keep an *immutable* reference to the original cleaner
_orig_clean = analysis.text_encoder.clean            # bound method

# Pre-compiled regex that recognises Arabic ḥarakāt
_ARABIC_DIAC = re.compile(r'[\u064B-\u065F]')        # Fathatan → Sukūn

def smart_clean(self, txt: str) -> str:
    """
    If the string already contains diacritics → return unchanged.
    Otherwise → run the *original* normaliser exactly once.
    """
    if _ARABIC_DIAC.search(txt):
        return txt
    return _orig_clean(txt)

# Bind the wrapper back onto the *existing* encoder object
analysis.text_encoder.clean = MethodType(smart_clean, analysis.text_encoder)

# Optional: expose both versions for debugging / unit tests
analysis.text_encoder.clean_raw   = _orig_clean
analysis.text_encoder.clean_smart = analysis.text_encoder.clean

# ─── 3.  Sanity check ──────────────────────────────────────────────────────
bayt = "ألا ليت شعري هل أبيتن ليلة # بجنب الغضى أزجي القلاص النواجيا"
result = analysis.analyze(bayt, override_tashkeel=False)

print("\n— ANALYSIS —")
for k, v in result.items():
    if isinstance(v, list):
        print(f"{k:18}: {v[:3]}{' …' if len(v) > 3 else ''}")   # shorten long lists
    else:
        print(f"{k:18}: {v}")


2025-06-25 00:54:50.171172: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-25 00:54:50.231880: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-25 00:54:50.233716: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Exporting the pretrained models ... 
File exists: deep-learning-models.zip
load diacritization model ... 
loading from /teamspace/studios/this_studio/Ashaar/deep-learning-models/log_dir_ashaar/ashaar_proc.base.cbhg/models/10000-snapshot.pt
load meter classification model ...


2025-06-25 00:55:02.986959: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-06-25 00:55:02.990858: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


TypeError: in user code:


    TypeError: outer_factory.<locals>.inner_factory.<locals>.tf__call() missing 1 required positional argument: 'training'
