# Parse "TABLE OF RECOMMENDED NUCLEAR MAGNETIC DIPOLE MOMENTS: PART I, LONG-LIVED STATES"

source: https://www-nds.iaea.org/publications/indc/indc-nds-0794/

In [None]:
from fractions import Fraction
import re

from camelot.io import read_pdf
import pandas as pd
import numpy as np
import polars as pl

In [None]:
pl.Config.set_tbl_rows(100)

In [None]:
columns = [
    "Nucleus",
    "Ex",
    "T1/2",
    "Jπ",
    "m(nm)",
    "Method",
    "NSR Keynumber",
    "Journal Reference",
]

# read and merge into a single dataframe
tabs = read_pdf("indc-nds-0794.pdf", pages="13-43", flavor="lattice")
df = pd.concat([t.df.replace("", None).dropna(axis="index", how="all") for t in tabs])

# drop the first row and set column names
df = df.iloc[1:]
df.columns = columns

df.to_parquet("raw_parse.parquet")

## Process extracted table

In [None]:
df = pl.from_pandas(pd.read_parquet("raw_parse.parquet"))
df = df.drop(["T1/2", "Method", "NSR Keynumber", "Journal Reference"])

In [None]:
# drop rows for antiproton and neutron
df = df.filter(pl.col("Nucleus").is_in(["0 n 1", "antiproton"]).not_())

# drop excited statesand keep only ground states
df = df.filter(pl.col("Ex") == "0")

# get atomic and mass number and symbol
pattern = r"(\d+)\s*([A-Za-z]+)\s*(\d+)"
df = df.with_columns(
    captures=pl.col("Nucleus").str.extract_groups(pattern)
).with_columns(
    atomic_number=pl.col("captures").struct["1"].cast(pl.Int64),
    symbol=pl.col("captures").struct["2"],
    mass_number=pl.col("captures").struct["3"].cast(pl.Int64),
)

In [None]:
# parse spin and parity
spin_re = r"(?P<spin>\d+/\d+|\d+)(?P<parity>[+-]?)"
df = df.with_columns(captures=pl.col("Jπ").str.extract_groups(spin_re)).unnest(
    "captures"
)

In [None]:
# parse nuclear magnetic dipole moments and uncertainties
val_unc_re = r"(?P<value_sign>[+-])?(?P<value_lead>\d+)\.?(?P<value_decimals>\d+)?\s*\(?(?P<value_unc>\d+(?:\.\d+)?)\)?"
df = (
    df.with_columns(captures=pl.col("m(nm)").str.extract_groups(val_unc_re))
    .unnest("captures")
    .with_columns(
        value_precision=pl.col("value_decimals").str.len_chars().cast(pl.Int32),
    )
    .with_columns(
        magnetic_moment=pl.concat_str(
            pl.col("value_sign"),
            pl.col("value_lead"),
            pl.lit("."),
            pl.col("value_decimals"),
            ignore_nulls=True,
        ).cast(pl.Float32),
        magnetic_moment_unc=pl.lit(10.0, dtype=pl.Float32).pow(
            -pl.col("value_precision").cast(pl.Float32)
        )
        * pl.col("value_unc").cast(pl.Float32),
    )
)

# compute g-factor
df = df.with_columns(
    g_factor=pl.col("magnetic_moment")
    / pl.col("spin").map_elements(lambda x: Fraction(x), return_dtype=pl.Float32),
    g_factor_uncertainty=pl.col("magnetic_moment_unc")
    / pl.col("spin").map_elements(lambda x: Fraction(x), return_dtype=pl.Float32),
)

In [None]:
new = df.select(
    [
        "atomic_number",
        "mass_number",
        "symbol",
        "spin",
        "parity",
        "m(nm)",
        "magnetic_moment",
        "g_factor",
        "g_factor_uncertainty",
        "magnetic_moment_unc",
    ]
)

In [None]:
new

## Fetch isotope table for comparison

In [None]:
from mendeleev.fetch import fetch_table

In [None]:
isotopes = pl.from_pandas(fetch_table("isotopes"))
isotopes

In [None]:
new.with_columns(
    pl.col("atomic_number").cast(pl.Int64), pl.col("mass_number").cast(pl.Int64)
).join(isotopes, on=["atomic_number", "mass_number"], suffix="_true").select(
    [
        "atomic_number",
        "mass_number",
        "symbol",
        "g_factor",
        "g_factor_true",
        "parity",
        "parity_true",
    ]
).filter(pl.col("g_factor_true").is_null())

## Update values in mendleev db

In [None]:
from mendeleev.db import get_session, get_engine
from mendeleev.models import Isotope

In [None]:
session = get_session(read_only=False)

In [None]:
fields = {"spin", "parity", "g_factor", "g_factor_uncertainty"}
for row in new.iter_rows(named=True):
    iso = (
        session.query(Isotope)
        .filter_by(atomic_number=row["atomic_number"], mass_number=row["mass_number"])
        .update({k: v for k, v in row.items() if k in fields})
    )
    # session.rollback()
    session.commit()
session.close()

## Validate that new value are available

In [None]:
from mendeleev import K, Ti, Cr

In [None]:
K.isotope(40).g_factor

In [None]:
Ti.isotope(47).g_factor

In [None]:
Ti.isotope(49).g_factor

In [None]:
Cr.isotope(53).g_factor