# Parse "Table of Nuclear Electric Quadrupole Moments"

source: https://www-nds.iaea.org/publications/indc/indc-nds-0833/

In [None]:
from fractions import Fraction
from pathlib import Path
import re

import camelot
from camelot.io import read_pdf
import pandas as pd
import numpy as np
import polars as pl

In [None]:
pl.Config.set_tbl_rows(100)

In [None]:
pdf = Path("../data/indc-nds-0833.pdf")
tabs = read_pdf(
    str(pdf),
    pages="19-66",
    flavor="stream",
    columns=["115,171,212,261,306,385,444,508,585"],
    table_areas=["50,545,701,50"],
)

In [None]:
df = pd.concat([t.df.replace("", None).dropna(axis="index", how="all") for t in tabs])
# drop first row and set columns
df = df.iloc[1:]
df.columns = [
    "Element",
    "Isotope",
    "E(level)",
    "T1/2",
    "s/p",
    "Q(b)",
    "ref.std.",
    "method",
    "NSR Keynumber",
    "Journal Reference",
]
# save
df.to_parquet("nuc-el-quad-mom-raw.parquet")

## Process extracted table

In [None]:
qmom = pl.read_parquet("nuc-el-quad-mom-raw.parquet").select(
    ["Isotope", "E(level)", "s/p", "Q(b)"]
)

# drop all rows where all values are null and keep only ground state nuclei
qmom = qmom.drop_nulls(subset="Q(b)").filter(pl.col("E(level)") == "0")

# get atomic and mass number and symbol
pattern = r"(\d+)\s*([A-Za-z]+)\s*(\d+)"
qmom = (
    qmom.with_columns(captures=pl.col("Isotope").str.extract_groups(pattern))
    .with_columns(
        atomic_number=pl.col("captures").struct["1"].cast(pl.Int64),
        symbol=pl.col("captures").struct["2"],
        mass_number=pl.col("captures").struct["3"].cast(pl.Int64),
    )
    .drop("captures")
)

In [None]:
# parse spin and parity
spin_re = r"(?P<spin>\d+/\d+|\d+)(?P<parity>[+-]?)"
qmom = qmom.with_columns(captures=pl.col("s/p").str.extract_groups(spin_re)).unnest(
    "captures"
)

In [None]:
# parse nuclear electric quadrupole moments and uncertainties
val_unc_re = r"(?P<value_sign>[+-])?(?P<value_lead>\d+)\.?(?P<value_decimals>\d+)?\s*\(?(?P<value_unc>\d+(?:\.\d+)?)\)?"
qmom = (
    qmom.with_columns(captures=pl.col("Q(b)").str.extract_groups(val_unc_re))
    .unnest("captures")
    .with_columns(
        value_precision=pl.col("value_decimals").str.len_chars().cast(pl.Int32),
    )
    .with_columns(
        quadrupole_moment=pl.concat_str(
            pl.col("value_sign"),
            pl.col("value_lead"),
            pl.lit("."),
            pl.col("value_decimals"),
            ignore_nulls=True,
        ).cast(pl.Float32),
        quadrupole_moment_uncertainty=pl.lit(10.0, dtype=pl.Float32).pow(
            -pl.col("value_precision").cast(pl.Float32)
        )
        * pl.col("value_unc").cast(pl.Float32),
    )
)

## Fetch isotope table for comparison

In [None]:
from mendeleev.fetch import fetch_table

In [None]:
isotopes = pl.from_pandas(fetch_table("isotopes"))
isotopes

In [None]:
qmom.select(
    [
        "atomic_number",
        "symbol",
        "mass_number",
        "spin",
        "parity",
        "quadrupole_moment",
        "quadrupole_moment_uncertainty",
    ]
).join(
    isotopes, on=["atomic_number", "mass_number"], suffix="_true", how="left"
).select(
    [
        "atomic_number",
        "symbol",
        "mass_number",
        "spin",
        "spin_true",
        "parity",
        "parity_true",
        "quadrupole_moment",
        "quadrupole_moment_true",
        "quadrupole_moment_uncertainty",
        "quadrupole_moment_uncertainty_true",
    ]
)

## Update values in mendleev db

In [None]:
from mendeleev.db import get_session, get_engine
from mendeleev.models import Isotope

In [None]:
session = get_session(read_only=False)

In [None]:
fields = {"spin", "parity", "quadrupole_moment", "quadrupole_moment_uncertainty"}
for row in qmom.iter_rows(named=True):
    iso = (
        session.query(Isotope)
        .filter_by(atomic_number=row["atomic_number"], mass_number=row["mass_number"])
        .update({k: v for k, v in row.items() if k in fields})
    )
    # session.rollback()
    session.commit()
session.close()

## Validate

In [None]:
from mendeleev import H

In [None]:
H.isotope(2).quadrupole_moment