In [474]:
import urllib
import os
import pandas as pd
import re
import decimal

from math import pow
from pprint import pprint
from bs4 import BeautifulSoup
from termcolor import colored

In [475]:
Z_MIN = 11
Z_MAX = 15
TIME_CONVERSION = {
    "s": 1, 
    "m": 60, 
    "h": 3600, 
    "d": 86400, 
    "y": 31556952
}

In [476]:
def download_isotopes_list(Z):
    request_url = f"http://nucleardata.nuclear.lu.se/toi/listnuc.asp?sql=&Z={Z}"
    urllib.request.urlretrieve(request_url, f"downloads/find_isotopes/z_{Z}.html")

In [477]:
def download_range(z_min, z_max):
    for Z in range(z_min, z_max + 1):
        download_isotopes_list(Z)

In [478]:
def parse_isotopes_one(Z):
    html_path = f"downloads/find_isotopes/z_{Z}.html"
    isotopes_lst_html = open(html_path, "r")
    bs = BeautifulSoup(isotopes_lst_html.read(), 'lxml')
    table = bs.find_all("table")[0]
    nuclide_lst = table.find_all('th')[9:]
    abbr = str(nuclide_lst[0].find('a')).split('</sup>')[1][:-4]
    out_file_path = f"downloads/find_isotopes_parsed/{Z}_{abbr}.txt"
    out_file = open(out_file_path, "w")
    out_file.write(f"{Z}\n")
    out_file.write(f"{abbr}\n")
    for nuclide in nuclide_lst:
        out_file.write(f"{nuclide.find('sup').get_text()}\n")

    out_file.close()

In [479]:
def parse_isotopes_range(z_min, z_max):
    for Z in range(z_min, z_max + 1):
        parse_isotopes_one(Z)

In [480]:
def download_all_isotopes(Z):
    down_dir = "downloads/find_isotopes_parsed"
    files_lst = os.listdir(down_dir)
    file_name = [s for s in files_lst if str(Z) == s.split('_')[0]]
    isotopes_lst_file = open(f"{down_dir}/{file_name[0]}", "r")
    lines = isotopes_lst_file.readlines()
    abbr = lines[1].strip()
    A_lst = lines[2:]
    for A in A_lst:
        print(f"Z is {Z}")
        print(f"A is {A}")
        A = A.strip()
        if str(A)[-1] == "m":
            str_A = int(A[:-1]) + 300
        elif str(A)[-2:] == "m2":
            str_A = int(A[:-2]) + 600
        elif int(A) < 10:
            str_A = '00' + A
        elif int(A) < 100:
            str_A = '0' + A
        else:
            str_A = A
        url = f"http://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA={Z}0{str_A}"
        urllib.request.urlretrieve(url, f"downloads/isotopes_html/{A}_{abbr}_{Z}.html")

In [481]:
def download_all_elements(z_min, z_max):
    for Z in range(z_min, z_max + 1):
        download_all_isotopes(Z)

In [482]:
def extract_Igamma(A, element, Z):
    html_file = open(f"downloads/isotopes_html/{A}_{element}_{Z}.html", "r")
    soup = BeautifulSoup(html_file.read(), 'lxml')

    try:
        gammas_table = soup.find_all("table")[4]
        gammas_rows = gammas_table.find_all('tr')[3:-1]
    except:
        A = int(A)
        if A < 10:
            str_A = '00' + str(A)
        elif A < 100:
            str_A = '0' + str(A)
        print(colored(f"Seems like there are no gamma-lines known for isotope {A}{element}.", 'yellow'))
        print(colored("Check yellow pages for reference.", 'yellow'))
        print(colored(f"http://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA={Z}0{str_A}", 'yellow'))
        return 1
    energy = []
    sigm_energy = []
    i = []
    sigm_i = []

    print(gammas_table.find("font").get_text(strip=True))

    #if any(["Betas", "X-rays"]) in gammas_table.find("font").get_text(strip=True):
    if any(x in gammas_table.find("font").get_text(strip=True) for x in ["Betas", "X-rays"]):
        A = int(A)
        if A < 10:
            str_A = '00' + str(A)
        elif A < 100:
            str_A = '0' + str(A)
        print(colored(f"Seems like there are no gamma-lines known for isotope {A}{element}.", 'yellow'))
        print(colored("Check yellow pages for reference.", 'yellow'))
        print(colored(f"http://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA={Z}0{str_A}", 'yellow'))
        return 1
        

    for row in gammas_rows:
        cells = row.find_all('td')
        
        e_val = cells[0].get_text(strip=True)
        i_val = cells[1].get_text(strip=True)
        try:
            ig_val = float(i_val[:-1])
            sigm_ig_val = float(i_val[-1])
        except:
            ig_val = float('NaN')
            sigm_ig_val = float('NaN')
       
        energy.append(float(e_val[:-1]))
        sigm_energy.append(int(e_val[-1]))
        i.append(ig_val)
        
        sigm_i.append(sigm_ig_val)


    df_dict = {
        "E_tab": energy,
        "sigm_E": sigm_energy, 
        "Ig": i,
        "sigm_Ig": sigm_i
        }
    df = pd.DataFrame(df_dict)
    df_name = f'downloads/ig_db/{A}{element}.csv'
    df.to_csv(df_name)
   
    print(f"Ig extracted from file 'downloads/{A}{element}.html' into '{df_name}'.")
    return 0

In [483]:
def extract_all_elements(z_min, z_max):
    for Z in range(z_min, z_max + 1):
        extract_element(Z)

In [484]:
def extract_element(Z):
    html_lst = os.listdir("downloads/isotopes_html")
    element_files = [f for f in html_lst if str(Z) == f.split('_')[-1].split('.')[0]]
    for isotope_file in element_files:
        A, element, Z = (isotope_file.split('.')[0]).split('_')
        print(f"Extracting {A}{element}")
        extract_Igamma(A, element, Z)
        extract_info(A, element, Z)

In [485]:
def extract_info(A, element, Z):
    print("Extract info")
    print(A)
    print(element)
    print(Z)
    html_file = open(f"downloads/isotopes_html/{A}_{element}_{Z}.html", "r")
    soup = BeautifulSoup(html_file.read(), 'lxml')
    table = soup.find_all("table")[0]

    info_rows = table.find_all("tr")[6:16]
    info_df = {}
    #info_df = dict.fromkeys(["Prod. mode:", "Half life:"])
    for row in info_rows:
        #print(row)
        try:
            key = (row.find_all("th")[0]).get_text(strip=True)
        except:
            break
        try:
            val = row.find_all("td")[0]
        except:
            continue
        if val.find("i"):
            sigm = val.find("i").get_text(strip=True)
            val = str(val.get_text(strip=True))[:-len(sigm)]
            info_df[f"sigm_{key}"] = sigm
        else:
            val = val.get_text(strip=True)

        info_df[key] = val
    
    info_df.pop('', None)
    info_df = pd.DataFrame(info_df, index=[0])

    columns = info_df.columns.tolist()
    for i in range(len(columns)):
        columns[i] = columns[i][:-1]
        columns[i] = (columns[i]).replace(u'\xa0', u' ')

    info_df.columns = columns
    if "Literature cut-off date" in columns:
        info_df["Literature cut-off date"] = pd.to_datetime(info_df["Literature cut-off date"])

    #print(columns)

    if "Prod. mode" in columns:
        for mode in re.findall('[A-Z][^A-Z]*', str(info_df["Prod. mode"][0])):
            mode = mode.replace(u'\xa0', u' ')
            info_df[f"Prod_mode_{mode}"] = True
    
    for var in ["Sn(keV)", "Sp(keV)"]:
        if var in columns:
            orig_num = info_df[var][0]
            print(info_df[var][0])
            if info_df[var][0] == "":
                continue

            info_df[var] = info_df[var].astype(float)

            sigma = f"sigm_{var}"
            if sigma in columns and info_df[sigma][0] != "sy":
                g = decimal.Decimal(orig_num)
                print(info_df[sigma])
                info_df[sigma] = int(info_df[sigma]) * pow(10, g.as_tuple().exponent)

    print(info_df["Half life"][0])
    if info_df["Half life"][0] == "stable":
        info_df["Stable"] = True
        info_df = info_df.drop(columns=["Prod. mode", "Half life"], errors="ignore")
        #if "sigm_Sp(keV)" in columns:
        #    info_df = info_df.drop(columns=["sigm_Sp(keV)"])
        info_df.to_csv(f"downloads/ig_db/info_{A}{element}.csv")
    elif info_df["Half life"][0] == "":
        info_df["Stable"] = False
        info_df = info_df.drop(columns=["Prod. mode", "Half life"], errors="ignore")
        #if "sigm_Sp(keV)" in columns:
        #    info_df = info_df.drop(columns=["sigm_Sp(keV)"])
        info_df.to_csv(f"downloads/ig_db/info_{A}{element}.csv")

    else:
        info_df["Stable"] = False
        hl_val, hl_unit = info_df["Half life"][0].split()

        if not hl_unit in list(TIME_CONVERSION.keys()):
            info_df["Half-life [s]"] = None
        else:
            d = decimal.Decimal(hl_val)
            hl_val = float(hl_val)

            info_df["Half-life [s]"] = hl_val * TIME_CONVERSION[hl_unit]
            if "sigm_Half life" in columns:
                info_df["sigm_Half-life [s]"] = int(info_df["sigm_Half life"][0]) * pow(10, d.as_tuple().exponent) * TIME_CONVERSION[hl_unit]
        
        info_df = info_df.drop(columns=["Prod. mode", "Half life"], errors="ignore")
        if "sigm_Half life" in columns:
            info_df = info_df.drop(columns=["sigm_Half life"])
        if "sigm_Sp(keV)" in columns:
            info_df = info_df.drop(columns=["sigm_Sp(keV)"])
        info_df.to_csv(f"downloads/ig_db/info_{A}{element}.csv")

In [486]:
download_range(Z_MIN, Z_MAX)
parse_isotopes_range(Z_MIN,Z_MAX)
download_all_elements(Z_MIN, Z_MAX)
extract_all_elements(Z_MIN, Z_MAX)

Z is 11
A is 18

Z is 11
A is 19

Z is 11
A is 20

Z is 11
A is 21

Z is 11
A is 22

Z is 11
A is 23

Z is 11
A is 24

Z is 11
A is 24m

Z is 11
A is 25

Z is 11
A is 26

Z is 11
A is 27

Z is 11
A is 28

Z is 11
A is 29

Z is 11
A is 30

Z is 11
A is 31

Z is 11
A is 32

Z is 11
A is 33

Z is 11
A is 34

Z is 11
A is 35

Z is 12
A is 20

Z is 12
A is 21

Z is 12
A is 22

Z is 12
A is 23

Z is 12
A is 24

Z is 12
A is 25

Z is 12
A is 26

Z is 12
A is 27

Z is 12
A is 28

Z is 12
A is 29

Z is 12
A is 30

Z is 12
A is 31

Z is 12
A is 32

Z is 12
A is 33

Z is 12
A is 34

Z is 12
A is 35

Z is 12
A is 36

Z is 12
A is 37

Z is 13
A is 21

Z is 13
A is 22

Z is 13
A is 23

Z is 13
A is 24

Z is 13
A is 24m

Z is 13
A is 25

Z is 13
A is 26

Z is 13
A is 26m

Z is 13
A is 27

Z is 13
A is 28

Z is 13
A is 29

Z is 13
A is 30

Z is 13
A is 31

Z is 13
A is 32

Z is 13
A is 33

Z is 13
A is 34

Z is 13
A is 35

Z is 13
A is 36

Z is 13
A is 37

Z is 13
A is 38

Z is 13
A is 39

Z is 14
A i

ValueError: invalid literal for int() with base 10: '26m'

In [459]:
A = 300
str(A)[:-1]

'30'