In [40]:
import urllib
import os
import pandas as pd
import re

from pprint import pprint
from bs4 import BeautifulSoup
from termcolor import colored

In [2]:
Z_MIN = 1
Z_MAX = 112

In [3]:
def download_isotopes_list(Z):
    request_url = f"http://nucleardata.nuclear.lu.se/toi/listnuc.asp?sql=&Z={Z}"
    urllib.request.urlretrieve(request_url, f"downloads/find_isotopes/z_{Z}.html")

In [4]:
def download_range(z_min, z_max):
    for Z in range(z_min, z_max + 1):
        download_isotopes_list(Z)

In [5]:
download_range(Z_MIN, Z_MAX)

In [5]:
def parse_isotopes_one(Z):
    html_path = f"downloads/find_isotopes/z_{Z}.html"
    isotopes_lst_html = open(html_path, "r")
    bs = BeautifulSoup(isotopes_lst_html.read(), 'lxml')
    table = bs.find_all("table")[0]
    nuclide_lst = table.find_all('th')[9:]
    abbr = str(nuclide_lst[0].find('a')).split('</sup>')[1][:-4]
    out_file_path = f"downloads/find_isotopes_parsed/{Z}_{abbr}.txt"
    out_file = open(out_file_path, "w")
    out_file.write(f"{Z}\n")
    out_file.write(f"{abbr}\n")
    for nuclide in nuclide_lst:
        out_file.write(f"{nuclide.find('sup').get_text()}\n")

    out_file.close()

In [6]:
def parse_isotopes_range(z_min, z_max):
    for Z in range(z_min, z_max + 1):
        parse_isotopes_one(Z)

In [8]:
parse_isotopes_range(Z_MIN,Z_MAX)

In [7]:
def download_all_isotopes(Z):
    down_dir = "downloads/find_isotopes_parsed"
    files_lst = os.listdir(down_dir)
    file_name = [s for s in files_lst if str(Z) == s.split('_')[0]]
    isotopes_lst_file = open(f"{down_dir}/{file_name[0]}", "r")
    lines = isotopes_lst_file.readlines()
    abbr = lines[1].strip()
    A_lst = lines[2:]
    for A in A_lst:
        A = int(A)
        if A < 10:
            str_A = '00' + str(A)
        elif A < 100:
            str_A = '0' + str(A)
        url = f"http://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA={Z}0{str_A}"
        urllib.request.urlretrieve(url, f"downloads/isotopes_html/{A}_{abbr}_{Z}.html")

In [8]:
def extract_Igamma(A, element, Z):
    html_file = open(f"downloads/isotopes_html/{A}_{element}_{Z}.html", "r")
    soup = BeautifulSoup(html_file.read(), 'lxml')

    try:
        gammas_table = soup.find_all("table")[4]
        gammas_rows = gammas_table.find_all('tr')[3:-1]
    except:
        A = int(A)
        if A < 10:
            str_A = '00' + str(A)
        elif A < 100:
            str_A = '0' + str(A)
        print(colored(f"Seems like there are no gamma-lines known for isotope {A}{element}.", 'red'))
        print(colored("Check yellow pages for reference.", 'yellow'))
        print(colored(f"http://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA={Z}0{str_A}", 'yellow'))
        return 1
    energy = []
    sigm_energy = []
    i = []
    sigm_i = []

    for row in gammas_rows:
        cells = row.find_all('td')
        
        e_val = cells[0].get_text(strip=True)
        i_val = cells[1].get_text(strip=True)
        try:
            ig_val = float(i_val[:-1])
            sigm_ig_val = float(i_val[-1])
        except:
            ig_val = float('NaN')
            sigm_ig_val = float('NaN')
       
        energy.append(float(e_val[:-1]))
        sigm_energy.append(int(e_val[-1]))
        i.append(ig_val)
        
        sigm_i.append(sigm_ig_val)


    df_dict = {
        "E_tab": energy,
        "sigm_E": sigm_energy, 
        "Ig": i,
        "sigm_Ig": sigm_i
        }
    df = pd.DataFrame(df_dict)
    df_name = f'downloads/ig_db/{A}{element}.csv'
    df.to_csv(df_name)
   
    print(f"Ig extracted from file 'downloads/{A}{element}.html' into '{df_name}'.")
    return 0

In [9]:
def extract_all_elements(z_min, z_max):
    for Z in range(z_min, z_max + 1):
        extract_element(Z)

In [10]:
def extract_element(Z):
    html_lst = os.listdir("downloads/isotopes_html")
    element_files = [f for f in html_lst if str(Z) == f.split('_')[-1].split('.')[0]]
    for isotope_file in element_files:
        A, element, Z = (isotope_file.split('.')[0]).split('_')
        extract_Igamma(A, element, Z)
        extract_info(A, element, Z)

In [11]:
def extract_info(A, element, ):
    html_file = open(f"downloads/isotopes_html/{A}_{element}_{Z}.html", "r")
    soup = BeautifulSoup(html_file.read(), 'lxml')
    table = soup.find_all("table")[0]

    info_rows = table.find_all("tr")[6:16]
    info_df = {}
    for row in info_rows:
        key = (row.find_all("th")[0]).get_text(strip=True)
        val = row.find_all("td")[0]
        if val.find("i"):
            sigm = val.find("i").get_text(strip=True)
            val = str(val.get_text(strip=True))[:-len(sigm)]
            info_df[f"sigm_{key}"] = sigm
        else:
            val = val.get_text(strip=True)

        info_df[key] = val
    
    info_df.pop('', None)
    info_df = pd.DataFrame(info_df, index=[0])

    columns = info_df.columns.tolist()
    for i in range(len(columns)):
        columns[i] = columns[i][:-1]
        columns[i] = (columns[i]).replace(u'\xa0', u' ')

    info_df.columns = columns
    info_df["Literature cut-off date"] = pd.to_datetime(info_df["Literature cut-off date"])

    for mode in re.findall('[A-Z][^A-Z]*', str(info_df["Prod. mode"][0])):
        mode = mode.replace(u'\xa0', u' ')
        info_df[f"Prod_mode_{mode}"] = True

In [12]:
html_file = open(f"downloads/isotopes_html/14_C_6.html", "r")
soup = BeautifulSoup(html_file.read(), 'lxml')

In [48]:
table = soup.find_all("table")[0]
info_rows = table.find_all("tr")[6:16]

#info_df = pd.DataFrame()
info_df = {}
for row in info_rows:
    key = (row.find_all("th")[0]).get_text(strip=True)
    val = row.find_all("td")[0]
    #print(val)
    if val.find("i"):
        sigm = val.find("i").get_text(strip=True)
        val = str(val.get_text(strip=True))[:-len(sigm)]
        info_df[f"sigm_{key}"] = sigm
    else:
        val = val.get_text(strip=True)

    info_df[key] = val
info_df.pop('', None)
info_df = pd.DataFrame(info_df, index=[0])
columns = info_df.columns.tolist()
for i in range(len(columns)):
    columns[i] = columns[i][:-1]
    columns[i] = (columns[i]).replace(u'\xa0', u' ')
info_df.columns = columns
info_df["Literature cut-off date"] = pd.to_datetime(info_df["Literature cut-off date"])

for mode in re.findall('[A-Z][^A-Z]*', str(info_df["Prod. mode"][0])):
    mode = mode.replace(u'\xa0', u' ')
    info_df[f"Prod_mode_{mode}"] = True

#info_df["Half_life"] = to_seconds(info_df["Half_life"])

info_df.head()

Unnamed: 0,sigm_Half life,Half life,Jp,Sn(keV),sigm_Sp(keV),Sp(keV),Prod. mode,ENSDF citation,Literature cut-off date,Author(s),References since cut-off,Prod_mode_Naturally occurring,Prod_mode_Fast neutron activation,Prod_mode_Thermal neutron activation
0,40,5730 y,0+,8176.44,11,20831.3,Naturally occurringFast neutron activationTher...,"NP A523,1 (1991)",1986-04-23,F. Ajzenberg-Selove,14C decay from 1986-98 (NSR),True,True,True


In [44]:
for mode in re.findall('[A-Z][^A-Z]*', str(info_df["Prod. mode"][0])):
    mode = mode.replace(u'\xa0', u' ')
    info_df[f"Prod_mode_{mode}"] = True

In [47]:
info_df.columns.tolist()

['sigm_Half life',
 'Half life',
 'Jp',
 'Sn(keV)',
 'sigm_Sp(keV)',
 'Sp(keV)',
 'Prod. mode',
 'ENSDF citation',
 'Literature cut-off date',
 'Author(s)',
 'References since cut-off',
 'Prod_mode_Naturally occurring',
 'Prod_mode_Fast neutron activation',
 'Prod_mode_Thermal neutron activation']

In [47]:
df = pd.DataFrame()

In [95]:
str1 = "foobar"
str2 = "bar"
print(str1[:-len(str2)])

foo


In [31]:
extract_Igamma_element(4)

[31mSeems like there are no gamma-lines known for isotope 5Be.[0m
[33mCheck yellow pages for reference.[0m
[33mhttp://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA=40005[0m
Ig extracted from file 'downloads/14Be.html' into 'downloads/ig_db/14Be.csv'.
[31mSeems like there are no gamma-lines known for isotope 8Be.[0m
[33mCheck yellow pages for reference.[0m
[33mhttp://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA=40008[0m
Ig extracted from file 'downloads/7Be.html' into 'downloads/ig_db/7Be.csv'.
[31mSeems like there are no gamma-lines known for isotope 12Be.[0m
[33mCheck yellow pages for reference.[0m
[33mhttp://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA=40012[0m
[31mSeems like there are no gamma-lines known for isotope 6Be.[0m
[33mCheck yellow pages for reference.[0m
[33mhttp://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA=40006[0m
[31mSeems like there are no gamma-lines known for isotope 13Be.[0m
[33mCheck yellow pages for reference.[0m
[33mhttp://nucle

In [13]:
extract_Igamma(20, "F", 9)

Ig extracted from file 'downloads/20F.html' into 'downloads/ig_db/20F.csv'.


0

In [14]:
for i in range(1,10):
    download_all_isotopes(i)

In [17]:
matching = [s for s in lst if "12" == s.split('_')[0]]