In [374]:
import pandas as pd
import urllib
import numba
import os
import re
import json
import math

from getIg import getZ
from bs4 import BeautifulSoup

In [375]:
def func_get_text(cell):
    return cell.get_text()

In [376]:
#@numba.jit
def process_radiation_search(download_path):
    html_file = open(download_path, "r")
    soup = BeautifulSoup(html_file.read(), 'lxml')
    table = soup.find_all("table")[0]
    gammas = table.find_all("tr")
    init_dict = {"E_tab": [0], "Ig": [0], "Decay_Mode": ["b-"], "half_life": ["3 m"], "Isotope": ["Tu"]}
    gamma_df = pd.DataFrame(init_dict)

    for gamma_row in gammas[2:-2]:
        vals = list(map(func_get_text, gamma_row.find_all("td")))
        try:
            gamma_df = gamma_df.append({"E_tab": vals[0].split()[0], "Ig": vals[1].split()[0], "Decay_Mode": vals[2], "half_life": vals[3], "Isotope": vals[4]}, ignore_index=True)
        except:
            print("******* MISSING INTENSITY **************")
            #print(vals)
            continue
    return gamma_df

In [377]:
def find_gammas(energy, fwhm, t_min, t_max):
    print(energy)
    e_min = energy - fwhm
    e_max = energy + fwhm
    output_file = f"downloads/searchE{energy}pm{fwhm}tMin{t_min}tMax{t_max}.html"
    if not os.path.isfile(output_file):
        request_url = f"http://nucleardata.nuclear.lu.se/toi/Gamma.asp?sql=&Min={e_min}&Max={e_max}&HlifeMin={t_min}&tMinStr={t_min}+s&HlifeMax={t_max}&tMaxStr={t_max}+s"
        urllib.request.urlretrieve(request_url, output_file)
        print(f"File {output_file} downloaded.")
    
    gamma_df = process_radiation_search(output_file)
    gamma_df["Energy"] = energy
    gamma_df["FWHM"] = fwhm
    gamma_df["half_life_searched"] = f"{t_min} - {t_max}"
    gamma_df = gamma_df.drop([0])
    return gamma_df

In [378]:
def has_star(some_string):
    if "*" in str(some_string) or "~" in str(some_string) or "<" in str(some_string) or ">" in str(some_string):
        return 1
    else:
        return 0

In [379]:
def add_all_gammas(df, t_min, t_max, e_thr):
    df["Ig"] = int(1000.1)
    for row in df.iterrows():
        possible_gammas = find_gammas(row[1]["Energy"], row[1]["FWHM"], t_min, t_max)
        df = df.append(possible_gammas, ignore_index=True)
    #print(f"DF size is {df.shape}")
    drop_columns = [
        "Fit", "Filename", "Sample Identification", 
        "Sample Type", "Sample Geometry", 
        "Sample Size", "Efficiency ID", 
        "Peak Analysis Report                    2.12.2020  10"
        ]
    drop_columns3 = ["Peak Analysis From Channel", "Peak Search Sensitivity", "Max Iterations"]
    drop_columns4 = ["Use Fixed FWHM", "Peak Fit Engine Name"]
    df = df.drop(columns=drop_columns)
    #df = df.drop(columns=drop_columns2)
    df = df.drop(columns=drop_columns3)
    df = df.drop(columns=drop_columns4)
    print(df.columns)

    # change columns order
    old_column_order = df.columns.tolist()
    first_cols = ["Energy", "FWHM", "E_tab", "Area", "%err", "Ig", "half_life", "Isotope", "Real Time", "Live Time"]
    new_cols = first_cols + list(set(old_column_order) - set(first_cols))
    df = df[new_cols]
    df["Real Time"] = df["Real Time"].apply(lambda x: float(str(x).split(' ')[0]))
    df["Live Time"] = df["Live Time"].apply(lambda x: float(str(x).split(' ')[0]))
    #df = df[df["Ig"].apply(lambda x: str(x).isnumeric())]
    #df["Ig"] = pd.to_numeric(df['Ig'], errors='coerce')

    
    df = df[df["Ig"] != "*"]
    df = df[df["Ig"] != ""]

    df["Has_star"] = df["Ig"].apply(has_star)
    df = df[df["Has_star"] == 0]

    #df["Ig"] = df["Ig"].apply(float(re.findall(r"[\d\.\d]+", str(x))[0]))

    df['Ig'] = df['Ig'].astype(float)
    df = df[df["Ig"] >= e_thr]

    df = df.sort_values(by=["Energy", "Ig"], ascending=[True, False])


    return df

In [380]:
def add_origin(df, origins_dict):
    df["Origin"] = "not set"
    #for row in df.iterrows():
    for i in range(df.shape[0]):
        print(df["Isotope"].loc[i])
        df["Origin"].loc[i], origins_dict = find_origin(df["Isotope"].loc[i], origins_dict)

    return df, origin_dict

In [381]:
def find_origin(isotope, origin_dict):
    #try:
    if origin_dict.get(isotope) != None:
        print("Found")
        print(origin_dict.get(isotope))
        return origin_dict.get(isotope), origin_dict
    else:
        print("Not found")
        return add_isotope_origin(isotope, origin_dict)

In [382]:
def add_isotope_origin(isotope, origin_dict):
    print(f"isotope is {isotope}, length is {len(str(isotope))}")
    if str(isotope) == "nan":
        print("it is nana")
        return None, origin_dict
    else:
        print(f"Isotope is {isotope}")
        A, metastable, element = re.split(r'(\d{2,3})(m{0,1}\d{0,1})(\w+)', isotope)[1:-1]

        if metastable == "m":
            A += 300
        elif metastable == "m2":
            A += 600
        print(list(element))
        url = f"http://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA={getZ(element)}0{A}"
        download_path = "downloads/temp.html"
        urllib.request.urlretrieve(url, download_path)
        html_file = open(download_path, "r")
        table = BeautifulSoap(html_file.read(), 'lxml').find_all("table")[1]
        try:
            origin_dict[isotope] = table.find_all("tr")[7].find("td").get_text(strip=True)
        except:
            origin_dict[isotope] = "???"
        
        return origin_dict.get(isotope), origin_dict

In [383]:
#df = pd.read_csv("parsed_reports/M1-C1-Udepl-120.csv", index_col=0)
#all_gammas = add_all_gammas(df, 120, 1e5, 1e-2)

#all_gammas.to_csv("out/M1-C1-Udepl-120.csv", index=False)

In [384]:
with open("aux_data/origins.json", "w") as outfile:  
    json.dump(origins, outfile) 

In [385]:
df = pd.read_csv("out/M1-C1-Udepl-120_mod.csv")

In [386]:
# Opening JSON file 
with open('aux_data/origins.json', "r") as origin_file: 
    origin_dict = json.load(origin_file) 

In [387]:
print(origin_dict)
df, origins = add_origin(df, origin_dict)

{'1H': 'Naturally occurring'}
nan
Not found
isotope is nan, length is 3
it is nana
120Xe
Not found
isotope is 120Xe, length is 5
Isotope is 120Xe
['X', 'e']


KeyError: 'Xe'

In [126]:
with open("aux_data/origins2.json", "w") as outfile:  
    json.dump(origins, outfile) 

In [127]:
df.to_csv("out/scratch.csv")

In [128]:
df["Origin"]

0      None
1      None
2      None
3      None
4      None
       ... 
105    None
106    None
107    None
108    None
109    None
Name: Origin, Length: 110, dtype: object

In [129]:
url = "http://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA=630150"
path = "downloads/scratch.html"
urllib.request.urlretrieve(url, path)

('downloads/scratch.html', <http.client.HTTPMessage at 0x7f3f913636a0>)

In [130]:
html_file = open(path, "r")

In [131]:
soap = BeautifulSoup(html_file.read(), 'lxml')

In [132]:
sett = ["198mTl", "155Ho", "135I", "85m2Y", "85mY", "135Sm", "135mSm"]
for isotope in sett:
    print(re.split(r'(\d{2,3})(m{0,1}\d{0,1})(\w+)', isotope)[1:-1])

['198', 'm', 'Tl']
['155', '', 'Ho']
['135', '', 'I']
['85', 'm2', 'Y']
['85', 'm', 'Y']
['135', '', 'Sm']
['135', 'm', 'Sm']


In [390]:
getZ("")

94