In [386]:
import pandas as pd
import urllib
import numba
import os
import re

from getIg import getZ
from bs4 import BeautifulSoup

In [387]:
def func_get_text(cell):
    return cell.get_text()

In [388]:
#@numba.jit
def process_radiation_search(download_path):
    html_file = open(download_path, "r")
    soup = BeautifulSoup(html_file.read(), 'lxml')
    table = soup.find_all("table")[0]
    gammas = table.find_all("tr")
    init_dict = {"E_tab": [0], "Ig": [0], "Decay_Mode": ["b-"], "half_life": ["3 m"], "Isotope": ["Tu"]}
    gamma_df = pd.DataFrame(init_dict)

    for gamma_row in gammas[2:-2]:
        vals = list(map(func_get_text, gamma_row.find_all("td")))
        try:
            gamma_df = gamma_df.append({"E_tab": vals[0].split()[0], "Ig": vals[1].split()[0], "Decay_Mode": vals[2], "half_life": vals[3], "Isotope": vals[4]}, ignore_index=True)
        except:
            print("******* MISSING INTENSITY **************")
            #print(vals)
            continue
    return gamma_df

In [389]:
def find_gammas(energy, fwhm, t_min, t_max):
    print(energy)
    e_min = energy - fwhm
    e_max = energy + fwhm
    output_file = f"downloads/searchE{energy}pm{fwhm}tMin{t_min}tMax{t_max}.html"
    if not os.path.isfile(output_file):
        request_url = f"http://nucleardata.nuclear.lu.se/toi/Gamma.asp?sql=&Min={e_min}&Max={e_max}&HlifeMin={t_min}&tMinStr={t_min}+s&HlifeMax={t_max}&tMaxStr={t_max}+s"
        urllib.request.urlretrieve(request_url, output_file)
        print(f"File {output_file} downloaded.")
    
    gamma_df = process_radiation_search(output_file)
    gamma_df["Energy"] = energy
    gamma_df["FWHM"] = fwhm
    gamma_df["half_life_searched"] = f"{t_min} - {t_max}"
    gamma_df = gamma_df.drop([0])
    return gamma_df

In [390]:
def has_star(some_string):
    if "*" in str(some_string) or "~" in str(some_string) or "<" in str(some_string) or ">" in str(some_string):
        return 1
    else:
        return 0

In [391]:
def add_all_gammas(df, t_min, t_max, e_thr):
    df["Ig"] = int(1000.1)
    for row in df.iterrows():
        possible_gammas = find_gammas(row[1]["Energy"], row[1]["FWHM"], t_min, t_max)
        df = df.append(possible_gammas, ignore_index=True)
    #print(f"DF size is {df.shape}")
    drop_columns = [
        "Fit", "Filename", "Sample Identification", 
        "Sample Type", "Sample Geometry", 
        "Sample Size", "Efficiency ID", 
        "Peak Analysis Report                    2.12.2020  10"
        ]
    drop_columns3 = ["Peak Analysis From Channel", "Peak Search Sensitivity", "Max Iterations"]
    drop_columns4 = ["Use Fixed FWHM", "Peak Fit Engine Name"]
    df = df.drop(columns=drop_columns)
    #df = df.drop(columns=drop_columns2)
    df = df.drop(columns=drop_columns3)
    df = df.drop(columns=drop_columns4)
    print(df.columns)

    # change columns order
    old_column_order = df.columns.tolist()
    first_cols = ["Energy", "FWHM", "E_tab", "Area", "%err", "Ig", "half_life", "Isotope", "Real Time", "Live Time"]
    new_cols = first_cols + list(set(old_column_order) - set(first_cols))
    df = df[new_cols]
    df["Real Time"] = df["Real Time"].apply(lambda x: float(str(x).split(' ')[0]))
    df["Live Time"] = df["Live Time"].apply(lambda x: float(str(x).split(' ')[0]))
    #df = df[df["Ig"].apply(lambda x: str(x).isnumeric())]
    #df["Ig"] = pd.to_numeric(df['Ig'], errors='coerce')

    
    df = df[df["Ig"] != "*"]
    df = df[df["Ig"] != ""]

    df["Has_star"] = df["Ig"].apply(has_star)
    df = df[df["Has_star"] == 0]

    #df["Ig"] = df["Ig"].apply(float(re.findall(r"[\d\.\d]+", str(x))[0]))

    df['Ig'] = df['Ig'].astype(float)
    df = df[df["Ig"] >= e_thr]

    df = df.sort_values(by=["Energy", "Ig"], ascending=[True, False])


    return df

In [None]:
def add_origin(df, origins_dict):
    for row in df.iterrows():
        row["Origin"], origins_dict = find_origin(row["Isotope"], origins_dict)

    return df

In [None]:
def find_origin(isotope, origin_dict):
    try:
        return origin_dict.get(isotope), origin_dict
    except KeyError:
        return add_isotope_origin(isotope, origin_dict)

In [None]:
def add_isotope_origin(isotope, origin_dict):
    A, metastable, element = re.split(r'(\d{2,3})(m{0,1}\d{0,1})(\w+)', isotope)[1:-1]
    
    if metastable == "m":
        A += 300
    elif metastable == "m2":
        A += 600
    url = f"http://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA={getZ(element)}0{A}"
    download_path = "downloads/temp.html"
    urllib.request.urlretrieve(url, download_path)
    html_file = open(download_path, "r")
    table = BeautifulSoap(html_file.read(), 'lxml').find_all("table")[1]
    try:
        origin_dict[isotope] = table.find_all("tr")[7].find("td").get_text(strip=True)
    except:
        origin_dict[isotope] = "???"

    return origin_dict.get(isotope), origin_dict

In [392]:
df = pd.read_csv("parsed_reports/M1-C1-Udepl-120.csv", index_col=0)
all_gammas = add_all_gammas(df, 120, 1e5, 1e-2)

all_gammas.to_csv("out/M1-C1-Udepl-120.csv", index=False)

25.27
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
79.8
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
95.02
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
114.9
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
150.05
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
181.23
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
******* MISSING INTENSITY **************
******* MISSING INTE

In [393]:
df = pd.read_csv("parsed_reports/M6-C1-Udepl-3.csv", index_col=0)

In [394]:
df["Live Time"].apply(lambda x: float(str(x).split(' ')[0]))

0     86795.1
1     86795.1
2     86795.1
3     86795.1
4     86795.1
       ...   
81    86795.1
82    86795.1
83    86795.1
84    86795.1
85    86795.1
Name: Live Time, Length: 86, dtype: float64

In [395]:
df[df["Max Iterations"].apply(lambda x: x.isnumeric())]

Unnamed: 0,Pk,IT,Energy,Area,Bkgnd,FWHM,Channel,Left,PW,Cts/Sec,...,Energy Calibration Used Done On,Efficiency Calibration Used Done On,Efficiency ID,Peak Analysis Report 10.12.2020 8,Peak Analysis Performed on,Peak Analysis From Channel,Peak Search Sensitivity,Max Iterations,Use Fixed FWHM,Peak Fit Engine Name


In [396]:
test = [
    0.134, 
    0.286, 
    ~0.00046,
    0.52115, 
    "2.65*",
    493, 
    "3*"
]
for num in test:
    print(re.findall(r"[\d\.\d]+", str(num))[0])

    #z = re.match(r"[\.\d]", str(num))
    #if z:
    #    print(z.groups())
    #else:
    #    print("No match")




TypeError: bad operand type for unary ~: 'float'

In [398]:
path = "downloads/searchE150.05pm1.05tMin120tMax100000.0.html"
html_file = open(path, "r")
soup = BeautifulSoup(html_file.read(), 'lxml')

In [171]:
row = gammas[3]

In [209]:
energy = row.find_all("td")[5]
print(energy.get_text())

IndexError: list index out of range

In [191]:
energy.get_text().split()

['149.0', '3']

In [184]:
energy.find_all("i")[0].get_text()

'3'

In [348]:
ll = pd.read_csv("out/M1-C1-Udepl-120.csv")

In [352]:
ll["Ig"].sort_values()

758         0.01
459         0.01
330         0.01
297         0.01
757         0.01
          ...   
1221    10000.10
1253    10000.10
110     10000.10
1389    10000.10
0       10000.10
Name: Ig, Length: 2290, dtype: float64

In [412]:
url = "http://nucleardata.nuclear.lu.se/toi/nuclide.asp?iZA=630150"
path = "downloads/scratch.html"
urllib.request.urlretrieve(url, path)

('downloads/scratch.html', <http.client.HTTPMessage at 0x7f6238e92250>)

In [413]:
html_file = open(path, "r")

In [414]:
soap = BeautifulSoup(html_file.read(), 'lxml')

'Charged\xa0particle reactionFast\xa0neutron activation'

In [452]:
sett = ["198mTl", "155Ho", "135I", "85m2Y", "85mY", "135Sm", "135mSm"]
for isotope in sett:
    print(re.split(r'(\d{2,3})(m{0,1}\d{0,1})(\w+)', isotope)[1:-1])

['198', 'm', 'Tl']
['155', '', 'Ho']
['135', '', 'I']
['85', 'm2', 'Y']
['85', 'm', 'Y']
['135', '', 'Sm']
['135', 'm', 'Sm']
