In [1]:
import pandas as pd
import numpy as np
import os
from natsort import natsorted

In [347]:
df = pd.read_csv("/../ML_Data/working_xs_v1_unsk.csv")

In [348]:
df.columns

Index(['Target_Meta_State', 'MT', 'Energy', 'dEnergy', 'Data', 'dData',
       'ELV/HL', 'dELV/HL', 'I78', 'Refer', 'Protons', 'Product_Meta_State',
       'Frame', 'Reaction_Notation', 'Title', 'Year', 'Author', 'Institute',
       'Date', 'Reference', 'Out', 'Target_Neutrons', 'Origin', 'Mass_Excess',
       'dMass_Excess', 'Binding_Energy', 'dBinding_Energy', 'B_Decay_Energy',
       'dB_Decay_Energy', 'Atomic_Mass_Micro', 'dAtomic_Mass_Micro', 'S(2n)',
       'dS(2n)', 'S(2p)', 'dS(2p)', 'Q(a)', 'dQ(a)', 'Q(2B-)', 'dQ(2B-)',
       'Q(ep)', 'dQ(ep)', 'Q(B-n)', 'dQ(B-n)', 'Target_Mass_Number', 'Element',
       'S(n)', 'dS(n)', 'S(p)', 'dS(p)', 'Q(4B-)', 'dQ(4B-)', 'Q(d,a)',
       'dQ(d,a)', 'Q(p,a)', 'dQ(p,a)', 'Q(n,a)', 'dQ(n,a)', 'Q(g,p)', 'Q(g,n)',
       'Q(g,pn)', 'Q(g,d)', 'Q(g,t)', 'Q(g,He3)', 'Q(g,2p)', 'Q(g,2n)',
       'Q(g,a)', 'Q(p,n)', 'Q(p,2p)', 'Q(p,pn)', 'Q(p,d)', 'Q(p,2n)', 'Q(p,t)',
       'Q(p,3He)', 'Q(n,2p)', 'Q(n,np)', 'Q(n,d)', 'Q(n,2n)', 'Q(n,t)',
       'Q

In [349]:
df = df[~(df["Element"] == "0.0")]

In [350]:
df["Element_w_A"] = df["Target_Mass_Number"].astype(str) + df["Element"]

In [352]:
elements = df[["Element", "Target_Mass_Number"]].copy()
elements["Target_Mass_Number"] = elements["Target_Mass_Number"].astype(str)
elements["Isotopes"] = elements["Target_Mass_Number"] + elements["Element"]
elements = elements["Isotopes"].unique()

In [356]:
# Search all files withing the ENSDF directory
directory = "./ENSDF_Files/"

print("Searching directory for RIPL ENSDF files...")
names = []
for root, dirs, files in os.walk(directory):
    for file in files:
        if file.endswith(".dat"):
            names.append(os.path.join(root, file))
            
print("Gathered {} RIPL ENSDF files.".format(len(names)))
names = natsorted(names)

Searching directory for RIPL ENSDF files...
Gathered 118 RIPL ENSDF files.


In [357]:
resulting_files_dir = "./Resulting_Files/"

In [358]:
# We use the list of documents to extract only the data we need
print("Extracting ENSDF headers ...")
for i in names:
    with open(i) as infile, open(resulting_files_dir + 'all_ensdf_headers.txt', 'a') as outfile:
        for line in infile:
            for z in elements:
                if z in line.split():
                    outfile.write(line)
print("Finished extracting headers.")

Extracting ...
Finished extracting.


In [359]:
# Using the document with all data we insert commas following the EXFOR format
print("Formatting ENSDF header data...")
with open(resulting_files_dir + "all_ensdf_headers.txt") as infile, open(resulting_files_dir + 'all_ensdf_headers_formatted.csv', 'w') as outfile:
    for line in infile:
        if line.strip():
            string = list(line)
            for i, j in enumerate([5, 10, 15, 20, 25, 30, 35, 47]):
                string.insert(i + j, ';')
            outfile.write("".join(string))
print("Finished formating data.")

Formatting ENSDF header data...
Finished formating data.


In [362]:
ensdf_index_col = ["SYMB", "A", "Z", "Nol", "Nog", "Nmax", "Nc", "Sn", "Sp"]
ensdf_index = pd.read_csv(resulting_files_dir + "all_ensdf_headers_formatted.csv", names=ensdf_index_col, sep=";")
ensdf_index["Text_Filenames"] = ensdf_index["SYMB"].apply(lambda x: x.strip())

Verify that all EXFOR isotopes have information avaliable in ENSDF database.

In [363]:
len(elements) == len(ensdf_index.SYMB.unique())

True

In [364]:
element_list_endf = ensdf_index.SYMB.tolist() # string that files start with
element_list_names = ensdf_index.Text_Filenames.tolist() # same strings but stripped

In [365]:
ensdf_index.head()

Unnamed: 0,SYMB,A,Z,Nol,Nog,Nmax,Nc,Sn,Sp,Text_Filenames
0,1n,1,0,1,0,1,1,0.0,0.0,1n
1,1H,1,1,1,0,1,1,0.0,0.0,1H
2,2H,2,1,1,0,1,1,2.22457,2.22457,2H
3,3H,3,1,1,0,1,1,6.25723,0.0,3H
4,3He,3,2,1,0,1,1,0.0,5.49348,3He


### Extracting ENSDF Data per Element

In [366]:
print("Extracting ENSDF data per element with header ...")
for e in element_list_endf:
    for i in names:
        with open(i, "r") as infile, open(("Elemental_ENSDF/" + str(e).strip() + '.txt'), 'a') as outfile:
            lines = infile.readlines()
            for z, line in enumerate(lines):
                if line.startswith(str(e)):
                    for y in range(0, 1 + ensdf_index[ensdf_index["SYMB"] == e][["Nol"]].values[0][0] + ensdf_index[ensdf_index["SYMB"] == e][["Nog"]].values[0][0]):
                        outfile.write(lines[z + y])
print("Finished extracting data per element with header.")

Extracting ENSDF data per element with header ...
Finished extracting data per element with header.


### Extracting Stable States Only

In [305]:
print("Extracting stable states ...")
for e in element_list_endf:
    for i in names:
        with open(i, "r") as infile, open((resulting_files_dir + "ensdf_stable_state.txt"), 'a') as outfile:
            lines = infile.readlines()
            for z, line in enumerate(lines):
                if line.startswith(str(e)):
                    outfile.write(e + lines[1 + z])
print("Finished extracting stable states.")

Extracting stable states ...
Finished extracting REACTION NOTATION.


In [306]:
print("Formatting ENSDF stable states file ...")
with open(resulting_files_dir + "ensdf_stable_state.txt") as infile, open(resulting_files_dir + 'ensdf_stable_state_formatted.csv', 'w') as outfile:
    for line in infile:
        if line.strip():
            string = list(line)
            for i, j in enumerate([5, 10, 19, 25, 28, 39, 42, 44, 46, 59, 68, 71, 74, 85, 93, 96, 107, 115]):
                string.insert(i + j, ';')
            outfile.write("".join(string))
print("Finished formating data.")

Formatting ENSDF data...
Finished formating data.


### Extracting ENSDF Data per Element without Header

In [367]:
print("Extracting ENSDF data per element without header ...")
for e in element_list_endf:
    for i in names:
        with open(i, "r") as infile, open(("Elemental_ENSDF_v2/" + str(e).strip() + '.txt'), 'a') as outfile:
            lines = infile.readlines()
            for z, line in enumerate(lines):
                if line.startswith(str(e)):
                    for y in range(1, 1 + ensdf_index[ensdf_index["SYMB"] == e][["Nol"]].values[0][0] + ensdf_index[ensdf_index["SYMB"] == e][["Nog"]].values[0][0]):
                        outfile.write(lines[z + y])
print("Finished extracting data per element without header.")

Extracting ENSDF data per element without header ...
Finished extracting data per element without header.


In [368]:
print("Formatting ENSDF data...")
for i in element_list_names:
    with open("Elemental_ENSDF_v2/" + i + ".txt") as infile, open("Elemental_ENSDF_v3/" + i + ".txt", 'w') as outfile:
        for line in infile:
            if line.strip():
                string = list(line)
                for i, j in enumerate([4, 15, 20, 23, 34, 37, 39, 43, 54, 65, 66]):
                    string.insert(i + j, ';')
                outfile.write("".join(string))
print("Finished formating data.")

Formatting ENSDF data...
Finished formating data.


### Making DataFrame for ENSDF Inferal

In [370]:
print("Creatign DataFrame with Basic ENSDF data ...")
appended_data = []
ensdf_cols = ["Level_Number", "Level_Energy", "Spin", "Parity", "Half_Life", 
              "Number_Gammas", "Flag_Spin", "Flag_Energy", "Other", "Other2", "Other3", "Other4"]

for e in element_list_names:
    with open("Elemental_ENSDF_v3/" + e + ".txt", "r") as infile:
        element_ensdf = pd.read_csv(infile, sep=";", names=ensdf_cols)
        element_ensdf["Level_Number"] = element_ensdf["Level_Number"].astype(str)
        element_ensdf["Level_Number"] = element_ensdf["Level_Number"].apply(lambda x: x.strip())
        element_ensdf["Level_Number"] = element_ensdf["Level_Number"].replace(to_replace="", value=np.nan)
        element_ensdf = element_ensdf.dropna().reset_index(drop=True)
        element_ensdf["Element_w_A"] = e
        appended_data.append(element_ensdf)
print("Finished creating list of dataframes.")

Creatign DataFrame with Basic ENSDF data ...
Finished creating list of dataframes.


In [371]:
appended_data = pd.concat(appended_data)

In [376]:
appended_data = appended_data[["Level_Number", "Level_Energy", "Spin", "Parity", "Element_w_A"]]

In [377]:
appended_data.head()

Unnamed: 0,Level_Number,Level_Energy,Spin,Parity,Element_w_A
0,1,0,0.5,1,1n
0,1,0,0.5,1,1H
0,1,0,1.0,1,2H
0,1,0,0.5,1,3H
0,1,0,0.5,1,3He


In [387]:
len(appended_data["Element_w_A"].value_counts())

530

In [394]:
appended_data_2 = pd.merge(appended_data, df[["Protons", "Target_Neutrons", "Atomic_Mass_Micro", "Target_Mass_Number", "Element", "Element_w_A"]].drop_duplicates(subset=['Protons', 'Target_Neutrons']), on='Element_w_A')

In [398]:
appended_data.shape[0] == appended_data_2.shape[0]

True

In [399]:
appended_data_2.to_csv("ensdf_v1.csv", index=False)

### Adding Stable 

In [264]:
columns_ensdf = ["Element_w_A", "N1", "Elv[MeV]", "spin", "parity", "state_half_life", "Ng", "J", "unc", "spins", "nd", 
                 "m", "percent", "mode", "other", "other1", "other2", "other3", "other4"]
ensdf_final = pd.read_csv("ensdf_stable_state_formatted.csv", names=columns_ensdf, sep=";")
ensdf_final["spin"] = ensdf_final["spin"].replace(to_replace=-1.0, value=3.5) 
ensdf_final["parity"] = ensdf_final["parity"].replace(to_replace=0, value=1.0)
ensdf_final["Element_w_A"] = ensdf_final["Element_w_A"].apply(lambda x: x.strip())
ensdf_final = ensdf_final[["Element_w_A", "spin", "parity"]]

In [275]:
df2 = pd.merge(df, ensdf_final, on='Element_w_A')

In [276]:
df2[df2["Element_w_A"] == "35Cl"]

Unnamed: 0,Target_Meta_State,MT,Energy,dEnergy,Data,dData,ELV/HL,dELV/HL,I78,Refer,...,"Q(3He,a)","Q(t,a)",Unc,Nuc_Radius_fm,Neut_Nuc_Rad_Ratio,Compound_Neutrons,Compound_Mass_Number,Element_w_A,spin,parity
688816,G,1,-1.513145,-3.602091,45.370,0.00000,0.0,0.0,L,"R.M.BRUGGER,ET.AL. (56)",...,11931.288965,10137.979683,0.000000,4.088833,0.305711,19,36,35Cl,1.5,1
688817,G,1,-1.507519,-3.596465,45.070,0.00000,0.0,0.0,L,"R.M.BRUGGER,ET.AL. (56)",...,11931.288965,10137.979683,0.000000,4.088833,0.305711,19,36,35Cl,1.5,1
688818,G,1,-1.501827,-3.590774,44.690,0.00000,0.0,0.0,L,"R.M.BRUGGER,ET.AL. (56)",...,11931.288965,10137.979683,0.000000,4.088833,0.305711,19,36,35Cl,1.5,1
688819,G,1,-1.496073,-3.585019,44.540,0.00000,0.0,0.0,L,"R.M.BRUGGER,ET.AL. (56)",...,11931.288965,10137.979683,0.000000,4.088833,0.305711,19,36,35Cl,1.5,1
688820,G,1,-1.490260,-3.579206,44.640,0.00000,0.0,0.0,L,"R.M.BRUGGER,ET.AL. (56)",...,11931.288965,10137.979683,0.000000,4.088833,0.305711,19,36,35Cl,1.5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
703043,G,107,7.161368,5.072422,0.191,0.03056,0.0,0.0,L,"E.B.Paul,ET.AL. (53)",...,7932.859400,13443.054900,0.000000,4.088833,0.305711,19,36,35Cl,1.5,1
703044,G,107,7.164353,5.301030,0.108,0.09000,0.0,0.0,L,"R.C.Barrall,ET.AL. (69)",...,7932.859400,13443.054900,0.013699,4.088833,0.305711,19,36,35Cl,1.5,1
703045,G,107,7.164353,5.301030,0.121,0.02000,0.0,0.0,L,W.Nagel (66),...,7932.859400,13443.054900,0.013699,4.088833,0.305711,19,36,35Cl,1.5,1
703046,G,107,7.164353,5.301030,0.117,0.02000,0.0,0.0,L,W.Nagel (66),...,7932.859400,13443.054900,0.013699,4.088833,0.305711,19,36,35Cl,1.5,1


In [277]:
df2["Reference"] = df2["Refer"] + " " + df2["Reference"]

In [278]:
df2[["Refer", "Reference", "Author"]]

Unnamed: 0,Refer,Reference,Author
0,"D.F.MEASDAY,ET.AL. (66)","D.F.MEASDAY,ET.AL. (66) Jour. Nuclear Phys...",D.F.Measday+
1,"D.F.MEASDAY,ET.AL. (66)","D.F.MEASDAY,ET.AL. (66) Jour. Nuclear Phys...",D.F.Measday+
2,"D.F.MEASDAY,ET.AL. (66)","D.F.MEASDAY,ET.AL. (66) Jour. Nuclear Phys...",D.F.Measday+
3,"D.F.MEASDAY,ET.AL. (66)","D.F.MEASDAY,ET.AL. (66) Jour. Nuclear Phys...",D.F.Measday+
4,"D.F.MEASDAY,ET.AL. (66)","D.F.MEASDAY,ET.AL. (66) Jour. Nuclear Phys...",D.F.Measday+
...,...,...,...
4533402,S.F.Mughabghab (06),S.F.Mughabghab (06) Jour. Physical Rev...,S.F.Mughabghab
4533403,S.F.Mughabghab (06),S.F.Mughabghab (06) 0.0,S.F.Mughabghab
4533404,"J.F.Wild,ET.AL. (73)","J.F.Wild,ET.AL. (73) 0.0",J.F.Wild+
4533405,S.F.Mughabghab (06),S.F.Mughabghab (06) 0.0,S.F.Mughabghab


In [280]:
df2 = df2.drop(columns=["Refer", "Author"])

In [282]:
df2.columns

Index(['Target_Meta_State', 'MT', 'Energy', 'dEnergy', 'Data', 'dData',
       'ELV/HL', 'dELV/HL', 'I78', 'Protons', 'Product_Meta_State', 'Frame',
       'Reaction_Notation', 'Title', 'Year', 'Institute', 'Date', 'Reference',
       'Out', 'Target_Neutrons', 'Origin', 'Mass_Excess', 'dMass_Excess',
       'Binding_Energy', 'dBinding_Energy', 'B_Decay_Energy',
       'dB_Decay_Energy', 'Atomic_Mass_Micro', 'dAtomic_Mass_Micro', 'S(2n)',
       'dS(2n)', 'S(2p)', 'dS(2p)', 'Q(a)', 'dQ(a)', 'Q(2B-)', 'dQ(2B-)',
       'Q(ep)', 'dQ(ep)', 'Q(B-n)', 'dQ(B-n)', 'Target_Mass_Number', 'Element',
       'S(n)', 'dS(n)', 'S(p)', 'dS(p)', 'Q(4B-)', 'dQ(4B-)', 'Q(d,a)',
       'dQ(d,a)', 'Q(p,a)', 'dQ(p,a)', 'Q(n,a)', 'dQ(n,a)', 'Q(g,p)', 'Q(g,n)',
       'Q(g,pn)', 'Q(g,d)', 'Q(g,t)', 'Q(g,He3)', 'Q(g,2p)', 'Q(g,2n)',
       'Q(g,a)', 'Q(p,n)', 'Q(p,2p)', 'Q(p,pn)', 'Q(p,d)', 'Q(p,2n)', 'Q(p,t)',
       'Q(p,3He)', 'Q(n,2p)', 'Q(n,np)', 'Q(n,d)', 'Q(n,2n)', 'Q(n,t)',
       'Q(n,3He)', 'Q(d,t)',

In [281]:
df2.to_csv("../../ML_Data/working_xs_v2_unsk.csv", index=False)