In [4]:
import pandas as pd
import numpy as np
import os
from natsort import natsorted
import shutil

# Reading EXFOR Data into DataFrame

In [5]:
df = pd.read_csv("../ML_Data/working_xs_v1_unsk.csv")

In [6]:
df.columns

Index(['Target_Meta_State', 'MT', 'Energy', 'dEnergy', 'Data', 'dData', 'I78',
       'Target_Protons', 'Product_Meta_State', 'Frame', 'Reaction_Notation',
       'Title', 'Year', 'Institute', 'Date', 'Reference', 'Out',
       'Target_Neutrons', 'Target_Mass_Number', 'Element', 'Origin',
       'Mass_Excess', 'dMass_Excess', 'Binding_Energy', 'dBinding_Energy',
       'B_Decay_Energy', 'dB_Decay_Energy', 'Atomic_Mass_Micro',
       'dAtomic_Mass_Micro', 'S(2n)', 'dS(2n)', 'S(2p)', 'dS(2p)', 'Q(a)',
       'dQ(a)', 'Q(2B-)', 'dQ(2B-)', 'Q(ep)', 'dQ(ep)', 'Q(B-n)', 'dQ(B-n)',
       'S(n)', 'dS(n)', 'S(p)', 'dS(p)', 'Q(4B-)', 'dQ(4B-)', 'Q(d,a)',
       'dQ(d,a)', 'Q(p,a)', 'dQ(p,a)', 'Q(n,a)', 'dQ(n,a)', 'Q(g,p)', 'Q(g,n)',
       'Q(g,pn)', 'Q(g,d)', 'Q(g,t)', 'Q(g,He3)', 'Q(g,2p)', 'Q(g,2n)',
       'Q(g,a)', 'Q(p,n)', 'Q(p,2p)', 'Q(p,pn)', 'Q(p,d)', 'Q(p,2n)', 'Q(p,t)',
       'Q(p,3He)', 'Q(n,2p)', 'Q(n,np)', 'Q(n,d)', 'Q(n,2n)', 'Q(n,t)',
       'Q(n,3He)', 'Q(d,t)', 'Q(d,3He)', '

In [7]:
elements = df.Element_w_A.unique()

In [29]:
# Search all files withing the ENSDF directory
directory = "./ENSDF/ENSDF_Files/"

print("Searching directory for RIPL ENSDF files...")
names = []
for root, dirs, files in os.walk(directory):
    for file in files:
        if file.endswith(".dat"):
            names.append(os.path.join(root, file))
            
print("Gathered {} RIPL ENSDF files.".format(len(names)))
names = natsorted(names)

Searching directory for RIPL ENSDF files...
Gathered 118 RIPL ENSDF files.


In [10]:
resulting_files_dir = "./ENSDF/Resulting_Files/"

In [358]:
# We use the list of documents to extract only the data we need
print("Extracting ENSDF headers ...")
for i in names:
    with open(i) as infile, open(resulting_files_dir + 'all_ensdf_headers.txt', 'a') as outfile:
        for line in infile:
            for z in elements:
                if z in line.split():
                    outfile.write(line)
print("Finished extracting headers.")

Extracting ...
Finished extracting.


In [359]:
# Using the document with all data we insert commas following the EXFOR format
print("Formatting ENSDF header data...")
with open(resulting_files_dir + "all_ensdf_headers.txt") as infile, open(resulting_files_dir + 'all_ensdf_headers_formatted.csv', 'w') as outfile:
    for line in infile:
        if line.strip():
            string = list(line)
            for i, j in enumerate([5, 10, 15, 20, 25, 30, 35, 47]):
                string.insert(i + j, ';')
            outfile.write("".join(string))
print("Finished formating data.")

Formatting ENSDF header data...
Finished formating data.


In [11]:
ensdf_index_col = ["SYMB", "A", "Z", "Nol", "Nog", "Nmax", "Nc", "Sn", "Sp"]
ensdf_index = pd.read_csv(resulting_files_dir + "all_ensdf_headers_formatted.csv", names=ensdf_index_col, sep=";")
ensdf_index["Text_Filenames"] = ensdf_index["SYMB"].apply(lambda x: x.strip())

Verify that all EXFOR isotopes have information avaliable in ENSDF database.

In [12]:
len(elements) == len(ensdf_index.SYMB.unique())

True

In [13]:
element_list_endf = ensdf_index.SYMB.tolist() # string that files start with
element_list_names = ensdf_index.Text_Filenames.tolist() # same strings but stripped

In [14]:
ensdf_index.head()

Unnamed: 0,SYMB,A,Z,Nol,Nog,Nmax,Nc,Sn,Sp,Text_Filenames
0,1n,1,0,1,0,1,1,0.0,0.0,1n
1,1H,1,1,1,0,1,1,0.0,0.0,1H
2,2H,2,1,1,0,1,1,2.22457,2.22457,2H
3,3H,3,1,1,0,1,1,6.25723,0.0,3H
4,3He,3,2,1,0,1,1,0.0,5.49348,3He


### Extracting ENSDF Data per Element

In [366]:
print("Extracting ENSDF data per element with header ...")
for e in element_list_endf:
    for i in names:
        with open(i, "r") as infile, open(("Elemental_ENSDF/" + str(e).strip() + '.txt'), 'a') as outfile:
            lines = infile.readlines()
            for z, line in enumerate(lines):
                if line.startswith(str(e)):
                    for y in range(0, 1 + ensdf_index[ensdf_index["SYMB"] == e][["Nol"]].values[0][0] + ensdf_index[ensdf_index["SYMB"] == e][["Nog"]].values[0][0]):
                        outfile.write(lines[z + y])
print("Finished extracting data per element with header.")

Extracting ENSDF data per element with header ...
Finished extracting data per element with header.


### Extracting Stable States Only

In [305]:
print("Extracting stable states ...")
for e in element_list_endf:
    for i in names:
        with open(i, "r") as infile, open((resulting_files_dir + "ensdf_stable_state.txt"), 'a') as outfile:
            lines = infile.readlines()
            for z, line in enumerate(lines):
                if line.startswith(str(e)):
                    outfile.write(e + lines[1 + z])
print("Finished extracting stable states.")

Extracting stable states ...
Finished extracting REACTION NOTATION.


In [306]:
print("Formatting ENSDF stable states file ...")
with open(resulting_files_dir + "ensdf_stable_state.txt") as infile, open(resulting_files_dir + 'ensdf_stable_state_formatted.csv', 'w') as outfile:
    for line in infile:
        if line.strip():
            string = list(line)
            for i, j in enumerate([5, 10, 19, 25, 28, 39, 42, 44, 46, 59, 68, 71, 74, 85, 93, 96, 107, 115]):
                string.insert(i + j, ';')
            outfile.write("".join(string))
print("Finished formating data.")

Formatting ENSDF data...
Finished formating data.


### Extracting ENSDF Data per Element without Header

In [367]:
print("Extracting ENSDF data per element without header ...")
for e in element_list_endf:
    for i in names:
        with open(i, "r") as infile, open(("Elemental_ENSDF_v2/" + str(e).strip() + '.txt'), 'a') as outfile:
            lines = infile.readlines()
            for z, line in enumerate(lines):
                if line.startswith(str(e)):
                    for y in range(1, 1 + ensdf_index[ensdf_index["SYMB"] == e][["Nol"]].values[0][0] + ensdf_index[ensdf_index["SYMB"] == e][["Nog"]].values[0][0]):
                        outfile.write(lines[z + y])
print("Finished extracting data per element without header.")

Extracting ENSDF data per element without header ...
Finished extracting data per element without header.


In [368]:
print("Formatting ENSDF data...")
for i in element_list_names:
    with open("Elemental_ENSDF_v2/" + i + ".txt") as infile, open("Elemental_ENSDF_v3/" + i + ".txt", 'w') as outfile:
        for line in infile:
            if line.strip():
                string = list(line)
                for i, j in enumerate([4, 15, 20, 23, 34, 37, 39, 43, 54, 65, 66]):
                    string.insert(i + j, ';')
                outfile.write("".join(string))
print("Finished formating data.")

Formatting ENSDF data...
Finished formating data.


### Making DataFrame for ENSDF Inferal

In [16]:
print("Creatign DataFrame with Basic ENSDF data ...")
appended_data = []
ensdf_cols = ["Level_Number", "Level_Energy", "Spin", "Parity", "Half_Life", 
              "Number_Gammas", "Flag_Spin", "Flag_Energy", "Other", "Other2", "Other3", "Other4"]

for e in element_list_names:
    with open("./ENSDF/Elemental_ENSDF_v3/" + e + ".txt", "r") as infile:
        element_ensdf = pd.read_csv(infile, sep=";", names=ensdf_cols)
        element_ensdf["Level_Number"] = element_ensdf["Level_Number"].astype(str)
        element_ensdf["Level_Number"] = element_ensdf["Level_Number"].apply(lambda x: x.strip())
        element_ensdf["Level_Number"] = element_ensdf["Level_Number"].replace(to_replace="", value=np.nan)
        element_ensdf = element_ensdf.dropna().reset_index(drop=True)
        element_ensdf["Element_w_A"] = e
        appended_data.append(element_ensdf)
print("Finished creating list of dataframes.")

Creatign DataFrame with Basic ENSDF data ...
Finished creating list of dataframes.


In [17]:
appended_data = pd.concat(appended_data)

In [18]:
appended_data = appended_data[["Level_Number", "Level_Energy", "Spin", "Parity", "Element_w_A"]]

In [19]:
appended_data.head()

Unnamed: 0,Level_Number,Level_Energy,Spin,Parity,Element_w_A
0,1,0,0.5,1,1n
0,1,0,0.5,1,1H
0,1,0,1.0,1,2H
0,1,0,0.5,1,3H
0,1,0,0.5,1,3He


In [20]:
len(appended_data["Element_w_A"].value_counts())

530

In [23]:
appended_data_2 = pd.merge(appended_data, df[["Target_Protons", "Target_Neutrons", "Atomic_Mass_Micro", "Target_Mass_Number", "Element", "Element_w_A"]].drop_duplicates(subset=['Target_Protons', 'Target_Neutrons']), on='Element_w_A')

In [24]:
appended_data.shape[0] == appended_data_2.shape[0]

True

In [25]:
appended_data_2.to_csv("./ENSDF/ensdf_v1.csv", index=False)

In [26]:
appended_data_2 = pd.read_csv("./ENSDF/ensdf_v1.csv")

This dataset is for ENSDF prediction.

In [27]:
appended_data_2.head()

Unnamed: 0,Level_Number,Level_Energy,Spin,Parity,Element_w_A,Target_Protons,Target_Neutrons,Atomic_Mass_Micro,Target_Mass_Number,Element
0,1.0,0.0,0.5,1.0,1n,0,1,1008665.0,1,n
1,1.0,0.0,0.5,1.0,1H,1,0,1007825.0,1,H
2,1.0,0.0,1.0,1.0,2H,1,1,2014102.0,2,H
3,1.0,0.0,0.5,1.0,3H,1,2,3016049.0,3,H
4,1.0,0.0,0.5,1.0,3He,2,1,3016029.0,3,He


In [28]:
appended_data_2[appended_data_2.Target_Protons == 92]

Unnamed: 0,Level_Number,Level_Energy,Spin,Parity,Element_w_A,Target_Protons,Target_Neutrons,Atomic_Mass_Micro,Target_Mass_Number,Element
69295,1.0,0.000000,0.0,1.0,230U,92,138,2.300339e+08,230,U
69296,2.0,0.051727,2.0,1.0,230U,92,138,2.300339e+08,230,U
69297,3.0,0.169340,4.0,1.0,230U,92,138,2.300339e+08,230,U
69298,4.0,0.346950,6.0,1.0,230U,92,138,2.300339e+08,230,U
69299,5.0,0.366649,1.0,-1.0,230U,92,138,2.300339e+08,230,U
...,...,...,...,...,...,...,...,...,...,...
70439,103.0,1.787500,-1.0,0.0,239U,92,147,2.390543e+08,239,U
70440,104.0,1.796500,-1.0,0.0,239U,92,147,2.390543e+08,239,U
70441,105.0,1.807900,1.5,0.0,239U,92,147,2.390543e+08,239,U
70442,106.0,1.828200,-1.0,0.0,239U,92,147,2.390543e+08,239,U


### Adding Stable 

In [39]:
columns_ensdf = ["Element_w_A", "N1", "Elv[MeV]", "spin", "parity", "state_half_life", "Ng", "J", "unc", "spins", "nd", 
                 "m", "percent", "mode", "other", "other1", "other2", "other3", "other4"]
ensdf_final = pd.read_csv(resulting_files_dir + "ensdf_stable_state_formatted.csv", names=columns_ensdf, sep=";")
ensdf_final["spin"] = ensdf_final["spin"].replace(to_replace=-1.0, value=3.5) 
ensdf_final["parity"] = ensdf_final["parity"].replace(to_replace=0, value=1.0)
ensdf_final["Element_w_A"] = ensdf_final["Element_w_A"].apply(lambda x: x.strip())
ensdf_final = ensdf_final[["Element_w_A", "spin", "parity"]]

In [40]:
df2 = pd.merge(df, ensdf_final, on='Element_w_A')

In [41]:
df2[df2["Element_w_A"] == "35Cl"]

Unnamed: 0,Target_Meta_State,MT,Energy,dEnergy,Data,dData,I78,Target_Protons,Product_Meta_State,Frame,...,"Q(3He,t)","Q(3He,a)","Q(t,a)",Element_w_A,Nuc_Radius_fm,Neut_Nuc_Rad_Ratio,Compound_Neutrons,Compound_Mass_Number,spin,parity
685286,G,1,-1.513145,-3.106825,45.370,6.492932,L,17,G,L,...,4653.502682,11931.288965,10137.979683,35Cl,4.088833,0.195655,19,36,1.5,1
685287,G,1,-1.507519,-3.101199,45.070,6.449999,L,17,G,L,...,4653.502682,11931.288965,10137.979683,35Cl,4.088833,0.195655,19,36,1.5,1
685288,G,1,-1.501827,-3.095508,44.690,6.395617,L,17,G,L,...,4653.502682,11931.288965,10137.979683,35Cl,4.088833,0.195655,19,36,1.5,1
685289,G,1,-1.496073,-3.089753,44.540,6.374150,L,17,G,L,...,4653.502682,11931.288965,10137.979683,35Cl,4.088833,0.195655,19,36,1.5,1
685290,G,1,-1.490260,-3.083940,44.640,6.388461,L,17,G,L,...,4653.502682,11931.288965,10137.979683,35Cl,4.088833,0.195655,19,36,1.5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
699513,G,107,7.161368,5.584221,0.191,0.030560,L,17,G,L,...,-5984.835000,7932.859400,13443.054900,35Cl,4.088833,0.195655,19,36,1.5,1
699514,G,107,7.164353,5.301030,0.108,0.090000,L,17,G,L,...,-5984.835000,7932.859400,13443.054900,35Cl,4.088833,0.195655,19,36,1.5,1
699515,G,107,7.164353,5.301030,0.121,0.020000,L,17,G,L,...,-5984.835000,7932.859400,13443.054900,35Cl,4.088833,0.195655,19,36,1.5,1
699516,G,107,7.164353,5.301030,0.117,0.020000,L,17,G,L,...,-5984.835000,7932.859400,13443.054900,35Cl,4.088833,0.195655,19,36,1.5,1


In [44]:
df2.to_csv("../ML_Data/working_xs_v2_unsk.csv", index=False)

In [None]:
os.remove(resulting_files_dir + 'all_ensdf_headers.txt')
os.remove(resulting_files_dir + 'ensdf_stable_state.txt')

# Cutoff Energy

In [None]:
# Using the document with all data we insert commas following the EXFOR format
print("Formatting ENSDF cutoff data...")
with open(resulting_files_dir + "levels-param.data.txt") as infile, open(resulting_files_dir + 'cut_off_ensdf_energies.csv', 'w') as outfile:
    for line in infile:
        if line.strip():
            string = list(line)
            for i, j in enumerate([4, 8, 11, 21, 31, 41, 51, 55, 59, 63, 67, 76, 85, 96, 98, 100, 104, 116]):
                string.insert(i + j, ';')
            outfile.write("".join(string))
print("Finished formating cutoff data.")

In [29]:
cut_off_cols = ["Z", "A", "Element", "Temperature_MeV", "Temperature_U", "Black_Shift", 
                "Black_Shift_U", "N_Lev_ENSDF", "N_Max_Lev_Complete", "Min_Lev_Complete", 
                "Num_Lev_Unique_Spin", "E_Max_N_Max", "E_Num_Lev_U_Spin", "Other", "Other2", 
                "Flag", "Nox", "Other3", "Other4", "Spin_Cutoff"]
cut_off = pd.read_csv("./ENSDF/Resulting_Files/cut_off_ensdf_energies.csv", names=cut_off_cols, sep=";")

cut_off.tail()

Unnamed: 0,Z,A,Element,Temperature_MeV,Temperature_U,Black_Shift,Black_Shift_U,N_Lev_ENSDF,N_Max_Lev_Complete,Min_Lev_Complete,Num_Lev_Unique_Spin,E_Max_N_Max,E_Num_Lev_U_Spin,Other,Other2,Flag,Nox,Other3,Other4,Spin_Cutoff
3348,117,293,17,0.0,0.0,0.0,0.0,1,1,1,1,0.0,0.0,,,,0,,0.0,
3349,118,293,18,0.0,0.0,0.0,0.0,1,1,1,1,0.0,0.0,,,,0,,0.0,
3350,117,294,17,0.0,0.0,0.0,0.0,1,1,1,1,0.0,0.0,,,,0,,0.0,
3351,118,294,18,0.0,0.0,0.0,0.0,1,1,1,1,0.0,0.0,,,,0,,0.0,
3352,118,295,18,0.0,0.0,0.0,0.0,1,1,1,1,0.0,0.0,,,,0,,0.0,


In [30]:
cut_off = cut_off[["Z", "A", "Element", "N_Lev_ENSDF", "N_Max_Lev_Complete", "E_Max_N_Max"]]
cut_off["Element"] = cut_off["Element"].apply(lambda x: x.strip())
cut_off["Element_w_A"] = cut_off["A"].astype(str) + cut_off["Element"]
cut_off = cut_off[~cut_off.Element.str.contains(r'\d')]

In [32]:
print("Reading data into dataframe...")
df = pd.read_csv("./ENSDF/ensdf_v1.csv")
print("Data read into dataframe!")

# Converting specific columns to datatype 'string'
str_cols = ["Spin", "Parity", "Element_w_A", "Element"]
df[str_cols] = df[str_cols].astype('category')

# Converting remaining columns to numeric type. 
for col in list(df.columns):
    if col not in str_cols:
        df[col] = df[col].astype(float)

# Converting proton, neutron and mass number features to integers
int_cols = ["Level_Number", "Target_Protons", "Target_Neutrons", "Target_Mass_Number"]
df[int_cols] = df[int_cols].astype(int)

basic_cols = ["Level_Number", "Level_Energy", "Target_Protons", "Target_Neutrons", "Atomic_Mass_Micro", "Element_w_A"]
df = df[basic_cols]

element_list_names = df.Element_w_A.unique()

Reading data into dataframe...
Data read into dataframe!


In [33]:
print("Creatign Cut-off Dataframe ...")
appended_data = []
ensdf_cols = ["Level_Number", "Level_Energy", "Spin", "Parity", "Half_Life", 
              "Number_Gammas", "Flag_Spin", "Flag_Energy", "Other", "Other2", "Other3", "Other4"]

for e in element_list_names:
    with open("./ENSDF/Elemental_ENSDF_v3/" + e + ".txt", "r") as infile:
        element_ensdf = pd.read_csv(infile, sep=";", names=ensdf_cols)
        element_ensdf["Level_Number"] = element_ensdf["Level_Number"].astype(str)
        element_ensdf["Level_Number"] = element_ensdf["Level_Number"].apply(lambda x: x.strip())
        element_ensdf["Level_Number"] = element_ensdf["Level_Number"].replace(to_replace="", value=np.nan)
        element_ensdf = element_ensdf.dropna().reset_index(drop=True)
        element_ensdf["Element_w_A"] = e
        x = cut_off[cut_off.Element_w_A == e].N_Max_Lev_Complete.values[0]
        if x == 0:
            element_ensdf = element_ensdf.iloc[0:1]
        else:
            element_ensdf = element_ensdf.iloc[0:x]
        appended_data.append(element_ensdf)
print("Finished creating list of dataframes.")

Creatign Cut-off Dataframe ...
Finished creating list of dataframes.


In [34]:
appended_data = pd.concat(appended_data)
appended_data = appended_data[["Level_Number", "Level_Energy", "Spin", "Parity", "Element_w_A"]]

In [35]:
appended_data_2 = pd.merge(appended_data, df[["Target_Protons", "Target_Neutrons", "Atomic_Mass_Micro", "Element_w_A"]].drop_duplicates(subset=['Target_Protons', 'Target_Neutrons']), on='Element_w_A')

In [36]:
appended_data_2.to_csv("./ENSDF/ensdf_v2.csv", index=False)