# Extracting Nuclide info from Genie2K reports.
This file contains code to read Genie2K reports for each core (housed in folder called 'BH_CoreCounts') and pull information to fill in values in 'BombayHook_Gamma.xlsm' spreadsheet.

In [1]:
# Imports
import numpy as np
import pandas as pd
import os

In [2]:
# Globals
REPORT_FOLDER = 'BH_CoreCounts'
REPORTS = os.listdir(REPORT_FOLDER)
COL_NAMES = ['No.', 'Start', 'Centroid', 'keV', 'Area', 'Uncert.', 'Counts', 'Length', 'Nuclide']

#### Part 1. Prep work for reading files

In [3]:
# Using the first file to get column names for nuclide rows
firstfile = REPORT_FOLDER + '/' + REPORTS[0]
with open(firstfile,'r') as f:
    col_names = f.readlines()[22].replace('(','').replace(')','').split()
print(col_names)

['No.', 'Start', 'Centroid', 'keV', 'Area', 'Uncert.', 'Counts', 'Length', 'Nuclide']


In [4]:
# Helper Function to read report data into a dataframe
def read_report(filename):
    """
    INPUT: file of a Genie2K report
    OUTPUT: dictionary of core information descriptors
            ('core','interval','detector','mass','count_time')
            dataframe of nuclide counts including columns:
            ['No.', 'Start', 'Centroid', 'keV', 'Area', 'Uncert.', 
            'Counts', 'Length', 'Nuclide']
    """
    with open(filename,'r') as f:
        lines = f.readlines()
        
        # Information about the core
        info_dict = {}
        core_id = lines[3].split()[-1].split('_')
        info_dict['core'] = core_id[0]
        info_dict['interval'] = core_id[1]
        info_dict['detector'] = core_id[2]
        info_dict['mass'] = lines[7].split()[-2]
        info_dict['date'] = lines[10].split()[3]
        info_dict['count_time'] = lines[8].split()[-2]
        
        # Nuclide Counts
        nuclide_data = []
        for line in lines:
            row = line.split()
            if not row:
                pass
            elif row[0].isdigit():
                nuclide_data.append(row)
        nuclide_df = pd.DataFrame(nuclide_data, columns=COL_NAMES).set_index('No.')
    
    return info_dict, nuclide_df                

In [5]:
# testing read_report() on first file
info, df = read_report(firstfile)
print(info)
df

{'interval': '0-2', 'date': '4/6/2016', 'detector': 'BEGe', 'mass': '41.30', 'core': 'AF02A', 'count_time': '85851.4'}


Unnamed: 0_level_0,Start,Centroid,keV,Area,Uncert.,Counts,Length,Nuclide
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,254,259,47,1948,70,789,11,Pb-210
2,347,352,64,620,58,728,11,Th-234
3,509,516,93,926,58,781,13,Th-234
4,1024,1032,186,421,49,637,17,Ra-226
5,1318,1326,239,2307,66,645,17,Pb-212
6,1631,1640,295,337,43,447,19,Pb-214
7,1869,1879,338,399,40,380,21,Ac-228
8,1946,1955,352,735,42,332,21,Pb-214
9,2023,2033,366,26,31,293,21,.....
10,2645,2656,478,-42,29,262,23,Be-7


#### Part 2. Helper Function to setup columns

In [12]:
def blank_row():
    """
    Returns a blank single row multi-indexed dataframe
    with columns for core info and readings for area,
    uncertainty, counts and length at each keV level.
    """
    # variables for initial column names, energy levels and readings
    START_COLS = ['core','interval','detector','mass','date','count_time']
    ENERGY_LEVELS = [46.5, 59.5, 63.3, 186.1, 295.1, 352.0, 610.0, 661.0, 1460.8]
    READINGS = ['Area', 'Uncert.', 'Counts', 'Length'] 
    # create multi-index for nuclide data
    levels = [(l, r) for l in ENERGY_LEVELS for r in READINGS]
    index = pd.MultiIndex.from_tuples(levels, names=['keV', 'measurement'])
    # variables for initial column names
    START_COLS = ['core','interval','detector','mass','date','count_time']
    # creating the data frame
    cores_df = pd.DataFrame([np.nan]*36, index=index).T
    for cname in START_COLS:
        cores_df[cname] =  np.nan
    #return reorderd dataframe    
    return cores_df[START_COLS + ENERGY_LEVELS]

In [13]:
# testing blank row function
cores_df = blank_row()
# take a look
cores_df

keV,core,interval,detector,mass,date,count_time,46.5,46.5,46.5,46.5,...,610.0,610.0,661.0,661.0,661.0,661.0,1460.8,1460.8,1460.8,1460.8
measurement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Area,Uncert.,Counts,Length,...,Counts,Length,Area,Uncert.,Counts,Length,Area,Uncert.,Counts,Length
0,,,,,,,,,,,...,,,,,,,,,,


In [10]:
# attempt to fill row 1, keV 46.5 area
cores_df.loc[0,(46.5,'Area')] = 8
# take a look
cores_df

keV,core,interval,detector,mass,date,count_time,46.5,46.5,46.5,46.5,...,610.0,610.0,661.0,661.0,661.0,661.0,1460.8,1460.8,1460.8,1460.8
measurement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Area,Uncert.,Counts,Length,...,Counts,Length,Area,Uncert.,Counts,Length,Area,Uncert.,Counts,Length
0,,,,,,,8,,,,...,,,,,,,,,,


#### Part 3. filling df

In [None]:
# 1) loop through reports
#    for each report:
#         0) run read_report()
#         1) fill general info
#         2) loop through nuclide_df:
#          
 