# Extracting Nuclide info from Genie2K reports.
This file contains code to read Genie2K reports for each core (housed in folder called 'BH_CoreCounts') and pull information to fill in values in 'BombayHook_Gamma.xlsm' spreadsheet.

In [1]:
# Imports
import numpy as np
import pandas as pd
import os

In [50]:
# Globals
REPORT_FOLDER = 'BH_CoreCounts'
REPORTS = os.listdir(REPORT_FOLDER)
COL_NAMES = ['No.', 'Start', 'Centroid', 'keV', 'Area', 'Uncert.', 'Counts', 'Length']

#### Part 1. Prep work for reading files

In [3]:
# Using the first file to get column names for nuclide rows
firstfile = REPORT_FOLDER + '/' + REPORTS[0]
with open(firstfile,'r') as f:
    col_names = f.readlines()[22].replace('(','').replace(')','').split()
print(col_names)

['No.', 'Start', 'Centroid', 'keV', 'Area', 'Uncert.', 'Counts', 'Length', 'Nuclide']


In [51]:
# Helper Function to read report data into a dataframe
def read_report(filename):
    """
    INPUT: file of a Genie2K report
    OUTPUT: dictionary of core information descriptors
            ('core','interval','detector','mass','count_time')
            dataframe of nuclide counts including columns:
            ['No.', 'Start', 'Centroid', 'keV', 'Area', 'Uncert.', 
            'Counts', 'Length', 'Nuclide']
    """
    with open(filename,'r') as f:
        lines = f.readlines()
        
        # Information about the core
        info_dict = {}
        core_id = lines[3].split()[-1].split('_')
        info_dict['core'] = core_id[0]
        info_dict['interval'] = core_id[1]
        info_dict['detector'] = lines[15].split()[-1]
        info_dict['mass'] = lines[7].split()[-2]
        info_dict['date'] = lines[10].split()[3]
        info_dict['count_time'] = lines[8].split()[-2]
        
        # Nuclide Counts
        nuclide_data = []
        for line in lines:
            row = line.split()
            if not row:
                pass
            elif row[0].isdigit():
                nuclide_data.append(row[:-1])
        nuclide_df = pd.DataFrame(nuclide_data, columns=COL_NAMES, dtype='float64').set_index('No.')
    
    return info_dict, nuclide_df                

In [5]:
# testing read_report() on first file
info, df = read_report(firstfile)
print(info)
df

{'mass': '41.30', 'date': '4/6/2016', 'count_time': '85851.4', 'core': 'AF02A', 'interval': '0-2', 'detector': 'BEGE'}


Unnamed: 0_level_0,Start,Centroid,keV,Area,Uncert.,Counts,Length
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,254,259,47,1948,70,789,11
2,347,352,64,620,58,728,11
3,509,516,93,926,58,781,13
4,1024,1032,186,421,49,637,17
5,1318,1326,239,2307,66,645,17
6,1631,1640,295,337,43,447,19
7,1869,1879,338,399,40,380,21
8,1946,1955,352,735,42,332,21
9,2023,2033,366,26,31,293,21
10,2645,2656,478,-42,29,262,23


In [6]:
df.dtypes

Start       float64
Centroid    float64
keV         float64
Area        float64
Uncert.     float64
Counts      float64
Length      float64
dtype: object

#### Part 2. Helper Function to setup columns

In [7]:
# variables for initial column names, energy levels and readings
START_COLS = ['core','interval','detector','mass','date','count_time']
ENERGY_LEVELS = [46.5, 59.5, 63.3, 186.1, 295.1, 352.0, 610.0, 661.0, 1460.8]
READINGS = ['Area', 'Uncert.', 'Counts', 'Length'] 

In [8]:
def blank_nuclide_df(start_cols, energy_levels, readings, reports):
    """
    Returns a multi-indexed dataframe
    with columns for core info and readings for area,
    uncertainty, counts and length at each keV level.
    """
    # create multi-index for nuclide data
    levels = [(l, r) for l in energy_levels for r in readings]
    index = pd.MultiIndex.from_tuples(levels, names=['keV', 'measurement'])
    # creating the data frame
    cores_df = pd.DataFrame([[np.nan]*len(reports)]*36, index=index, columns=reports).T
    for cname in START_COLS:
        cores_df[cname] =  np.nan
    #return reorderd dataframe    
    return cores_df[START_COLS + ENERGY_LEVELS]

In [9]:
# testing blank row function
cores_df = blank_nuclide_df(START_COLS, ENERGY_LEVELS, READINGS, REPORTS)
# take a look
#cores_df

In [10]:
def blank_row():
    """
    Returns a blank single row multi-indexed dataframe
    with columns for core info and readings for area,
    uncertainty, counts and length at each keV level.
    """
    # create multi-index for nuclide data
    levels = [(l, r) for l in ENERGY_LEVELS for r in READINGS]
    index = pd.MultiIndex.from_tuples(levels, names=['keV', 'measurement'])
    # creating the data frame
    cores_df = pd.DataFrame([np.nan]*36, index=index).T
    for cname in START_COLS:
        cores_df[cname] =  np.nan
    #return reorderd dataframe    
    return cores_df[START_COLS + ENERGY_LEVELS]

In [11]:
# take a look
test_row = blank_row()
test_row

keV,core,interval,detector,mass,date,count_time,46.5,46.5,46.5,46.5,...,610.0,610.0,661.0,661.0,661.0,661.0,1460.8,1460.8,1460.8,1460.8
measurement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Area,Uncert.,Counts,Length,...,Counts,Length,Area,Uncert.,Counts,Length,Area,Uncert.,Counts,Length
0,,,,,,,,,,,...,,,,,,,,,,


In [12]:
# attempt to fill row 1, keV 46.5 area
test_row.loc[0,(46.5,'Area')] = 8
# take a look
test_row

keV,core,interval,detector,mass,date,count_time,46.5,46.5,46.5,46.5,...,610.0,610.0,661.0,661.0,661.0,661.0,1460.8,1460.8,1460.8,1460.8
measurement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Area,Uncert.,Counts,Length,...,Counts,Length,Area,Uncert.,Counts,Length,Area,Uncert.,Counts,Length
0,,,,,,,8,,,,...,,,,,,,,,,


#### Part 3. filling df

In [13]:
# helper function to identify nuclide data 'close' to a specified keV level
def keV_in_range(val):
    for level in ENERGY_LEVELS:
        if abs(level - val) <= 1:
            return level
    return np.nan   

In [16]:
# test keV_in_range(val)
keV_in_range(46)

46.5

In [52]:
# Function to pull out data from reports
def get_data():
    """
    INPUT: n/a fxn runs on globals: REPORTS, REPORT_FOLDER, START_COLS, ENERGY_COLS, READINGS
    OUTPUT: data frame
    """
    # initialize df
    cores_df = blank_row()
    # loop through reports
    for core in REPORTS:
        print('... processing core:', core) # for debugging
        
        # read report & initialize a blank row
        info, df = read_report(REPORT_FOLDER + '/' + core)
        new_row = blank_row()
        
        # get report general info
        for cname in START_COLS:
            new_row.loc[0, cname] = info[cname]
            
        # get nuclide data
        df['keV'] = df['keV'].apply(keV_in_range)
        df.set_index('keV', inplace=True)
        for lev in ENERGY_LEVELS:
            if lev in df.index.tolist():
                for rdg in READINGS:
                    #print(lev,rdg)
                    new_row.loc[0,(lev, rdg)] = df.loc[lev, rdg].min()
        
        # append a new row
        cores_df = cores_df.append(new_row)    
    return cores_df

In [53]:
test = get_data()

... processing core: AF02A_0-2_BEGe.RPT
... processing core: AF02A_10-12_BEGe.RPT
... processing core: AF02A_12-14_BEGe.RPT
... processing core: AF02A_14-16_BEGe.RPT
... processing core: AF02A_16-18_BEGe.RPT
... processing core: AF02A_18-20_BEGe.RPT
... processing core: AF02A_2-4_BEGe.RPT
... processing core: AF02A_20-22_BEGe.RPT
... processing core: AF02A_22-24_BEGe.RPT
... processing core: AF02A_24-26_BEGe.RPT
... processing core: AF02A_26-28_BEGe.RPT
... processing core: AF02A_28-30_BEGe.RPT
... processing core: AF02A_30-32_BEGe.RPT
... processing core: AF02A_32-34_BEGe.RPT
... processing core: AF02A_34-36_BEGe.RPT
... processing core: AF02A_4-6_BEGe.RPT
... processing core: AF02A_6-8_BEGe.RPT
... processing core: AF02A_8-10_BEGe.RPT
... processing core: BHIN01 0-2_BEGe.RPT
... processing core: BHIN01_10-12_BEGe.RPT
... processing core: BHIN01_12-14_BEGe.RPT
... processing core: BHIN01_14-16_BEGe.RPT
... processing core: BHIN01_16-18_BEGe.RPT
... processing core: BHIN01_18-20_BEGe.R

In [54]:
test

keV,core,interval,detector,mass,date,count_time,46.5,46.5,46.5,46.5,...,610.0,610.0,661.0,661.0,661.0,661.0,1460.8,1460.8,1460.8,1460.8
measurement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Area,Uncert.,Counts,Length,...,Counts,Length,Area,Uncert.,Counts,Length,Area,Uncert.,Counts,Length
0,,,,,,,,,,,...,,,,,,,,,,
0,AF02A,0-2,BEGE,41.30,4/6/2016,85851.4,1948,70,789,11,...,190,25,126,27,181,27,1674,43,49,37
0,AF02A,12-14,BEGE,41.85,4/12/2016,85726.6,1330,63,701,11,...,178,25,250,31,219,27,1597,42,60,37
0,AF02A,12-14,BEGE,41.85,4/13/2016,85780.2,1220,65,808,11,...,238,25,286,30,196,27,1681,44,72,37
0,AF02A,14-46,BEGE,38.98,4/14/2016,99979.2,1410,67,811,11,...,208,25,271,32,238,27,1810,44,54,37
0,AF02A,16-18,BEGE,36.82,4/15/2016,172800.0,2360,86,1357,11,...,408,25,516,44,427,27,3161,59,109,37
0,AF02A,18-20,BEGE,39.55,4/19/2016,84734.6,1081,60,678,11,...,214,25,323,31,194,27,1588,42,49,37
0,AF02A,2-4,BEGE,41.11,4/7/2016,86327.7,1882,70,790,11,...,245,25,195,27,169,27,1726,43,54,37
0,AF02A,20-22,BEGE,35.57,4/20/2016,86940.4,1072,59,635,11,...,190,25,341,31,186,27,1460,41,63,37
0,AF02A,22-24,BEGE,39.70,4/21/2016,84684.4,1026,59,636,11,...,185,25,510,35,216,27,1477,41,76,37


In [57]:
test.to_excel('BombayHook_Gamma.xls', sheet_name='counts', startrow=5)

### Exploring better options for filling in cells.

In [None]:
# finding the problem
info, df = read_report('BH_CoreCounts/AF02A_12-14_BEGe.RPT')
df['keV'] = df['keV'].apply(keV_in_range)
df.set_index('keV', inplace=True)

In [None]:
df.loc[1460.8, ['Area','Uncert.','Counts','Length']]

In [None]:
nr = blank_row()
nr
dct = {'Area':1, 'Uncert.':2, 'Counts':3, 'Length':4}
nr[46.5].apply(lambda x: dct[x], axis=1)