# Extracting Nuclide info from Genie2K reports.
This file contains code to read Genie2K reports for each core (housed in folder called 'BH_CoreCounts') and pull information to fill in values in 'BombayHook_Gamma.xlsm' spreadsheet.

In [1]:
# Imports
import numpy as np
import pandas as pd
import os

In [82]:
# Globals
REPORT_FOLDER = 'BH_CoreCounts'
REPORTS = os.listdir(REPORT_FOLDER)
COL_NAMES = ['No.', 'Start', 'Centroid', 'keV', 'Area', 'Uncert.', 'Counts', 'Length', 'Nuclide']

#### Part 1. Prep work for reading files

In [83]:
# Using the first file to get column names for nuclide rows
firstfile = REPORT_FOLDER + '/' + reports[0]
with open(firstfile,'r') as f:
    col_names = f.readlines()[22].replace('(','').replace(')','').split()
print(col_names)

['No.', 'Start', 'Centroid', 'keV', 'Area', 'Uncert.', 'Counts', 'Length', 'Nuclide']


In [148]:
# Helper Function to read report data into a dataframe
def read_report(filename):
    """
    INPUT: file of a Genie2K report
    OUTPUT: dictionary of core information descriptors
            ('core','interval','detector','mass','count_time')
            dataframe of nuclide counts
    """
    with open(filename,'r') as f:
        lines = f.readlines()
        
        # Information about the core
        info_dict = {}
        core_id = lines[3].split()[-1].split('_')
        info_dict['core'] = core_id[0]
        info_dict['interval'] = core_id[1]
        info_dict['detector'] = core_id[2]
        info_dict['mass'] = lines[7].split()[-2]
        info_dict['count_time'] = lines[8].split()[-2]
        
        # Nuclide Counts
        nuclide_data = []
        for line in f:
            row = line.split()
            if not row:
                pass
            elif row[0].isdigit():
                nuclide_data.append(line)
        nuclide_df = pd.DataFrame(nuclide_data, columns=COL_NAMES).set_index('No.')
    
    return info_dict, nuclide_df                

In [149]:
# testing read_report() on first file
info, df = read_report(firstfile)
print(info)
df

mass: 41.30
count_time: 85851.4
{'mass': '41.30', 'count_time': '85851.4', 'detector': 'BEGe', 'core': 'AF02A', 'interval': '0-2'}


Unnamed: 0_level_0,Start,Centroid,keV,Area,Uncert.,Counts,Length,Nuclide
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


#### Ok, now to work.

In [21]:
# OK creating a function to parse by hand
def read_report(filepath):
    """
    INPUT: path for a GENIE2K report
    OUTPUT:
    """

In [28]:
rep_vars = ['core','interval','detector','mass','count_time']
nuclides = ['Pb-210','Am-241','Th-234','Ra-226','Pb-214',]

Unnamed: 0,1 254 259 47 1948 70 789 11 Pb-210
0,2 347 352 64 620 58...
1,3 509 516 93 926 58...
2,4 1024 1032 186 421 49...
3,...
4,5 1318 1326 239 2307 66...
5,6 1631 1640 295 337 43...
6,7 1869 1879 338 399 40...
7,8 1946 1955 352 735 42...
8,9 2023 2033 366 26 31...
9,10 2645 2656 478 -42 29...
