In [9]:
from training_set import *
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn import metrics
import numpy as np
import pandas as pd
import glob
import os
import posixpath

# Training Set -> Dataframes

In [2]:
def format_df(filename):
    data = pd.read_csv(filename).T
    data.columns = data.iloc[0]
    data.drop(data.index[0], inplace=True)
    return data

def get_labels(filename, rxtrs):
    tail, _ = os.path.splitext(os.path.basename(filename))
    i = rxtrs['OrigenReactor'].index(tail)
    rxtr_info = {'ReactorType': rxtrs['ReactorType'][i], 
                 'Enrichment': rxtrs['Enrichment'][i], 
                 'Burnup': rxtrs['Burnup'][i], 
                 'CoolingInts': rxtrs['CoolingInts'][i]
                 }
    return rxtr_info

def label_data(label, data):
    col = len(data.columns)
    data.insert(loc = col, column = 'ReactorType', value = label['ReactorType'])
    data.insert(loc = col+1, column = 'Enrichment', value = label['Enrichment'])
    burnup = burnup_label(data, label['Burnup'], label['CoolingInts'])
    data.insert(loc = col+2, column = 'Burnup', value = burnup)
    return data

def burnup_label(data, burn_steps, cooling_ints):
    num_cases = len(burn_steps)
    steps_per_case = len(cooling_ints) + 2
    burnup_list = [0, ]
    for case in range(0, num_cases):
        for step in range(0, steps_per_case):
            if (case == 0 and step == 0):
                continue
            elif (case > 0 and step == 0):
                burn_step = burn_steps[case-1]
                burnup_list.append(burn_step)
            else:
                burn_step = burn_steps[case]
                burnup_list.append(burn_step)
    return burnup_list

def dataframeXY(all_files, rxtr_label):
    all_data = []
    for f in all_files:
        data = format_df(f)
        labels = get_labels(f, rxtr_label)
        labeled = label_data(labels, data)
        all_data.append(labeled)
    dfXY = pd.concat(all_data)
    return dfXY

def splitXY(dfXY):
    x = len(dfXY.columns)-3
    y = x
    # Need better way to know when the nuclide columns start (6 for now)
    # Prob will just search for column idx that starts with str(1)?
    dfX = dfXY.iloc[:, 6:x]
    r_dfY = dfXY.iloc[:, y]
    e_dfY = dfXY.iloc[:, y+1]
    b_dfY = dfXY.iloc[:, y+2]
    return dfX, r_dfY, e_dfY, b_dfY

In [31]:
train_files = []
nucfiles = ['0', '2', '4', '6'] 
#gammafiles = ['1', '3', '5', '7']
datapath = "../origen-data/14nov2017_actinides/"
for o_rxtr in O_RXTRS:
    for enrich in nucfiles:
        enrichment = o_rxtr + "/*" + enrich + ".plt"
        trainpath = os.path.join(datapath, enrichment)
        train_files += glob.glob(trainpath)
train_files = [f.replace("\\", "/") for f in train_files]

In [None]:
trainXY = dataframeXY(train_files, train_label)

In [None]:
trainXY.reset_index(inplace=True)
trainX, r_trainY, e_trainY, b_trainY = splitXY(trainXY)