In [6]:
from training_set import *
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn import metrics
import numpy as np
import pandas as pd
import glob
import os

In [5]:
#train_files[0]

# Training Set -> Dataframes

### Getting list of files holding training data

In [7]:
train_files = []
#datapath = "../origen/origen-data/30nov2017_actinides/"
datapath = "../origen-data/30nov2017_actinides/"
for i in range(0, len(O_RXTRS)):
    o_rxtr = O_RXTRS[i]
    for j in range(0, len(ENRICH[i])):
        enrich = ENRICH[i][j]
        rxtrpath = datapath + o_rxtr + "/"
        csv = o_rxtr + "_enr" + str(enrich) + "_nucs.csv"
        #csv = o_rxtr + "_enr" + str(enrich) + "_gammas.csv"
        trainpath = os.path.join(rxtrpath, csv)
        train_files.append(trainpath)

### Supporting Functions

In [15]:
def loop_labels(burnup, cooling):
    steps_per_case = len(COOLING_INTERVALS) + 1
    burnup_lbl = [0, ]
    cooling_lbl = [0, ]
    for case in range(0, len(burnup)):
        for step in range(0, steps_per_case):
            if (step == 0):
                burnup_lbl.append(burnup[case])
                cooling_lbl.append(0)
            else:
                burnup_lbl.append(burnup[case])
                cooling_lbl.append(COOLING_INTERVALS[step-1])
    return burnup_lbl, cooling_lbl

def label_data(labels, data):
    col = len(data.columns)
    burnups, coolings = loop_labels(labels['Burnup'], labels['CoolingInts'])
    # inserting 4 labels into columns
    data.insert(loc = col, column = 'ReactorType', value = labels['ReactorType'])
    data.insert(loc = col+1, column = 'Enrichment', value = labels['Enrichment'])
    data.insert(loc = col+2, column = 'Burnup', value = burnups)
    data.insert(loc = col+3, column = 'CoolingTime', value = coolings)
    return data

def format_df(filename):
    data = pd.read_csv(filename, header=5, index_col=0).T
    data.drop_duplicates(keep='last', inplace=True)
    data.drop('subtotal', axis=1, inplace=True)
    return data

### Main formatting function

In [19]:
def dataframeXY(all_files):
    all_data = []
    for f in all_files:
        idx = all_files.index(f)
        data = format_df(f)
        labels = {'ReactorType': TRAIN_LABELS['ReactorType'][idx],
                  #'OrigenReactor': TRAIN_LABELS['OrigenReactor'][idx],
                  'Enrichment': TRAIN_LABELS['Enrichment'][idx], 
                  'Burnup': TRAIN_LABELS['Burnup'][idx], 
                  'CoolingInts': COOLING_INTERVALS
                  }
        labeled = label_data(labels, data)
        all_data.append(labeled)
    dfXY = pd.concat(all_data)
    dfXY.fillna(value=0, inplace=True)
    return dfXY
trainXY = dataframeXY(train_files)
trainXY.index

Index(['0.000e+00', '1.000e+02.1', '1.070e+02', '1.300e+02', '4.653e+02',
       '2.392e+03', '1.000e+02.2', '2.000e+02.1', '2.070e+02', '2.300e+02',
       ...
       '5.300e+02', '8.653e+02', '2.792e+03', '5.000e+02.2', '5.250e+02',
       '5.250e+02.1', '5.320e+02', '5.550e+02', '8.903e+02', '2.817e+03'],
      dtype='object', length=9779)

In [10]:
trainXY.shape

(9779, 49)

### Split into different Y's for separate ML models

In [None]:
def splitXY(dfXY):
    lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'total']
    dfX = dfXY.drop(lbls, axis=1)
    r_dfY = dfXY.loc[:, lbls[0]]
    c_dfY = dfXY.loc[:, lbls[1]]
    e_dfY = dfXY.loc[:, lbls[2]]
    b_dfY = dfXY.loc[:, lbls[3]]
    return dfX, r_dfY, c_dfY, e_dfY, b_dfY

In [None]:
trainX, rY, cY, eY, bY = splitXY(trainXY)

In [None]:
trainX

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import scale
from sklearn.model_selection import cross_val_predict
from sklearn import metrics
import numpy as np
import pandas as pd

In [None]:
trainX = scale(trainX)

In [None]:
# Reactor Type
# L1 norm is Manhattan Distance
# L2 norm is Euclidian Distance 
l1knc = KNeighborsClassifier(metric='l1', p=1)
l2knc = KNeighborsClassifier(metric='l2', p=2)
l1knc_pred = cross_val_predict(l1knc, trainX, rY, cv=10)
l2knc_pred = cross_val_predict(l2knc, trainX, rY, cv=10)
# Accuracy
print(metrics.classification_report(rY, l1knc_pred))
print(metrics.classification_report(rY, l2knc_pred))

In [None]:
# Enrichment
el1knr = KNeighborsRegressor(metric='l1', p=1)
el2knr = KNeighborsRegressor(metric='l2', p=2)
el1knr_pred = cross_val_predict(el1knr, trainX, eY, cv=10)
el2knr_pred = cross_val_predict(el2knr, trainX, eY, cv=10)
print(metrics.mean_squared_error(eY, el1knr_pred))
print(metrics.mean_squared_error(eY, el2knr_pred))
print(metrics.explained_variance_score(eY, el1knr_pred))
print(metrics.explained_variance_score(eY, el2knr_pred))

In [None]:
# Cooling Time
cl1knr = KNeighborsRegressor(metric='l1', p=1)
cl2knr = KNeighborsRegressor(metric='l2', p=2)
cl1knr_pred = cross_val_predict(cl1knr, trainX, cY, cv=10)
cl2knr_pred = cross_val_predict(cl2knr, trainX, cY, cv=10)
print(metrics.mean_squared_error(cY, cl1knr_pred))
print(metrics.mean_squared_error(cY, cl2knr_pred))
print(metrics.explained_variance_score(cY, cl1knr_pred))
print(metrics.explained_variance_score(cY, cl2knr_pred))

In [None]:
# Burnup
bl1knr = KNeighborsRegressor(metric='l1', p=1)
bl2knr = KNeighborsRegressor(metric='l2', p=2)
bl1knr_pred = cross_val_predict(bl1knr, trainX, bY, cv=10)
bl2knr_pred = cross_val_predict(bl2knr, trainX, bY, cv=10)
print(metrics.mean_squared_error(bY, bl1knr_pred))
print(metrics.mean_squared_error(bY, bl2knr_pred))
print(metrics.explained_variance_score(bY, bl1knr_pred))
print(metrics.explained_variance_score(bY, bl2knr_pred))