In [24]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import swifter
from sklearn.preprocessing import scale
from sklearn.metrics import accuracy_score, explained_variance_score, mean_absolute_error
from statsmodels.base.model import GenericLikelihoodModel

In [25]:
def vary(values, frac=0.1, round_dec=2):
    dev = frac * np.random.uniform(-1, 1, len(values))
    new_vals = np.array(values) * (1 + dev)
    new_vals = np.round_(new_vals, round_dec)
    return new_vals

In [13]:
#make trainDB
ones = np.ones(20)
meas1 = vary(ones)
meas4 = vary(1.5 * ones)
meas2 = vary(2 * ones)
meas5 = vary(2.5 * ones)
meas3 = vary(3 * ones)

trainXY = pd.DataFrame({"a" : meas1, 
                        "b" : meas4,
                        "c" : meas2,
                        "d" : meas5,
                        "e" : meas3
                        })
X = trainXY.copy() # make a copy

trainXY

Unnamed: 0,a,b,c,d,e
0,0.96,1.51,1.83,2.41,3.07
1,0.93,1.62,1.84,2.56,2.92
2,1.04,1.64,1.92,2.66,2.72
3,1.03,1.54,2.02,2.33,2.76
4,1.03,1.43,1.94,2.43,2.73
5,1.05,1.45,1.97,2.66,3.04
6,1.03,1.37,2.15,2.59,3.17
7,1.08,1.38,1.83,2.25,3.2
8,0.99,1.59,1.96,2.39,3.06
9,0.93,1.57,2.2,2.27,2.78


In [14]:
#make test cases
test1 = [1, 1.5, 2, 2.5, 3] # best case
test2 = [1, 0, 0, 0, 0] # 1 meas only, worst-ish case 
test3 = [1, 1, 1, 1, 1] # bad measurements
test4 = [0, 1.5, 2.5, 0, 0] # 2 meas only
test5 = [1, 0, 2, 0, 3]  # 3 meas only
tests = [test1, test2, test3, test4, test5]

testXY = pd.DataFrame(tests, columns = ["a", "b", "c", "d", "e"])
testXY

Unnamed: 0,a,b,c,d,e
0,1,1.5,2.0,2.5,3
1,1,0.0,0.0,0.0,0
2,1,1.0,1.0,1.0,1
3,0,1.5,2.5,0.0,0
4,1,0.0,2.0,0.0,3


In [6]:
def ll_calc(y_sim, y_mes, std):
    ll = np.sum(stats.norm.logpdf(y_sim, loc=y_mes, scale=std))
    return ll

def unc_calc(y_sim, y_mes, sim_unc_sq, mes_unc_sq):
    unc = ((y_sim - y_mes) / sim_unc_sq)**2 * (sim_unc_sq + mes_unc_sq)
    unc.replace([np.inf, -np.inf], 0, inplace=True)
    unc.fillna(0, inplace = True)
    return np.sqrt(unc.sum(axis=1))

In [15]:
def calc_ll(XY, test_sample, unc):
    ll_name = 'LogLikelihood_' + str(unc)
    #X = XY.copy()#drop(lbls, axis=1).copy()
    XY[ll_name] = X.apply(lambda row: ll_calc(row, test_sample, unc*row), axis=1)
    max_ll = XY[ll_name].max()
    max_idx = XY[ll_name].idxmax()
    pred_answer = XY.loc[XY.index == max_idx].drop(ll_name, axis=1)
    return max_ll, max_idx, pred_answer

In [19]:
y_true = testXY.index.to_list()
y_true

[0, 1, 2, 3, 4]

In [21]:
%%time

unc = 0.1
y_pred = []
for i, t in enumerate(tests):
    colname = 'LogLikelihood_' + str(i+1)
    uncname = 'LLUncertainty_' + str(i+1)
    test_sample = testXY.loc[testXY.index == i]
    # In this case, the idx will be the labels
    max_ll, max_idx, pred_sample = calc_ll(trainXY, test_sample, unc)
    #trainXY[colname] = X.apply(lambda row: ll_calc(row, test_sample, unc*row), axis=1)
    #trainXY[uncname] = X.apply(lambda row: unc_calc(row, test_sample, (unc*row)**2, (unc*test_sample)**2), axis=1)
    y_pred.append(max_idx)

CPU times: user 115 ms, sys: 0 ns, total: 115 ms
Wall time: 121 ms


In [27]:
exp_var = explained_variance_score(y_true, y_pred)
mean_absolute_error(y_true, y_pred)

7.0

In [9]:
trainXY

Unnamed: 0,a,b,c,d,e,LogLikelihood_1,LLUncertainty_1,LogLikelihood_2,LLUncertainty_2,LogLikelihood_3,LLUncertainty_3,LogLikelihood_4,LLUncertainty_4,LogLikelihood_5,LLUncertainty_5
0,1.06,1.38,2.2,2.3,3.14,2.343329,2.407131,-196.387908,20.015133,-54.252762,11.713839,-147.535533,17.49029,-96.900527,14.230083
1,0.91,1.35,1.88,2.45,3.2,2.495724,2.570125,-196.467143,20.053911,-51.929443,11.550861,-152.033337,18.244644,-96.866168,14.274516
2,1.09,1.62,1.89,2.59,2.81,2.654644,2.050438,-196.612671,20.031365,-54.612253,11.828622,-151.754559,18.156597,-97.010633,14.24612
3,1.07,1.37,2.13,2.44,2.93,2.903415,1.929615,-196.401352,20.020035,-53.229867,11.638417,-148.146309,17.58278,-96.616141,14.199283
4,0.94,1.49,2.05,2.31,2.78,3.116693,1.954016,-196.199918,20.021701,-51.303042,11.555097,-148.407739,17.663347,-96.542793,14.224633
5,0.93,1.6,1.91,2.29,2.87,2.878174,2.166462,-196.292437,20.030516,-51.766872,11.622872,-150.975456,18.072725,-96.506041,14.216765
6,0.94,1.42,2.0,2.51,3.0,3.554205,1.242275,-196.286303,20.021701,-53.478402,11.684354,-149.36629,17.795707,-96.286303,14.172809
7,1.08,1.44,2.1,2.31,3.08,2.925987,1.847681,-196.501834,20.025462,-53.772132,11.709756,-148.12835,17.58215,-96.648945,14.197994
8,1.03,1.35,2.01,2.56,2.71,2.692506,2.3631,-196.088938,20.00412,-50.549105,11.4066,-149.635266,17.829619,-96.662744,14.237914
9,0.93,1.55,1.94,2.46,3.11,3.39618,1.384072,-196.428193,20.030516,-55.089518,11.857602,-150.363176,17.954698,-96.538571,14.200718


In [38]:
max_pwr = trainXY['LogLikelihood'].loc[trainXY['ReactorType'] == 'pwr'].max()
idx_pwr = trainXY['LogLikelihood'].loc[trainXY['ReactorType'] == 'pwr'].idxmax()
unc_pwr = float(trainXY['LLUncertainty'].loc[trainXY.index == idx_pwr])
print(f'Max Log Likelihood for PWRs: {max_pwr} +/- {unc_pwr}')
trainXY.loc[trainXY.index == idx_pwr, ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']]

Max Log Likelihood for PWRs: -172.46966586074987 +/- 228.7371919493878


Unnamed: 0,ReactorType,CoolingTime,Enrichment,Burnup,OrigenReactor
1013,pwr,99.56,3.1,1854.07,ce14x14


#### AGR