In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import swifter
from sklearn.preprocessing import scale
from sklearn.metrics import accuracy_score, explained_variance_score, mean_absolute_error
from statsmodels.base.model import GenericLikelihoodModel

  import pandas.util.testing as tm


In [18]:
lbls = ["a", "b", "c", "d", "e"]
pred_lbls = ["Pred_" + s for s in lbls]
dict(zip(lbls, pred_lbls))

{'a': 'Pred_a', 'b': 'Pred_b', 'c': 'Pred_c', 'd': 'Pred_d', 'e': 'Pred_e'}

In [36]:
pred_lbls.append('test col')
pred_lbls

['Pred_a', 'Pred_b', 'Pred_c', 'Pred_d', 'Pred_e', 'test col', 'test col']

In [3]:
def vary(values, frac=0.1, round_dec=2):
    dev = frac * np.random.uniform(-1, 1, len(values))
    new_vals = np.array(values) * (1 + dev)
    new_vals = np.round_(new_vals, round_dec)
    return new_vals

In [4]:
#make trainDB
ones = np.ones(20)
meas1 = vary(ones)
meas4 = vary(1.5 * ones)
meas2 = vary(2 * ones)
meas5 = vary(2.5 * ones)
meas3 = vary(3 * ones)

trainXY = pd.DataFrame({"a" : meas1, 
                        "b" : meas4,
                        "c" : meas2,
                        "d" : meas5,
                        "e" : meas3
                        })
X = trainXY.copy() # make a copy

trainXY

Unnamed: 0,a,b,c,d,e
0,1.08,1.4,1.9,2.47,2.88
1,1.1,1.63,2.0,2.28,2.83
2,0.95,1.37,1.93,2.46,3.22
3,1.09,1.5,2.12,2.31,3.23
4,0.99,1.62,2.05,2.41,3.09
5,0.92,1.53,2.14,2.7,2.92
6,0.9,1.4,2.04,2.74,3.03
7,1.03,1.38,2.06,2.31,2.77
8,1.03,1.39,2.09,2.32,3.23
9,0.94,1.42,1.95,2.54,3.08


In [5]:
#make test cases
test1 = [1, 1.5, 2, 2.5, 3] # best case
test2 = [1, 0, 0, 0, 0] # 1 meas only, worst-ish case 
test3 = [1, 1, 1, 1, 1] # bad measurements
test4 = [0, 1.5, 2.5, 0, 0] # 2 meas only
test5 = [1, 0, 2, 0, 3]  # 3 meas only
tests = [test1, test2, test3, test4, test5]

testXY = pd.DataFrame(tests, columns = ["a", "b", "c", "d", "e"])
testXY

Unnamed: 0,a,b,c,d,e
0,1,1.5,2.0,2.5,3
1,1,0.0,0.0,0.0,0
2,1,1.0,1.0,1.0,1
3,0,1.5,2.5,0.0,0
4,1,0.0,2.0,0.0,3


In [6]:
def ll_calc(y_sim, y_mes, std):
    ll = np.sum(stats.norm.logpdf(y_sim, loc=y_mes, scale=std))
    return ll

def unc_calc(y_sim, y_mes, sim_unc_sq, mes_unc_sq):
    unc = ((y_sim - y_mes) / sim_unc_sq)**2 * (sim_unc_sq + mes_unc_sq)
    unc.replace([np.inf, -np.inf], 0, inplace=True)
    unc.fillna(0, inplace = True)
    return np.sqrt(unc.sum(axis=1))

In [30]:
def calc_ll(XY, test_sample, unc):
    ll_name = 'LogLikelihood_' + str(unc)
    #X = XY.copy()#drop(lbls, axis=1).copy()
    XY[ll_name] = X.apply(lambda row: ll_calc(row, test_sample, unc*row), axis=1)
    max_ll = XY[ll_name].max()
    max_idx = XY[ll_name].idxmax()
    pred_answer = XY.loc[XY.index == max_idx].drop(ll_name, axis=1)
    lbls = ["a", "b", "c", "d", "e"]
    pred_lbls = ["Pred_" + s for s in lbls]
    pred_answer = pred_answer.rename(columns=dict(zip(lbls, pred_lbls)))  
    return max_ll, max_idx, pred_answer

In [31]:
y_true = testXY.index.to_list()
y_true

[0, 1, 2, 3, 4]

In [32]:
#%%time

unc = 0.1
y_pred = []
lls = []
for i, t in enumerate(tests):
    colname = 'LogLikelihood_' + str(i+1)
    uncname = 'LLUncertainty_' + str(i+1)
    test_sample = testXY.loc[testXY.index == i]
    # In this case, the idx will be the labels
    max_ll, max_idx, pred_sample = calc_ll(trainXY, test_sample, unc)
    #trainXY[colname] = X.apply(lambda row: ll_calc(row, test_sample, unc*row), axis=1)
    #trainXY[uncname] = X.apply(lambda row: unc_calc(row, test_sample, (unc*row)**2, (unc*test_sample)**2), axis=1)
    y_pred.append(max_idx)
    lls.append(max_ll)
    if i == 0:
        pred_df = pd.DataFrame(columns = pred_sample.columns.to_list())
    pred_df = pred_df.append(pred_sample)

In [44]:
pred_df

Unnamed: 0,Pred_a,Pred_b,Pred_c,Pred_d,Pred_e
9,0.94,1.42,1.95,2.54,3.08
7,1.03,1.38,2.06,2.31,2.77
7,1.03,1.38,2.06,2.31,2.77
19,0.98,1.48,2.18,2.57,3.09
16,0.97,1.38,1.95,2.42,3.16


In [45]:
all_tests = pd.DataFrame({'True Label' : y_true, 'Pred Label' : y_pred, 'Loglikelihood' : lls})
all_tests['Absolute Error'] = all_tests.apply(lambda row: np.abs(row[0]-row[1]), axis=1)
all_tests

Unnamed: 0,True Label,Pred Label,Loglikelihood,Absolute Error
0,0,9,3.463112,9.0
1,1,7,-196.054627,6.0
2,2,7,-49.580061,5.0
3,3,19,-147.39149,16.0
4,4,16,-96.284445,12.0


In [52]:
pred_df.reset_index(inplace=True)

In [54]:
pred_df = pd.concat([all_tests, pred_df], axis=1)
pred_df

Unnamed: 0,True Label,Pred Label,Loglikelihood,Absolute Error,index,Pred_a,Pred_b,Pred_c,Pred_d,Pred_e
0,0,9,3.463112,9.0,9,0.94,1.42,1.95,2.54,3.08
1,1,7,-196.054627,6.0,7,1.03,1.38,2.06,2.31,2.77
2,2,7,-49.580061,5.0,7,1.03,1.38,2.06,2.31,2.77
3,3,19,-147.39149,16.0,19,0.98,1.48,2.18,2.57,3.09
4,4,16,-96.284445,12.0,16,0.97,1.38,1.95,2.42,3.16


In [56]:
pred_df['test'] = np.abs(pred_df.loc[:,'True Label'] - pred_df.loc[:,'Pred Label'])
pred_df

Unnamed: 0,True Label,Pred Label,Loglikelihood,Absolute Error,index,Pred_a,Pred_b,Pred_c,Pred_d,Pred_e,test
0,0,9,3.463112,9.0,9,0.94,1.42,1.95,2.54,3.08,9
1,1,7,-196.054627,6.0,7,1.03,1.38,2.06,2.31,2.77,6
2,2,7,-49.580061,5.0,7,1.03,1.38,2.06,2.31,2.77,5
3,3,19,-147.39149,16.0,19,0.98,1.48,2.18,2.57,3.09,16
4,4,16,-96.284445,12.0,16,0.97,1.38,1.95,2.42,3.16,12


In [38]:
max_pwr = trainXY['LogLikelihood'].loc[trainXY['ReactorType'] == 'pwr'].max()
idx_pwr = trainXY['LogLikelihood'].loc[trainXY['ReactorType'] == 'pwr'].idxmax()
unc_pwr = float(trainXY['LLUncertainty'].loc[trainXY.index == idx_pwr])
print(f'Max Log Likelihood for PWRs: {max_pwr} +/- {unc_pwr}')
trainXY.loc[trainXY.index == idx_pwr, ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']]

Max Log Likelihood for PWRs: -172.46966586074987 +/- 228.7371919493878


Unnamed: 0,ReactorType,CoolingTime,Enrichment,Burnup,OrigenReactor
1013,pwr,99.56,3.1,1854.07,ce14x14


#### AGR