In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
%matplotlib inline

# Data Setup & Functions

In [3]:
from mll_calc.mll_pred import format_XY
from mll_calc.all_jobs import uncs, job_dirs

In [4]:
def logpdf_calc(row, test_sample, unc):
    y_sim = row[test_sample>0].values.tolist()
    std = row.multiply(unc)[test_sample>0].values.tolist()
    y_mes = test_sample[test_sample>0].values.tolist()
    logpdf_list = stats.norm.logpdf(y_sim, loc=y_mes, scale=std)
    return logpdf_list

In [5]:
def loop_db(XY, pred, unc, lbls, nonlbls):  
    logpdf_df = pd.DataFrame()
    for idx, row in pred.iterrows():
        sim_idx = row['sim_idx']
        pred_idx = row['pred_idx']
        all_lbls = lbls + nonlbls
        
        test_sample = XY.loc[sim_idx].drop(all_lbls)
        train_row = XY.loc[pred_idx].drop(all_lbls)
        
        logpdf = logpdf_calc(train_row, test_sample, unc)
        mll = row['MaxLogLL']
        
        if round(mll, 4) != round(np.sum(logpdf), 4):
            print('mismatch')
            break
        
        logpdf = pd.Series(logpdf, index=train_row[test_sample>0].index, name=sim_idx)
        if logpdf_df.empty:
            logpdf_df = pd.DataFrame(columns = test_sample.index.to_list())
        logpdf_df = logpdf_df.append(logpdf)
    return logpdf_df

### Train and Test DBs

In [6]:
lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
nonlbls = ['AvgPowerDensity', 'ModDensity', 'UiWeight']

rdrive = '/mnt/researchdrive/BOX_INTERNAL/opotowsky/'
train_pkl = 'detector_response/d1_hpge_spectra_peaks_trainset.pkl'
XY = format_XY(rdrive + train_pkl)

### LL Calc Results

In [7]:
results = rdrive + 'mll/gam_spec/d1/'

dfs = {}
for i, job in enumerate(job_dirs):
    key = 'unc' + str(i)
    dfs[key] = pd.read_csv(results + job + '/' + job + '.csv').drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])
    dfs[key]['Relative_Burnup_Error'] = dfs[key]['Burnup_Error'] / dfs[key]['Burnup']
    dfs[key]['Relative_Enrichment_Error'] = dfs[key]['Enrichment_Error'] / dfs[key]['Enrichment']
    dfs[key]['Relative_CoolingTime_Error'] = dfs[key]['CoolingTime_Error'] / dfs[key]['CoolingTime']

In [8]:
# For ease of only viewing 5% case for now
preds = dfs['unc0']

In [9]:
preds.columns

Index(['Unnamed: 0.1.1', 'sim_idx', 'ReactorType', 'CoolingTime', 'Enrichment',
       'Burnup', 'OrigenReactor', 'AvgPowerDensity', 'ModDensity', 'UiWeight',
       'pred_idx', 'pred_ReactorType', 'pred_CoolingTime', 'pred_Enrichment',
       'pred_Burnup', 'pred_OrigenReactor', 'pred_AvgPowerDensity',
       'pred_ModDensity', 'pred_UiWeight', 'MaxLogLL', 'MaxLLUnc',
       '2ndMaxLogLL', '2ndMaxLLUnc', 'CDF_LogLL_0.9998', 'CDF_LLUnc_0.9998',
       'CDF_LogLL_0.9988', 'CDF_LLUnc_0.9988', 'CDF_LogLL_0.95',
       'CDF_LLUnc_0.95', 'CDF_LogLL_0.9', 'CDF_LLUnc_0.9', 'CDF_LogLL_0.5',
       'CDF_LLUnc_0.5', 'CDF_LogLL_0.1', 'CDF_LLUnc_0.1', 'CDF_LogLL_0.01',
       'CDF_LLUnc_0.01', 'ReactorType_Score', 'CoolingTime_Error',
       'Enrichment_Error', 'Burnup_Error', 'OrigenReactor_Score',
       'Relative_Burnup_Error', 'Relative_Enrichment_Error',
       'Relative_CoolingTime_Error'],
      dtype='object')

### LogPDF (wrt nuclide) results

In [None]:
unc = 0.05
#logpdf_df = loop_db(XY, preds, unc, lbls, nonlbls)

# Explore Errors

### Burnup

In [22]:
preds['Burnup_Error'].describe()

count    24080.000000
mean       283.350105
std        800.097291
min          0.000000
25%          0.000000
50%          0.000000
75%        160.020000
max      13514.130000
Name: Burnup_Error, dtype: float64

In [10]:
preds['Relative_Burnup_Error'].describe()
# nuc29 results for comparison
#count    24030.000000
#mean         0.003469
#std          0.009906
#min          0.000000
#25%          0.000000
#50%          0.000000
#75%          0.000000
#max          0.141558

count    24080.000000
mean         0.007434
std          0.016869
min          0.000000
25%          0.000000
50%          0.000000
75%          0.007890
max          0.303447
Name: Relative_Burnup_Error, dtype: float64

In [11]:
to_print = ['sim_idx', 'pred_idx', 'MaxLogLL',
            'AvgPowerDensity', 'pred_AvgPowerDensity', 
            'ModDensity', 'pred_ModDensity',
            'ReactorType', 'pred_ReactorType', 'ReactorType_Score', 
            'Enrichment', 'pred_Enrichment', 'Relative_Enrichment_Error', 
            'Burnup', 'pred_Burnup', 'Relative_Burnup_Error',
            'CoolingTime', 'pred_CoolingTime', 'Relative_CoolingTime_Error',
            'OrigenReactor', 'pred_OrigenReactor']
preds.loc[preds['Relative_Burnup_Error'] > 0.16, to_print]

Unnamed: 0,sim_idx,pred_idx,MaxLogLL,AvgPowerDensity,pred_AvgPowerDensity,ModDensity,pred_ModDensity,ReactorType,pred_ReactorType,ReactorType_Score,...,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,CoolingTime,pred_CoolingTime,Relative_CoolingTime_Error,OrigenReactor,pred_OrigenReactor
3224,58941,169743,-1418.153153,22.0,10.0,0.3,0.7,bwr,bwr,True,...,0.51,0.875306,36277.08,42615.19,0.174714,0.000576,0.000521,0.095486,ge7x7-0,atrium10x10-9
4169,76287,389164,-1136.624763,10.0,35.0,0.7,0.71,bwr,pwr,False,...,1.61,0.678,7973.46,5634.2,0.293381,0.000576,0.000864,0.5,ge7x7-0,bw15x15
4260,78057,147382,-1229.279927,22.0,22.0,0.7,0.1,bwr,bwr,True,...,4.76,0.048,11457.98,9547.88,0.166705,0.000576,0.000772,0.340278,ge7x7-0,abb8x8-1
4432,81125,439768,-1152.840902,22.0,18.0,0.1,0.84,bwr,phwr,False,...,0.711,0.422,4878.84,6153.14,0.261189,0.000772,0.000591,0.234456,abb8x8-1,candu37
4484,82069,427042,-1436.745868,22.0,35.0,0.1,0.71,bwr,pwr,False,...,5.27,9.54,36282.93,42293.67,0.165663,0.000772,0.00067,0.132124,abb8x8-1,vver1000
8282,151630,272403,-1448.481905,22.0,10.0,0.3,0.5,bwr,bwr,True,...,2.2,0.537815,47192.85,59719.48,0.265435,0.000772,0.000751,0.027202,abb8x8-1,svea64-1
12118,221663,266208,-1145.686783,10.0,22.0,0.7,0.1,bwr,bwr,True,...,2.2,0.45679,7853.16,5844.07,0.255832,0.000521,0.000751,0.441459,atrium10x10-9,svea64-1
13487,246738,328689,-1347.126888,22.0,25.0,0.5,0.71,bwr,pwr,False,...,3.22,5.44,15730.18,19747.8,0.255408,0.000751,0.000548,0.270306,svea64-1,ce14x14
13709,250750,365033,-1462.136763,22.0,41.0,0.7,0.71,bwr,pwr,False,...,0.5,0.0,44535.37,36875.71,0.17199,0.000751,0.000728,0.030626,svea64-1,s18x18
16771,306918,47672,-1447.299418,22.0,10.0,0.1,0.5,bwr,bwr,True,...,2.79,0.413866,44535.37,58049.5,0.303447,0.000751,0.000576,0.233023,svea64-1,ge7x7-0


In [None]:
# finding a good prediction with high Max LL
new_df = preds.loc[(preds['Relative_Burnup_Error'] == 0.0) & 
                   (preds['Relative_Enrichment_Error'] == 0.0) & 
                   (preds['Relative_CoolingTime_Error'] == 0.0) & 
                   (preds['ReactorType_Score'] == True) &
                   (preds['MaxLogLL'] > 20), to_print]
new_df

In [None]:
new_df.iloc[0:1].to_pickle('train_pred_good.pkl')

In [None]:
# finding a bad prediction
new_df = preds.loc[(preds['Relative_Burnup_Error'] > 0.05) & 
                   (preds['Relative_Enrichment_Error'] > 0.05) & 
                   (preds['Relative_CoolingTime_Error'] != np.inf) & (preds['Relative_CoolingTime_Error'] > 0.05) &
                   (preds['ReactorType_Score'] == False), to_print]
                   #(preds['MaxLogLL'] > 20)
new_df

In [None]:
new_df.iloc[0:1].to_pickle('train_pred_bad.pkl')

### Enrichment

In [23]:
preds['Enrichment_Error'].describe()

count    24080.000000
mean         0.474577
std          0.707389
min          0.000000
25%          0.000000
50%          0.040000
75%          0.810000
max          4.820000
Name: Enrichment_Error, dtype: float64

In [12]:
preds['Relative_Enrichment_Error'].describe()
# nuc29 results for comparison
#count    24030.000000
#mean         0.003688
#std          0.015824
#min          0.000000
#25%          0.000000
#50%          0.000000
#75%          0.000000
#max          0.260204

count    24080.000000
mean         0.252097
std          0.637707
min          0.000000
25%          0.000000
50%          0.038462
75%          0.272727
max          9.640000
Name: Relative_Enrichment_Error, dtype: float64

In [25]:
preds.loc[preds['Enrichment_Error'] > 4, to_print]

Unnamed: 0,sim_idx,pred_idx,MaxLogLL,AvgPowerDensity,pred_AvgPowerDensity,ModDensity,pred_ModDensity,ReactorType,pred_ReactorType,ReactorType_Score,...,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,CoolingTime,pred_CoolingTime,Relative_CoolingTime_Error,OrigenReactor,pred_OrigenReactor
66,1191,227457,-1389.293601,10.0,22.0,0.1,0.1,bwr,bwr,True,...,4.57,7.462963,38404.38,36155.54,0.058557,722.652583,815.87552,0.129001,ge7x7-0,atrium10x10-9
156,2783,306748,-1406.768224,22.0,22.0,0.1,0.1,bwr,bwr,True,...,4.76,7.814815,36277.08,35160.39,0.030782,691.373834,340.831443,0.507023,ge7x7-0,svea64-1
257,4610,77296,-1413.447099,10.0,10.0,0.3,0.7,bwr,bwr,True,...,5.0,8.259259,46849.31,46849.31,0.0,501.174696,221.142476,0.558752,ge7x7-0,ge7x7-0
327,5997,67114,-1272.340226,22.0,10.0,0.3,0.1,bwr,bwr,True,...,5.0,8.259259,27004.1,27004.1,0.0,3604.785743,2877.991122,0.201619,ge7x7-0,ge7x7-0
606,11160,425753,-1395.346369,10.0,25.0,0.7,0.71,bwr,pwr,False,...,5.27,8.759259,42798.88,37839.18,0.115884,601.367719,541.647619,0.099307,ge7x7-0,vver1000
684,12477,147878,-1286.26093,22.0,22.0,0.7,0.1,bwr,bwr,True,...,4.76,7.814815,22177.69,21794.86,0.017262,2546.062421,2161.680854,0.150971,ge7x7-0,abb8x8-1
3853,70575,163090,-1379.131373,10.0,10.0,0.3,0.3,bwr,bwr,True,...,0.51,0.898,38404.38,40213.62,0.04711,722.652583,1106.705317,0.531449,ge7x7-0,atrium10x10-9
4310,78915,11241,-1312.374052,22.0,10.0,0.7,0.7,bwr,bwr,True,...,0.54,0.892,42798.88,46849.31,0.094639,2917.346435,2877.991122,0.01349,ge7x7-0,ge7x7-0
4410,80727,336730,-1392.146744,10.0,25.0,0.1,0.71,bwr,pwr,False,...,5.22,9.44,55425.74,49987.76,0.098113,1159.784712,1342.101743,0.157199,abb8x8-1,ce14x14
4484,82069,427042,-1436.745868,22.0,35.0,0.1,0.71,bwr,pwr,False,...,5.27,9.54,36282.93,42293.67,0.165663,0.000772,0.00067,0.132124,abb8x8-1,vver1000


### Cooling Time

In [26]:
preds['CoolingTime_Error'].describe()

count    24080.000000
mean        65.774197
std        106.819106
min          0.000000
25%          0.837097
50%         22.985977
75%         78.118430
max       1047.926663
Name: CoolingTime_Error, dtype: float64

In [16]:
preds['Relative_CoolingTime_Error'].describe()
# nuc29 results for comparison
#count    2.402900e+04
#mean              inf
#std               NaN
#min      0.000000e+00
#25%      3.174909e-03
#50%      1.037382e-02
#75%      2.831056e-02
#max               inf

count    24080.000000
mean        12.437931
std        282.437788
min          0.000000
25%          0.000711
50%          0.012022
75%          0.066165
max      12168.499040
Name: Relative_CoolingTime_Error, dtype: float64

In [30]:
preds.loc[preds['CoolingTime_Error'] > 800, to_print]
#preds.loc[(preds['Relative_CoolingTime_Error'] != np.inf) & (preds['Relative_CoolingTime_Error'] > 5000), to_print]

Unnamed: 0,sim_idx,pred_idx,MaxLogLL,AvgPowerDensity,pred_AvgPowerDensity,ModDensity,pred_ModDensity,ReactorType,pred_ReactorType,ReactorType_Score,...,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,CoolingTime,pred_CoolingTime,Relative_CoolingTime_Error,OrigenReactor,pred_OrigenReactor
4039,73901,372508,-1301.021185,10.0,41.0,0.5,0.71,bwr,pwr,False,...,2.03,0.594,38404.38,36875.71,0.039805,3069.556073,3889.809512,0.267222,ge7x7-0,s18x18
17030,311684,368685,-1255.359471,10.0,41.0,0.5,0.71,bwr,pwr,False,...,1.61,0.661765,34542.92,33606.05,0.027122,4609.418475,5477.17017,0.188256,svea64-1,s18x18
17032,311681,389741,-1273.777232,10.0,35.0,0.5,0.71,bwr,pwr,False,...,1.61,0.661765,34542.92,33455.37,0.031484,3828.034825,4659.404299,0.217179,svea64-1,bw15x15
17225,315221,393635,-1295.915947,10.0,35.0,0.7,0.71,bwr,pwr,False,...,2.01,0.577731,44535.37,42528.45,0.045064,3828.034825,4659.404299,0.217179,svea64-1,bw15x15
23926,437888,384164,-1104.838749,2.2,25.0,0.84,0.71,phwr,pwr,False,...,0.5,0.296765,4529.76,4289.02,0.053146,377.951895,1178.272577,2.11752,candu37,bw15x15
23951,438452,11964,-1079.148043,2.2,22.0,0.84,0.7,phwr,bwr,False,...,0.54,0.240506,8649.45,8232.51,0.048204,3968.95518,4888.970123,0.231803,candu37,ge7x7-0
23959,438439,10301,-1112.470551,2.2,10.0,0.84,0.7,phwr,bwr,False,...,0.54,0.240506,8649.45,8232.51,0.048204,2478.741403,3317.028212,0.338191,candu37,ge7x7-0
23983,438890,172354,-1261.732106,2.2,10.0,0.84,0.1,phwr,bwr,False,...,1.44,1.025316,13667.64,12861.21,0.059003,290.765745,1222.842911,3.205595,candu37,atrium10x10-9
23988,438876,170686,-1105.115589,2.2,22.0,0.84,0.7,phwr,bwr,False,...,0.51,0.2827,12861.88,11583.12,0.099422,5744.04358,6791.970243,0.182437,candu37,atrium10x10-9


# Max Log LL

In [31]:
print(dfs['unc0']['MaxLogLL'].describe())
print(dfs['unc1']['MaxLogLL'].describe())
print(dfs['unc2']['MaxLogLL'].describe())
print(dfs['unc3']['MaxLogLL'].describe())

count    24080.000000
mean     -1226.388858
std        159.236910
min      -1554.191596
25%      -1332.813917
50%      -1271.165489
75%      -1142.566049
max       -592.383835
Name: MaxLogLL, dtype: float64
count    24080.000000
mean     -1304.233124
std        158.340699
min      -1597.782884
25%      -1409.295233
50%      -1348.834613
75%      -1221.243084
max       -685.392542
Name: MaxLogLL, dtype: float64
count    24080.000000
mean     -1349.304149
std        159.145652
min      -1644.400756
25%      -1455.144632
50%      -1392.957910
75%      -1267.006359
max       -727.760277
Name: MaxLogLL, dtype: float64
count    24080.000000
mean     -1380.682951
std        158.856992
min      -1676.538345
25%      -1485.146805
50%      -1424.772146
75%      -1297.925444
max       -766.830634
Name: MaxLogLL, dtype: float64
