In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
%matplotlib inline

# Data Setup & Functions

In [5]:
from mll_calc.mll_pred import format_XY
from mll_calc.all_jobs import uncs, job_dirs

In [6]:
def logpdf_calc(row, test_sample, unc):
    y_sim = row[test_sample>0].values.tolist()
    std = row.multiply(unc)[test_sample>0].values.tolist()
    y_mes = test_sample[test_sample>0].values.tolist()
    logpdf_list = stats.norm.logpdf(y_sim, loc=y_mes, scale=std)
    return logpdf_list

In [7]:
def loop_db(XY, pred, unc, lbls, nonlbls):  
    logpdf_df = pd.DataFrame()
    for idx, row in pred.iterrows():
        sim_idx = row['sim_idx']
        pred_idx = row['pred_idx']
        all_lbls = lbls + nonlbls
        
        test_sample = XY.loc[sim_idx].drop(all_lbls)
        train_row = XY.loc[pred_idx].drop(all_lbls)
        
        logpdf = logpdf_calc(train_row, test_sample, unc)
        mll = row['MaxLogLL']
        
        if round(mll, 4) != round(np.sum(logpdf), 4):
            print('mismatch')
            break
        
        logpdf = pd.Series(logpdf, index=train_row[test_sample>0].index, name=sim_idx)
        if logpdf_df.empty:
            logpdf_df = pd.DataFrame(columns = test_sample.index.to_list())
        logpdf_df = logpdf_df.append(logpdf)
    return logpdf_df

### Train and Test DBs

In [8]:
lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
nonlbls = ['AvgPowerDensity', 'ModDensity', 'UiWeight']

rdrive = '/mnt/researchdrive/BOX_INTERNAL/opotowsky/'
train_pkl = 'sim_grams_nuc29.pkl'
XY = format_XY(rdrive + train_pkl)

### LL Calc Results

In [9]:
results = rdrive + 'mll/nuc_conc/train/'

for i, job in enumerate(job_dirs):
    key = 'unc' + str(i)
    dfs[key] = pd.read_csv(results + job + '/' + job + '.csv').drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])
    dfs[key]['Relative_Burnup_Error'] = dfs[key]['Burnup_Error'] / dfs[key]['Burnup']
    dfs[key]['Relative_Enrichment_Error'] = dfs[key]['Enrichment_Error'] / dfs[key]['Enrichment']
    dfs[key]['Relative_CoolingTime_Error'] = dfs[key]['CoolingTime_Error'] / dfs[key]['CoolingTime']
    
# For ease of only viewing 5% case for now
preds = dfs['unc0']

In [15]:
preds.columns

Index(['Unnamed: 0', 'Unnamed: 0.1', 'sim_idx', 'ReactorType', 'CoolingTime',
       'Enrichment', 'Burnup', 'OrigenReactor', 'AvgPowerDensity',
       'ModDensity', 'UiWeight', 'pred_idx', 'pred_ReactorType',
       'pred_CoolingTime', 'pred_Enrichment', 'pred_Burnup',
       'pred_OrigenReactor', 'pred_AvgPowerDensity', 'pred_ModDensity',
       'pred_UiWeight', 'MaxLogLL', 'MaxLLUnc', '2ndMaxLogLL', '2ndMaxLLUnc',
       'CDF_LogLL_0.9998', 'CDF_LLUnc_0.9998', 'CDF_LogLL_0.9988',
       'CDF_LLUnc_0.9988', 'CDF_LogLL_0.95', 'CDF_LLUnc_0.95', 'CDF_LogLL_0.9',
       'CDF_LLUnc_0.9', 'CDF_LogLL_0.5', 'CDF_LLUnc_0.5', 'CDF_LogLL_0.1',
       'CDF_LLUnc_0.1', 'CDF_LogLL_0.01', 'CDF_LLUnc_0.01',
       'ReactorType_Score', 'CoolingTime_Error', 'Enrichment_Error',
       'Burnup_Error', 'OrigenReactor_Score', 'Relative_Burnup_Error',
       'Relative_Enrichment_Error', 'Relative_CoolingTime_Error'],
      dtype='object')

### LogPDF (wrt nuclide) results

In [None]:
unc = 0.05
logpdf_df = loop_db(XY, preds, unc, lbls, nonlbls)

# Explore Errors

### Burnup

In [10]:
preds['Relative_Burnup_Error'].describe()
# old nuc29 results for comparison
#count    12000.000000
#mean         0.003525
#std          0.009948
#min          0.000000
#25%          0.000000
#50%          0.000000
#75%          0.000000
#max          0.149049

count    24030.000000
mean         0.003469
std          0.009906
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          0.141558
Name: Relative_Burnup_Error, dtype: float64

In [16]:
to_print = ['sim_idx', 'pred_idx', 'MaxLogLL',
            'AvgPowerDensity', 'pred_AvgPowerDensity', 
            'ModDensity', 'pred_ModDensity',
            'ReactorType', 'pred_ReactorType', 'ReactorType_Score', 
            'Enrichment', 'pred_Enrichment', 'Relative_Enrichment_Error', 
            'Burnup', 'pred_Burnup', 'Relative_Burnup_Error',
            'CoolingTime', 'pred_CoolingTime', 'Relative_CoolingTime_Error',
            'OrigenReactor', 'pred_OrigenReactor']
preds.loc[preds['Relative_Burnup_Error'] > 0.09, to_print]

Unnamed: 0,sim_idx,pred_idx,MaxLogLL,AvgPowerDensity,pred_AvgPowerDensity,ModDensity,pred_ModDensity,ReactorType,pred_ReactorType,ReactorType_Score,...,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,CoolingTime,pred_CoolingTime,Relative_CoolingTime_Error,OrigenReactor,pred_OrigenReactor
3317,72845,76265,-110.207959,22.0,22.0,0.3,0.5,bwr,bwr,True,...,3.83,0.0,14183.67,15521.57,0.094327,123.96447,123.96447,0.0,ge7x7-0,ge7x7-0
3319,72846,76266,-109.265031,22.0,22.0,0.3,0.5,bwr,bwr,True,...,3.83,0.0,14183.67,15521.57,0.094327,213.376227,213.376227,0.0,ge7x7-0,ge7x7-0
7743,170473,73095,-122.380574,22.0,22.0,0.1,0.3,bwr,bwr,True,...,3.83,0.010554,19194.08,20962.08,0.092112,1103.125238,1100.612018,0.002278,abb8x8-1,ge7x7-0
10486,230661,331465,-90.425388,22.0,22.0,0.7,0.7,bwr,bwr,True,...,1.45,0.066176,11220.14,12296.75,0.095953,1947.827214,1960.053924,0.006277,atrium10x10-9,svea64-1
10853,238749,141310,-101.410056,10.0,10.0,0.3,0.5,bwr,bwr,True,...,1.96,0.024876,4156.59,4744.99,0.141558,564.091877,545.167126,0.033549,atrium10x10-9,abb8x8-1
11479,252435,255855,-101.808159,10.0,10.0,0.1,0.3,bwr,bwr,True,...,2.89,0.0,10185.04,11220.14,0.101629,1344.264698,1344.264698,0.0,atrium10x10-9,atrium10x10-9
11706,257409,260829,-93.406339,22.0,22.0,0.3,0.5,bwr,bwr,True,...,2.89,0.0,9326.39,10185.04,0.092067,564.091877,564.091877,0.0,atrium10x10-9,atrium10x10-9
11995,263950,163152,-67.038425,22.0,22.0,0.7,0.7,bwr,bwr,True,...,3.27,0.131488,4156.59,4744.99,0.141558,698.65191,744.412322,0.065498,atrium10x10-9,abb8x8-1
16045,352931,258852,-50.870555,10.0,10.0,0.1,0.5,bwr,bwr,True,...,2.89,0.052459,2961.19,3235.81,0.09274,859.422117,854.168573,0.006113,svea64-1,atrium10x10-9
23963,527289,522310,-64.300861,18.0,22.0,0.84,0.84,phwr,phwr,True,...,0.711,0.0,4169.04,4551.81,0.091813,591.580111,570.998629,0.034791,candu37,candu19


In [17]:
# finding a good prediction with high Max LL
new_df = preds.loc[(preds['Relative_Burnup_Error'] == 0.0) & 
                   (preds['Relative_Enrichment_Error'] == 0.0) & 
                   (preds['Relative_CoolingTime_Error'] == 0.0) & 
                   (preds['ReactorType_Score'] == True) &
                   (preds['MaxLogLL'] > 20), to_print]
new_df

Unnamed: 0,sim_idx,pred_idx,MaxLogLL,AvgPowerDensity,pred_AvgPowerDensity,ModDensity,pred_ModDensity,ReactorType,pred_ReactorType,ReactorType_Score,...,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,CoolingTime,pred_CoolingTime,Relative_CoolingTime_Error,OrigenReactor,pred_OrigenReactor
21361,470037,468777,34.354942,41.0,35.0,0.71,0.71,pwr,pwr,True,...,5.4,0.0,1878.95,1878.95,0.0,6878.110557,6878.110557,0.0,s18x18,s18x18
23198,510319,511579,27.58417,35.0,41.0,0.71,0.71,pwr,pwr,True,...,4.36,0.0,1817.43,1817.43,0.0,1360.851082,1360.851082,0.0,vver1000,vver1000


In [18]:
new_df.iloc[0:1].to_pickle('train_pred_good.pkl')

In [19]:
# finding a bad prediction
new_df = preds.loc[(preds['Relative_Burnup_Error'] > 0.05) & 
                   (preds['Relative_Enrichment_Error'] > 0.05) & 
                   (preds['Relative_CoolingTime_Error'] != np.inf) & (preds['Relative_CoolingTime_Error'] > 0.05) &
                   (preds['ReactorType_Score'] == False), to_print]
                   #(preds['MaxLogLL'] > 20)
new_df

Unnamed: 0,sim_idx,pred_idx,MaxLogLL,AvgPowerDensity,pred_AvgPowerDensity,ModDensity,pred_ModDensity,ReactorType,pred_ReactorType,ReactorType_Score,...,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,CoolingTime,pred_CoolingTime,Relative_CoolingTime_Error,OrigenReactor,pred_OrigenReactor
10476,230410,452471,-68.23449,22.0,25.0,0.7,0.71,bwr,pwr,False,...,1.51,0.110294,6085.43,6409.86,0.053313,698.65191,648.505877,0.071775,atrium10x10-9,s18x18
21259,467584,294244,-57.322216,25.0,22.0,0.71,0.5,pwr,bwr,False,...,4.97,0.07963,6409.86,6085.43,0.050614,35.352356,27.895076,0.210942,s18x18,atrium10x10-9
21612,475627,123607,-120.117251,25.0,22.0,0.71,0.3,pwr,bwr,False,...,1.47,0.063694,28775.72,27118.57,0.057588,291.648122,317.650622,0.089157,bw15x15,abb8x8-1


In [20]:
new_df.iloc[0:1].to_pickle('train_pred_bad.pkl')

### Enrichment

In [14]:
preds['Relative_Enrichment_Error'].describe()
# old nuc29 results for comparison
#count    12000.000000
#mean         0.003752
#std          0.014869
#min          0.000000
#25%          0.000000
#50%          0.000000
#75%          0.000000
#max          0.248447

count    24030.000000
mean         0.003688
std          0.015824
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          0.260204
Name: Relative_Enrichment_Error, dtype: float64

In [19]:
preds.loc[preds['Relative_Enrichment_Error'] > 0.17, to_print]

Unnamed: 0,sim_idx,pred_idx,AvgPowerDensity,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,CoolingTime,pred_CoolingTime,Relative_CoolingTime_Error,OrigenReactor,pred_OrigenReactor,MaxLogLL
6193,136267,321067,22.0,bwr,bwr,True,1.96,1.45,0.260204,4744.99,4393.24,0.074131,317.650622,382.083291,0.202841,abb8x8-1,svea64-1,-149.501757
7003,154153,70155,22.0,bwr,bwr,True,3.27,3.83,0.171254,40858.01,43319.72,0.06025,1103.125238,1100.612018,0.002278,abb8x8-1,ge7x7-0,-151.979522
9968,219250,17711,10.0,bwr,bwr,True,1.36,1.61,0.183824,22560.98,24325.03,0.07819,698.65191,671.225903,0.039256,atrium10x10-9,ge7x7-0,-127.206715
9973,219494,17894,10.0,bwr,bwr,True,1.36,1.61,0.183824,33003.01,33220.78,0.006598,1058.436222,1049.079632,0.00884,atrium10x10-9,ge7x7-0,-121.167664
10018,220335,18736,22.0,bwr,bwr,True,1.36,1.61,0.183824,6085.43,6527.94,0.072716,1344.264698,1375.012358,0.022873,atrium10x10-9,ge7x7-0,-89.706413
10032,220761,19221,22.0,bwr,bwr,True,1.36,1.61,0.183824,16428.84,17877.45,0.088175,1947.827214,1947.44785,0.000195,atrium10x10-9,ge7x7-0,-111.269827
10132,222861,21260,10.0,bwr,bwr,True,1.36,1.61,0.183824,33003.01,33220.78,0.006598,1947.827214,1897.65499,0.025758,atrium10x10-9,ge7x7-0,-118.230786
10232,225082,23482,22.0,bwr,bwr,True,1.36,1.61,0.183824,70500.0,70500.0,0.0,1947.827214,1947.44785,0.000195,atrium10x10-9,ge7x7-0,-121.842966
10301,226750,25150,10.0,bwr,bwr,True,1.36,1.61,0.183824,70500.0,70500.0,0.0,564.091877,525.189565,0.068964,atrium10x10-9,ge7x7-0,-126.678673
16730,368105,82505,22.0,bwr,bwr,True,3.05,3.83,0.255738,4393.24,4734.98,0.077788,124.376303,123.96447,0.003311,svea64-1,ge7x7-0,-74.400055


### Cooling Time

In [20]:
preds['Relative_CoolingTime_Error'].describe()
# old nuc29 results for comparison
#count    1.200000e+04
#mean              inf
#std               NaN
#min      0.000000e+00
#25%      3.333665e-03
#50%      1.209631e-02
#75%      2.999413e-02
#max               inf

count    2.402900e+04
mean              inf
std               NaN
min      0.000000e+00
25%      3.174909e-03
50%      1.037382e-02
75%      2.831056e-02
max               inf
Name: Relative_CoolingTime_Error, dtype: float64

In [26]:
preds.loc[(preds['Relative_CoolingTime_Error'] != np.inf) & (preds['Relative_CoolingTime_Error'] > 3.3), to_print]

Unnamed: 0,sim_idx,pred_idx,AvgPowerDensity,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,CoolingTime,pred_CoolingTime,Relative_CoolingTime_Error,OrigenReactor,pred_OrigenReactor,MaxLogLL
19624,431823,431824,35.0,pwr,pwr,True,1.63,1.63,0.0,45569.93,45569.93,0.0,5.378912,33.379722,5.205664,w17x17,w17x17,-116.84847
20173,443943,443944,41.0,pwr,pwr,True,3.86,3.86,0.0,20332.54,20332.54,0.0,5.378912,33.379722,5.205664,w17x17,w17x17,-97.554395
20831,458343,458344,35.0,pwr,pwr,True,1.9,1.9,0.0,54038.26,54038.26,0.0,5.822302,35.352356,5.071886,s18x18,s18x18,-120.535317
20947,460743,460744,25.0,pwr,pwr,True,3.0,3.0,0.0,41813.04,41813.04,0.0,5.822302,35.352356,5.071886,s18x18,s18x18,-116.226126
20950,460983,460984,25.0,pwr,pwr,True,3.0,3.0,0.0,62060.12,62060.12,0.0,5.822302,35.352356,5.071886,s18x18,s18x18,-123.569189
23656,520365,520366,32.0,pwr,pwr,True,3.82,3.82,0.0,55107.98,55107.98,0.0,5.853899,27.174611,3.642139,vver440_3.82,vver440_3.82,-120.891197
23792,523504,523505,22.0,phwr,phwr,True,0.711,0.711,0.0,976.14,976.14,0.0,21.245189,118.534143,4.57934,candu28,candu28,-314.132343
23990,527823,527824,18.0,phwr,phwr,True,0.711,0.711,0.0,8359.44,8359.44,0.0,6.087128,30.272394,3.973182,candu37,candu37,-65.743391


# Max Log LL

In [27]:
print(dfs['unc0']['MaxLogLL'].describe())
print(dfs['unc3']['MaxLogLL'].describe())

count    24030.000000
mean       -72.590698
std         39.836912
min       -314.132343
25%       -100.163817
50%        -84.816623
75%        -57.545144
max         99.408788
Name: MaxLogLL, dtype: float64
count    24030.000000
mean      -109.490204
std         40.547331
min       -165.511257
25%       -138.170095
50%       -122.438716
75%        -93.833657
max         57.182867
Name: MaxLogLL, dtype: float64


# Nuclide Contributions to LL

In [28]:
nuc29 = ['am241', 'am242m', 'am243', 
         'cm242', 'cm244', 
         'cs134', 'cs137', 
         'eu154', 
         'nd143', 'nd144', 'nd145', 'nd146', 'nd148', 'nd150',
         'np237', 
         'pu238', 'pu239', 'pu240', 'pu241', 'pu242',
         'sm147', 'sm149', 'sm150', 'sm151', 'sm152',
         'u234', 'u235', 'u236', 'u238']

In [29]:
logpdf_df.sample(10)

Unnamed: 0,am241,am242m,am243,cm242,cm244,cs134,cs137,eu154,nd143,nd144,...,pu242,sm147,sm149,sm150,sm151,sm152,u234,u235,u236,u238
253589,-4.655484,0.681044,-4.369243,6.597865,-4.081567,-0.582493,-5.312229,-1.322161,-4.882605,-5.768622,...,-5.291546,-3.63795,0.98655,-4.09845,-0.701404,-2.708195,-3.248592,-5.713489,-6.178775,-11.651742
220666,-4.255233,3.141454,-1.469946,9.090388,0.474172,-0.186655,-3.898317,0.769119,-3.843175,-4.115978,...,-3.543607,-2.790269,1.103745,-2.758694,0.137418,-1.957447,-3.456627,-6.585711,-5.259492,-11.709288
121681,-1.905861,2.379386,-1.857217,-0.408113,-0.373349,-1.532787,-4.376516,-0.221986,-3.966563,-4.233131,...,-3.901079,-2.178693,1.747395,-2.914319,0.225639,-2.196951,-3.400088,-6.327679,-5.431993,-11.70702
120924,-4.115219,2.081688,-4.705172,6.521814,-4.882333,-1.812733,-5.563265,-1.422365,-4.6138,-5.936549,...,-5.489,-3.362665,1.085767,-4.284681,-0.577955,-2.860554,-2.584479,-3.293379,-5.388862,-11.649339
438679,-1.89965,5.802303,2.170445,9.029936,5.002184,1.015416,-3.7324,1.6351,-3.713472,-3.775762,...,-0.975791,-2.394043,0.928122,-2.273355,0.021849,-1.569961,-3.659133,-7.961662,-5.49365,-11.698416
121355,-3.264893,3.779203,0.283036,9.726779,2.837064,2.071557,-3.566393,1.607048,-3.541794,-3.755968,...,-2.431094,-2.54403,1.692212,-2.277091,0.512247,-1.67224,-3.584777,-6.866712,-5.139916,-11.714527
15788,-2.300519,3.906801,-2.18898,-0.17288,-0.651122,-1.326131,-4.261953,0.039709,-3.579897,-4.210238,...,-4.329096,-2.071493,1.667337,-2.835861,0.721421,-2.289992,-3.298485,-4.37494,-4.460333,-11.715448
160682,-1.883032,2.292217,-2.758694,-0.901999,-1.68301,-2.625668,-5.044459,-1.033645,-4.584195,-4.966519,...,-4.445152,-2.572576,0.65041,-3.621641,-0.268926,-2.629126,-3.269837,-6.88969,-6.247057,-11.6813
366094,-4.243082,2.447028,-4.382485,8.377563,-3.911348,-1.235296,-5.408992,-0.996629,-4.601548,-5.91885,...,-5.420413,-3.574127,1.25218,-4.160554,-0.297875,-2.954298,-2.97959,-4.617524,-6.192707,-11.655118
388273,0.685066,8.747108,6.13964,10.733204,9.595372,2.007504,-2.977621,3.068872,-2.994623,-2.890934,...,2.335305,-1.391302,0.746541,-1.342244,0.005132,-0.524413,-3.739126,-8.593091,-5.020755,-11.685843


In [30]:
for nuc in nuc29:
    desc = logpdf_df[nuc].describe()
    print(desc[['mean', 'std', 'min', 'max']])

mean     -2.820982
std       2.145106
min    -107.827192
max       9.930513
Name: am241, dtype: float64
mean     3.857434
std      3.062783
min    -51.241867
max     17.370122
Name: am242m, dtype: float64
mean    -0.688843
std      4.058155
min     -8.728945
max     15.104302
Name: am243, dtype: float64
mean      6.749372
std       5.590586
min    -113.053963
max      23.318809
Name: cm242, dtype: float64
mean     1.097483
std      5.366312
min    -22.510467
max     21.060062
Name: cm244, dtype: float64
mean     0.425326
std      2.690959
min    -28.348369
max     10.668878
Name: cs134, dtype: float64
mean   -4.267991
std     1.022022
min    -6.442801
max    -0.337778
Name: cs137, dtype: float64
mean    0.642084
std     1.875193
min    -2.957553
max     8.037416
Name: eu154, dtype: float64
mean   -3.972439
std     0.797334
min    -6.405137
max    -0.669194
Name: nd143, dtype: float64
mean    -4.476833
std      1.279887
min    -72.752913
max      1.471931
Name: nd144, dtype: float64
mea

In [31]:
arr = []
for nuc in nuc29:
    desc = logpdf_df[nuc].describe()
    arr.append((desc['mean'], desc['std'], desc['min'], desc['max']))
logpdf_nuc = pd.DataFrame(arr, index=nuc29, columns=['mean', 'std', 'min', 'max'])

In [32]:
logpdf_nuc

Unnamed: 0,mean,std,min,max
am241,-2.820982,2.145106,-107.827192,9.930513
am242m,3.857434,3.062783,-51.241867,17.370122
am243,-0.688843,4.058155,-8.728945,15.104302
cm242,6.749372,5.590586,-113.053963,23.318809
cm244,1.097483,5.366312,-22.510467,21.060062
cs134,0.425326,2.690959,-28.348369,10.668878
cs137,-4.267991,1.022022,-6.442801,-0.337778
eu154,0.642084,1.875193,-2.957553,8.037416
nd143,-3.972439,0.797334,-6.405137,-0.669194
nd144,-4.476833,1.279887,-72.752913,1.471931


In [33]:
logpdf_nuc['mean'].nlargest(7)

cm242     6.749372
am242m    3.857434
sm149     1.156759
cm244     1.097483
eu154     0.642084
cs134     0.425326
sm151     0.033077
Name: mean, dtype: float64

In [34]:
logpdf_nuc['max'].nlargest(7)

cm242     23.318809
cm244     21.060062
am242m    17.370122
am243     15.104302
cs134     10.668878
am241      9.930513
pu242      9.177770
Name: max, dtype: float64