In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
%matplotlib inline

# Data Setup & Functions

In [3]:
from mll_calc.mll_pred import format_XY

In [4]:
def logpdf_calc(row, test_sample, unc):
    y_sim = row[test_sample>0].values.tolist()
    std = row.multiply(unc)[test_sample>0].values.tolist()
    y_mes = test_sample[test_sample>0].values.tolist()
    logpdf_list = stats.norm.logpdf(y_sim, loc=y_mes, scale=std)
    return logpdf_list

In [5]:
def loop_db(XY, pred, unc, lbls, nonlbls):
    logpdf_df = pd.DataFrame()
    for idx, row in pred.iterrows():
        sim_idx = row['sim_idx']
        pred_idx = row['pred_idx']
        all_lbls = lbls + nonlbls
        
        test_sample = XY.loc[sim_idx].drop(all_lbls)
        train_row = XY.loc[pred_idx].drop(all_lbls)
        
        logpdf = logpdf_calc(train_row, test_sample, unc)
        mll = row['MaxLogLL']
        
        if round(mll, 4) != round(np.sum(logpdf), 4):
            print('mismatch')
            break
        
        logpdf = pd.Series(logpdf, index=train_row[test_sample>0].index, name=sim_idx)
        if logpdf_df.empty:
            logpdf_df = pd.DataFrame(columns = test_sample.index.to_list())
        logpdf_df = logpdf_df.append(logpdf)
    return logpdf_df

### Train and Test DBs

In [6]:
lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
nonlbls = ['AvgPowerDensity', 'ModDensity', 'UiWeight']

train_pkl = '~/sims_n_results/simupdates_aug2020/not-scaled_nuc15.pkl'
XY = format_XY(train_pkl)

### LL Calc Results

In [7]:
results = '~/sims_n_results/simupdates_aug2020/train15/'

uncs = [0.05, 0.1, 0.15, 0.2]
job_dirs = ['Job' + str(i) + '_unc' + str(unc) for i, unc in enumerate(uncs)]
dfs = {}
for i, unc_job in enumerate(job_dirs):
    dfs['unc' + str(i)] = pd.read_csv(results + unc_job + '/' + unc_job + '.csv')

for dfname in list(dfs.keys()):
    df = dfs[dfname]
    df['Relative_Burnup_Error'] = df['Burnup_Error'] / df['Burnup']
    df['Relative_Enrichment_Error'] = df['Enrichment_Error'] / df['Enrichment']
    df['Relative_CoolingTime_Error'] = df['CoolingTime_Error'] / df['CoolingTime']

# For ease of only viewing 5% case for now
preds = dfs['unc0']

### LogPDF (wrt nuclide) results

In [8]:
unc = 0.05
logpdf_df = loop_db(XY, preds, unc, lbls, nonlbls)

# Explore Errors

### Burnup

In [9]:
preds['Relative_Burnup_Error'].describe()
# old nuc15 trainset for comparison
#count    24678.000000
#mean         0.007150
#std          0.012990
#min          0.000000
#25%          0.000000
#50%          0.000000
#75%          0.009484
#max          0.230009

count    9600.000000
mean        0.002187
std         0.006690
min         0.000000
25%         0.000000
50%         0.000000
75%         0.000000
max         0.064146
Name: Relative_Burnup_Error, dtype: float64

In [10]:
to_print = ['sim_idx', 'pred_idx', 'AvgPowerDensity', 
            'ReactorType', 'pred_ReactorType', 'ReactorType_Score', 
            'Enrichment', 'pred_Enrichment', 'Relative_Enrichment_Error', 
            'Burnup', 'pred_Burnup', 'Relative_Burnup_Error',
            'CoolingTime', 'pred_CoolingTime', 'Relative_CoolingTime_Error',
            'OrigenReactor', 'pred_OrigenReactor', 'MaxLogLL']
preds.loc[preds['Relative_Burnup_Error'] > 0.05, to_print]

Unnamed: 0,sim_idx,pred_idx,AvgPowerDensity,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,CoolingTime,pred_CoolingTime,Relative_CoolingTime_Error,OrigenReactor,pred_OrigenReactor,MaxLogLL
947,36115,217554,10.0,bwr,bwr,True,2.74,2.73,0.00365,19250.15,20277.55,0.053371,5606.394331,5719.115255,0.020106,ge7x7-0,svea64-1,-44.586598
2385,91257,212157,10.0,bwr,bwr,True,3.1,2.73,0.119355,11723.37,10971.36,0.064146,6630.505199,6542.408877,0.013287,abb8x8-1,svea64-1,-37.080794
3328,127317,187858,10.0,bwr,bwr,True,0.5,0.5,0.0,43096.56,45382.93,0.053052,6902.526051,6980.095432,0.011238,atrium10x10-9,svea64-1,-51.067163
5373,205617,145136,25.0,bwr,bwr,True,2.01,1.87,0.069652,15572.83,14697.28,0.056223,6542.408877,6459.941245,0.012605,svea64-1,atrium10x10-9,-36.832438
6101,233338,51898,10.0,bwr,bwr,True,5.05,4.57,0.09505,54750.28,51880.09,0.052423,6980.095432,6973.335475,0.000968,svea64-1,ge7x7-0,-54.541367
8508,325534,348213,25.0,pwr,pwr,True,3.99,4.03,0.010025,21665.77,22879.33,0.056013,3241.942859,3308.216103,0.020442,bw15x15,vver1000,-50.640551
8715,333237,265195,25.0,pwr,pwr,True,0.5,0.5,0.0,27280.53,25737.31,0.056569,6416.609153,6229.120831,0.029219,vver1000,w17x17,-52.80776
9363,358076,281333,32.0,pwr,pwr,True,3.6,3.82,0.061111,14469.9,15314.05,0.058338,5694.186585,5764.417053,0.012334,vver440,w17x17,-37.056899
9416,360178,360238,32.0,pwr,pwr,True,4.25,4.25,0.0,55362.0,58332.94,0.053664,7503.128582,7503.128582,0.0,vver440_4.25,vver440_4.25,-59.68292
9422,360316,359202,32.0,pwr,pwr,True,4.38,4.25,0.02968,3876.0,3682.09,0.050028,4368.036325,4097.992512,0.061823,vver440_4.38,vver440_4.25,-17.940922


### Enrichment

In [11]:
preds['Relative_Enrichment_Error'].describe()
# old nuc15 trainset for comparison
#count    24678.000000
#mean         0.073357
#std          0.227707
#min          0.000000
#25%          0.000000
#50%          0.000000
#75%          0.069652
#max          4.760000

count    9600.000000
mean        0.004361
std         0.019275
min         0.000000
25%         0.000000
50%         0.000000
75%         0.000000
max         0.303448
Name: Relative_Enrichment_Error, dtype: float64

In [12]:
preds.loc[preds['Relative_Enrichment_Error'] > 0.2, to_print]

Unnamed: 0,sim_idx,pred_idx,AvgPowerDensity,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,CoolingTime,pred_CoolingTime,Relative_CoolingTime_Error,OrigenReactor,pred_OrigenReactor,MaxLogLL
306,11660,203180,10.0,bwr,bwr,True,1.57,2.01,0.280255,63534.57,64115.97,0.009151,1706.758096,1734.919436,0.0165,ge7x7-0,svea64-1,-58.719346
394,15118,206637,25.0,bwr,bwr,True,1.57,2.01,0.280255,70500.0,70336.3,0.002322,6160.818841,6159.232422,0.000258,ge7x7-0,svea64-1,-54.080553
482,18456,209975,20.0,bwr,bwr,True,1.57,2.01,0.280255,70500.0,70336.3,0.002322,3407.329596,3439.807457,0.009532,ge7x7-0,svea64-1,-56.953919
5402,206639,15119,25.0,bwr,bwr,True,2.01,1.57,0.218905,70336.3,70500.0,0.002327,6980.095432,6973.335475,0.000968,svea64-1,ge7x7-0,-52.939402
5532,211622,20102,25.0,bwr,bwr,True,2.01,1.57,0.218905,70336.3,70500.0,0.002327,0.462278,0.504127,0.090528,svea64-1,ge7x7-0,-60.721039
7083,270837,274617,35.0,pwr,pwr,True,1.46,1.88,0.287671,61271.92,61271.92,0.0,6652.692057,6652.692057,0.0,w17x17,w17x17,-54.340864
7177,274580,270800,35.0,pwr,pwr,True,1.88,1.46,0.223404,61271.92,61271.92,0.0,2163.111412,2163.111412,0.0,w17x17,w17x17,-60.122903
7209,275826,272046,41.0,pwr,pwr,True,1.88,1.46,0.223404,61271.92,61271.92,0.0,187.634863,187.634863,0.0,w17x17,w17x17,-62.579747
8816,337257,341037,25.0,pwr,pwr,True,1.45,1.89,0.303448,38668.6,38668.6,0.0,6416.609153,6416.609153,0.0,vver1000,vver1000,-53.307329
8856,338775,342555,35.0,pwr,pwr,True,1.45,1.89,0.303448,60004.12,60004.12,0.0,1170.526895,1170.526895,0.0,vver1000,vver1000,-61.409897


### Cooling Time

In [13]:
preds['Relative_CoolingTime_Error'].describe()
# old nuc15 trainset for comparison
#count    2.466300e+04
#mean              inf
#std               NaN
#min      0.000000e+00
#25%      2.556960e-02
#50%      3.208206e-01
#75%      1.000000e+00
#max               inf

count    9600.000000
mean             inf
std              NaN
min         0.000000
25%         0.004190
50%         0.013621
75%         0.047517
max              inf
Name: Relative_CoolingTime_Error, dtype: float64

In [14]:
preds.loc[(preds['Relative_CoolingTime_Error'] != np.inf) & (preds['Relative_CoolingTime_Error'] > 3), to_print]

Unnamed: 0,sim_idx,pred_idx,AvgPowerDensity,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,CoolingTime,pred_CoolingTime,Relative_CoolingTime_Error,OrigenReactor,pred_OrigenReactor,MaxLogLL
5357,204964,204965,25.0,bwr,bwr,True,2.01,2.01,0.0,918.08,918.08,0.0,21.396677,101.346301,3.736544,svea64-1,svea64-1,15.085492
7040,269343,269344,25.0,pwr,pwr,True,1.46,1.46,0.0,49156.39,49156.39,0.0,6.445991,36.914607,4.726754,w17x17,w17x17,-59.055322
7142,273243,273244,25.0,pwr,pwr,True,1.88,1.88,0.0,58841.98,58841.98,0.0,6.445991,36.914607,4.726754,w17x17,w17x17,-61.133206
7161,273963,273964,35.0,pwr,pwr,True,1.88,1.88,0.0,25737.31,25737.31,0.0,6.445991,36.914607,4.726754,w17x17,w17x17,-50.603788
7175,274383,274384,35.0,pwr,pwr,True,1.88,1.88,0.0,49156.39,49156.39,0.0,6.445991,36.914607,4.726754,w17x17,w17x17,-59.425183
7176,274503,274504,35.0,pwr,pwr,True,1.88,1.88,0.0,58841.98,58841.98,0.0,6.445991,36.914607,4.726754,w17x17,w17x17,-61.494392
7233,276723,276724,25.0,pwr,pwr,True,2.92,2.92,0.0,37574.08,37574.08,0.0,6.445991,36.914607,4.726754,w17x17,w17x17,-55.531389
7239,276903,276904,25.0,pwr,pwr,True,2.92,2.92,0.0,49156.39,49156.39,0.0,6.445991,36.914607,4.726754,w17x17,w17x17,-59.177956
7266,277923,277924,35.0,pwr,pwr,True,2.92,2.92,0.0,35195.07,35195.07,0.0,6.445991,36.914607,4.726754,w17x17,w17x17,-54.748672
7269,278043,278044,35.0,pwr,pwr,True,2.92,2.92,0.0,43038.84,43038.84,0.0,6.445991,36.914607,4.726754,w17x17,w17x17,-57.647604


# Max Log LL

In [15]:
print(dfs['unc0']['MaxLogLL'].describe())
print(dfs['unc3']['MaxLogLL'].describe())

count    9600.000000
mean      -38.294298
std        19.291833
min       -65.162488
25%       -52.718086
50%       -43.620235
75%       -29.973796
max        38.627426
Name: MaxLogLL, dtype: float64
count    9600.000000
mean      -58.051921
std        19.337723
min       -84.003339
25%       -72.697537
50%       -63.698441
75%       -49.036952
max        17.588901
Name: MaxLogLL, dtype: float64


# Nuclide Contributions to LL

In [16]:
nuc15 = ['cs133', 'cs134', 'cs135', 'cs137', 
         'eu153', 'eu154', 'ba136', 'ba138', 
         'sm149', 'sm150', 'sm152', 
         'pu239', 'pu240', 'pu241', 'pu242']

In [17]:
logpdf_df.sample(10)

Unnamed: 0,ba136,ba138,cs133,cs134,cs135,cs137,eu153,eu154,pu239,pu240,pu241,pu242,sm149,sm150,sm152
209556,-1.213472,-5.237093,-5.073123,0.433431,-3.840486,-4.939816,-2.883951,-0.354339,-6.0432,-5.585674,-4.415065,-4.969854,1.537381,-3.708876,-2.814307
183274,2.419566,-2.620044,-2.620956,2.640139,-0.540602,-2.377425,0.199315,3.361657,-5.580962,-3.973411,-2.185674,-0.345351,1.999833,-1.025099,-0.621879
276553,-0.706364,-4.991378,-4.875935,-1.728869,-3.853859,-4.854465,-2.57907,-0.746049,-6.447176,-5.341387,-4.994915,-4.173568,1.004868,-3.498018,-2.530374
17610,0.002867,-4.545943,-4.474144,-0.417885,-3.06487,-4.31289,-2.099745,0.357947,-6.009309,-5.283946,-4.169824,-3.840755,1.577231,-3.033989,-2.333213
278891,0.209463,-4.614669,-4.536993,-1.405224,-3.117274,-4.507169,-2.056932,-0.115981,-6.353752,-5.020755,-4.674141,-3.277431,0.862178,-3.103303,-2.253018
56294,-0.168056,-4.711953,-4.631412,-0.91611,-4.191924,-4.507551,-1.963622,0.233452,-6.196032,-4.792429,-4.095487,-2.58853,1.149365,-3.051328,-2.306981
84066,3.789681,-2.388884,-2.351997,0.741063,-0.915934,-2.311965,0.960014,4.018201,-4.966366,-2.196812,-0.164209,3.50391,1.51148,-0.601484,0.073559
180901,-0.619275,-5.128841,-5.013283,-2.340008,-4.196468,-5.030632,-2.517416,-0.781399,-6.354207,-5.122138,-4.843878,-3.593432,0.938117,-3.556209,-2.655009
283295,-1.653468,-5.587553,-5.368624,-0.398629,-4.293792,-5.30603,-3.18745,-1.05569,-6.604048,-5.702255,-5.05663,-5.017441,0.645287,-4.152112,-2.866989
291445,-0.316309,-4.743317,-4.624684,0.321021,-3.058975,-4.494096,-2.491397,-0.168115,-6.244437,-5.305614,-4.634156,-4.590914,1.215926,-3.320382,-2.457324


In [18]:
for nuc in nuc15:
    desc = logpdf_df[nuc].describe()
    print(desc[['mean', 'std', 'min', 'max']])

mean   -4.405521
std     0.965536
min    -6.029460
max    -0.748743
Name: cs133, dtype: float64
mean     0.553483
std      2.621435
min     -8.562148
max     10.851586
Name: cs134, dtype: float64
mean   -3.342744
std     1.198377
min    -8.581306
max     1.405381
Name: cs135, dtype: float64
mean   -4.276594
std     1.035930
min    -5.794990
max    -0.277484
Name: cs137, dtype: float64
mean   -1.885695
std     1.335974
min    -3.630491
max     2.702843
Name: eu153, dtype: float64
mean    0.674270
std     1.913928
min    -2.123420
max     8.071268
Name: eu154, dtype: float64
mean    0.016693
std     1.819714
min    -3.955707
max     5.819496
Name: ba136, dtype: float64
mean   -4.524608
std     1.035798
min    -6.281010
max    -0.781399
Name: ba138, dtype: float64
mean    1.254416
std     0.479296
min    -4.785105
max     3.130911
Name: sm149, dtype: float64
mean   -2.937306
std     1.119736
min    -4.654297
max     1.069201
Name: sm150, dtype: float64
mean   -2.100939
std     0.974940
mi

In [19]:
arr = []
for nuc in nuc15:
    desc = logpdf_df[nuc].describe()
    arr.append((desc['mean'], desc['std'], desc['min'], desc['max']))
logpdf_nuc = pd.DataFrame(arr, index=nuc15, columns=['mean', 'std', 'min', 'max'])

In [20]:
logpdf_nuc

Unnamed: 0,mean,std,min,max
cs133,-4.405521,0.965536,-6.02946,-0.748743
cs134,0.553483,2.621435,-8.562148,10.851586
cs135,-3.342744,1.198377,-8.581306,1.405381
cs137,-4.276594,1.03593,-5.79499,-0.277484
eu153,-1.885695,1.335974,-3.630491,2.702843
eu154,0.67427,1.913928,-2.12342,8.071268
ba136,0.016693,1.819714,-3.955707,5.819496
ba138,-4.524608,1.035798,-6.28101,-0.781399
sm149,1.254416,0.479296,-4.785105,3.130911
sm150,-2.937306,1.119736,-4.654297,1.069201


In [21]:
logpdf_nuc.describe()

Unnamed: 0,mean,std,min,max
count,15.0,15.0,15.0,15.0
mean,-2.552953,1.39939,-5.993224,2.938444
std,2.258229,0.710644,2.129219,4.100783
min,-6.035644,0.479296,-8.581306,-3.662861
25%,-4.341057,1.005369,-8.095529,0.16965
50%,-2.937306,1.198377,-6.02946,1.772255
75%,-0.934501,1.793298,-4.305002,5.304357
max,1.254416,2.999151,-2.12342,10.851586
