In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
%matplotlib inline

# Data Setup & Functions

In [3]:
from mll_calc.mll_pred import format_XY, convert_g_to_mgUi

In [4]:
def logpdf_calc(row, test_sample, unc):
    y_sim = row[test_sample>0].values.tolist()
    std = row.multiply(unc)[test_sample>0].values.tolist()
    y_mes = test_sample[test_sample>0].values.tolist()
    logpdf_list = stats.norm.logpdf(y_sim, loc=y_mes, scale=std)
    return logpdf_list

In [5]:
def loop_sfco(XY, test, pred, unc, lbls, nonlbls):
    xy_cols = XY.columns.tolist()
    for col in nonlbls: xy_cols.remove(col)
    test = test[xy_cols]
    
    logpdf_df = pd.DataFrame()
    for test_idx, row in test.iterrows():
        test_sample = row.drop(lbls)
        test_answer = row[lbls]
        pred_row = pred.loc[pred['sim_idx'] == test_idx]
        pred_idx = pred_row['pred_idx'].values[0]
        train_row = XY.loc[pred_idx].drop(lbls+nonlbls)
        
        logpdf = logpdf_calc(train_row, test_sample, unc)
        mll = pred_row['MaxLogLL'].values[0]
        
        if round(mll, 4) != round(np.sum(logpdf), 4):
            print('mismatch')
            break
        
        logpdf = pd.Series(logpdf, index=train_row[test_sample>0].index, name=test_idx)
        if logpdf_df.empty:
            logpdf_df = pd.DataFrame(columns = test_sample.index.to_list())
        logpdf_df = logpdf_df.append(logpdf)
    return logpdf_df

### Train and Test DBs

In [6]:
lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
nonlbls = ['AvgPowerDensity', 'ModDensity', 'UiWeight']

train_pkl = '~/sims_n_results/simupdates_aug2020/not-scaled_nuc29.pkl'
XY = format_XY(train_pkl)
XY = convert_g_to_mgUi(XY, lbls+nonlbls)

sfco_pkl = '~/sfcompo/format_clean/sfcompo_nuc29.pkl'
sfco = pd.read_pickle(sfco_pkl)

### LL Calc Results

In [16]:
sfcompo_results = '~/sims_n_results/simupdates_aug2020/sfco29/'

uncs = [0.05, 0.1, 0.15, 0.2]
job_dirs = ['Job' + str(i) + '_unc' + str(unc) for i, unc in enumerate(uncs)]
dfs = {}
for i, unc_job in enumerate(job_dirs):
    dfs['unc' + str(i)] = pd.read_csv(sfcompo_results + unc_job + '/' + unc_job + '.csv')

for dfname in list(dfs.keys()):
    df = dfs[dfname]
    df['Relative_Burnup_Error'] = df['Burnup_Error'] / df['Burnup']
    df['Relative_Enrichment_Error'] = df['Enrichment_Error'] / df['Enrichment']

# For ease of only viewing 5% case for now
preds = dfs['unc0']

### LogPDF (wrt nuclide) results

In [8]:
unc = 0.05
logpdf_df = loop_sfco(XY, sfco, preds, unc, lbls, nonlbls)

# Explore Errors

### Burnup

In [41]:
preds['Relative_Burnup_Error'].describe()
#nuc15 results for reference:
#count    505.000000
#mean       0.187539
#std        0.155398
#min        0.000408
#25%        0.058485
#50%        0.153846
#75%        0.298556
#max        1.503097

count    505.000000
mean       0.151887
std        0.195778
min        0.000031
25%        0.036301
50%        0.087295
75%        0.183161
max        1.440513
Name: Relative_Burnup_Error, dtype: float64

In [42]:
to_print = ['sim_idx', 'pred_idx', 'pred_CoolingTime', 'AvgPowerDensity', 
            'ReactorType', 'pred_ReactorType', 'ReactorType_Score', 
            'Enrichment', 'pred_Enrichment', 'Relative_Enrichment_Error', 
            'Burnup', 'pred_Burnup', 'Relative_Burnup_Error',
            'OrigenReactor', 'pred_OrigenReactor', 'MaxLogLL']
preds.loc[preds['Relative_Burnup_Error'] > 0.49, to_print]

Unnamed: 0,sim_idx,pred_idx,pred_CoolingTime,AvgPowerDensity,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,OrigenReactor,pred_OrigenReactor,MaxLogLL
54,GAR-1|SA-13|E6|11,111254,1136.709042,10.0,bwr,bwr,True,2.41,5.23,1.170124,4200.0,8683.04,1.06739,Garigliano-1_BWR,abb8x8-1,-8.741122
55,GAR-1|SA-13|E6|13,50762,0.826167,10.0,bwr,bwr,True,2.41,4.57,0.896266,5580.0,9758.61,0.748855,Garigliano-1_BWR,ge7x7-0,11.5444
56,GAR-1|SA-13|E6|1,175085,109.336506,25.0,bwr,bwr,True,2.41,5.11,1.120332,6080.0,9304.01,0.530265,Garigliano-1_BWR,atrium10x10-9,10.778134
57,GAR-1|SA-13|E6|16,40811,829.867541,10.0,bwr,bwr,True,2.41,4.4,0.825726,6640.0,11427.93,0.721074,Garigliano-1_BWR,ge7x7-0,-8.06151
58,GAR-1|SA-13|E6|10,239103,8.08754,20.0,bwr,bwr,True,2.41,5.05,1.095436,8140.0,13912.97,0.70921,Garigliano-1_BWR,svea64-1,23.738365
60,GAR-1|SA-13|E6|7,59347,334.379204,25.0,bwr,bwr,True,2.41,4.57,0.896266,8320.0,13478.85,0.620054,Garigliano-1_BWR,ge7x7-0,23.983706
61,GAR-1|SA-13|E6|5,171906,242.633977,10.0,bwr,bwr,True,2.41,5.11,1.120332,8640.0,13992.11,0.619457,Garigliano-1_BWR,atrium10x10-9,3.973372
62,GAR-1|A-106|D4|1,41044,25.487227,10.0,bwr,bwr,True,2.1,4.4,1.095238,8850.0,17668.74,0.996468,Garigliano-1_BWR,ge7x7-0,-449.234902
63,GAR-1|A-106|E5|1,309124,29.215503,41.0,bwr,pwr,False,2.1,5.48,1.609524,8930.0,21793.78,1.440513,Garigliano-1_BWR,s18x18,-516.102948
64,GAR-1|A-106|C3|1,232501,0.462278,10.0,bwr,bwr,True,2.1,5.05,1.404762,9140.0,17118.7,0.872943,Garigliano-1_BWR,svea64-1,-491.23027


### Enrichment

In [21]:
preds['Relative_Enrichment_Error'].describe()
#nuc15 results for reference:
#count    505.000000
#mean       0.369961
#std        0.289978
#min        0.000000
#25%        0.119444
#50%        0.288235
#75%        0.611765
#max        2.853727

count    505.000000
mean       0.165083
std        0.248423
min        0.000000
25%        0.047923
50%        0.083004
75%        0.170455
max        1.750000
Name: Relative_Enrichment_Error, dtype: float64

In [22]:
preds.loc[preds['Relative_Enrichment_Error'] > 0.49, to_print]

Unnamed: 0,sim_idx,pred_idx,pred_CoolingTime,AvgPowerDensity,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,OrigenReactor,pred_OrigenReactor,MaxLogLL
54,GAR-1|SA-13|E6|11,111254,1136.709042,10.0,bwr,bwr,True,2.41,5.23,1.170124,4200.0,8683.04,1.06739,Garigliano-1_BWR,abb8x8-1,-8.741122
55,GAR-1|SA-13|E6|13,50762,0.826167,10.0,bwr,bwr,True,2.41,4.57,0.896266,5580.0,9758.61,0.748855,Garigliano-1_BWR,ge7x7-0,11.5444
56,GAR-1|SA-13|E6|1,175085,109.336506,25.0,bwr,bwr,True,2.41,5.11,1.120332,6080.0,9304.01,0.530265,Garigliano-1_BWR,atrium10x10-9,10.778134
57,GAR-1|SA-13|E6|16,40811,829.867541,10.0,bwr,bwr,True,2.41,4.4,0.825726,6640.0,11427.93,0.721074,Garigliano-1_BWR,ge7x7-0,-8.06151
58,GAR-1|SA-13|E6|10,239103,8.08754,20.0,bwr,bwr,True,2.41,5.05,1.095436,8140.0,13912.97,0.70921,Garigliano-1_BWR,svea64-1,23.738365
59,GAR-1|SA-13|E6|3,165126,242.633977,25.0,bwr,bwr,True,2.41,4.23,0.755187,8260.0,11307.48,0.368944,Garigliano-1_BWR,atrium10x10-9,11.403446
60,GAR-1|SA-13|E6|7,59347,334.379204,25.0,bwr,bwr,True,2.41,4.57,0.896266,8320.0,13478.85,0.620054,Garigliano-1_BWR,ge7x7-0,23.983706
61,GAR-1|SA-13|E6|5,171906,242.633977,10.0,bwr,bwr,True,2.41,5.11,1.120332,8640.0,13992.11,0.619457,Garigliano-1_BWR,atrium10x10-9,3.973372
62,GAR-1|A-106|D4|1,41044,25.487227,10.0,bwr,bwr,True,2.1,4.4,1.095238,8850.0,17668.74,0.996468,Garigliano-1_BWR,ge7x7-0,-449.234902
63,GAR-1|A-106|E5|1,309124,29.215503,41.0,bwr,pwr,False,2.1,5.48,1.609524,8930.0,21793.78,1.440513,Garigliano-1_BWR,s18x18,-516.102948


# Max Log LL

In [23]:
print(dfs['unc0']['MaxLogLL'].describe())
print(dfs['unc3']['MaxLogLL'].describe())

count     505.000000
mean     -134.927067
std       246.620348
min     -2554.382504
25%      -136.777791
50%       -50.368502
75%       -18.304648
max        64.665774
Name: MaxLogLL, dtype: float64
count    490.000000
mean      10.182512
std       23.947474
min     -130.126567
25%       -3.514626
50%        3.204138
75%       21.907521
max       98.435790
Name: MaxLogLL, dtype: float64


# Nuclide Contributions to LL

In [30]:
nuc29 = ['am241', 'am242m', 'am243', 
         'cm242', 'cm244', 
         'cs134', 'cs137', 
         'eu154', 
         'nd143', 'nd144', 'nd145', 'nd146', 'nd148', 'nd150',
         'np237', 
         'pu238', 'pu239', 'pu240', 'pu241', 'pu242',
         'sm147', 'sm149', 'sm150', 'sm151', 'sm152',
         'u234', 'u235', 'u236', 'u238']

In [25]:
logpdf_df.sample(10)

Unnamed: 0,am241,am242m,am243,cm242,cm244,cs134,cs137,eu154,nd143,nd144,...,pu242,sm147,sm149,sm150,sm151,sm152,u234,u235,u236,u238
OHI-2|17G|F4|89G10,0.956188,9.406406,3.969836,6.001097,4.815012,-118.736466,-1.563019,-23.84024,1.535227,-4.073728,...,2.314268,,,,,,-7.793109,-4.485511,0.611765,-4.777245
OBR-1|BE124|G7|P1,-5.444719,0.53685,6.592891,6.812961,4.134567,,-21.287002,,,,...,3.496815,,,,,,,-20.805889,-4.005002,-4.780431
QC-1|LYD396|G5|5A2,0.992583,-29.862941,-0.021991,,3.358108,6.280034,-1.164611,,-8.18067,,...,0.515939,-3.08418,7.790455,-0.119148,6.41878,-0.882086,-12.04982,-42.39354,-14.614976,
YNK-1|E6|C-F1|N-17,,,,,,,,,,,...,1.37633,,,,,,-44.458208,-3.406446,-0.483939,-4.781116
YNK-1|E6|SE-F5|T-184,,,,,,,,,,,...,2.005742,,,,,,-30.452485,-1.612231,0.673433,-4.774461
TMI-1|NJ05YU|H6|B2,3.145574,,2.32638,,,,-2.607003,,2.056228,,...,2.152635,-1.594364,2.262596,1.282553,5.081192,3.984467,3.712843,-0.944751,-1.339206,
TVC-1|509-069|A1|7,,6.598395,-10.34875,-89.044034,-39.349524,,,,,,...,-12.301208,,,,,,,-10.720469,-16.933861,-4.811152
NOV-4|213|64|15,,,-4.778847,,0.893613,,,,,,...,2.702686,,,,,,3.051261,-4.246243,0.296701,-4.768149
YNK-1|E6|C-A1|T-171,,,,,,,,,,,...,-13.58558,,,,,,-33.48218,-3.394085,-6.614708,-4.771901
TVC-1|509-069|E11|2K,,3.560556,-14.873949,-85.020683,-18.497721,,,,,,...,-21.602647,,,,,,,-4.69952,-12.353179,-4.798846


In [40]:
for nuc in nuc29:
    desc = logpdf_df[nuc].describe()
    print(desc[['count', 'mean', 'std', 'min', 'max']])

count    237.000000
mean     -43.111645
std       97.121443
min     -794.506648
max        7.443631
Name: am241, dtype: float64
count    110.000000
mean     -28.865569
std       54.989217
min     -186.267401
max       13.108325
Name: am242m, dtype: float64
count    203.000000
mean      -7.205421
std       17.722737
min     -101.038273
max        9.315329
Name: am243, dtype: float64
count    214.000000
mean      -7.635268
std       36.916614
min     -302.590970
max       18.925809
Name: cm242, dtype: float64
count    269.000000
mean     -14.969502
std       38.596167
min     -175.276748
max       17.242042
Name: cm244, dtype: float64
count    113.000000
mean     -29.159047
std       56.604572
min     -180.668807
max        9.725492
Name: cs134, dtype: float64
count    185.000000
mean      -4.224876
std       11.265662
min      -70.572595
max        4.037617
Name: cs137, dtype: float64
count    100.000000
mean     -23.055338
std       42.402957
min     -166.955348
max        8.000693
Nam