In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
%matplotlib inline

# Data Setup & Functions

In [3]:
from mll_calc.mll_pred import format_XY, convert_g_to_mgUi

In [4]:
def logpdf_calc(row, test_sample, unc):
    y_sim = row[test_sample>0].values.tolist()
    std = row.multiply(unc)[test_sample>0].values.tolist()
    y_mes = test_sample[test_sample>0].values.tolist()
    logpdf_list = stats.norm.logpdf(y_sim, loc=y_mes, scale=std)
    return logpdf_list

In [5]:
def loop_sfco(XY, test, pred, unc, lbls, nonlbls):
    xy_cols = XY.columns.tolist()
    for col in nonlbls: xy_cols.remove(col)
    test = test[xy_cols]
    
    logpdf_df = pd.DataFrame()
    for test_idx, row in test.iterrows():
        test_sample = row.drop(lbls)
        test_answer = row[lbls]
        pred_row = pred.loc[pred['sim_idx'] == test_idx]
        pred_idx = pred_row['pred_idx'].values[0]
        train_row = XY.loc[pred_idx].drop(lbls+nonlbls)
        
        logpdf = logpdf_calc(train_row, test_sample, unc)
        mll = pred_row['MaxLogLL'].values[0]
        
        if round(mll, 4) != round(np.sum(logpdf), 4):
            print('mismatch')
            break
        
        logpdf = pd.Series(logpdf, index=train_row[test_sample>0].index, name=test_idx)
        if logpdf_df.empty:
            logpdf_df = pd.DataFrame(columns = test_sample.index.to_list())
        logpdf_df = logpdf_df.append(logpdf)
    return logpdf_df

### Train and Test DBs

In [6]:
lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
nonlbls = ['AvgPowerDensity', 'ModDensity', 'UiWeight']

train_pkl = '~/sims_n_results/simupdates_aug2020/not-scaled_nuc15.pkl'
XY = format_XY(train_pkl)
XY = convert_g_to_mgUi(XY, lbls+nonlbls)

sfco_pkl = '~/sfcompo/format_clean/sfcompo_nuc15.pkl'
sfco = pd.read_pickle(sfco_pkl)

### LL Calc Results

In [7]:
sfcompo_results = '~/sims_n_results/simupdates_aug2020/sfco15/'

uncs = [0.05, 0.1, 0.15, 0.2]
job_dirs = ['Job' + str(i) + '_unc' + str(unc) for i, unc in enumerate(uncs)]
dfs = {}
for i, unc_job in enumerate(job_dirs):
    dfs['unc' + str(i)] = pd.read_csv(sfcompo_results + unc_job + '/' + unc_job + '.csv')

for dfname in list(dfs.keys()):
    df = dfs[dfname]
    df['Relative_Burnup_Error'] = df['Burnup_Error'] / df['Burnup']
    df['Relative_Enrichment_Error'] = df['Enrichment_Error'] / df['Enrichment']

# For ease of only viewing 5% case for now
preds = dfs['unc0']

### LogPDF (wrt nuclide) results

In [8]:
unc = 0.05
logpdf_df = loop_sfco(XY, sfco, preds, unc, lbls, nonlbls)

# Explore Errors

### Burnup

In [9]:
preds['Relative_Burnup_Error'].describe()
#old results for reference:
#count    544.000000
#mean       0.332184
#std        0.285241
#min        0.000074
#25%        0.193478
#50%        0.297102
#75%        0.393373
#max        2.743641

count    505.000000
mean       0.187539
std        0.155398
min        0.000408
25%        0.058485
50%        0.153846
75%        0.298556
max        1.503097
Name: Relative_Burnup_Error, dtype: float64

In [10]:
to_print = ['sim_idx', 'pred_idx', 'pred_CoolingTime', 'AvgPowerDensity', 
            'ReactorType', 'pred_ReactorType', 'ReactorType_Score', 
            'Enrichment', 'pred_Enrichment', 'Relative_Enrichment_Error', 
            'Burnup', 'pred_Burnup', 'Relative_Burnup_Error',
            'OrigenReactor', 'pred_OrigenReactor', 'MaxLogLL']
preds.loc[preds['Relative_Burnup_Error'] > 0.49, to_print]

Unnamed: 0,sim_idx,pred_idx,pred_CoolingTime,AvgPowerDensity,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,OrigenReactor,pred_OrigenReactor,MaxLogLL
24,FDN-1|2F1ZN2|C3|UB,312854,1083.936839,41.0,bwr,pwr,False,4.9,0.51,0.895918,47490.0,18330.69,0.614009,Fukushima-Daini-1_BWR,bw15x15,-9.539628
38,FDN-2|2F2D3|H5|TU302,244758,1407.1767,41.0,bwr,pwr,False,3.4,0.51,0.85,31400.0,15867.31,0.494672,Fukushima-Daini-2_BWR,ce14x14,0.743836
39,FDN-2|2F2D3|A4|TU306,287604,2011.587621,25.0,bwr,pwr,False,3.4,0.5,0.852941,32300.0,16206.28,0.498258,Fukushima-Daini-2_BWR,s18x18,-0.007534
49,FDN-2|2F2D8|H5|TU503,187444,21.396677,10.0,bwr,bwr,True,3.4,0.5,0.852941,55600.0,27569.97,0.504137,Fukushima-Daini-2_BWR,svea64-1,5.905322
54,GAR-1|SA-13|E6|11,114548,464.008668,25.0,bwr,bwr,True,2.41,5.23,1.170124,4200.0,7656.06,0.822871,Garigliano-1_BWR,abb8x8-1,16.508729
55,GAR-1|SA-13|E6|13,171792,812.044584,10.0,bwr,bwr,True,2.41,5.11,1.120332,5580.0,9700.27,0.7384,Garigliano-1_BWR,atrium10x10-9,14.079438
64,GAR-1|A-106|C3|1,166978,7097.519283,10.0,bwr,bwr,True,2.1,4.23,1.014286,9140.0,14697.28,0.608018,Garigliano-1_BWR,atrium10x10-9,-112.530928
202,NOV-4|213|25|9,290040,0.000776,41.0,pwr,pwr,True,3.6,0.5,0.861111,27600.0,11834.22,0.571224,Novovoronezh-4_VVER-440,s18x18,-28.451755
236,NPD-1|1022|A|1,30358,6973.335475,10.0,phwr,bwr,False,0.711,2.74,2.853727,791.0,1979.95,1.503097,Nuclear Power Demonstration Reactor-1_CANDU,ge7x7-0,-24.645155
284,OBR-1|BE210|G14|P5(1),241572,854.157139,25.0,pwr,bwr,False,2.83,5.05,0.784452,30100.0,45382.93,0.507739,Obrigheim-1_PWR,svea64-1,-20.559729


### Enrichment

In [11]:
preds['Relative_Enrichment_Error'].describe()

count    505.000000
mean       0.369961
std        0.289978
min        0.000000
25%        0.119444
50%        0.288235
75%        0.611765
max        2.853727
Name: Relative_Enrichment_Error, dtype: float64

In [12]:
preds.loc[preds['Relative_Enrichment_Error'] > 0.8, to_print]

Unnamed: 0,sim_idx,pred_idx,pred_CoolingTime,AvgPowerDensity,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,OrigenReactor,pred_OrigenReactor,MaxLogLL
16,COO-1|CZ346|ADD2974|J,307073,6067.268722,25.0,bwr,pwr,False,2.94,5.48,0.863946,29230.0,41063.62,0.404845,Cooper-1_BWR,s18x18,-42.394934
17,COO-1|CZ346|ADD2974|U,288870,3124.794142,35.0,bwr,pwr,False,2.94,0.5,0.829932,31040.0,16206.28,0.47789,Cooper-1_BWR,s18x18,-21.978731
18,COO-1|CZ346|ADD2966|K,310354,3241.942859,25.0,bwr,pwr,False,2.94,0.51,0.826531,33070.0,18330.69,0.4457,Cooper-1_BWR,bw15x15,-23.522626
19,COO-1|CZ346|ADD2966|T,310354,3241.942859,25.0,bwr,pwr,False,2.94,0.51,0.826531,33940.0,18330.69,0.459909,Cooper-1_BWR,bw15x15,-16.210298
24,FDN-1|2F1ZN2|C3|UB,312854,1083.936839,41.0,bwr,pwr,False,4.9,0.51,0.895918,47490.0,18330.69,0.614009,Fukushima-Daini-1_BWR,bw15x15,-9.539628
29,FDN-1|2F1ZN3|C3|UB,1205,84.338574,10.0,bwr,bwr,True,4.9,0.5,0.897959,68250.0,36458.87,0.465804,Fukushima-Daini-1_BWR,ge7x7-0,1.116023
38,FDN-2|2F2D3|H5|TU302,244758,1407.1767,41.0,bwr,pwr,False,3.4,0.51,0.85,31400.0,15867.31,0.494672,Fukushima-Daini-2_BWR,ce14x14,0.743836
39,FDN-2|2F2D3|A4|TU306,287604,2011.587621,25.0,bwr,pwr,False,3.4,0.5,0.852941,32300.0,16206.28,0.498258,Fukushima-Daini-2_BWR,s18x18,-0.007534
49,FDN-2|2F2D8|H5|TU503,187444,21.396677,10.0,bwr,bwr,True,3.4,0.5,0.852941,55600.0,27569.97,0.504137,Fukushima-Daini-2_BWR,svea64-1,5.905322
50,FDN-2|2F2D8|A4|TU506,1149,473.850856,10.0,bwr,bwr,True,3.4,0.5,0.852941,57500.0,34140.74,0.406248,Fukushima-Daini-2_BWR,ge7x7-0,1.17768


# Max Log LL

In [13]:
print(dfs['unc0']['MaxLogLL'].describe())
print(dfs['unc3']['MaxLogLL'].describe())

count    505.000000
mean     -12.379540
std       46.738361
min     -438.446156
25%      -12.343408
50%       -4.528192
75%        6.085927
max       52.071185
Name: MaxLogLL, dtype: float64
count    505.000000
mean       6.654287
std        9.238379
min       -5.505686
25%       -0.227195
50%        2.738450
75%       11.888770
max       43.619047
Name: MaxLogLL, dtype: float64


# Nuclide Contributions to LL

In [14]:
nuc15 = ['cs133', 'cs134', 'cs135', 'cs137', 
         'eu153', 'eu154', 'ba136', 'ba138', 
         'sm149', 'sm150', 'sm152', 
         'pu239', 'pu240', 'pu241', 'pu242']

In [15]:
logpdf_df.sample(10)

Unnamed: 0,ba136,ba138,cs133,cs134,cs135,cs137,eu153,eu154,pu239,pu240,pu241,pu242,sm149,sm150,sm152
OBR-1|BE171|1|88,,,,,,,,,0.377889,-8.1297,2.012587,1.126241,,,
TMI-1|NJ070G|O1|S2,,,,5.428716,,1.374932,1.377732,,0.355431,-13.667326,-0.290394,1.644359,7.507232,0.893431,3.2582
GAR-1|A-106|A9|1,,,,,,,,,-16.657813,-0.05595,-61.466251,2.451982,,,
YNK-1|E6|SE-F5|N-25,,,,,,,,,-1.117904,-5.749993,1.492756,-1.463265,,,
NOV-4|213|64|15,,,,,,,,,-0.643557,-10.819088,1.671907,-0.001896,,,
FDN-2|2F2DN23|SF98|4,,,,3.746061,,-0.95386,,5.476027,0.707135,-11.546844,0.726137,-13.063159,5.055513,-0.666196,-4.559284
KOL-3|144-46879|62|81,,,1.272586,7.406385,-8.675377,0.711368,-3.352758,0.760259,-2.735557,-7.711641,-6.262424,-0.792971,-3.838983,1.490876,-16.542275
JPD-1|A-20|C3|KA-1040+171,,,,8.562583,,1.944895,,9.37849,-2.473292,2.549989,4.435464,6.45796,,,
TVC-1|509-069|E11|2K,,,,,,,,,0.139271,-8.514064,2.194199,1.904718,,,
CCL-1|D101|MLA098|P,,,,,,,,,-0.915969,-7.962498,2.110648,-0.685518,,,


In [16]:
for nuc in nuc15:
    desc = logpdf_df[nuc].describe()
    print(desc[['count', 'mean', 'std', 'min', 'max']])

count     46.000000
mean      -5.609692
std       27.251557
min     -151.341834
max        2.242629
Name: cs133, dtype: float64
count    113.000000
mean       6.143763
std        2.554392
min       -5.554422
max       10.838051
Name: cs134, dtype: float64
count    40.000000
mean     -0.109266
std       3.591226
min     -14.249798
max       2.915950
Name: cs135, dtype: float64
count    185.000000
mean       0.500056
std        3.120128
min      -16.736625
max        4.618208
Name: cs137, dtype: float64
count     83.000000
mean     -12.082937
std       40.759924
min     -158.826684
max        4.718569
Name: eu153, dtype: float64
count    100.000000
mean      -7.950006
std       38.689436
min     -158.745997
max       11.282856
Name: eu154, dtype: float64
count    0.0
mean     NaN
std      NaN
min      NaN
max      NaN
Name: ba136, dtype: float64
count    0.0
mean     NaN
std      NaN
min      NaN
max      NaN
Name: ba138, dtype: float64
count    97.000000
mean      2.897750
std      13.6

# Look at ratio values for a single prediction

In [17]:
sim_id = 'VAN-2|EF05|WZR0058|E58-88'
pred_id = 43225
preds.loc[preds['sim_idx']==sim_id, to_print]

Unnamed: 0,sim_idx,pred_idx,pred_CoolingTime,AvgPowerDensity,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,OrigenReactor,pred_OrigenReactor,MaxLogLL
418,VAN-2|EF05|WZR0058|E58-88,352405,2359.896319,25.0,pwr,pwr,True,4.5,5.15,0.144444,43520.0,42872.97,0.014867,Vandellos-2_PWR,vver1000,7.961585


In [18]:
y_mes = sfco.loc[sfco.index == sim_id].squeeze().drop(lbls)
y_sim = XY.loc[XY.index == pred_id].squeeze().drop(lbls+nonlbls)
y_logpdf = logpdf_df.loc[logpdf_df.index == sim_id].squeeze()
df = pd.DataFrame([y_mes, y_sim])
df.loc['Abs Diff'] = np.abs(df.loc[pred_id] - df.loc[sim_id])
df.loc['% Diff'] = df.loc['Abs Diff'] * 100 / df.loc[sim_id]
df.loc['LogPDF'] = y_logpdf

In [19]:
df

Unnamed: 0,cs133,cs134,cs135,cs137,eu153,eu154,ba136,ba138,sm149,sm150,sm152,pu239,pu240,pu241,pu242
VAN-2|EF05|WZR0058|E58-88,1.4,0.0184,0.784,1.43,0.133,0.0166,0.0,0.0,0.00339,0.331,0.125,6.14,2.4,1.13,0.556
43225,1.279,0.01546,0.6227,1.154,0.1189,0.01472,0.02285,1.458,0.002967,0.3074,0.1146,4.826,1.581,0.9389,0.4729
Abs Diff,0.121,0.00294,0.1613,0.276,0.0141,0.00188,0.02285,1.458,0.000423,0.0236,0.0104,1.314,0.819,0.1911,0.0831
% Diff,8.642857,15.978261,20.57398,19.300699,10.601504,11.325301,inf,inf,12.477876,7.129909,8.32,21.400651,34.125,16.911504,14.946043
LogPDF,1.184735,3.40535,2.050726,0.797028,2.847683,2.059226,,,3.464595,1.204093,4.156235,0.114093,-15.642552,0.123411,2.19696
