In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
%matplotlib inline

# Data Setup & Functions

In [3]:
from mll_calc.mll_pred import format_XY, convert_g_to_mgUi

In [4]:
def logpdf_calc(row, test_sample, unc):
    y_sim = row[test_sample>0].values.tolist()
    std = row.multiply(unc)[test_sample>0].values.tolist()
    y_mes = test_sample[test_sample>0].values.tolist()
    logpdf_list = stats.norm.logpdf(y_sim, loc=y_mes, scale=std)
    return logpdf_list

In [5]:
def loop_sfco(XY, test, pred, unc, lbls, nonlbls):
    xy_cols = XY.columns.tolist()
    for col in nonlbls: xy_cols.remove(col)
    test = test[xy_cols]
    
    logpdf_df = pd.DataFrame()
    for test_idx, row in test.iterrows():
        test_sample = row.drop(lbls)
        test_answer = row[lbls]
        pred_row = pred.loc[pred['sim_idx'] == test_idx]
        pred_idx = pred_row['pred_idx'].values[0]
        train_row = XY.loc[pred_idx].drop(lbls+nonlbls)
        
        logpdf = logpdf_calc(train_row, test_sample, unc)
        mll = pred_row['MaxLogLL'].values[0]
        
        if round(mll, 4) != round(np.sum(logpdf), 4):
            print('mismatch')
            break
        
        logpdf = pd.Series(logpdf, index=train_row[test_sample>0].index, name=test_idx)
        if logpdf_df.empty:
            logpdf_df = pd.DataFrame(columns = test_sample.index.to_list())
        logpdf_df = logpdf_df.append(logpdf)
    return logpdf_df

### Train and Test DBs

In [6]:
lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
nonlbls = ['AvgPowerDensity', 'ModDensity', 'UiWeight']

train_pkl = '~/sims_n_results/final_sims_nov2020/not-scaled_nuc29.pkl'
XY = format_XY(train_pkl)
XY = convert_g_to_mgUi(XY, lbls+nonlbls)

sfco_pkl = '~/sfcompo/format_clean/sfcompo_nuc29.pkl'
sfco = pd.read_pickle(sfco_pkl)

### LL Calc Results

In [8]:
sfcompo_results = '~/sims_n_results/final_sims_nov2020/mll/sfco/'

uncs = [0.05, 0.1, 0.15, 0.2]
job_dirs = ['Job' + str(i) + '_unc' + str(unc) for i, unc in enumerate(uncs)]
dfs = {}
for i, unc_job in enumerate(job_dirs):
    dfs['unc' + str(i)] = pd.read_csv(sfcompo_results + unc_job + '/' + unc_job + '.csv')

for dfname in list(dfs.keys()):
    df = dfs[dfname]
    df['Relative_Burnup_Error'] = df['Burnup_Error'] / df['Burnup']
    df['Relative_Enrichment_Error'] = df['Enrichment_Error'] / df['Enrichment']

# For ease of only viewing 5% case for now
preds = dfs['unc0']

In [9]:
preds.columns

Index(['Unnamed: 0', 'Unnamed: 0.1', 'sim_idx', 'ReactorType', 'CoolingTime',
       'Enrichment', 'Burnup', 'OrigenReactor', 'pred_idx', 'pred_ReactorType',
       'pred_CoolingTime', 'pred_Enrichment', 'pred_Burnup',
       'pred_OrigenReactor', 'pred_AvgPowerDensity', 'pred_ModDensity',
       'pred_UiWeight', 'MaxLogLL', 'MaxLLUnc', '2ndMaxLogLL', '2ndMaxLLUnc',
       'CDF_LogLL_0.9998', 'CDF_LLUnc_0.9998', 'CDF_LogLL_0.9988',
       'CDF_LLUnc_0.9988', 'CDF_LogLL_0.95', 'CDF_LLUnc_0.95', 'CDF_LogLL_0.9',
       'CDF_LLUnc_0.9', 'CDF_LogLL_0.5', 'CDF_LLUnc_0.5', 'CDF_LogLL_0.1',
       'CDF_LLUnc_0.1', 'CDF_LogLL_0.01', 'CDF_LLUnc_0.01',
       'ReactorType_Score', 'CoolingTime_Error', 'Enrichment_Error',
       'Burnup_Error', 'OrigenReactor_Score', 'Relative_Burnup_Error',
       'Relative_Enrichment_Error'],
      dtype='object')

### LogPDF (wrt nuclide) results

In [None]:
unc = 0.05
logpdf_df = loop_sfco(XY, sfco, preds, unc, lbls, nonlbls)

# Explore Errors

### Burnup

In [8]:
preds['Relative_Burnup_Error'].describe()
# old nuc29 results for reference:
#count    505.000000
#mean       0.151887
#std        0.195778
#min        0.000031
#25%        0.036301
#50%        0.087295
#75%        0.183161
#max        1.440513

count    505.000000
mean       0.132149
std        0.150509
min        0.000104
25%        0.031564
50%        0.077272
75%        0.184942
max        0.958580
Name: Relative_Burnup_Error, dtype: float64

In [10]:
to_print = ['sim_idx', 'pred_idx', 'MaxLogLL',
            'pred_AvgPowerDensity', 'pred_ModDensity', 'pred_CoolingTime',
            'ReactorType', 'pred_ReactorType', 'ReactorType_Score', 
            'Enrichment', 'pred_Enrichment', 'Relative_Enrichment_Error', 
            'Burnup', 'pred_Burnup', 'Relative_Burnup_Error',
            'OrigenReactor', 'pred_OrigenReactor']
preds.loc[preds['Relative_Burnup_Error'] > 0.49, to_print]

Unnamed: 0,sim_idx,pred_idx,MaxLogLL,pred_AvgPowerDensity,pred_ModDensity,pred_CoolingTime,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,OrigenReactor,pred_OrigenReactor
54,GAR-1|SA-13|E6|11,84260,12.02643,10.0,0.1,1897.65499,bwr,bwr,True,2.41,4.83,1.004149,4200.0,6527.94,0.554271,Garigliano-1_BWR,ge7x7-0
57,GAR-1|SA-13|E6|16,386831,-1.338533,10.0,0.1,859.422117,bwr,bwr,True,2.41,4.81,0.995851,6640.0,10075.43,0.517384,Garigliano-1_BWR,svea64-1
58,GAR-1|SA-13|E6|10,395284,16.280892,22.0,0.5,23.172669,bwr,bwr,True,2.41,4.81,0.995851,8140.0,12296.75,0.510657,Garigliano-1_BWR,svea64-1
62,GAR-1|A-106|D4|1,84664,-424.739125,10.0,0.1,21.098877,bwr,bwr,True,2.1,4.83,1.3,8850.0,15521.57,0.75385,Garigliano-1_BWR,ge7x7-0
64,GAR-1|A-106|C3|1,296340,-495.790571,10.0,0.7,0.000516,bwr,bwr,True,2.1,4.97,1.366667,9140.0,16428.84,0.797466,Garigliano-1_BWR,atrium10x10-9
65,GAR-1|A-106|D2|1,190500,-411.000393,22.0,0.3,0.000804,bwr,bwr,True,2.1,4.83,1.3,9440.0,17276.1,0.830095,Garigliano-1_BWR,abb8x8-1
66,GAR-1|A-106|B1|1,192300,-576.809574,10.0,0.5,0.000804,bwr,bwr,True,1.6,4.83,2.01875,9800.0,19194.08,0.95858,Garigliano-1_BWR,abb8x8-1
68,GAR-1|A-106|A3|1,89821,-379.179894,22.0,0.3,0.41851,bwr,bwr,True,2.1,4.83,1.3,10510.0,17878.66,0.701109,Garigliano-1_BWR,ge7x7-0
69,GAR-1|A-106|G7|1,467820,-365.74406,25.0,0.71,0.000608,bwr,pwr,False,2.1,5.4,1.571429,10540.0,19326.12,0.833598,Garigliano-1_BWR,s18x18
70,GAR-1|A-106|A5|1,279600,-356.612787,10.0,0.7,0.000516,bwr,bwr,True,2.1,4.29,1.042857,10570.0,17804.03,0.684393,Garigliano-1_BWR,atrium10x10-9


In [11]:
# finding a good prediction with high max LL
new_df = preds.loc[(preds['Relative_Burnup_Error'] < 0.03) & 
                   (preds['Relative_Enrichment_Error'] < 0.04) & 
                   (preds['ReactorType_Score'] == True) &
                   (preds['MaxLogLL'] > 20), to_print]
new_df

Unnamed: 0,sim_idx,pred_idx,MaxLogLL,pred_AvgPowerDensity,pred_ModDensity,pred_CoolingTime,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,OrigenReactor,pred_OrigenReactor
191,NOV-3|RP-3371A|97|3,417261,21.50746,41.0,0.71,1824.411883,pwr,pwr,True,3.3,3.27,0.009091,8700.0,8656.02,0.005055,Novovoronezh-3_VVER-440,ce14x14
241,NPD-1|1129|C|1,525314,20.962873,2.2,0.84,1295.837963,phwr,phwr,True,0.711,0.711,0.0,1622.0,1643.2,0.01307,Nuclear Power Demonstration Reactor-1_CANDU,candu37
328,TMI-1|NJ070G|O13|S7,446536,28.662774,35.0,0.71,1521.070654,pwr,pwr,True,4.66,4.76,0.021459,22800.0,23170.26,0.016239,Three Mile Island-1_PWR,w17x17


In [12]:
new_df.iloc[2:3].to_pickle('sfco_pred_good.pkl')

In [13]:
# finding a bad prediction
new_df = preds.loc[(preds['Relative_Burnup_Error'] > 0.05) & 
                   (preds['Relative_Enrichment_Error'] > 0.4) & 
                   (preds['ReactorType_Score'] == False), to_print]
                   #(preds['MaxLogLL'] > 20)
new_df

Unnamed: 0,sim_idx,pred_idx,MaxLogLL,pred_AvgPowerDensity,pred_ModDensity,pred_CoolingTime,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,OrigenReactor,pred_OrigenReactor
69,GAR-1|A-106|G7|1,467820,-365.74406,25.0,0.71,0.000608,bwr,pwr,False,2.1,5.4,1.571429,10540.0,19326.12,0.833598,Garigliano-1_BWR,s18x18
73,GAR-1|A-106|E1|1,513182,-366.267039,25.0,0.71,0.896355,bwr,pwr,False,2.1,4.91,1.338095,10800.0,18889.66,0.749043,Garigliano-1_BWR,vver1000
76,GAR-1|A-106|H8|1,441424,-401.98417,25.0,0.71,33.379722,bwr,pwr,False,2.1,3.86,0.838095,12700.0,20332.54,0.600987,Garigliano-1_BWR,w17x17
79,GAR-1|A-106|J9|1,505680,-597.108941,25.0,0.71,0.000578,bwr,pwr,False,1.6,3.24,1.025,14480.0,21061.75,0.454541,Garigliano-1_BWR,vver1000
168,MON-1|MTB099|F6|S4,510185,-454.632313,25.0,0.71,129.30773,bwr,pwr,False,2.87,4.36,0.519164,44100.0,63663.56,0.443618,Monticello-1_BWR,vver1000
179,MON-1|MTB099|H8|S4,508985,-348.856503,41.0,0.71,30.605773,bwr,pwr,False,2.14,3.24,0.514019,51100.0,70500.0,0.379648,Monticello-1_BWR,vver1000
287,OBR-1|BE124|G7|P3,283445,-282.27563,10.0,0.9,102.229393,pwr,bwr,False,3.0,4.29,0.43,31300.0,37088.7,0.184942,Obrigheim-1_PWR,atrium10x10-9
374,TVC-1|509-069|E5|9,293343,-318.976732,10.0,0.5,7.33055,pwr,bwr,False,3.13,4.97,0.587859,19254.0,28751.06,0.493251,Trino Vercellese-1_PWR,atrium10x10-9
376,TVC-1|509-069|E11|2,390722,-348.469119,10.0,0.3,1.191198,pwr,bwr,False,3.13,4.81,0.536741,20628.0,28611.6,0.387027,Trino Vercellese-1_PWR,svea64-1
380,TVC-1|509-069|E5|4,387482,-330.97308,10.0,0.1,1.191198,pwr,bwr,False,3.13,4.81,0.536741,23715.0,30940.85,0.304695,Trino Vercellese-1_PWR,svea64-1


In [14]:
new_df.iloc[4:5].to_pickle('sfco_pred_bad.pkl')

### Enrichment

In [18]:
preds['Relative_Enrichment_Error'].describe()
# old nuc29 results for reference:
#count    505.000000
#mean       0.165083
#std        0.248423
#min        0.000000
#25%        0.047923
#50%        0.083004
#75%        0.170455
#max        1.750000

count    505.000000
mean       0.167684
std        0.234862
min        0.000000
25%        0.053648
50%        0.097228
75%        0.226923
max        2.018750
Name: Relative_Enrichment_Error, dtype: float64

In [21]:
preds.loc[preds['Relative_Enrichment_Error'] > 1.0, to_print]

Unnamed: 0,sim_idx,pred_idx,pred_CoolingTime,pred_AvgPowerDensity,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,OrigenReactor,pred_OrigenReactor,MaxLogLL
54,GAR-1|SA-13|E6|11,84260,1897.65499,10.0,bwr,bwr,True,2.41,4.83,1.004149,4200.0,6527.94,0.554271,Garigliano-1_BWR,ge7x7-0,12.02643
55,GAR-1|SA-13|E6|13,84364,21.098877,10.0,bwr,bwr,True,2.41,4.83,1.004149,5580.0,8102.96,0.452143,Garigliano-1_BWR,ge7x7-0,18.671879
61,GAR-1|SA-13|E6|5,188647,317.650622,10.0,bwr,bwr,True,2.41,4.83,1.004149,8640.0,12072.11,0.397235,Garigliano-1_BWR,abb8x8-1,-1.95883
62,GAR-1|A-106|D4|1,84664,21.098877,10.0,bwr,bwr,True,2.1,4.83,1.3,8850.0,15521.57,0.75385,Garigliano-1_BWR,ge7x7-0,-424.739125
64,GAR-1|A-106|C3|1,296340,0.000516,10.0,bwr,bwr,True,2.1,4.97,1.366667,9140.0,16428.84,0.797466,Garigliano-1_BWR,atrium10x10-9,-495.790571
65,GAR-1|A-106|D2|1,190500,0.000804,22.0,bwr,bwr,True,2.1,4.83,1.3,9440.0,17276.1,0.830095,Garigliano-1_BWR,abb8x8-1,-411.000393
66,GAR-1|A-106|B1|1,192300,0.000804,10.0,bwr,bwr,True,1.6,4.83,2.01875,9800.0,19194.08,0.95858,Garigliano-1_BWR,abb8x8-1,-576.809574
67,GAR-1|A-106|B2|1,271080,0.000516,22.0,bwr,bwr,True,2.1,4.29,1.042857,10280.0,15208.79,0.479454,Garigliano-1_BWR,atrium10x10-9,-415.165873
68,GAR-1|A-106|A3|1,89821,0.41851,22.0,bwr,bwr,True,2.1,4.83,1.3,10510.0,17878.66,0.701109,Garigliano-1_BWR,ge7x7-0,-379.179894
69,GAR-1|A-106|G7|1,467820,0.000608,25.0,bwr,pwr,False,2.1,5.4,1.571429,10540.0,19326.12,0.833598,Garigliano-1_BWR,s18x18,-365.74406


# Max Log LL

In [22]:
print(dfs['unc0']['MaxLogLL'].describe())
print(dfs['unc3']['MaxLogLL'].describe())

count     505.000000
mean     -220.902308
std       211.694589
min     -1759.146357
25%      -284.467316
50%      -190.569048
75%       -82.341520
max        51.576416
Name: MaxLogLL, dtype: float64
count    505.000000
mean       4.437217
std       24.045795
min      -77.449002
25%      -10.889628
50%       -1.685414
75%       13.713790
max       92.988448
Name: MaxLogLL, dtype: float64


# Nuclide Contributions to LL

In [23]:
nuc29 = ['am241', 'am242m', 'am243', 
         'cm242', 'cm244', 
         'cs134', 'cs137', 
         'eu154', 
         'nd143', 'nd144', 'nd145', 'nd146', 'nd148', 'nd150',
         'np237', 
         'pu238', 'pu239', 'pu240', 'pu241', 'pu242',
         'sm147', 'sm149', 'sm150', 'sm151', 'sm152',
         'u234', 'u235', 'u236', 'u238']

In [24]:
logpdf_df.sample(10)

Unnamed: 0,am241,am242m,am243,cm242,cm244,cs134,cs137,eu154,nd143,nd144,...,pu242,sm147,sm149,sm150,sm151,sm152,u234,u235,u236,u238
MON-1|MTB099|A1|S1,,,,6.163488,,,,,,,...,-4.689277,,,,,,-58.643448,-4.534996,-2.540176,-4.910649
YNK-1|E5|C-A6|G-103,,,,,,,,,,,...,5.18988,,,,,,-28.752399,-2.356046,0.167525,-4.789144
GAR-1|A-106|B1|1,-184.231793,,-23.727609,7.502274,6.382196,,,,,,...,-1.202706,,,,,,,,,
FOR-3|14595|F6|FFBU,-4.356343,,2.795038,,-10.238029,,,,2.06519,-2.865455,...,0.005492,,,,,,-12.599263,0.526847,-19.191779,-4.78271
JPD-1|A-18|C3|-293,1.604227,,,14.106719,16.095196,,4.310332,,3.857968,4.03653,...,9.219027,,,,,,-68.018665,-5.928735,1.350999,-4.80214
OBR-1|BE210|K14|P1,5.553794,,,6.390365,5.166247,,-36.479233,,,,...,-3.185852,,,,,,,-8.00145,-0.499738,-4.781431
NOV-4|13602496|25|19,,,-3.199363,,4.261703,,,,,,...,-0.146758,,,,,,0.669257,-7.207012,0.22039,-4.763148
GAR-1|A-106|G7|1,5.164808,,,2.237062,9.101566,,,,,,...,-8.9653,,,,,,,,,
KOL-3|144-46879|61|13,-51.436577,,-11.368869,3.140377,-73.032956,-139.629737,0.711926,-8.602772,-0.425874,,...,-8.727153,-91.516096,-92.573277,-43.205261,5.845147,-55.353917,3.263634,-10.379145,0.196043,-4.765546
FDN-2|2F2D8|H4|MS1,-61.780111,,-42.937074,2.165411,-10.677867,,,,1.967466,0.904727,...,-0.804464,,,,,,-7.188503,-4.700013,-7.01595,-5.562809


In [25]:
for nuc in nuc29:
    desc = logpdf_df[nuc].describe()
    print(desc[['count', 'mean', 'std', 'min', 'max']])

count    237.000000
mean     -31.333772
std       53.489034
min     -194.695673
max        7.258579
Name: am241, dtype: float64
count    110.000000
mean     -23.854524
std       50.385684
min     -248.774750
max       10.275533
Name: am242m, dtype: float64
count    203.000000
mean      -7.828539
std       16.973707
min      -98.152084
max        9.532066
Name: am243, dtype: float64
count    214.000000
mean      -1.295015
std       19.955262
min     -140.361241
max       19.656788
Name: cm242, dtype: float64
count    269.000000
mean     -15.300685
std       37.722623
min     -180.310354
max       16.717730
Name: cm244, dtype: float64
count    113.000000
mean     -27.694817
std       53.129003
min     -188.228578
max        9.011785
Name: cs134, dtype: float64
count    185.000000
mean      -3.531138
std       12.885717
min      -78.277985
max        4.310332
Name: cs137, dtype: float64
count    100.000000
mean     -19.015840
std       41.451448
min     -163.587753
max        8.841122
Nam

# Look at ratio values for a single prediction

In [27]:
sim_id = 'VAN-2|EF05|WZR0058|E58-88'
pred_id = 445584
preds.loc[preds['sim_idx']==sim_id, to_print]

Unnamed: 0,sim_idx,pred_idx,pred_CoolingTime,pred_AvgPowerDensity,ReactorType,pred_ReactorType,ReactorType_Score,Enrichment,pred_Enrichment,Relative_Enrichment_Error,Burnup,pred_Burnup,Relative_Burnup_Error,OrigenReactor,pred_OrigenReactor,MaxLogLL
418,VAN-2|EF05|WZR0058|E58-88,445584,2066.125663,25.0,pwr,pwr,True,4.5,4.76,0.057778,43520.0,38041.0,0.125896,Vandellos-2_PWR,w17x17,-208.47935


In [28]:
y_mes = sfco.loc[sfco.index == sim_id].squeeze().drop(lbls)
y_sim = XY.loc[XY.index == pred_id].squeeze().drop(lbls+nonlbls)
y_logpdf = logpdf_df.loc[logpdf_df.index == sim_id].squeeze()
df = pd.DataFrame([y_mes, y_sim])
df.loc['Abs Diff'] = np.abs(df.loc[pred_id] - df.loc[sim_id])
df.loc['% Diff'] = df.loc['Abs Diff'] * 100 / df.loc[sim_id]
df.loc['LogPDF'] = y_logpdf

In [29]:
pd.set_option("display.max_rows", None, "display.max_columns", None)
df

Unnamed: 0,am241,am242m,am243,cm242,cm244,cs134,cs137,eu154,nd143,nd144,nd145,nd146,nd148,nd150,np237,pu238,pu239,pu240,pu241,pu242,sm147,sm149,sm150,sm151,sm152,u234,u235,u236,u238
VAN-2|EF05|WZR0058|E58-88,0.252,0.0,0.0901,0.0,0.0165,0.0184,1.43,0.0166,1.02,1.65,0.842,0.852,0.484,0.235,0.61,0.268,6.14,2.4,1.13,0.556,0.275,0.00339,0.331,0.0144,0.125,0.237,12.9,5.51,0.0
445584,0.48,0.001284,0.09911,6e-06,0.02492,0.01859,1.215,0.01722,1.014,1.498,0.8059,0.799,0.4256,0.1983,0.622,0.2273,5.378,1.242,1.305,0.5594,0.2704,0.003924,0.331,0.01478,0.1138,0.2287,15.89,5.548,928.7
Abs Diff,0.228,0.001284,0.00901,6e-06,0.00842,0.00019,0.215,0.00062,0.006,0.152,0.0361,0.053,0.0584,0.0367,0.012,0.0407,0.762,1.158,0.175,0.0034,0.0046,0.000534,0.0,0.00038,0.0112,0.0083,2.99,0.038,928.7
% Diff,90.47619,inf,10.0,inf,51.030303,1.032609,15.034965,3.73494,0.588235,9.212121,4.287411,6.220657,12.066116,15.617021,1.967213,15.186567,12.410423,48.25,15.486726,0.611511,1.672727,15.752212,0.0,2.638889,8.96,3.50211,23.178295,0.689655,inf
LogPDF,-42.314237,,2.735426,,-17.063865,6.041033,-4.380546,5.879211,2.055888,-0.386514,1.891277,1.421177,-0.834709,-3.155644,2.477168,-2.854112,-3.620641,-172.00163,-1.785951,2.650296,3.326766,3.913578,3.182431,6.159069,2.312875,3.288715,-7.770385,0.353974,
