In [2]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from mll_calc.mll_pred import format_XY, convert_g_to_mgUi

# Functions

In [48]:
def logpdf_calc(row, test_sample, unc):
    y_sim = row[test_sample>0].values.tolist()
    std = row.multiply(unc)[test_sample>0].values.tolist()
    y_mes = test_sample[test_sample>0].values.tolist()
    logpdf_list = stats.norm.logpdf(y_sim, loc=y_mes, scale=std)
    return logpdf_list

In [49]:
def loop_sfco(XY, test, pred, unc, lbls):
    logpdf_df = pd.DataFrame()
    for test_idx, row in test.iterrows():
        test_sample = row.drop(lbls)
        test_answer = row[lbls]
        
        pred_idx = pred['pred_idx'].loc[pred['sim_idx'] == test_idx]
        train_row = XY.loc[pred_idx.values[0]].drop(lbls)
        
        logpdf = logpdf_calc(train_row, test_sample, unc)
        logpdf = pd.Series(logpdf, index=train_row[test_sample>0].index, name=test_idx)
        
        if logpdf_df.empty:
            logpdf_df = pd.DataFrame(columns = test_sample.index.to_list())
        logpdf_df = logpdf_df.append(logpdf)
    return logpdf_df

In [4]:
unc = 0.05
lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
nonlbls = ['AvgPowerDensity', 'ModDensity', 'UiWeight']

In [1]:
to_print = ['sim_idx', 'pred_idx', 'MaxLogLL',
            'ReactorType', 'pred_ReactorType', 'ReactorType_Score',
            'OrigenReactor', 'pred_OrigenReactor',
            'Enrichment', 'pred_Enrichment', 
            'Burnup', 'pred_Burnup', 
            'pred_CoolingTime', 'AvgPowerDensity', 'ModDensity']

# Data formatting & Calculations

In [5]:
pklfile = '~/sims_n_results/simupdates_aug2020/not-scaled_nuc29.pkl'
XY = format_XY(pklfile)
XY = convert_g_to_mgUi(XY, lbls+nonlbls)

sfco_pkl = '~/sfcompo/format_clean/sfcompo_nuc29.pkl'
sfco = pd.read_pickle(sfco_pkl)

# for preds
sfco_path = '~/sims_n_results/simupdates_aug2020/sfco29/'
unc_job = 'Job0_unc0.05'
sfcoLL = pd.read_csv(sfco_path + unc_job + '/' + unc_job + '.csv').drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])

## (a) Get test samples for iso-dependent logpdf calcs

manually found predictions with low and high errors, respectively

In [6]:
# <1% burnup and enrichment errors + correct reactor pred
good_id = 'CCL-1|D047|MKP109|LL' 
good_idx = 7 
grow_train_idx = 344552
# ~150% burnup and enrichment error + wrong reactor pred
bad_id = 'GAR-1|A-106|E5|1' 
bad_idx = 63 
brow_train_idx = 309124

In [7]:
grow = sfco.loc[sfco.index == good_id].squeeze()
grow_pred = sfcoLL.loc[sfcoLL.index == good_idx]
#grow_train_idx = grow_pred['pred_idx']
grow_pred[to_print]

Unnamed: 0,sim_idx,pred_idx,MaxLogLL,ReactorType,pred_ReactorType,ReactorType_Score,OrigenReactor,pred_OrigenReactor,Enrichment,pred_Enrichment,Burnup,pred_Burnup,pred_CoolingTime,AvgPowerDensity,ModDensity
7,CCL-1|D047|MKP109|LL,344552,15.400516,pwr,pwr,True,Calvert Cliffs-1_PWR,vver1000,3.038,3.04,27350.0,27280.53,3031.473636,25.0,0.71


In [8]:
brow = sfco.loc[sfco.index == bad_id].squeeze()
brow_pred = sfcoLL.loc[sfcoLL.index == bad_idx]
#brow_train_idx = brow_pred['pred_idx']
brow_pred[to_print]

Unnamed: 0,sim_idx,pred_idx,MaxLogLL,ReactorType,pred_ReactorType,ReactorType_Score,OrigenReactor,pred_OrigenReactor,Enrichment,pred_Enrichment,Burnup,pred_Burnup,pred_CoolingTime,AvgPowerDensity,ModDensity
63,GAR-1|A-106|E5|1,309124,-516.102948,bwr,pwr,False,Garigliano-1_BWR,s18x18,2.1,5.48,8930.0,21793.78,29.215503,41.0,0.71


In [9]:
to_save = pd.concat([grow_pred[to_print], brow_pred[to_print]])
to_save.to_csv('sims.csv')

## (b) logpdf calcs for "good" pred

In [56]:
test_sample = grow.drop(lbls)
test_answer = grow[lbls]
row = XY.loc[grow_train_idx].drop(lbls+nonlbls)
logpdf = logpdf_calc(row, test_sample, unc)

In [57]:
logpdf

array([ 2.32427716,  2.00529073,  2.62440243,  2.66290005,  3.19868018,
        3.83192943,  2.47230846,  0.29155296, -9.49417025,  1.456463  ,
       -5.72762326,  3.23450913,  4.26116623,  3.49302582,  6.69741831,
        4.32891265, -8.00625375, -0.25009964,  0.78413377, -4.78830782])

In [58]:
np.sum(logpdf)

15.400515584443543

In [59]:
#check match
sfcoLL['MaxLogLL'].loc[sfcoLL['sim_idx']==good_id]

7    15.400516
Name: MaxLogLL, dtype: float64

In [60]:
row[test_sample>0]

nd143      0.7108
nd144       1.062
nd145      0.5783
nd146      0.5564
nd148       0.306
nd150       0.145
pu238      0.1047
pu239       4.986
pu240       1.578
pu241      0.8254
pu242      0.4136
sm147      0.2247
sm149    0.002898
sm150      0.2373
sm151    0.009662
sm152     0.09672
u234       0.2388
u235        9.761
u236        3.592
u238        949.3
Name: 344552, dtype: object

In [61]:
logpdf = pd.Series(logpdf, index=row[test_sample>0].index, name='LogPDF')
logpdf

nd143    2.324277
nd144    2.005291
nd145    2.624402
nd146    2.662900
nd148    3.198680
nd150    3.831929
pu238    2.472308
pu239    0.291553
pu240   -9.494170
pu241    1.456463
pu242   -5.727623
sm147    3.234509
sm149    4.261166
sm150    3.493026
sm151    6.697418
sm152    4.328913
u234    -8.006254
u235    -0.250100
u236     0.784134
u238    -4.788308
Name: LogPDF, dtype: float64

In [62]:
pd.DataFrame([row, test_sample, logpdf])

Unnamed: 0,am241,am242m,am243,cm242,cm244,cs134,cs137,eu154,nd143,nd144,...,pu242,sm147,sm149,sm150,sm151,sm152,u234,u235,u236,u238
344552,0.4456,0.000591,0.06028,2e-06,0.01087,0.004962,0.8313,0.008788,0.7108,1.062,...,0.4136,0.2247,0.002898,0.2373,0.009662,0.09672,0.2388,9.761,3.592,949.3
CCL-1|D047|MKP109|LL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6954,1.07,...,0.3274,0.2155,0.00329,0.2348,0.009756,0.0987,0.1815,9.609,3.562,955.8
LogPDF,,,,,,,,,2.324277,2.005291,...,-5.727623,3.234509,4.261166,3.493026,6.697418,4.328913,-8.006254,-0.2501,0.784134,-4.788308


### Sidebar: Finding some samples to simulate

In [63]:
# print "good" pred labels
sfco.loc[sfco.index == good_id, lbls]

Unnamed: 0,ReactorType,CoolingTime,Enrichment,Burnup,OrigenReactor
CCL-1|D047|MKP109|LL,pwr,0.0,3.038,27350.0,Calvert Cliffs-1_PWR


In [64]:
# print "bad" pred labels
sfco.loc[sfco.index == bad_id, lbls]

Unnamed: 0,ReactorType,CoolingTime,Enrichment,Burnup,OrigenReactor
GAR-1|A-106|E5|1,bwr,0.0,2.1,8930.0,Garigliano-1_BWR


In [13]:
# print labels for a 9 ratio case
sfco.loc[sfco.index == 'VAN-2|EF05|WZR0058|E58-88', lbls]

Unnamed: 0,ReactorType,CoolingTime,Enrichment,Burnup,OrigenReactor
VAN-2|EF05|WZR0058|E58-88,pwr,0.0,4.5,43520.0,Vandellos-2_PWR


## (c) logpdf calcs for "bad" pred

In [20]:
test_sample = brow.drop(lbls)
test_answer = brow[lbls]
row = XY.loc[14605].drop(lbls)
logpdf = logpdf_calc(row, test_sample, unc)
logpdf = pd.Series(logpdf, index=row[test_sample>0].index, name='LogPDF')

In [21]:
np.sum(logpdf)

7.57858452439314

In [22]:
#check match
sfcoLL['MaxLogLL'].loc[sfcoLL['sim_idx']==bad_id]

268    7.578585
Name: MaxLogLL, dtype: float64

In [23]:
pd.DataFrame([row, test_sample, logpdf])

Unnamed: 0,cs137/cs133,cs134/cs137,cs135/cs137,ba136/ba138,sm150/sm149,sm152/sm149,eu154/eu153,pu240/pu239,pu241/pu239,pu242/pu239
14605,0.67858,0.000137,0.4421,0.002254,9.63477,4.754565,0.015806,0.065473,0.005232,0.000288
NPD-1|1022|B|1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055976,0.004129,0.000314
LogPDF,,,,,,,,0.594151,-1.555835,8.540268


# Expand Calcs to Entire SFCOMPO DB

In [24]:
logpdf_df = loop_sfco(XY, sfco, sfcoLL, unc, lbls)

In [25]:
logpdf_df.sample(10)

Unnamed: 0,cs137/cs133,cs134/cs137,cs135/cs137,ba136/ba138,sm150/sm149,sm152/sm149,eu154/eu153,pu240/pu239,pu241/pu239,pu242/pu239
YNK-1|E6|C-A1|G-107,,,,,,,,3.386169,3.532635,5.215175
TMI-1|NJ05YU|H6|B3J,,,,,-13.286297,-3.636586,,0.704379,2.609359,3.07193
NOV-3|RP-3371A|58|4,,,,,,,,3.674064,4.510104,6.744375
GRM-1|B23|B3|K2680,,,,,,,,2.613706,3.644666,5.094574
GAR-1|SA-13|E6|7,,,,,,,,3.349305,4.298478,7.036366
COO-1|CZ346|ADD2966|B,,,,,,,,3.199513,4.422056,5.702915
JPD-1|A-20|C3|KA-1040+660,,-30.246236,,,,,,3.673956,5.275301,7.443001
TVC-1|509-069|L11|7,,,,,,,,3.227234,3.789009,5.206139
NOV-4|213|25|8,,,,,,,,3.387765,4.021041,5.64976
MON-1|MTB099|H1|S5,,,,,,,,-10.5235,2.845892,1.61112


## Stats on Pu Isotope Ratios

In [27]:
logpdf_df['pu240/pu239'].describe()

count    544.000000
mean      -6.514987
std       19.998108
min     -106.368642
25%       -6.891090
50%        2.562300
75%        3.261939
max        4.801567
Name: pu240/pu239, dtype: float64

In [28]:
logpdf_df['pu241/pu239'].describe()

count    543.000000
mean      -1.641779
std       20.742276
min     -128.179832
25%        1.571448
50%        3.370135
75%        3.820741
max        7.535007
Name: pu241/pu239, dtype: float64

In [29]:
logpdf_df['pu242/pu239'].describe()

count    544.000000
mean       2.436788
std        9.196064
min      -96.145311
25%        2.697182
50%        4.294617
75%        5.613382
max       10.632926
Name: pu242/pu239, dtype: float64

## Stats on Cs Isotope Ratios

In [70]:
logpdf_df['cs137/cs133'].describe()

count      45.000000
mean     -180.031390
std      1025.921941
min     -6789.207620
25%        -2.383887
50%         1.468763
75%         2.058430
max         2.365249
Name: cs137/cs133, dtype: float64

In [71]:
logpdf_df['cs134/cs137'].describe()

count    133.000000
mean      -6.879995
std       20.992119
min     -147.531583
25%      -12.617056
50%       -0.745675
75%        5.016665
max        8.388282
Name: cs134/cs137, dtype: float64

In [72]:
logpdf_df['cs135/cs137'].describe()

count     40.000000
mean     -27.931060
std       27.555771
min     -111.785441
25%      -41.708223
50%      -24.461021
75%       -4.510232
max        3.409395
Name: cs135/cs137, dtype: float64

## Stats on Sm/Eu Isotope Ratios

In [75]:
logpdf_df['sm150/sm149'].describe()

count    105.000000
mean      -7.958776
std       10.296818
min      -70.498442
25%       -8.685561
50%       -4.832084
75%       -3.161528
max       -1.763939
Name: sm150/sm149, dtype: float64

In [76]:
logpdf_df['sm152/sm149'].describe()

count    105.000000
mean     -10.076113
std       12.685425
min      -86.321396
25%      -10.867071
50%       -5.882239
75%       -3.501622
max       -1.080357
Name: sm152/sm149, dtype: float64

In [77]:
logpdf_df['eu154/eu153'].describe()

count    45.000000
mean     -9.164255
std      12.323654
min     -44.838226
25%     -19.010468
50%      -5.105708
75%       0.373877
max       4.451885
Name: eu154/eu153, dtype: float64