In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
%matplotlib inline

# Functions

In [3]:
def ll_list_calc(row, test_sample, unc):
    y_sim = row[test_sample>0].values.tolist()
    std = row.multiply(unc)[test_sample>0].values.tolist()
    y_mes = test_sample[test_sample>0].values.tolist()
    ll_list = stats.norm.logpdf(y_sim, loc=y_mes, scale=std)
    return ll_list

In [4]:
def ratios(XY, ratio_list, labels):
    XY_ratios = XY.loc[:, labels].copy()
    for ratio in ratio_list:
        nucs = ratio.split('/')
        XY_ratios[ratio] = XY[nucs[0]] / XY[nucs[1]]
    XY_ratios.replace([np.inf, -np.inf], 0, inplace=True)
    XY_ratios.fillna(0, inplace = True)
    # reorganize columns
    cols = ratio_list + labels
    return XY_ratios

In [5]:
def format_XY(db_path):
    XY = pd.read_pickle(db_path)
    if 'total' in XY.columns:
        XY.drop('total', axis=1, inplace=True)
    XY = XY.loc[XY['Burnup'] > 0]
    XY.reset_index(inplace=True, drop=True)
    return XY

In [6]:
unc = 0.05
lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
tamu_list = ['cs137/cs133', 'cs134/cs137', 'cs135/cs137', 'ba136/ba138',
             'sm150/sm149', 'sm152/sm149', 'eu154/eu153', 'pu240/pu239',
             'pu241/pu239', 'pu242/pu239']

# Data formatting & Calculations

In [7]:
pklfile = '../../sims_n_results/nucmoles_opusupdate_aug2019/not-scaled_15nuc.pkl'
XY = format_XY(pklfile)
XY = ratios(XY, tamu_list, lbls)

sfco_pkl = '~/sfcompo/format_clean/sfcompo_formatted.pkl'
sfco = pd.read_pickle(sfco_pkl)
sfco = ratios(sfco, tamu_list, lbls)

# for preds
sfco_path = '~/sims_n_results/nucmoles_opusupdate_aug2019/sfco/'
unc_job = 'Job0_unc0.05'
sfcoLL = pd.read_csv(sfco_path + unc_job + '/' + unc_job + '.csv')

## (a) Get test samples for iso-dependent LL calcs

manually found predictions with low and high errors, respectively

In [8]:
# ~6% burnup and enrichment errors + correct reactor pred
good_id = 'OHI-2|17G|C5|89G03' 
good_idx = 331
# > 200% burnup and enrichment error + wrong reactor pred
bad_id = 'NPD-1|1022|B|1' 
bad_idx = 268

In [9]:
grow = sfco.loc[sfco.index == good_id].squeeze()
grow_pred = sfcoLL.loc[sfcoLL.index == good_idx]
grow_train_idx = grow_pred['pred_idx']

In [10]:
brow = sfco.loc[sfco.index == bad_id].squeeze()
brow_pred = sfcoLL.loc[sfcoLL.index == bad_idx]
brow_train_idx = brow_pred['pred_idx']

In [11]:
print(grow_train_idx)
print(brow_train_idx)

331    8925
Name: pred_idx, dtype: int64
268    14605
Name: pred_idx, dtype: int64


## (b) LL calcs for "good" pred

In [12]:
test_sample = grow.drop(lbls)
test_answer = grow[lbls]
row = XY.loc[8925].drop(lbls)
logpdf = ll_list_calc(row, test_sample, unc)

In [13]:
logpdf

array([-19.14817197, -13.31293433,  -0.2413733 ,  -2.23560324])

In [14]:
np.sum(logpdf)

-34.93808284176995

In [15]:
sfcoLL['MaxLogLL'].loc[sfcoLL['sim_idx']==good_id]

331   -34.938083
Name: MaxLogLL, dtype: float64

In [16]:
row[test_sample>0]

cs134/cs137    0.0163462
pu240/pu239     0.368265
pu241/pu239     0.247569
pu242/pu239     0.166689
Name: 8925, dtype: object

In [17]:
logpdf = pd.Series(logpdf, index=row[test_sample>0].index, name='LogPDF')
logpdf

cs134/cs137   -19.148172
pu240/pu239   -13.312934
pu241/pu239    -0.241373
pu242/pu239    -2.235603
Name: LogPDF, dtype: float64

In [18]:
pd.DataFrame([row, logpdf])

Unnamed: 0,cs137/cs133,cs134/cs137,cs135/cs137,ba136/ba138,sm150/sm149,sm152/sm149,eu154/eu153,pu240/pu239,pu241/pu239,pu242/pu239
8925,0.897989,0.016346,0.248352,0.010341,100.111607,41.344866,0.106392,0.368265,0.247569,0.166689
LogPDF,,-19.148172,,,,,,-13.312934,-0.241373,-2.235603


## (c) LL calcs for "bad" pred

In [19]:
test_sample = brow.drop(lbls)
test_answer = brow[lbls]
row = XY.loc[14605].drop(lbls)
logpdf = ll_list_calc(row, test_sample, unc)
logpdf = pd.Series(logpdf, index=row[test_sample>0].index, name='LogPDF')

In [20]:
np.sum(logpdf)

7.57858452439314

In [22]:
sfcoLL['MaxLogLL'].loc[sfcoLL['sim_idx']==bad_id]

268    7.578585
Name: MaxLogLL, dtype: float64

In [23]:
pd.DataFrame([row, logpdf])

Unnamed: 0,cs137/cs133,cs134/cs137,cs135/cs137,ba136/ba138,sm150/sm149,sm152/sm149,eu154/eu153,pu240/pu239,pu241/pu239,pu242/pu239
14605,0.67858,0.000137,0.4421,0.002254,9.63477,4.754565,0.015806,0.065473,0.005232,0.000288
LogPDF,,,,,,,,0.594151,-1.555835,8.540268
