In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats

In [2]:
from mll_calc.mll_pred import format_XY
from mll_calc.all_jobs import uncs, job_dirs

In [3]:
def logpdf_calc(row, test_sample, unc):
    y_sim = row[test_sample>0].values.tolist()
    std = row.multiply(unc)[test_sample>0].values.tolist()
    y_mes = test_sample[test_sample>0].values.tolist()
    logpdf_list = stats.norm.logpdf(y_sim, loc=y_mes, scale=std)
    return logpdf_list

def loop_db(XY, pred, unc, lbls, nonlbls):
    logpdf_df = pd.DataFrame()
    for idx, row in pred.iterrows():
        sim_idx = row['sim_idx']
        pred_idx = row['pred_idx']
        all_lbls = lbls + nonlbls
        
        test_sample = XY.loc[sim_idx].drop(all_lbls)
        train_row = XY.loc[pred_idx].drop(all_lbls)
        
        logpdf = logpdf_calc(train_row, test_sample, unc)
        mll = row['MaxLogLL']
        
        if round(mll, 4) != round(np.sum(logpdf), 4):
            print('mismatch')
            break
        
        logpdf = pd.Series(logpdf, index=train_row[test_sample>0].index, name=sim_idx)
        if logpdf_df.empty:
            logpdf_df = pd.DataFrame(columns = test_sample.index.to_list())
        logpdf_df = logpdf_df.append(logpdf)
    return logpdf_df

In [4]:
lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
nonlbls = ['AvgPowerDensity', 'ModDensity', 'UiWeight']

rdrive = '/mnt/researchdrive/BOX_INTERNAL/opotowsky/'
d1_train_pkl = rdrive + 'detector_response/d1_hpge_spectra_peaks_trainset.pkl'
d2_train_pkl = rdrive + 'detector_response/d2_hpge_spectra_peaks_trainset.pkl'

d1XY = format_XY(d1_train_pkl)
d2XY = format_XY(d2_train_pkl)

In [5]:
d1results = rdrive + 'mll/gam_spec/d1/'
d2results = rdrive + 'mll/gam_spec/d2/'

dfs = {}
#for i, job in enumerate(job_dirs):
job = job_dirs[0]
d1_preds = pd.read_csv(d1results + job + '/' + job + '.csv').drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])
d2_preds = pd.read_csv(d2results + job + '/' + job + '.csv').drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])

In [6]:
# overlapping predictions in both sets
to_print = ['sim_idx', 'pred_idx', 'AvgPowerDensity', 
            'ReactorType', 'pred_ReactorType', 'ReactorType_Score', 
            'Enrichment', 'pred_Enrichment', 'Enrichment_Error', 
            'Burnup', 'pred_Burnup', 'Burnup_Error',
            'CoolingTime', 'pred_CoolingTime', 'CoolingTime_Error',
            'OrigenReactor', 'pred_OrigenReactor', 'MaxLogLL']
d1_preds = d1_preds.loc[:, to_print]
d2_preds = d2_preds.loc[:, to_print]

preds = d1_preds.join(d2_preds, on = 'sim_idx', how = 'inner', lsuffix = '_l', rsuffix = '_r')

In [8]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [9]:
preds.sample(20)

Unnamed: 0,sim_idx,sim_idx_l,pred_idx_l,AvgPowerDensity_l,ReactorType_l,pred_ReactorType_l,ReactorType_Score_l,Enrichment_l,pred_Enrichment_l,Enrichment_Error_l,Burnup_l,pred_Burnup_l,Burnup_Error_l,CoolingTime_l,pred_CoolingTime_l,CoolingTime_Error_l,OrigenReactor_l,pred_OrigenReactor_l,MaxLogLL_l,sim_idx_r,pred_idx_r,AvgPowerDensity_r,ReactorType_r,pred_ReactorType_r,ReactorType_Score_r,Enrichment_r,pred_Enrichment_r,Enrichment_Error_r,Burnup_r,pred_Burnup_r,Burnup_Error_r,CoolingTime_r,pred_CoolingTime_r,CoolingTime_Error_r,OrigenReactor_r,pred_OrigenReactor_r,MaxLogLL_r
24,1029,1029,1028,10.0,bwr,bwr,True,0.5,0.5,0.0,30660.96,30660.96,0.0,473.850856,471.388214,2.462642,ge7x7-0,ge7x7-0,-50.838399,39417,220856,25.0,bwr,bwr,True,2.74,2.73,0.01,17668.74,18096.51,427.77,6160.818841,6159.232422,1.586419,ge7x7-0,svea64-1,-35.197104
177,6732,6732,6733,20.0,bwr,bwr,True,0.5,0.5,0.0,965.63,965.63,0.0,1032.998451,1046.132758,13.134307,ge7x7-0,ge7x7-0,9.162696,257530,257531,25.0,pwr,pwr,True,3.76,3.76,0.0,23261.53,23261.53,0.0,750.085316,848.060234,97.974918,ce14x14,ce14x14,-47.250258
43,1584,1584,1585,10.0,bwr,bwr,True,0.5,0.5,0.0,63534.57,63534.57,0.0,2074.891758,2079.301841,4.410083,ge7x7-0,ge7x7-0,-56.287381,60724,60723,10.0,bwr,bwr,True,0.55,0.55,0.0,5533.4,5533.4,0.0,27.412098,7.797336,19.614762,abb8x8-1,abb8x8-1,-26.370728
168,6435,6435,6375,10.0,bwr,bwr,True,0.5,0.5,0.0,48949.3,48334.78,614.52,1206.81007,1206.81007,0.0,ge7x7-0,ge7x7-0,-54.629971,246110,246170,25.0,pwr,pwr,True,1.52,1.52,0.0,19111.96,19775.13,663.17,5333.9593,5333.9593,0.0,ce14x14,ce14x14,-43.313419
247,9448,9448,9449,25.0,bwr,bwr,True,0.5,0.5,0.0,30660.96,30660.96,0.0,2598.8066,2625.070937,26.264337,ge7x7-0,ge7x7-0,-47.992152,361436,361437,22.0,phwr,phwr,True,0.711,0.711,0.0,7263.4,7263.4,0.0,6483.944237,6678.915522,194.971285,candu19,candu19,-27.004577
78,3053,3053,3054,20.0,bwr,bwr,True,0.5,0.5,0.0,48334.78,48334.78,0.0,5128.931533,5158.023377,29.091844,ge7x7-0,ge7x7-0,-50.135744,116814,116815,10.0,bwr,bwr,True,5.23,5.23,0.0,21800.82,21800.82,0.0,6026.246281,6103.568798,77.322517,abb8x8-1,abb8x8-1,-35.866421
197,7576,7576,7577,20.0,bwr,bwr,True,0.5,0.5,0.0,21745.58,21745.58,0.0,1250.512881,1272.233919,21.721038,ge7x7-0,ge7x7-0,-45.368293,289817,289816,41.0,pwr,pwr,True,0.5,0.5,0.0,1967.36,1967.36,0.0,1397.95296,1351.325953,46.627007,s18x18,s18x18,-6.60846
189,7299,7299,67779,20.0,bwr,bwr,True,0.5,0.55,0.05,13478.85,13558.75,79.9,3828.695214,3869.273243,40.578029,ge7x7-0,abb8x8-1,-36.641836,279128,279068,41.0,pwr,pwr,True,2.92,2.92,0.0,30039.95,29851.6,188.35,455.237804,455.237804,0.0,w17x17,w17x17,-50.992094
128,4946,4946,4947,25.0,bwr,bwr,True,0.5,0.5,0.0,63534.57,63534.57,0.0,2311.579113,2355.494784,43.915671,ge7x7-0,ge7x7-0,-56.980939,189180,189239,20.0,bwr,bwr,True,0.5,0.5,0.0,30045.06,30045.06,0.0,0.000619,0.0,0.000619,svea64-1,svea64-1,-50.090386
159,5979,5979,187362,10.0,bwr,bwr,True,0.5,0.5,0.0,22690.61,22475.28,215.33,3828.695214,3758.52213,70.173084,ge7x7-0,svea64-1,-44.299913,228610,228609,20.0,bwr,bwr,True,4.07,4.07,0.0,2796.41,2796.41,0.0,565.158549,523.199045,41.959504,svea64-1,svea64-1,-0.112459


In [13]:
to_print_2 = ['MaxLogLL_l', 'MaxLogLL_r', 'Burnup_Error_l', 'Burnup_Error_r', 
              'Enrichment_Error_l', 'Enrichment_Error_r', 
              'CoolingTime_Error_r', 'CoolingTime_Error_r']
preds.loc[:, to_print_2].sample(20)

Unnamed: 0,MaxLogLL_l,MaxLogLL_r,Burnup_Error_l,Burnup_Error_r,Enrichment_Error_l,Enrichment_Error_r,CoolingTime_Error_r,CoolingTime_Error_r.1
31,-46.267675,-44.812435,0.0,0.0,0.0,0.0,53.146807,53.146807
152,-44.1046,-35.870123,0.0,0.0,0.0,0.0,3.454678,3.454678
155,-45.248006,-46.236946,0.0,0.0,0.0,0.0,30.859317,30.859317
169,-52.039704,-52.976269,0.0,0.0,0.0,0.0,38.884825,38.884825
5,-27.242643,-54.171732,0.0,0.0,0.0,0.0,15.938932,15.938932
248,-50.738206,-28.710187,0.0,0.0,0.0,0.0,40.605195,40.605195
37,-55.499709,-26.323204,0.0,0.0,0.0,0.0,21.721038,21.721038
81,-52.069627,-14.127972,0.0,0.0,0.0,0.0,49.744153,49.744153
116,-46.324208,-43.718984,0.0,0.0,0.0,0.0,50.938446,50.938446
40,-54.682785,-59.654566,0.0,0.0,0.0,0.0,0.32204,0.32204


In [10]:
#d1 preds
preds['MaxLogLL_l'].describe()

count    251.000000
mean     -38.513872
std       16.269817
min      -60.082480
25%      -50.470185
50%      -42.287828
75%      -31.687015
max       13.742486
Name: MaxLogLL_l, dtype: float64

In [11]:
#d2 preds
preds['MaxLogLL_r'].describe()

count    251.000000
mean     -38.129809
std       18.475049
min      -62.220978
25%      -51.340667
50%      -43.174071
75%      -30.749305
max       26.853743
Name: MaxLogLL_r, dtype: float64

In [None]:
unc = 0.05
#d1_logpdf = loop_db(d1XY, d1_preds, unc, lbls, nonlbls)
#d2_logpdf = loop_db(d2XY, d2_preds, unc, lbls, nonlbls)