In [1]:
import pandas as pd

## Aligning SFCOMPO test cases to training set design

### A. Reactors
1. Delete RBMK, MAGNOX, AGR from test set (did not simulate these)
2. Ensure ReactorType column matches options from training set, and preserve this information by adding that text to the ReactorName column

### B. Metadata and other information
1. Confirm units match training set (burnup)
2. Check script to allow for additional columns (sample ref, etc) as well as OrigenReactor vs. ReactorName

### C. Isotopes
1. Melt isotope concentrations from single column to their own columns. 
  - Using concentration instead of value for unit conformity. 
  - Need to double check, but I think uncertainties apply to 'values' and not 'concentration'
  - Keeping concentrations, but train set is in g-atoms (moles), so only ratios will be useable until new gram-output training set is simulated.
2. Keep only list of 15 nuclides

In [44]:
pklfile = '~/prep-pkls/nucmoles_opusupdate_aug2019/not-scaled_15nuc.pkl'
sfcompofile = '../clean/sfcompoDB_clean.csv'

In [45]:
train = pd.read_pickle(pklfile)
test = pd.read_csv(sfcompofile)

In [46]:
train.head()

Unnamed: 0,Burnup,CoolingTime,Enrichment,OrigenReactor,ReactorType,ba136,ba138,cs133,cs134,cs135,...,eu153,eu154,pu239,pu240,pu241,pu242,sm149,sm150,sm152,total
0.000e+00,0.0,0.0,0.53,ce14x14,pwr,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4202.0
5.733e+01.1,2006.54,0.000839,0.53,ce14x14,pwr,0.000593,0.6109,0.5142,0.002411,0.1187,...,0.0155,0.000451,2.575,0.1198,0.006937,0.000123,0.008141,0.08695,0.04212,4211.0
5.792e+01,2006.54,0.592485,0.53,ce14x14,pwr,0.0006,0.6114,0.5204,0.002411,0.1209,...,0.01568,0.000451,2.605,0.1198,0.006937,0.000123,0.009107,0.08695,0.04212,4211.0
5.844e+01,2006.54,1.109213,0.53,ce14x14,pwr,0.000607,0.6114,0.5256,0.00241,0.1224,...,0.0158,0.000451,2.627,0.1198,0.006936,0.000123,0.00982,0.08695,0.04212,4211.0
6.579e+01,2006.54,8.4604,0.53,ce14x14,pwr,0.000681,0.6114,0.5759,0.002394,0.1238,...,0.01637,0.00045,2.746,0.1198,0.006929,0.000123,0.01347,0.08695,0.04212,4211.0


In [75]:
test.head(5)

Unnamed: 0,ReactorName,ReactorType,Fuel type,Burnup,BurnupUnit,Enrichment,EnrichmentUnit,SampleRef,Measurement,Isotope,Value,Unit,Concentration,ConcentrationUnit,Sigma,Uncertainty,UncertaintyUnit
10,Balakovo-2_VVER-1000,pwr,UO2,45100.0,MW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,pu239,6.07,mg/gUi,6.07,mg/gUi,2.0,0.66,%
11,Balakovo-2_VVER-1000,pwr,UO2,45100.0,MW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,pu240,2.58,mg/gUi,2.58,mg/gUi,2.0,0.78,%
12,Balakovo-2_VVER-1000,pwr,UO2,45100.0,MW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,pu241,1.7,mg/gUi,1.7,mg/gUi,2.0,0.59,%
13,Balakovo-2_VVER-1000,pwr,UO2,45100.0,MW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,pu242,0.69,mg/gUi,0.69,mg/gUi,2.0,1.45,%
22,Balakovo-2_VVER-1000,pwr,UO2,45800.0,MW*d/tUi,4.4,wt%,BAL-2|1476|42|31,Isotopic Concentration,pu239,6.19,mg/gUi,6.19,mg/gUi,2.0,0.81,%


In [48]:
test.ReactorType.unique()

array(['VVER-1000', 'MAGNOX', 'PWR', 'BWR', 'AGR', 'VVER-440', 'RBMK',
       'CANDU'], dtype=object)

In [49]:
len(test.SampleRef.unique())

652

### A. Changes to ReactorName and ReactorType

In [50]:
test = test[(test.ReactorType != 'MAGNOX') & (test.ReactorType != 'AGR') & (test.ReactorType != 'RBMK')]

In [51]:
test.ReactorType.unique()

array(['VVER-1000', 'PWR', 'BWR', 'VVER-440', 'CANDU'], dtype=object)

In [52]:
len(test.SampleRef.unique())

544

In [53]:
test.ReactorName = test.ReactorName + '_' + test.ReactorType
test.ReactorName

0        Balakovo-2_VVER-1000
1        Balakovo-2_VVER-1000
2        Balakovo-2_VVER-1000
3        Balakovo-2_VVER-1000
4        Balakovo-2_VVER-1000
                 ...         
12210            Yankee-1_PWR
12211            Yankee-1_PWR
12212            Yankee-1_PWR
12213            Yankee-1_PWR
12214            Yankee-1_PWR
Name: ReactorName, Length: 9944, dtype: object

In [54]:
rtypes = {'PWR' : 'pwr', 'BWR' : 'bwr', 'CANDU' : 'phwr', 'VVER-1000' : 'pwr', 'VVER-440' : 'pwr'}
test.ReactorType.replace(rtypes, inplace=True)

In [55]:
test.rename(columns = {'OrigenReactor' : 'ReactorName'}, inplace = True)

### B. Changes to Burnup Units

In [56]:
# only run once you idiot!
test.Burnup = 1000 * test.Burnup
test.BurnupUnit.unique()

array(['GW*d/tUi'], dtype=object)

In [57]:
test.BurnupUnit.replace({'GW*d/tUi' : 'MW*d/tUi'}, inplace=True)

### C. Matching Iso Column Format

In [59]:
test.ConcentrationUnit.unique()

array(['mg/gUi'], dtype=object)

In [62]:
test.Isotope = test.Isotope.str.lower()

In [65]:
nuc15 = ['cs-133', 'cs134', 'cs135', 'cs137', 'eu153', 
         'eu154', 'ba136', 'ba138', 'sm149', 'sm150', 
         'sm152', 'pu239', 'pu240', 'pu241', 'pu242'
        ]
test = test[test.Isotope.isin(nuc15)]

In [87]:
df = test[['SampleRef', 'Isotope', 'Concentration']]

In [89]:
df = df.pivot(index=df.index, columns='Isotope', values='Concentration')

KeyError: "None of [Int64Index([   10,    11,    12,    13,    22,    23,    24,    25,    40,\n               41,\n            ...\n            12192, 12193, 12199, 12200, 12201, 12202, 12207, 12208, 12209,\n            12210],\n           dtype='int64', length=3180)] are in the [columns]"

In [81]:
df.head(50)

Isotope,cs134,cs135,cs137,eu153,eu154,pu239,pu240,pu241,pu242,sm149,sm150,sm152
10,,,,,,6.07,,,,,,
11,,,,,,,2.58,,,,,
12,,,,,,,,1.7,,,,
13,,,,,,,,,0.69,,,
22,,,,,,6.19,,,,,,
23,,,,,,,2.65,,,,,
24,,,,,,,,1.7,,,,
25,,,,,,,,,0.7,,,
40,,,,,,6.1,,,,,,
41,,,,,,,2.67,,,,,
