In [1]:
import pandas as pd

## Aligning SFCOMPO test cases to training set design

### Reactors
1. Delete RBMK, MAGNOX, AGR from test set (did not simulate these)
2. Ensure ReactorType column matches options from training set, and preserve this information by adding that text to the ReactorName column
3. Probably easiest to rename ReactorName to OrigenReactor since that's the name in the training DB

### Isotopes
1. Melt isotope concentrations from single column to their own columns
2. Keep only list of 15 nuclides

### Metadata and other information
1. Check script to allow for additional columns (sample ref, etc)
2. But for now, delete these
3. Also delete units after ensuring they match training set units (will also strive to keep this info intact in the future)

In [2]:
pklfile = '~/prep-pkls/nucmoles_opusupdate_aug2019/not-scaled_15nuc.pkl'
sfcompofile = '../clean/sfcompoDB_clean.csv'

In [4]:
train = pd.read_pickle(pklfile)
test = pd.read_csv(sfcompofile)

In [5]:
train.head()

Unnamed: 0,Burnup,CoolingTime,Enrichment,OrigenReactor,ReactorType,ba136,ba138,cs133,cs134,cs135,...,eu153,eu154,pu239,pu240,pu241,pu242,sm149,sm150,sm152,total
0.000e+00,0.0,0.0,0.53,ce14x14,pwr,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4202.0
5.733e+01.1,2006.54,0.000839,0.53,ce14x14,pwr,0.000593,0.6109,0.5142,0.002411,0.1187,...,0.0155,0.000451,2.575,0.1198,0.006937,0.000123,0.008141,0.08695,0.04212,4211.0
5.792e+01,2006.54,0.592485,0.53,ce14x14,pwr,0.0006,0.6114,0.5204,0.002411,0.1209,...,0.01568,0.000451,2.605,0.1198,0.006937,0.000123,0.009107,0.08695,0.04212,4211.0
5.844e+01,2006.54,1.109213,0.53,ce14x14,pwr,0.000607,0.6114,0.5256,0.00241,0.1224,...,0.0158,0.000451,2.627,0.1198,0.006936,0.000123,0.00982,0.08695,0.04212,4211.0
6.579e+01,2006.54,8.4604,0.53,ce14x14,pwr,0.000681,0.6114,0.5759,0.002394,0.1238,...,0.01637,0.00045,2.746,0.1198,0.006929,0.000123,0.01347,0.08695,0.04212,4211.0


In [6]:
test.head()

Unnamed: 0,ReactorName,ReactorType,Fuel type,Burnup,BurnupUnit,Enrichment,EnrichmentUnit,SampleRef,Measurement,Isotope,Value,Unit,Concentration,ConcentrationUnit,Sigma,Uncertainty,UncertaintyUnit
0,Balakovo-2,VVER-1000,UO2,45.1,GW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,Am241,0.048,mg/gUi,0.048,mg/gUi,2.0,4.17,%
1,Balakovo-2,VVER-1000,UO2,45.1,GW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,Am243,0.141,mg/gUi,0.141,mg/gUi,2.0,2.84,%
2,Balakovo-2,VVER-1000,UO2,45.1,GW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,Cm242,0.021,mg/gUi,0.021,mg/gUi,2.0,9.52,%
3,Balakovo-2,VVER-1000,UO2,45.1,GW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,Cm244,0.055,mg/gUi,0.055,mg/gUi,2.0,10.91,%
4,Balakovo-2,VVER-1000,UO2,45.1,GW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,Nd142,0.033,mg/gUi,0.033,mg/gUi,2.0,3.03,%


In [7]:
test.ReactorType.unique()

array(['VVER-1000', 'MAGNOX', 'PWR', 'BWR', 'AGR', 'VVER-440', 'RBMK',
       'CANDU'], dtype=object)

In [8]:
test.ReactorName.unique()

array(['Balakovo-2', 'Balakovo-3', 'Bradwell-1', 'Calvert Cliffs-1',
       'Cooper-1', 'Dodewaard-1', 'Forsmark-3', 'Fukushima-Daini-1',
       'Fukushima-Daini-2', 'Garigliano-1', 'Genkai-1', 'Gundremmingen-1',
       'Gösgen-1', 'H.B. Robinson-2', 'Hinkley Point B-1',
       'Hinkley Point B-2', 'Hunterston A-1', 'Hunterston B-2',
       'Japan Power Demonstration Reactor-1', 'Kalinin-1', 'Kola-3',
       'Leningrad-1', 'Mihama-3', 'Monticello-1', 'Neckarwestheim-2',
       'Novovoronezh-3', 'Novovoronezh-4', 'Novovoronezh-5',
       'Nuclear Power Demonstration Reactor-1', 'Obrigheim-1', 'Ohi-1',
       'Ohi-2', 'Pickering A-1', 'Quad Cities-1', 'Takahama-3',
       'Three Mile Island-1', 'Trino Vercellese-1', 'Tsuruga-1',
       'Turkey Point-3', 'Vandellos-2', 'Yankee-1'], dtype=object)