In [1]:
import pandas as pd

## Aligning SFCOMPO test cases to training set design

### A. Reactors
1. Delete RBMK, MAGNOX, AGR from test set (did not simulate these)
2. Ensure ReactorType column matches options from training set, and preserve this information by adding that text to the ReactorName column

### B. Metadata, other information, DUPLICATES!
1. Confirm units match training set (burnup)
2. Check script to allow for additional columns (sample ref, etc) as well as OrigenReactor vs. ReactorName (may decide to delete for now to get script running faster)
3. Discovered duplicates. How to handle? Average? Take value with lower uncertainty? Right now (10 March), keeping first duplicate entry.

### C. Isotopes
1. Melt isotope concentrations from single column to their own columns. 
  - Using concentration instead of value for unit conformity. 
  - Need to double check, but I think uncertainties apply to 'values' and not 'concentration'
  - Keeping concentrations, but train set is in g-atoms (moles), so only ratios will be useable until new gram-output training set is simulated.
2. Keep only list of 15 nuclides

In [2]:
sfcompofile = '../clean/sfcompoDB_clean.csv'

In [3]:
test = pd.read_csv(sfcompofile)

In [4]:
test.head(5)

Unnamed: 0,ReactorName,ReactorType,Fuel type,Burnup,BurnupUnit,Enrichment,EnrichmentUnit,SampleRef,Measurement,Isotope,Value,Unit,Concentration,ConcentrationUnit,Sigma,Uncertainty,UncertaintyUnit
0,Balakovo-2,VVER-1000,UO2,45.1,GW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,Am241,0.048,mg/gUi,0.048,mg/gUi,2.0,4.17,%
1,Balakovo-2,VVER-1000,UO2,45.1,GW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,Am243,0.141,mg/gUi,0.141,mg/gUi,2.0,2.84,%
2,Balakovo-2,VVER-1000,UO2,45.1,GW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,Cm242,0.021,mg/gUi,0.021,mg/gUi,2.0,9.52,%
3,Balakovo-2,VVER-1000,UO2,45.1,GW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,Cm244,0.055,mg/gUi,0.055,mg/gUi,2.0,10.91,%
4,Balakovo-2,VVER-1000,UO2,45.1,GW*d/tUi,4.4,wt%,BAL-2|1476|42|15,Isotopic Concentration,Nd142,0.033,mg/gUi,0.033,mg/gUi,2.0,3.03,%


In [5]:
test['Fuel type'].unique()

array(['UO2', 'U-metal', 'UO2-Gd2O3'], dtype=object)

In [6]:
test = test.loc[test['Fuel type'] != 'UO2-Gd2O3']

In [7]:
test.ReactorType.unique()

array(['VVER-1000', 'MAGNOX', 'PWR', 'BWR', 'AGR', 'VVER-440', 'RBMK',
       'CANDU'], dtype=object)

In [8]:
len(test.SampleRef.unique())

613

### A. Changes to ReactorName and ReactorType

In [9]:
test = test[(test.ReactorType != 'MAGNOX') & (test.ReactorType != 'AGR') & (test.ReactorType != 'RBMK')]

In [10]:
test.ReactorType.unique()

array(['VVER-1000', 'PWR', 'BWR', 'VVER-440', 'CANDU'], dtype=object)

In [11]:
len(test.SampleRef.unique())

505

In [12]:
test.ReactorName = test.ReactorName + '_' + test.ReactorType
test.ReactorName

0        Balakovo-2_VVER-1000
1        Balakovo-2_VVER-1000
2        Balakovo-2_VVER-1000
3        Balakovo-2_VVER-1000
4        Balakovo-2_VVER-1000
                 ...         
12210            Yankee-1_PWR
12211            Yankee-1_PWR
12212            Yankee-1_PWR
12213            Yankee-1_PWR
12214            Yankee-1_PWR
Name: ReactorName, Length: 9019, dtype: object

In [13]:
rtypes = {'PWR' : 'pwr', 'BWR' : 'bwr', 'CANDU' : 'phwr', 'VVER-1000' : 'pwr', 'VVER-440' : 'pwr'}
test.ReactorType.replace(rtypes, inplace=True)

### B. Changes to Burnup Units

In [14]:
# only run once you idiot!
test.Burnup = 1000 * test.Burnup
test.BurnupUnit.unique()

array(['GW*d/tUi'], dtype=object)

In [15]:
test.BurnupUnit.replace({'GW*d/tUi' : 'MW*d/tUi'}, inplace=True)

#### Get ranges of Burnup and Enrichment Values

In [16]:
test.loc[test.ReactorType == 'phwr'].Burnup.describe()

count      216.000000
mean      5505.555556
std       3089.718787
min        791.000000
25%       2733.000000
50%       5600.000000
75%       8050.000000
max      11501.000000
Name: Burnup, dtype: float64

In [17]:
test.loc[test.ReactorType == 'bwr'].Burnup.describe()

count     2977.000000
mean     34084.816258
std      23974.805981
min       2185.000000
25%       7040.000000
50%      33070.000000
75%      56320.000000
max      77600.000000
Name: Burnup, dtype: float64

In [18]:
test.loc[test.ReactorType == 'pwr'].Enrichment.describe()

count    5826.000000
mean        3.711071
std         0.601586
min         2.453000
25%         3.203000
50%         3.600000
75%         4.370000
max         4.660000
Name: Enrichment, dtype: float64

In [19]:
test.loc[test.ReactorType == 'bwr'].Enrichment.describe()

count    2977.000000
mean        3.009399
std         0.935613
min         0.710000
25%         2.530000
50%         2.600000
75%         3.800000
max         4.940000
Name: Enrichment, dtype: float64

### C. Matching Iso Column Format

In [20]:
test.ConcentrationUnit.unique()

array(['mg/gUi'], dtype=object)

In [21]:
test.Isotope = test.Isotope.str.lower()

#### Testing pivot

In [19]:
import numpy as np
sample = [1, 1, 1, 2, 2, 3]
iso = ['A', 'B', 'C', 'A', 'B', 'A']
val = np.random.rand(6)

df = pd.DataFrame({'SampleRef' : sample, 'Isotope' : iso, 'Concentration' : val})
df

Unnamed: 0,SampleRef,Isotope,Concentration
0,1,A,0.115135
1,1,B,0.380782
2,1,C,0.982882
3,2,A,0.534473
4,2,B,0.03849
5,3,A,0.674667


In [20]:
#df.duplicated(['SampleRef', 'Isotope'])
df.pivot(index='SampleRef', columns='Isotope', values='Concentration')

Isotope,A,B,C
SampleRef,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.115135,0.380782,0.982882
2,0.534473,0.03849,
3,0.674667,,


### Deleting Duplicate Entries

In [22]:
test.drop_duplicates(['SampleRef', 'Isotope'], inplace=True)

#### pivot attempt

In [33]:
df = test[['SampleRef', 'Isotope', 'Concentration']]
df.head()

Unnamed: 0,SampleRef,Isotope,Concentration
0,BAL-2|1476|42|15,am241,0.048
1,BAL-2|1476|42|15,am243,0.141
2,BAL-2|1476|42|15,cm242,0.021
3,BAL-2|1476|42|15,cm244,0.055
4,BAL-2|1476|42|15,nd142,0.033


#### pivot on full test db

In [34]:
features = df.pivot(index='SampleRef', columns='Isotope', values='Concentration')
features

Isotope,ag109,ag110m,am,am241,am242,am242m,am243,ce140,ce142,ce144,...,sm154,sr90,tc99,u,u232,u233,u234,u235,u236,u238
SampleRef,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BAL-2|1476|42|15,,,,0.048,,,0.141,,,,...,,,,,,,,10.64,6.170,926.55
BAL-2|1476|42|31,,,,,,,0.146,,,,...,,,,,,,0.226,10.66,6.000,925.00
BAL-2|1476|42|6,,,,0.066,,,0.157,,,,...,,,,,,,,10.27,6.110,925.18
BAL-3|1591|23|581,,,,0.071,,,0.192,,,,...,,,,,,,,8.18,6.340,925.66
BAL-3|1591|23|912,,,,0.054,,,0.151,,,,...,,,,,,,,10.18,6.150,925.48
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YNK-1|F5|C-F6|G-119,,,,,,,,,,,...,,,,,,,0.145,15.46,3.602,946.95
YNK-1|F5|C-F6|G-120,,,,,,,,,,,...,,,,,,,0.147,15.94,3.400,946.86
YNK-1|F5|C-F6|G-121,,,,,,,,,,,...,,,,,,,0.161,20.17,2.720,954.05
YNK-1|F5|SE-F5|G-122,,,,,,,,,,,...,,,,,,,0.142,11.83,4.119,941.99


In [35]:
# tried groupby functionality, but drop_duplicates worked faster!
to_remove = ['Isotope', 'Concentration', 'Value', 'Unit', 'Sigma', 'Uncertainty', 'UncertaintyUnit', 'Measurement']
labels = test.drop(columns=to_remove)
labels.drop_duplicates(labels.columns.tolist(), inplace=True)
labels.set_index('SampleRef', inplace = True)
labels

Unnamed: 0_level_0,ReactorName,ReactorType,Fuel type,Burnup,BurnupUnit,Enrichment,EnrichmentUnit,ConcentrationUnit
SampleRef,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BAL-2|1476|42|15,Balakovo-2_VVER-1000,pwr,UO2,45100.0,MW*d/tUi,4.4,wt%,mg/gUi
BAL-2|1476|42|31,Balakovo-2_VVER-1000,pwr,UO2,45800.0,MW*d/tUi,4.4,wt%,mg/gUi
BAL-2|1476|42|6,Balakovo-2_VVER-1000,pwr,UO2,46700.0,MW*d/tUi,4.4,wt%,mg/gUi
BAL-3|1591|23|912,Balakovo-3_VVER-1000,pwr,UO2,46200.0,MW*d/tUi,4.4,wt%,mg/gUi
BAL-3|1591|23|581,Balakovo-3_VVER-1000,pwr,UO2,47900.0,MW*d/tUi,4.4,wt%,mg/gUi
...,...,...,...,...,...,...,...,...
YNK-1|E6|NW-A1|G-127,Yankee-1_PWR,pwr,UO2,42500.0,MW*d/tUi,3.4,wt%,mg/gUi
YNK-1|E6|NW-A1|G-128,Yankee-1_PWR,pwr,UO2,42500.0,MW*d/tUi,3.4,wt%,mg/gUi
YNK-1|E6|NW-A1|N-21,Yankee-1_PWR,pwr,UO2,42600.0,MW*d/tUi,3.4,wt%,mg/gUi
YNK-1|E6|NW-A1|T-165,Yankee-1_PWR,pwr,UO2,43200.0,MW*d/tUi,3.4,wt%,mg/gUi


Ensure sample refs are the same

In [36]:
labels_idx = labels.index.tolist().sort()
features_idx = features.index.tolist().sort()
labels_idx == features_idx

True

In [37]:
formatted = pd.concat([labels, features], axis=1)
formatted

Unnamed: 0,ReactorName,ReactorType,Fuel type,Burnup,BurnupUnit,Enrichment,EnrichmentUnit,ConcentrationUnit,ag109,ag110m,...,sm154,sr90,tc99,u,u232,u233,u234,u235,u236,u238
BAL-2|1476|42|15,Balakovo-2_VVER-1000,pwr,UO2,45100.0,MW*d/tUi,4.4,wt%,mg/gUi,,,...,,,,,,,,10.640,6.170,926.55
BAL-2|1476|42|31,Balakovo-2_VVER-1000,pwr,UO2,45800.0,MW*d/tUi,4.4,wt%,mg/gUi,,,...,,,,,,,0.226,10.660,6.000,925.00
BAL-2|1476|42|6,Balakovo-2_VVER-1000,pwr,UO2,46700.0,MW*d/tUi,4.4,wt%,mg/gUi,,,...,,,,,,,,10.270,6.110,925.18
BAL-3|1591|23|912,Balakovo-3_VVER-1000,pwr,UO2,46200.0,MW*d/tUi,4.4,wt%,mg/gUi,,,...,,,,,,,,10.180,6.150,925.48
BAL-3|1591|23|581,Balakovo-3_VVER-1000,pwr,UO2,47900.0,MW*d/tUi,4.4,wt%,mg/gUi,,,...,,,,,,,,8.180,6.340,925.66
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YNK-1|E6|NW-A1|G-127,Yankee-1_PWR,pwr,UO2,42500.0,MW*d/tUi,3.4,wt%,mg/gUi,,,...,,,,,,,0.111,7.249,4.770,932.06
YNK-1|E6|NW-A1|G-128,Yankee-1_PWR,pwr,UO2,42500.0,MW*d/tUi,3.4,wt%,mg/gUi,,,...,,,,,,,0.105,7.125,4.598,932.39
YNK-1|E6|NW-A1|N-21,Yankee-1_PWR,pwr,UO2,42600.0,MW*d/tUi,3.4,wt%,mg/gUi,,,...,,,,,,,0.102,7.373,4.569,931.37
YNK-1|E6|NW-A1|T-165,Yankee-1_PWR,pwr,UO2,43200.0,MW*d/tUi,3.4,wt%,mg/gUi,,,...,,,,,,,0.104,7.020,4.703,931.16


#### For now, formatting to match ORIGEN DB exactly

In [38]:
formatted.columns

Index(['ReactorName', 'ReactorType', 'Fuel type', 'Burnup', 'BurnupUnit',
       'Enrichment', 'EnrichmentUnit', 'ConcentrationUnit', 'ag109', 'ag110m',
       'am', 'am241', 'am242', 'am242m', 'am243', 'ce140', 'ce142', 'ce144',
       'cm', 'cm242', 'cm243', 'cm244', 'cm245', 'cm246', 'cm247', 'cs',
       'cs133', 'cs134', 'cs135', 'cs137', 'eu', 'eu151', 'eu152', 'eu153',
       'eu154', 'eu155', 'gd', 'gd152', 'gd154', 'gd155', 'gd156', 'gd157',
       'gd158', 'gd160', 'i129', 'la139', 'mo100', 'mo95', 'mo97', 'mo98',
       'nb95', 'nd', 'nd142', 'nd143', 'nd144', 'nd145', 'nd146', 'nd148',
       'nd150', 'np237', 'pd105', 'pd108', 'pm147', 'pu', 'pu236', 'pu238',
       'pu239', 'pu240', 'pu241', 'pu242', 'pu244', 'rh103', 'ru101', 'ru103',
       'ru106', 'sb125', 'sm', 'sm147', 'sm148', 'sm149', 'sm150', 'sm151',
       'sm152', 'sm154', 'sr90', 'tc99', 'u', 'u232', 'u233', 'u234', 'u235',
       'u236', 'u238'],
      dtype='object')

In [39]:
import numpy as np
formatted.rename(columns = {'Fuel type' : 'FuelType', 'ReactorName' : 'OrigenReactor'}, inplace = True)
to_remove = ['FuelType', 'BurnupUnit', 'EnrichmentUnit', 'ConcentrationUnit']
formatted = formatted.drop(columns=to_remove)
formatted['ba136'] = np.nan
formatted['ba138'] = np.nan
formatted['CoolingTime'] = 0.0
#formatted.fillna(0, inplace=True) #saving df w/ NaNs
formatted

Unnamed: 0,OrigenReactor,ReactorType,Burnup,Enrichment,ag109,ag110m,am,am241,am242,am242m,...,u,u232,u233,u234,u235,u236,u238,ba136,ba138,CoolingTime
BAL-2|1476|42|15,Balakovo-2_VVER-1000,pwr,45100.0,4.4,,,,0.048,,,...,,,,,10.640,6.170,926.55,,,0.0
BAL-2|1476|42|31,Balakovo-2_VVER-1000,pwr,45800.0,4.4,,,,,,,...,,,,0.226,10.660,6.000,925.00,,,0.0
BAL-2|1476|42|6,Balakovo-2_VVER-1000,pwr,46700.0,4.4,,,,0.066,,,...,,,,,10.270,6.110,925.18,,,0.0
BAL-3|1591|23|912,Balakovo-3_VVER-1000,pwr,46200.0,4.4,,,,0.054,,,...,,,,,10.180,6.150,925.48,,,0.0
BAL-3|1591|23|581,Balakovo-3_VVER-1000,pwr,47900.0,4.4,,,,0.071,,,...,,,,,8.180,6.340,925.66,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YNK-1|E6|NW-A1|G-127,Yankee-1_PWR,pwr,42500.0,3.4,,,,,,,...,,,,0.111,7.249,4.770,932.06,,,0.0
YNK-1|E6|NW-A1|G-128,Yankee-1_PWR,pwr,42500.0,3.4,,,,,,,...,,,,0.105,7.125,4.598,932.39,,,0.0
YNK-1|E6|NW-A1|N-21,Yankee-1_PWR,pwr,42600.0,3.4,,,,,,,...,,,,0.102,7.373,4.569,931.37,,,0.0
YNK-1|E6|NW-A1|T-165,Yankee-1_PWR,pwr,43200.0,3.4,,,,,,,...,,,,0.104,7.020,4.703,931.16,,,0.0


In [42]:
#hard code column order for now
labels = ['Burnup', 'CoolingTime', 'Enrichment', 'OrigenReactor', 'ReactorType'] 

In [40]:
nuc29 = ['am241', 'am242m', 'am243', 'cm242', 'cm244', 
         'cs134', 'cs137', 'eu154', 'nd143', 'nd144', 
         'nd145', 'nd146', 'nd148', 'nd150', 'np237', 
         'pu238', 'pu239', 'pu240', 'pu241', 'pu242', 
         'sm147', 'sm149', 'sm150', 'sm151', 'sm152',
         'u234', 'u235', 'u236', 'u238'
        ]
test29 = test[test.Isotope.isin(nuc29)]

In [43]:
formatted29 = formatted[labels + nuc29]
formatted29.to_pickle('sfcompo_nuc29_nulls.pkl')
formatted29

Unnamed: 0,Burnup,CoolingTime,Enrichment,OrigenReactor,ReactorType,am241,am242m,am243,cm242,cm244,...,pu242,sm147,sm149,sm150,sm151,sm152,u234,u235,u236,u238
BAL-2|1476|42|15,45100.0,0.0,4.4,Balakovo-2_VVER-1000,pwr,0.048,,0.141,0.021,0.0550,...,0.690,,,,,,,10.640,6.170,926.55
BAL-2|1476|42|31,45800.0,0.0,4.4,Balakovo-2_VVER-1000,pwr,,,0.146,,0.0539,...,0.700,,,,,,0.226,10.660,6.000,925.00
BAL-2|1476|42|6,46700.0,0.0,4.4,Balakovo-2_VVER-1000,pwr,0.066,,0.157,0.024,0.0590,...,0.720,,,,,,,10.270,6.110,925.18
BAL-3|1591|23|912,46200.0,0.0,4.4,Balakovo-3_VVER-1000,pwr,0.054,,0.151,0.021,0.0590,...,0.750,,,,,,,10.180,6.150,925.48
BAL-3|1591|23|581,47900.0,0.0,4.4,Balakovo-3_VVER-1000,pwr,0.071,,0.192,0.023,0.0790,...,0.880,,,,,,,8.180,6.340,925.66
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YNK-1|E6|NW-A1|G-127,42500.0,0.0,3.4,Yankee-1_PWR,pwr,,,,,,...,0.831,,,,,,0.111,7.249,4.770,932.06
YNK-1|E6|NW-A1|G-128,42500.0,0.0,3.4,Yankee-1_PWR,pwr,,,,,,...,0.803,,,,,,0.105,7.125,4.598,932.39
YNK-1|E6|NW-A1|N-21,42600.0,0.0,3.4,Yankee-1_PWR,pwr,,,,,,...,0.794,,,,,,0.102,7.373,4.569,931.37
YNK-1|E6|NW-A1|T-165,43200.0,0.0,3.4,Yankee-1_PWR,pwr,,,,,,...,0.823,,,,,,0.104,7.020,4.703,931.16


In [29]:
nuc15 = ['cs133', 'cs134', 'cs135', 'cs137', 'eu153', 
         'eu154', 'ba136', 'ba138', 'sm149', 'sm150', 
         'sm152', 'pu239', 'pu240', 'pu241', 'pu242'
        ]
test15 = test[test.Isotope.isin(nuc15)]

In [32]:
formatted15 = formatted[labels + nuc15]
formatted15.to_pickle('sfcompo_nuc15.pkl')
formatted15

Unnamed: 0,Burnup,CoolingTime,Enrichment,OrigenReactor,ReactorType,cs133,cs134,cs135,cs137,eu153,eu154,ba136,ba138,sm149,sm150,sm152,pu239,pu240,pu241,pu242
BAL-2|1476|42|15,45100.0,0.0,4.4,Balakovo-2_VVER-1000,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.070,2.580,1.700,0.690
BAL-2|1476|42|31,45800.0,0.0,4.4,Balakovo-2_VVER-1000,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.190,2.650,1.700,0.700
BAL-2|1476|42|6,46700.0,0.0,4.4,Balakovo-2_VVER-1000,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.100,2.670,1.760,0.720
BAL-3|1591|23|912,46200.0,0.0,4.4,Balakovo-3_VVER-1000,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.260,2.620,1.760,0.750
BAL-3|1591|23|581,47900.0,0.0,4.4,Balakovo-3_VVER-1000,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.080,2.680,1.760,0.880
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YNK-1|E6|NW-A1|G-127,42500.0,0.0,3.4,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.937,2.533,1.751,0.831
YNK-1|E6|NW-A1|G-128,42500.0,0.0,3.4,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.849,2.552,1.761,0.803
YNK-1|E6|NW-A1|N-21,42600.0,0.0,3.4,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.461,2.718,1.839,0.794
YNK-1|E6|NW-A1|T-165,43200.0,0.0,3.4,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.199,2.679,1.810,0.823


In [31]:
full = ['ag109', 'ag110m', 'am', 'am241', 'am242', 'am242m', 'am243', 
        'ce140', 'ce142', 'ce144', 
        'cm', 'cm242', 'cm243', 'cm244', 'cm245', 'cm246', 'cm247', 
        'cs', 'cs133', 'cs134', 'cs135', 'cs137', 
        'eu', 'eu151', 'eu152', 'eu153', 'eu154', 'eu155', 
        'gd', 'gd152', 'gd154', 'gd155', 'gd156', 'gd157', 'gd158', 'gd160', 
        'i129', 'la139', 'mo100', 'mo95', 'mo97', 'mo98', 'nb95', 
        'nd', 'nd142', 'nd143', 'nd144', 'nd145', 'nd146', 'nd148', 'nd150', 
        'np237', 'pd105', 'pd108', 'pm147', 
        'pu', 'pu236', 'pu238', 'pu239', 'pu240', 'pu241', 'pu242', 'pu244', 
        'rh103', 'ru101', 'ru103', 'ru106', 'sb125', 
        'sm', 'sm147', 'sm148', 'sm149', 'sm150', 'sm151', 'sm152', 'sm154', 
        'sr90', 'tc99', 
        'u', 'u232', 'u233', 'u234', 'u235', 'u236', 'u238']

In [36]:
formatted_full = formatted[labels + full]
formatted_full.to_pickle('sfcompo_all_isos.pkl')
formatted_full

Unnamed: 0,Burnup,CoolingTime,Enrichment,OrigenReactor,ReactorType,ag109,ag110m,am,am241,am242,...,sm154,sr90,tc99,u,u232,u233,u234,u235,u236,u238
BAL-2|1476|42|15,45100.0,0.0,4.4,Balakovo-2_VVER-1000,pwr,0.0,0.0,0.0,0.048,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000,10.640,6.170,926.55
BAL-2|1476|42|31,45800.0,0.0,4.4,Balakovo-2_VVER-1000,pwr,0.0,0.0,0.0,0.000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.226,10.660,6.000,925.00
BAL-2|1476|42|6,46700.0,0.0,4.4,Balakovo-2_VVER-1000,pwr,0.0,0.0,0.0,0.066,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000,10.270,6.110,925.18
BAL-3|1591|23|912,46200.0,0.0,4.4,Balakovo-3_VVER-1000,pwr,0.0,0.0,0.0,0.054,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000,10.180,6.150,925.48
BAL-3|1591|23|581,47900.0,0.0,4.4,Balakovo-3_VVER-1000,pwr,0.0,0.0,0.0,0.071,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000,8.180,6.340,925.66
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YNK-1|E6|NW-A1|G-127,42500.0,0.0,3.4,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.111,7.249,4.770,932.06
YNK-1|E6|NW-A1|G-128,42500.0,0.0,3.4,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.105,7.125,4.598,932.39
YNK-1|E6|NW-A1|N-21,42600.0,0.0,3.4,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.102,7.373,4.569,931.37
YNK-1|E6|NW-A1|T-165,43200.0,0.0,3.4,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.104,7.020,4.703,931.16


In [45]:
# Testing pandas functionality for different script
testme = formatted.sample(5)
lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']

In [47]:
for sim_idx, row in testme.iterrows():
    test_sample = testme.loc[testme.index == sim_idx].drop(lbls, axis=1)
    print(isinstance(test_sample, pd.DataFrame))
    test_answer = testme.loc[testme.index == sim_idx, lbls]
    #print(test_answer)
    test_test_sample = row.drop(lbls)
    print(isinstance(test_test_sample, pd.Series))
    test_test_answer = row[lbls]
    #print(test_test_answer)

True
True
True
True
True
True
True
True
True
True


In [36]:
sim_idx = 'BAL-2|1476|42|15'
row = formatted.loc[sim_idx] # series!
formatted.drop(sim_idx, inplace=True)
formatted

Unnamed: 0,Burnup,CoolingTime,Enrichment,OrigenReactor,ReactorType,ba136,ba138,cs133,cs134,cs135,cs137,eu153,eu154,pu239,pu240,pu241,pu242,sm149,sm150,sm152
BAL-2|1476|42|31,45800.0,0.0,4.40,Balakovo-2_VVER-1000,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.190,2.650,1.7000,0.7000,0.0,0.0,0.0
BAL-2|1476|42|6,46700.0,0.0,4.40,Balakovo-2_VVER-1000,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.100,2.670,1.7600,0.7200,0.0,0.0,0.0
BAL-3|1591|23|912,46200.0,0.0,4.40,Balakovo-3_VVER-1000,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.260,2.620,1.7600,0.7500,0.0,0.0,0.0
BAL-3|1591|23|581,47900.0,0.0,4.40,Balakovo-3_VVER-1000,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.080,2.680,1.7600,0.8800,0.0,0.0,0.0
CCL-1|D101|MLA098|JJ,18680.0,0.0,2.72,Calvert Cliffs-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.486,1.410,0.5154,0.1581,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YNK-1|E6|NW-A1|G-127,42500.0,0.0,3.40,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.937,2.533,1.7510,0.8310,0.0,0.0,0.0
YNK-1|E6|NW-A1|G-128,42500.0,0.0,3.40,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.849,2.552,1.7610,0.8030,0.0,0.0,0.0
YNK-1|E6|NW-A1|N-21,42600.0,0.0,3.40,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.461,2.718,1.8390,0.7940,0.0,0.0,0.0
YNK-1|E6|NW-A1|T-165,43200.0,0.0,3.40,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.199,2.679,1.8100,0.8230,0.0,0.0,0.0


In [38]:
# reappend sim row for future calculations
formatted.loc[sim_idx] = row
formatted.tail()

Unnamed: 0,Burnup,CoolingTime,Enrichment,OrigenReactor,ReactorType,ba136,ba138,cs133,cs134,cs135,cs137,eu153,eu154,pu239,pu240,pu241,pu242,sm149,sm150,sm152
YNK-1|E6|NW-A1|G-128,42500.0,0.0,3.4,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.849,2.552,1.761,0.803,0.0,0.0,0.0
YNK-1|E6|NW-A1|N-21,42600.0,0.0,3.4,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.461,2.718,1.839,0.794,0.0,0.0,0.0
YNK-1|E6|NW-A1|T-165,43200.0,0.0,3.4,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.199,2.679,1.81,0.823,0.0,0.0,0.0
YNK-1|E6|NW-A1|N-22,44400.0,0.0,3.4,Yankee-1_PWR,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.402,2.776,1.898,0.86,0.0,0.0,0.0
BAL-2|1476|42|15,45100.0,0.0,4.4,Balakovo-2_VVER-1000,pwr,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.07,2.58,1.7,0.69,0.0,0.0,0.0
