In [13]:
import pandas as pd
import numpy as np

from sklearn.decomposition import PCA, FactorAnalysis, FastICA
from sklearn.preprocessing import scale

In [14]:
def splitXY(dfXY):
    lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
    dfX = dfXY.drop(lbls, axis=1)
    if 'total' in dfX.columns:
        dfX.drop('total', axis=1, inplace=True)
    r_dfY = dfXY.loc[:, lbls[0]]
    c_dfY = dfXY.loc[:, lbls[1]]
    e_dfY = dfXY.loc[:, lbls[2]]
    b_dfY = dfXY.loc[:, lbls[3]]
    return dfX, r_dfY, c_dfY, e_dfY, b_dfY

CV = 5
trainset = '../pkl_trainsets/2jul2018/22jul2018_trainset3_nucs_fissact_not-scaled.pkl'
trainXY = pd.read_pickle(trainset)
#trainXY = trainXY.sample(frac=0.5)
X, rY, cY, eY, bY = splitXY(trainXY)
trainX = pd.DataFrame(scale(X), columns=X.columns)

# PCA

In [15]:
pca = PCA(n_components=3, whiten=True)
pca.fit_transform(trainX)

array([[-1.52268789, -0.12209472,  1.11506217],
       [-1.41352098, -0.15883125,  0.91789136],
       [-1.4129673 , -0.1594127 ,  0.91545403],
       ...,
       [-0.77566032,  6.25175067, -1.45741589],
       [-0.76512761,  5.04317696, -1.03613692],
       [-0.77184825,  5.03730189, -1.03431876]])

In [23]:
pca_components = pd.DataFrame(pca.components_.T, columns=['PC-1', 'PC-2', 'PC-3'], index=trainX.columns)
pca_components

Unnamed: 0,PC-1,PC-2,PC-3
ba138,0.163513,0.021385,0.030964
ce140,0.163368,0.023822,0.039556
ce142,0.163646,0.020039,0.02759
ce144,-0.025167,0.548237,-0.147559
cs133,0.163737,0.00747,-0.003976
cs135,0.024738,-0.045275,-0.350779
cs137,0.161863,0.018446,0.018126
la139,0.163683,0.018771,0.020935
mo100,0.163399,0.02136,0.030769
mo95,0.162908,0.016343,0.020033


# Factor

In [20]:
fa = FactorAnalysis(n_components=3)
fa.fit_transform(trainX)

array([[-1.27234134,  2.03591202, -0.48497343],
       [-1.20059417,  1.57481272, -0.19596514],
       [-1.20038941,  1.57161224, -0.20181028],
       ...,
       [-0.76374927, -0.7028066 ,  1.27830758],
       [-0.7637493 , -0.70280553,  1.27830621],
       [-0.76374937, -0.70280447,  1.27827978]])

In [24]:
fa_components = pd.DataFrame(fa.components_.T, columns=['PC-1', 'PC-2', 'PC-3'], index=trainX.columns)
fa_components

Unnamed: 0,PC-1,PC-2,PC-3
ba138,0.997423,-0.067541,0.024179
ce140,0.997428,-0.059988,0.036402
ce142,0.996282,-0.082523,0.024584
ce144,-0.133783,0.029595,0.097136
cs133,0.984982,-0.171567,0.000495
cs135,0.156359,0.025952,-0.21719
cs137,0.98703,-0.06839,0.012322
la139,0.996522,-0.082218,0.01291
mo100,0.998347,-0.05451,0.01822
mo95,0.98453,-0.132683,0.019981


# ICA

In [17]:
ica = FastICA(n_components=3, whiten=True)
ica.fit_transform(trainX)

array([[-0.00028448, -0.00442905,  0.01116088],
       [-0.000409  , -0.00334068,  0.01021062],
       [-0.00041041, -0.00332593,  0.01020382],
       ...,
       [ 0.04094845,  0.00120453, -0.00284122],
       [ 0.03303206,  0.00032461, -0.00121934],
       [ 0.0330016 ,  0.0003312 , -0.0011706 ]])

In [43]:
ica_components = pd.DataFrame(ica.components_.T, columns=['PC-1', 'PC-2', 'PC-3'], index=trainX.columns)
ica_components

Unnamed: 0,PC-1,PC-2,PC-3
ba138,2.243827e-05,-0.000201,-0.000136
ce140,2.401282e-05,-0.000243,-0.000126
ce142,2.033732e-05,-0.000184,-0.000139
ce144,0.002205198,0.00023,-0.00044
cs133,9.879319e-07,-2.5e-05,-0.000172
cs135,0.0001368016,0.001677,-0.000439
cs137,2.308598e-05,-0.000138,-0.000148
la139,2.146798e-05,-0.000152,-0.000147
mo100,2.253681e-05,-0.0002,-0.000136
mo95,1.325534e-05,-0.000145,-0.000146


# U/Pu only

In [40]:
nucs = trainX.columns.tolist()
upu = ['u234', 'u235', 'u236', 'u238', 'pu239', 'pu240', 'pu241', 'pu242']

In [41]:
trainX_upu = trainX.filter(upu, axis=1)
trainX_upu.head()

Unnamed: 0,u234,u235,u236,u238,pu239,pu240,pu241,pu242
0,-0.177572,2.017822,-2.286353,1.208731,-2.619126,-1.971166,-1.09989,-0.780072
1,-0.177572,1.743173,-1.950815,1.164032,-2.151137,-1.915436,-1.09989,-0.780036
2,-0.177572,1.743173,-1.950815,1.164032,-2.145345,-1.915436,-1.09989,-0.780036
3,-0.177572,1.743173,-1.950815,1.164032,-2.140904,-1.915436,-1.09989,-0.780036
4,-0.177572,1.743173,-1.950815,1.164032,-2.121598,-1.915436,-1.09989,-0.780036
