In [1]:
import pandas as pd
import os 
from sklearn.metrics import mean_squared_error
from tqdm import tqdm_notebook as tqdm
import numpy as np
import joblib
os.listdir('../data/phase_2')

['Aroma-1_Unit_000_Simulation_Results_Rerun_001_050_Yu Final.xlsx',
 'FCFC Import Naphtha Composition Lab_001~100_R2.xlsx',
 'train(add_split_factor).csv',
 'train.csv',
 'transform_matrix.csv']

In [2]:
def get_col(df,name):
    return df.columns[df.columns.str.contains(name)].tolist()

# load data

In [3]:
df = pd.read_csv('../data/phase_2/train(add_split_factor).csv',index_col=0).head(5)
df.head()

Unnamed: 0,Case Conditions_Feed Rate to C013 (Input),Case Conditions_C7+ in Light End (Input),Case Conditions_Heart Cut Prod. Rate (Input),Case Conditions_C6P- in Heart Cut (Input),Case Conditions_Feed Rate to C013 (Results),Case Conditions_C7+ in Light End (Results),Case Conditions_Heart Cut Prod. Rate (Results),Case Conditions_C6P- in Heart Cut (Results),Case Conditions_C10+ in Heart Cut (Results),Naphtha Properties_Naphtha Feed Rate,...,"Individual Component to Heavy End Split Factor_2,2,3,3-Tetramethylhexane",Individual Component to Heavy End Split Factor_3-Methylnonane,Individual Component to Heavy End Split Factor_tert-Butylcyclohexane,Individual Component to Heavy End Split Factor_n-Decane,"Individual Component to Heavy End Split Factor_1,2-Diethylbenzene",Individual Component to Heavy End Split Factor_n-Undecane,Individual Component to Heavy End Split Factor_n-Pentylbenzene,Individual Component to Heavy End Split Factor_n-Dodecane,Individual Component to Heavy End Split Factor_n-Hexylbenzene,Individual Component to Heavy End Split Factor_n-Tridecane
0,10.0,2.5,100.0,1.0,10.000001,2.500057,100.000198,1.000427,6.138641,118.000084,...,0.348342,0.378793,0.381912,0.397334,0.409138,0.433925,0.440811,0.451332,0.454764,1.0
1,10.0,5.0,100.0,1.0,10.000001,4.999882,100.000122,1.000401,6.24905,118.0,...,0.336983,0.367462,0.370631,0.386031,0.397878,0.422758,0.429677,0.440263,0.443717,1.0
2,10.0,10.0,100.0,1.0,10.000001,10.000006,100.000008,0.999993,6.376052,118.0,...,0.323372,0.354403,0.357728,0.37326,0.385311,0.410579,0.417606,0.428374,0.431884,1.0
3,10.0,2.5,100.0,1.5,10.000001,2.50002,100.000275,1.500293,6.316332,118.0,...,0.334064,0.360351,0.362654,0.37668,0.387033,0.409416,0.41564,0.42523,0.428354,1.0
4,10.0,5.0,100.0,1.5,10.000001,5.000001,99.999817,1.499961,6.416863,118.000084,...,0.323809,0.350009,0.352356,0.366307,0.376672,0.399048,0.405285,0.414893,0.418027,1.0


# define columns

In [4]:
xna_col = get_col(df,'Naphtha Properties')[5:-1]
sp_le_col = get_col(df,'Light End Split Factor')
sp_hc_col = get_col(df,'Heart Cut Split Factor')
sp_he_col = get_col(df,'Heavy End Split Factor')
sp_col_162 = sp_le_col + sp_hc_col + sp_he_col

# get xna and sp162

In [5]:
xna = df[xna_col]
sp162 = df[sp_col_162]

# build transformer

In [6]:
class transformer2(object):
    def __init__(self):
        # output columns
        self.le = get_col(df,'Light End Product Properties')[3:-1]
        self.hc = get_col(df,'Heart Cut Product Properties')[4:-1]
        self.he = get_col(df,'Heavy End Product Properties')[3:-1]
        
        # split factor columns
        self.le_sp = get_col(df,'Light End Split Factor')
        self.hc_sp = get_col(df,'Heart Cut Split Factor')
        self.he_sp = get_col(df,'Heavy End Split Factor')
    
    @staticmethod
    def _calculate_output(X,S,col_name):
        X, S = X.values, S.values
        F = np.diag(X@(S.T)).reshape(-1,1)
        Y = 100*(X*S)/(F)
        return pd.DataFrame(Y,columns=col_name)
    
    def __call__(self,xna,sp162):
        sle = sp162[self.le_sp] #SLE
        shc = sp162[self.hc_sp] #SHC
        she = sp162[self.he_sp] #SHE
        x_le = self._calculate_output(xna,sle,self.le) #XLE
        x_hc = self._calculate_output(xna,shc,self.hc) #XHC
        x_he = self._calculate_output(xna,she,self.he) #XHE
        return pd.concat([x_le,x_hc,x_he],axis=1)

# make transform

In [7]:
tr = transformer2()
y_pred = tr(xna,sp162)
y_pred

Unnamed: 0,Light End Product Properties_Oxygen,Light End Product Properties_Methane,Light End Product Properties_Ethane,Light End Product Properties_Propane,Light End Product Properties_n-Butane,Light End Product Properties_i-Pentane,Light End Product Properties_n-Pentane,Light End Product Properties_tr2-Pentene,Light End Product Properties_Cyclopentane,Light End Product Properties_3-Methylpentane,...,"Heavy End Product Properties_2,2,3,3-Tetramethylhexane",Heavy End Product Properties_3-Methylnonane,Heavy End Product Properties_tert-Butylcyclohexane,Heavy End Product Properties_n-Decane,"Heavy End Product Properties_1,2-Diethylbenzene",Heavy End Product Properties_n-Undecane,Heavy End Product Properties_n-Pentylbenzene,Heavy End Product Properties_n-Dodecane,Heavy End Product Properties_n-Hexylbenzene,Heavy End Product Properties_n-Tridecane
0,0.0,0.0,0.0,0.031463,0.046894,0.40197,1.138108,0.01539,0.196111,19.81685,...,5.498116,13.956124,2.497176,5.281884,3.507997,1.090498,0.220558,0.020529,0.041371,0.0
1,0.0,0.0,0.0,0.029677,0.044222,0.378933,1.072705,0.014506,0.184733,18.650427,...,5.565874,14.167475,2.535975,5.369975,3.569902,1.111782,0.224973,0.020956,0.042241,0.0
2,0.0,0.0,0.0,0.027325,0.040709,0.348719,0.987031,0.013348,0.169893,17.142468,...,5.754648,14.72209,2.637227,5.594397,3.72486,1.163364,0.235584,0.021969,0.044298,0.0
3,0.0,0.0,0.0,0.034538,0.05142,0.43984,1.243597,0.01682,0.212587,20.744602,...,4.928049,12.40869,2.216235,4.679968,3.101525,0.96164,0.194369,0.018078,0.036421,0.0
4,0.0,0.0,0.0,0.032552,0.048452,0.41428,1.171055,0.015839,0.199986,19.467864,...,4.967569,12.534007,2.239314,4.732872,3.139065,0.974726,0.197096,0.018343,0.036962,0.0


In [8]:
y_real = df[y_pred.columns]
y_real

Unnamed: 0,Light End Product Properties_Oxygen,Light End Product Properties_Methane,Light End Product Properties_Ethane,Light End Product Properties_Propane,Light End Product Properties_n-Butane,Light End Product Properties_i-Pentane,Light End Product Properties_n-Pentane,Light End Product Properties_tr2-Pentene,Light End Product Properties_Cyclopentane,Light End Product Properties_3-Methylpentane,...,"Heavy End Product Properties_2,2,3,3-Tetramethylhexane",Heavy End Product Properties_3-Methylnonane,Heavy End Product Properties_tert-Butylcyclohexane,Heavy End Product Properties_n-Decane,"Heavy End Product Properties_1,2-Diethylbenzene",Heavy End Product Properties_n-Undecane,Heavy End Product Properties_n-Pentylbenzene,Heavy End Product Properties_n-Dodecane,Heavy End Product Properties_n-Hexylbenzene,Heavy End Product Properties_n-Tridecane
0,0.0,0.0,0.0,0.031463,0.046894,0.40197,1.138108,0.01539,0.196111,19.816853,...,5.498116,13.956123,2.497176,5.281883,3.507996,1.090498,0.220558,0.020529,0.041371,0.0
1,0.0,0.0,0.0,0.029677,0.044222,0.378933,1.072704,0.014506,0.184733,18.650423,...,5.565875,14.167476,2.535975,5.369976,3.569902,1.111782,0.224973,0.020956,0.042241,0.0
2,0.0,0.0,0.0,0.027325,0.040709,0.348719,0.987031,0.013348,0.169893,17.142469,...,5.754647,14.722088,2.637226,5.594396,3.724859,1.163364,0.235584,0.021969,0.044298,0.0
3,0.0,0.0,0.0,0.034538,0.05142,0.43984,1.243597,0.01682,0.212587,20.7446,...,4.92805,12.40869,2.216235,4.679968,3.101526,0.96164,0.194369,0.018078,0.036421,0.0
4,0.0,0.0,0.0,0.032552,0.048452,0.41428,1.171054,0.015839,0.199986,19.467861,...,4.967569,12.534007,2.239314,4.732872,3.139065,0.974726,0.197096,0.018343,0.036962,0.0


# save

In [9]:
joblib.dump(tr,'../model/transformer(SP162_to_Y162).pkl')

['../model/transformer(SP162_to_Y162).pkl']