<a href="https://colab.research.google.com/github/skywalker0803r/ruby_research/blob/main/SFOPprediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [220]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [221]:
#!pip install git+https://github.com/jonbarron/robust_loss_pytorch
import robust_loss_pytorch
from torch.utils.data import TensorDataset,DataLoader
from copy import deepcopy

In [222]:
import torch
from torch import nn
import torch.nn.functional as F
from torch import tensor
from torch.nn import Linear,ReLU,Sigmoid,Tanh
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.metrics import r2_score,mean_squared_error
from math import sqrt
import joblib
from torch.utils.tensorboard import SummaryWriter
import warnings;warnings.simplefilter('ignore')
from tqdm import tqdm_notebook as tqdm
import os
from sklearn.utils import shuffle
import random
random.seed(42)
torch.manual_seed(42)
root = '/gdrive/My Drive/for Ruby'
excel_list=os.listdir(root)
excel_list

['淡水河流域.csv', '淡水河流域final_0429.ipynb', '驗證淡水河流域03_15.ipynb', '2.xlsx']

In [223]:
with open('/gdrive/My Drive/foo.txt', 'w') as f:
  f.write('Hello Google Drive!')
!cat '/gdrive/My Drive/foo.txt'

Hello Google Drive!

# some function

In [224]:
def mape(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def get_group_col(df,name):
    condition = df.columns.str.contains(name)
    return df.columns[condition].tolist()

def split_data(df,x_col,y_col):
  df = shuffle(df).astype('float32')
  X,Y = df[x_col],df[y_col]
  sp1 = int(len(df)*0.8)
  sp2 = int(len(df)*0.9)
  data = {}
  data['X_train'],data['Y_train'] = X.iloc[:sp1,:],Y.iloc[:sp1,:]
  data['X_vaild'],data['Y_vaild'] = X.iloc[sp1:sp2,:],Y.iloc[sp1:sp2,:]
  data['X_test'],data['Y_test'] = X.iloc[sp2:,:],Y.iloc[sp2:,:]
  return data

def show_metrics(y_pred,y_real):
  res = pd.DataFrame(index=y_pred.columns,columns=['R2','MSE','MAPE'])
  for i in y_pred.columns:
    res.loc[i,'R2'] = r2_score(y_real[i],y_pred[i])
    res.loc[i,'MSE'] = mean_squared_error(y_real[i],y_pred[i])
    res.loc[i,'MAPE'] = mape(y_real[i],y_pred[i])
  res.loc['AVG'] = res.mean(axis=0)
  return res

def init_weights(m):
  if hasattr(m,'weight'):
    torch.nn.init.xavier_uniform(m.weight)
  if hasattr(m,'bias'):
    m.bias.data.fill_(0)

# Part1：預測塔頂塔底組成

# load data

In [225]:
df = pd.read_excel(root+'/2.xlsx') #讀取excel檔
df = df.drop(['Unnamed: 1', 'Unnamed: 2'], axis=1)
df = df.drop(index=1)
col_name = df.iloc[0,:]
df.columns = col_name
df = df.iloc[1:,:] 
df.index = df.iloc[:,0].values
df = df.drop(df.columns[0],axis=1)
print(df.shape)
for i in df.columns:
    df[i] = pd.to_numeric(df[i],errors='coerce')

df['Condenser Duty'] = df['Condenser Duty'].apply(lambda x: x*-1)
df.head()

(1458, 18)


Unnamed: 0,F.TEMP.MIXED,Feed Flow,F.FLOW.BENZENE,F.FLOW.TOLUENE,Total stage,Reflux ratio,D/F,Feed stage,Stage-2 Efficiencies,Stage-45 Efficiencies,Condenser Temperature,Condenser Duty,D stream BENZENE,D stream TOLUENE,Reboiler Temp,Reboiler Duty,W stream BENZENE,W stream TOLUENE
Case 1,103,50,40,60,46,2.5,0.33,14,0.7,0.7,82.961772,5712.749075,0.991645,0.008355,112.480976,5503.283448,0.146149,0.853851
Case 2,103,50,50,50,46,2.5,0.33,14,0.7,0.7,82.990896,5809.514269,0.989946,0.010054,106.774195,3042.56197,0.284634,0.715366
Case 3,103,50,60,40,46,2.5,0.33,14,0.7,0.7,83.029809,5906.674859,0.987682,0.012318,101.915,593.089652,0.425427,0.574573
Case 4,103,50,40,60,46,2.5,0.33,19,0.7,0.7,82.845927,5708.665107,0.998416,0.001584,112.594861,5500.205696,0.143621,0.856379
Case 5,103,50,50,50,46,2.5,0.33,19,0.7,0.7,82.851761,5804.515753,0.998074,0.001926,106.894313,3038.549282,0.281451,0.718549


In [226]:
class part(object):
  def __init__(self,df,x_col,y_col,hidden_size=256,lr=0.01,max_epochs=500,robust_loss=False):
    
    # config
    self.robust_loss = robust_loss
    self.x_col = x_col
    self.y_col = y_col
    self.hidden_size = hidden_size
    self.lr = lr
    self.max_epochs = max_epochs
    self.ss_x = MinMaxScaler().fit(df[x_col])
    self.ss_y = MinMaxScaler().fit(df[y_col])
    
    # model
    self.net = nn.Sequential(
        nn.Linear(len(self.x_col),self.hidden_size),nn.ReLU(),
        nn.Linear(self.hidden_size,self.hidden_size),nn.ReLU(),
        nn.Linear(self.hidden_size,len(self.y_col)),nn.Sigmoid(),
                  ).apply(init_weights)
    
    # loss_function
    if self.robust_loss == True:
      adaptive = robust_loss_pytorch.adaptive.AdaptiveLossFunction(
          num_dims = len(self.y_col),
          float_dtype = np.float32,
          device = 'cpu')
      params = list(self.net.parameters())+list(adaptive.parameters())
      self.loss_fn = lambda y_i,y:torch.mean(adaptive.lossfun((y_i - y)))
    else:
      params = list(self.net.parameters())
      self.loss_fn = lambda y_i,y:torch.mean((y_i-y)**2)
    
    # optimizer
    self.optimizer = torch.optim.Adam(params,lr=self.lr)
    
    # dataset
    self.data = split_data(df,self.x_col,self.y_col)
    
    self.train_data = TensorDataset(
        torch.FloatTensor(self.ss_x.transform(self.data['X_train'])),
        torch.FloatTensor(self.ss_y.transform(self.data['Y_train'])),
        )
    self.train_iter = DataLoader(self.train_data,batch_size=64)
    
    self.vaild_data = TensorDataset(
        torch.FloatTensor(self.ss_x.transform(self.data['X_vaild'])),
        torch.FloatTensor(self.ss_y.transform(self.data['Y_vaild'])),
        )
    self.vaild_iter = DataLoader(self.vaild_data,batch_size=64)

  def train_step(self):
    self.net.train()
    total_loss = 0
    for t,(x,y) in enumerate(self.train_iter):
      y_hat = self.net(x)
      loss = self.loss_fn(y_hat,y)
      loss.backward()
      self.optimizer.step()
      self.optimizer.zero_grad()
      total_loss += loss.item()
    return total_loss/t
  
  def valid_step(self):
    self.net.eval()
    total_loss = 0
    for t,(x,y) in enumerate(self.vaild_iter):
      y_hat = self.net(x)
      loss = self.loss_fn(y_hat,y)
      total_loss += loss.item()
    return total_loss/t

  
  def train(self):   
    history = {
        'train_loss':[],
        'valid_loss':[]
        }
    current_loss = np.inf
    best_model = None
    
    for i in range(self.max_epochs):
      history['train_loss'].append(self.train_step())
      history['valid_loss'].append(self.valid_step())
      print("epoch:{} train_loss:{} valid_loss:{}".format(
          i,
          history['train_loss'][-1],
          history['valid_loss'][-1]))
      
      if history['valid_loss'][-1] <= current_loss:
        best_model = deepcopy(self.net.eval())
        current_loss = history['valid_loss'][-1]
        print('save best model')
    
    self.net = best_model
    return best_model

  def test(self):
    predict = self.net(torch.FloatTensor(self.ss_x.transform(self.data['X_test'])))
    predict = self.ss_y.inverse_transform(predict.detach().numpy())
    predict = pd.DataFrame(predict,columns=self.y_col)
    res = show_metrics(predict,self.data['Y_test'])
    return res

# PART1

In [227]:
part1 = part(df,df.columns[:10],df.columns[[12,13,16,17]])
part1.train()

epoch:0 train_loss:0.04849706036556098 valid_loss:0.012787146028131247
save best model
epoch:1 train_loss:0.008403990120213065 valid_loss:0.008509476610925049
save best model
epoch:2 train_loss:0.007122678902103669 valid_loss:0.00478197552729398
save best model
epoch:3 train_loss:0.008374484411130348 valid_loss:0.004135293536819518
save best model
epoch:4 train_loss:0.007135403420155247 valid_loss:0.005118321627378464
epoch:5 train_loss:0.004087934522734334 valid_loss:0.0026826912944670767
save best model
epoch:6 train_loss:0.0033349245527966153 valid_loss:0.0023121022968553007
save best model
epoch:7 train_loss:0.0030016003972074636 valid_loss:0.0021124045306351036
save best model
epoch:8 train_loss:0.00290153186102139 valid_loss:0.002183698510634713
epoch:9 train_loss:0.002824062402295466 valid_loss:0.002160378237022087
epoch:10 train_loss:0.002747308557445649 valid_loss:0.0021355104399845004
epoch:11 train_loss:0.002669029859437918 valid_loss:0.0021063952444819734
save best model
ep

Sequential(
  (0): Linear(in_features=10, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): ReLU()
  (4): Linear(in_features=256, out_features=4, bias=True)
  (5): Sigmoid()
)

In [228]:
res = part1.test()
res

Unnamed: 0_level_0,R2,MSE,MAPE
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D stream BENZENE,0.999908,4.07981e-07,0.0448104
D stream TOLUENE,0.999923,3.42494e-07,92.3445
W stream BENZENE,0.999884,2.00349e-06,22429.0
W stream TOLUENE,0.999921,1.36085e-06,0.09871
AVG,0.999909,1.02871e-06,5630.38


# PART2

In [229]:
part2 = part(df,df.columns[:10],df.columns[[11,15]])
part2.train()

epoch:0 train_loss:0.026806106583939657 valid_loss:0.0117783488240093
save best model
epoch:1 train_loss:0.003927192605462753 valid_loss:0.004447267972864211
save best model
epoch:2 train_loss:0.0013055351737421006 valid_loss:0.0020785971428267658
save best model
epoch:3 train_loss:0.0007248069903451122 valid_loss:0.0012114523851778358
save best model
epoch:4 train_loss:0.0004338614364516818 valid_loss:0.0010358991858083755
save best model
epoch:5 train_loss:0.0003161315350250031 valid_loss:0.0008402751554967836
save best model
epoch:6 train_loss:0.00024670276818344265 valid_loss:0.0007117091881809756
save best model
epoch:7 train_loss:0.00020625937092214977 valid_loss:0.0006133210699772462
save best model
epoch:8 train_loss:0.00016020504598499328 valid_loss:0.0005056451045675203
save best model
epoch:9 train_loss:0.0001399035823447371 valid_loss:0.0004198568931315094
save best model
epoch:10 train_loss:0.00011736053278986624 valid_loss:0.000402075813326519
save best model
epoch:11 tra

Sequential(
  (0): Linear(in_features=10, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): ReLU()
  (4): Linear(in_features=256, out_features=2, bias=True)
  (5): Sigmoid()
)

In [230]:
part2.test()

Unnamed: 0_level_0,R2,MSE,MAPE
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Condenser Duty,0.99997,5779.15,0.25491
Reboiler Duty,0.999934,7487.57,1.45082
AVG,0.999952,6633.36,0.852863


# Part 3：預測操作條件(temp)

In [231]:
part3 = part(df,x_col,df.columns[[10,14]])
part3.train()

epoch:0 train_loss:0.04348126485840314 valid_loss:0.03231658227741718
save best model
epoch:1 train_loss:0.00984770212865745 valid_loss:0.011238415958359838
save best model
epoch:2 train_loss:0.010271703779128276 valid_loss:0.015181856928393245
epoch:3 train_loss:0.005565982978118377 valid_loss:0.00777910026954487
save best model
epoch:4 train_loss:0.004475884230083061 valid_loss:0.007220234663691372
save best model
epoch:5 train_loss:0.005950257589574903 valid_loss:0.011524595902301371
epoch:6 train_loss:0.004624112525359831 valid_loss:0.006045385845936835
save best model
epoch:7 train_loss:0.004316484769030164 valid_loss:0.004707692482043058
save best model
epoch:8 train_loss:0.0034891378244436863 valid_loss:0.005867962958291173
epoch:9 train_loss:0.0028869090165244415 valid_loss:0.004472822358366102
save best model
epoch:10 train_loss:0.0026382624281622055 valid_loss:0.0046858978748787194
epoch:11 train_loss:0.0025854513159073475 valid_loss:0.0044030751450918615
save best model
epoc

Sequential(
  (0): Linear(in_features=10, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): ReLU()
  (4): Linear(in_features=256, out_features=2, bias=True)
  (5): Sigmoid()
)

In [232]:
part3.test()

Unnamed: 0_level_0,R2,MSE,MAPE
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Condenser Temperature,0.999951,0.000109068,0.00828698
Reboiler Temp,0.999967,0.00123659,0.0214112
AVG,0.999959,0.00067283,0.0148491
