<a href="https://colab.research.google.com/github/skywalker0803r/Wastewater-Biological-Treatment/blob/main/%E8%A9%95%E4%BC%B0critic%E6%A8%A1%E5%9E%8B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import torch
import torch.nn as nn


class R2D2(nn.Module):
  def __init__(self,obs_size,n_actions,hidden_size):
    super().__init__()
    self.hidden_size = hidden_size
    self.l1 = nn.Linear(obs_size,self.hidden_size)
    self.l2 = nn.LSTMCell(self.hidden_size,self.hidden_size)
    self.l3 = nn.Linear(self.hidden_size,n_actions)
  def forward(self,x,hx,cx):
    x = torch.tanh(self.l1(x))
    hx,cx = self.l2(x,(hx,cx))
    x = torch.tanh(hx)
    x = torch.sigmoid(self.l3(x)) # to range[0,1]
    return x,hx,cx

In [8]:
class ActorCritic(nn.Module):
  def __init__(self,actor,critic,time_step):
    super().__init__()
    self.time_step = time_step
    
    self.actor = actor
    self.actor_optimizer = Adam(actor.parameters())
    
    self.critic = critic
    self.critic_optimizer = Adam(critic.parameters())
  
  def forward(self,state,request):
    actions = self.actor_forward(state,request)
    values = self.critic_forward(state,actions)
    return values

  def actor_forward(self,state,request,eval=False):
    '''
    # 給定狀態(不可控)和需求 輸出一系列動作
    '''
    # train or eval model
    if eval == True:
      self.actor.eval()
    else:
      self.actor.train()
    
    # initialize hx,cx
    hx = torch.zeros((state.size()[0],actor.hidden_size))
    cx = torch.zeros((state.size()[0],actor.hidden_size))
    
    # get actions shape(batch_size,time_step,action_size)
    actions = torch.FloatTensor()
    
    # 對一定時間長度進行遍歷
    for t in range(self.time_step):
      # 在t時刻 根據t時刻的狀態,需求以及短期記憶hx,長期記憶cx當作輸入,得到輸出a(動作),更新後的短期記憶hx,更新後的長期記憶cx
      a,hx,cx = self.actor(torch.cat((state[:,t,:],request),dim=1),hx,cx)
      actions = torch.cat((actions,a),dim=1)
    # 一系列動作 (Batch,Time,Features)
    actions = actions.reshape(state.size()[0],self.time_step,-1)
    
    return actions
  
  def critic_forward(self,state,action,eval=False):
    '''
    # 輸入狀態(不可控)和一系列動作(可控),預測產出
    '''

    # train or eval model
    if eval == True:
      self.critic.eval()
    else:
      self.critic.train()
    
    # initialize hx,cx
    hx = torch.zeros((state.size()[0],critic.hidden_size))
    cx = torch.zeros((state.size()[0],critic.hidden_size))

    # get values shape(batch_size,time_step,value_size)
    values = torch.FloatTensor()
    
    # 對時間點遍歷
    for t in range(self.time_step):
      # 根據當前t時刻狀態,動作,hx,cx當輸入得到,v(產出),更新hx,cx
      v,hx,cx = self.critic(torch.cat((state[:,t,:],action[:,t,:]),dim=1),hx,cx)
      values = torch.cat((values,v),dim=1)
    
    # 一系列產出(Batch,Time,Features)
    values = values.reshape(state.size()[0],self.time_step,-1)
    
    # 只取最後一個時間點
    return values[:,-1,:] # return last time_step

  def train_critic(self,state,action,value):
    '''
    x: [state,action]
    y: [value]
    '監督式學習'
    '''
    self.critic.train()
    self.actor.eval()
    value_hat = self.critic_forward(state,action)
    
    # 預測值跟label的平方差愈小愈好
    loss = ((value_hat-value)**2).mean()
    loss.backward()
    self.critic_optimizer.step()
    self.critic_optimizer.zero_grad()
    return loss.item()
  
  def train_actor(self,state,request):
    '''
    x: [state,request]
    y: maximum critic value
    '最小化產出跟request的差異'
    '''
    self.actor.train()
    self.critic.eval()
    actions = self.actor_forward(state,request)
    value_hat = self.critic_forward(state,actions)

    # 首先預測值跟需求的平方差愈小愈好
    loss1 = ((request-value_hat)**2).mean()

    # 再來避免"變異數太大(時間維度上)" 因為盤控人員不可能突然調太多
    loss2 = actions.reshape(-1,time_step,action_size).std(axis=1)
    loss2 = loss2.sum(axis=-1).mean(axis=0)

    loss = loss1 + loss2
    loss.backward()
    
    self.actor_optimizer.step()
    self.actor.zero_grad()
    return loss.item()

  def save_weights(self):
    torch.save(self.actor.state_dict(), 'actor_weights.pt')
    torch.save(self.critic.state_dict(), 'critic_weights.pt')
    print('保存模型')

  def load_weights(self,actor_dir,critic_dir):
    self.actor.load_state_dict(torch.load(actor_dir))
    self.critic.load_state_dict(torch.load(critic_dir))

In [9]:
import torch
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error
from torch.optim import Adam
import numpy as np
import pandas as pd

action_size = 4
hidden_size = 256 
value_size = 1 
state_size = 1
request_size = 1
actor = R2D2(state_size+request_size,action_size,hidden_size)
critic = R2D2(state_size+action_size,value_size,hidden_size)
time_step = 16
model = ActorCritic(actor,critic,time_step)
actor_dir = '/content/drive/MyDrive/專案工作承攬契約書_廢水生物處理/actor_weights.pt'
critic_dir = '/content/drive/MyDrive/專案工作承攬契約書_廢水生物處理/critic_weights.pt'
model.load_weights(actor_dir=actor_dir,critic_dir=critic_dir)

# 將模型設定為評估模式
model.critic.eval()

import pickle
# 載入pickle檔案
test_iter_dir = '/content/drive/MyDrive/專案工作承攬契約書_廢水生物處理/test_iter.pkl'
with open(test_iter_dir, 'rb') as f:
    test_iter = pickle.load(f)


# 評估模型
with torch.no_grad():
  r2_list = []
  mse_list = []
  mape_list = []
  for i,(bs,br,ba,bv) in enumerate(test_iter):
    v_hat = model.critic_forward(bs,ba, eval=True)
    # 計算 R2 score
    r2 = r2_score(bv, v_hat)
    r2_list.append(r2)
    # 計算 MSE
    mse = mean_squared_error(bv, v_hat)
    mse_list.append(mse)
    # 計算 MAPE
    mape = mean_absolute_percentage_error(bv, v_hat)
    mape_list.append(mape)
  # create dictionary of evaluation metrics
  eval_metrics = {"R2 score": np.mean(r2_list), "MSE": np.mean(mse_list), "MAPE": np.mean(mape_list)}
  # create pandas DataFrame from dictionary
  eval_df = pd.DataFrame.from_dict(eval_metrics, orient="index", columns=["Value"])
  display(eval_df)

Unnamed: 0,Value
R2 score,0.812797
MSE,0.002478
MAPE,0.063153
