<a href="https://colab.research.google.com/github/skywalker0803r/SCR/blob/main/Modeling20230917.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import torch
import torch.nn as nn
import numpy as np
from torch.optim import Adam
import pandas as pd
from torch.utils.data import TensorDataset,DataLoader
from torch import FloatTensor

# data

In [3]:
import joblib
data = joblib.load('/content/drive/MyDrive/SCR/train_dataset.pkl')
test_data = joblib.load('/content/drive/MyDrive/SCR/test_dataset.pkl')

# B,T,F


In [4]:
data['state'].shape

(54046, 3, 35)

In [5]:
data['action'].shape

(54046, 3)

#  B,T,F


In [6]:
data['action'] = data['action'].reshape(-1,data['action'].shape[-1],1)
data['action'].shape

(54046, 3, 1)

#  B,F


In [7]:
data['value'].shape

(54046, 1)

In [8]:
data_size = len(data['value'])
batch_size = 128
hidden_size = 128
request_size = 1
state_size = data['state'].shape[-1]
value_size = data['value'].shape[-1]
action_size = data['action'].shape[-1]
time_step = data['state'].shape[1]

In [9]:
d = {}
d['state'] = FloatTensor(data['state'])
d['action'] = FloatTensor(data['action'])
d['value'] = FloatTensor(data['value'])
d['request'] = FloatTensor(torch.rand(data_size,request_size)) # uniform distribution
d = TensorDataset(d['state'],d['request'],d['action'],d['value'])
train_iter = DataLoader(d,batch_size,shuffle=False)

In [10]:
class R2D2(nn.Module):
  def __init__(self,obs_size,n_actions,hidden_size):
    super().__init__()
    self.hidden_size = hidden_size
    self.l1 = nn.Linear(obs_size,self.hidden_size)
    self.l2 = nn.LSTMCell(self.hidden_size,self.hidden_size)
    self.l3 = nn.Linear(self.hidden_size,n_actions)
    self.dropout = nn.Dropout (p = 0.25)

  def forward(self,x,hx,cx):
    x = torch.tanh(self.l1(x))
    hx,cx = self.l2(x,(hx,cx))
    #x = self.dropout(hx)
    x = torch.tanh(hx)
    x = torch.sigmoid(self.l3(x)) # to range[0,1]
    return x,hx,cx

In [11]:
class ActorCritic(nn.Module):
  def __init__(self,actor,critic,time_step):
    super().__init__()

    # TIME
    self.time_step = time_step

    # ACTOR OR 優化搜索模型
    self.actor = actor
    self.actor_optimizer = Adam(actor.parameters(),lr=1e-3)

    # CRITIC OR 代理模型
    self.critic = critic
    self.critic_optimizer = Adam(critic.parameters(),lr=1e-3)

  def forward(self,state,request):
    actions = self.actor_forward(state,request)
    values = self.critic_forward(state,actions)
    return values

  def actor_forward(self,state,request,eval=False):
    '''
    # 給定狀態(不可控)和需求 輸出一系列動作
    '''
    # 切換模式
    if eval == True:
      self.actor.eval()
    else:
      self.actor.train()

    # 初始化隱變量 LSTM特有的長短期記憶
    hx = torch.zeros((state.size()[0],actor.hidden_size))
    cx = torch.zeros((state.size()[0],actor.hidden_size))

    # 待會用來存放action element用
    actions = torch.FloatTensor()

    # 對一定時間長度進行遍歷將state給actor forward 並將結果蒐集起來
    for t in range(self.time_step):
      a,hx,cx = self.actor(torch.cat((state[:,t,:],request),dim=1),hx,cx)
      actions = torch.cat((actions,a),dim=1)

    # 一系列動作 (Batch,Time,Features)
    actions = actions.reshape(state.size()[0],self.time_step,-1)

    return actions

  def critic_forward(self,state,action,eval=False):
    '''
    # 輸入狀態(不可控)和一系列動作(可控),預測產出
    '''

    # 切換模式
    if eval == True:
      self.critic.eval()
    else:
      self.critic.train()

    # 初始化 隱變量 長短期記憶
    hx = torch.zeros((state.size()[0],critic.hidden_size))
    cx = torch.zeros((state.size()[0],critic.hidden_size))

    # 建立values的存放區
    values = torch.FloatTensor()

    # 對時間點遍歷 將state,action依序放入critic取得v並將v蒐集起來
    for t in range(self.time_step):
      # 根據當前t時刻狀態,動作,hx,cx當輸入得到,v(產出),更新hx,cx
      v,hx,cx = self.critic(torch.cat((state[:,t,:],action[:,t,:]),dim=1),hx,cx)
      values = torch.cat((values,v),dim=1)

    # 一系列產出(Batch,Time,Features)
    values = values.reshape(state.size()[0],self.time_step,-1)

    # 只取最後一個時間點的v (Batch,Time,Features)
    return values[:,-1,:] # return last time_step

  def train_critic(self,state,action,value):
    '''
    x: [state,action]
    y: [value]
    '監督式學習'
    '''
    self.critic.train()#可訓練
    self.actor.eval()#凍結權重

    # 計算預測值:hat
    value_hat = self.critic_forward(state,action)

    # 預測值跟label的平方差愈小愈好
    loss = ((value_hat - value)**2).mean()
    # loss 反向傳播
    loss.backward()

    # 使用optimizer優化
    self.critic_optimizer.step()
    self.critic_optimizer.zero_grad()
    return loss.item()

  def train_actor(self,state,request):
    '''
    x: [state,request]
    y: maximum critic value
    '最小化產出跟request的差異'
    '''
    self.actor.train()
    self.critic.eval()
    actions = self.actor_forward(state,request)
    value_hat = self.critic_forward(state,actions)

    # 首先預測值跟需求的平方差愈小愈好
    loss1 = ((request - value_hat)**2).mean()

    # 再來避免"變異數太大(時間維度上)" 因為盤控人員不可能突然調太多
    #loss2 = actions.reshape(-1,time_step,action_size).std(axis=1)
    #loss2 = loss2.sum(axis=-1).mean(axis=0)

    loss = loss1 #+ loss2
    loss.backward()

    self.actor_optimizer.step()
    self.actor.zero_grad()

    return loss.item()

# 實例化模型

In [12]:
# actor吃狀態跟需求輸出操作建議
actor = R2D2(state_size+request_size,action_size,hidden_size)
# critic吃狀態跟操作輸出最後產品產出量(預估)
critic = R2D2(state_size+action_size,value_size,hidden_size)
# 實例化這個ActorCritic架構
model = ActorCritic(actor,critic,time_step)

# 隨機產生數據測試s:state,r:request

In [13]:
s = torch.rand(1,time_step,state_size)#(B,T,s_dim)
r = torch.rand(1,request_size)#(B,r_dim)
s.shape,r.shape

(torch.Size([1, 3, 35]), torch.Size([1, 1]))

# 前向傳播測試

In [14]:
model.forward(s,r)

tensor([[0.5094]], grad_fn=<SliceBackward0>)

In [15]:
'''
%load_ext tensorboard
from datetime import datetime
import time
from torch.utils.tensorboard import SummaryWriter
logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
writer = SummaryWriter(log_dir=logdir)
writer.add_graph(model,[s,r])
writer.close()
%tensorboard --logdir logs
'''

'\n%load_ext tensorboard\nfrom datetime import datetime\nimport time\nfrom torch.utils.tensorboard import SummaryWriter\nlogdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")\nwriter = SummaryWriter(log_dir=logdir)\nwriter.add_graph(model,[s,r])\nwriter.close()\n%tensorboard --logdir logs\n'

# 定義訓練函數

In [16]:
def train_critic():
  loss = 0
  for i,(bs,br,ba,bv) in enumerate(train_iter):
    loss += model.train_critic(bs,ba,bv)
  return loss

def train_actor():
  loss = 0
  for i,(bs,br,ba,bv) in enumerate(train_iter):
    loss += model.train_actor(bs,br)
  return loss

def train(model,train_iter,epochs=100):
  train_history = {}
  train_history['actor'] = []
  train_history['critic'] = []

  # 先訓練critic代理模型(希望此代理模型訓練後夠接近真實世界情況)
  for epoch in range(epochs):
    loss = train_critic()
    train_history['critic'].append(loss)
    print('epoch:{} critic_loss:{}'.format(epoch,loss))

  # 代理模型訓練完之後就假設此critic代理模型已經是(真實世界情況)
  # 因此若假設成立則可以在此critic代理模型再進行最佳解的搜索
  # 這裡的搜索是利用神經網路actor,從一開始隨便給action觀察loss(這裡loss定義為使用者期待跟實際落差)
  # 然後不斷降低這個loss的過程,稱之為神經網路actor的搜索,也可以將其視為神經網路actor的訓練
  for epoch in range(epochs):
    loss = train_actor()
    train_history['actor'].append(loss)
    print('epoch:{} actor_loss:{}'.format(epoch,loss))

  return model,train_history

# 從train_iter 採些資料出來讓actor forward

In [17]:
a = next(iter(train_iter))
s,r,a,v = a
actions = model.actor_forward(s,r,eval=True)
print('以下是actor根據狀態和需求給出的操作建議')
pd.DataFrame(actions[0].detach().numpy()).style.background_gradient()

以下是actor根據狀態和需求給出的操作建議


Unnamed: 0,0
0,0.481272
1,0.481474
2,0.481508


# 執行訓練


In [18]:
model,train_history = train(model,train_iter,epochs=99)

epoch:0 critic_loss:3.197746816649783
epoch:1 critic_loss:4.243415304609982
epoch:2 critic_loss:4.042506417217169
epoch:3 critic_loss:4.092927486562985
epoch:4 critic_loss:4.02010541675611
epoch:5 critic_loss:4.016633069737736
epoch:6 critic_loss:4.0155665883823985
epoch:7 critic_loss:4.014931112252839
epoch:8 critic_loss:4.014527056116094
epoch:9 critic_loss:4.014273061869915
epoch:10 critic_loss:4.014133758114724
epoch:11 critic_loss:4.014108318378419
epoch:12 critic_loss:4.014121488492094
epoch:13 critic_loss:4.013812096688412
epoch:14 critic_loss:4.014649059362625
epoch:15 critic_loss:4.01310657817487
epoch:16 critic_loss:4.022615232971418
epoch:17 critic_loss:3.9412416233190015
epoch:18 critic_loss:4.065875594031752
epoch:19 critic_loss:4.053741900679029
epoch:20 critic_loss:4.047834011999953
epoch:21 critic_loss:4.05791655878852
epoch:22 critic_loss:4.013556881300701
epoch:23 critic_loss:4.0162574226125685
epoch:24 critic_loss:4.01086304678779
epoch:25 critic_loss:4.0185391527272

KeyboardInterrupt: ignored

# 繪製圖表

In [None]:
import matplotlib.pyplot as plt
plt.plot(train_history['critic'])
plt.show()
plt.plot(train_history['actor'])

# 試用訓練好的模型

In [None]:
s,r,a,v = next(iter(train_iter))
actions = model.actor_forward(s,r,eval=True)

# 訓練好的模型做推薦

In [None]:
pd.DataFrame(actions[0].detach().numpy()).style.background_gradient()

# 歷史上的推薦

In [None]:
pd.DataFrame(a[0].numpy()).style.background_gradient()

# 使用訓練好的模型產生values

In [None]:
values = model.critic_forward(s[[0]],actions[[0]],eval=True)
values

# 實際的Values

In [None]:
v[0]

假設廠區有個希望的wanna_ouput

假設state目前在常見情況下(0.5表示)

看看模型推薦的操作以及預估的產量

In [None]:
wanna_ouput = 0.0
state_setting = 0.5
s = torch.full((1,time_step,state_size),state_setting)
r = torch.full((1,request_size),wanna_ouput)
a = model.actor_forward(s,r,eval=True)
v = model.critic_forward(s,a,eval=True)

In [None]:
# 模型建議操作
pd.DataFrame(a[0].detach().numpy()).style.background_gradient()

In [None]:
# 模型預測根據上述操作應當的產出
print(f'value:{v.detach().numpy()[0][0]}\n')

# r2 rse mape

In [None]:
from sklearn.metrics import r2_score,mean_squared_error
def mape(y_true, y_pred):
  try:
    y_true, y_pred = np.array(y_true), np.array(y_pred)
  except:
    pass
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# TRAIN KPI

In [None]:
v_hat = model.critic_forward(
    FloatTensor(data['state']),
    FloatTensor(data['action']),eval=True).detach().numpy()
r2 = r2_score(v_hat,data['value'])
rmse = np.sqrt(mean_squared_error(v_hat,data['value']))
mape_ = mape(v_hat,data['value'])
r2,rmse,mape_

# TEST KPI

In [None]:
s = FloatTensor(test_data['state'])
a = FloatTensor(test_data['action'].reshape(-1,time_step,1))
v_hat = model.critic_forward(s,a,eval=True).detach().numpy()

r2 = r2_score(v_hat,test_data['value'])
rmse = np.sqrt(mean_squared_error(v_hat,test_data['value']))
mape_ = mape(v_hat,test_data['value'].astype('float32'))
r2,rmse,mape_

# TEST RESULT

In [None]:
result = pd.DataFrame()
result['r2'] = [r2]
result['rmse'] = [rmse]
result['mape'] = [mape]
result