In [99]:
import os, sys

base_path = os.path.join(os.getcwd(), "..")
print(f"Base Path: {base_path}")
sys.path.append(base_path)

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..


In [100]:
import gym
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

from stable_baselines3 import DQN, PPO
import torch

In [101]:
env = gym.make('SimKubeEnv-v0', reward_file='promes_static.py', scenario_file='scenario-5l-5m-1000p-10m.csv')
model = PPO.load('training/model/PPO_Promes_Combined', env=env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [102]:
model_policy = model.policy
print(model_policy)

ActorCriticPolicy(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential()
    (policy_net): Sequential(
      (0): Linear(in_features=12, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=12, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=64, out_features=6, bias=True)
  (value_net): Linear(in_features=64, out_features=1, bias=True)
)


In [103]:
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from gym import spaces

from stable_baselines3 import PPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.logger import configure

from kube_mm_scheduler.model.promes import Net5_
from kube_mm_scheduler.model.net3 import Model as Net3

from typing import Dict, List, Tuple, Type, Union


device = th.device("cuda" if th.cuda.is_available() else "cpu")


class Net3_(Net3):
    def __init__(self):
        super(Net3_, self).__init__()
        self.fc4 = None

    def forward(self, x1, x2):
        x1 = F.relu(self.fc1_1(x1))  
        x2 = F.relu(self.fc1_2(x2))
        x = torch.cat((x1, x2), dim=1) 
        x = F.relu(self.fc2(x))  
        x = F.relu(self.fc3(x))
        return x


class PromesPPO(BaseFeaturesExtractor):
    def __init__(self, observation_space: spaces.Box, features_dim: int = 80):
        super(PromesPPO, self).__init__(observation_space, features_dim)
        self.net = Net5_().to(device)
        self.net.load_state_dict(th.load(os.path.join(base_path,'kube_mm_scheduler/weight/net5.pt')))
        self.net.eval()

    def forward(self, observations: th.Tensor) -> th.Tensor:
        input1 = observations[:, :10].to(device)
        input2 = observations[:, 10:].to(device)

        return self.net(input1, input2)

policy_kwargs = dict(
    features_extractor_class=PromesPPO,
    features_extractor_kwargs=dict(features_dim=80),
)

class MlpExtractor(nn.Module):

    def __init__(
        self,
        feature_dim: int,
        net_arch: Union[List[int], Dict[str, List[int]]],
        activation_fn: Type[nn.Module],
        device: Union[th.device, str] = "auto",
    ) -> None:
        super().__init__()
        policy_net: List[nn.Module] = []
        value_net: List[nn.Module] = []
        last_layer_dim_pi = feature_dim
        last_layer_dim_vf = feature_dim

        # save dimensions of layers in policy and value nets
        if isinstance(net_arch, dict):
            # Note: if key is not specificed, assume linear network
            pi_layers_dims = net_arch.get("pi", [])  # Layer sizes of the policy network
            vf_layers_dims = net_arch.get("vf", [])  # Layer sizes of the value network
        else:
            pi_layers_dims = vf_layers_dims = net_arch
        # Iterate through the policy layers and build the policy net
        for curr_layer_dim in pi_layers_dims:
            policy_net.append(nn.Linear(last_layer_dim_pi, curr_layer_dim))
            policy_net.append(activation_fn())
            last_layer_dim_pi = curr_layer_dim
        # Iterate through the value layers and build the value net
        for curr_layer_dim in vf_layers_dims:
            value_net.append(nn.Linear(last_layer_dim_vf, curr_layer_dim))
            value_net.append(activation_fn())
            last_layer_dim_vf = curr_layer_dim

        # Save dim, used to create the distributions
        self.latent_dim_pi = last_layer_dim_pi
        self.latent_dim_vf = last_layer_dim_vf

        # Create networks
        # If the list of layers is empty, the network will just act as an Identity module
        self.policy_net = nn.Sequential(*policy_net).to(device)
        self.value_net = nn.Sequential(*value_net).to(device)

    def forward(self, features: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
        """
        :return: latent_policy, latent_value of the specified network.
            If all layers are shared, then ``latent_policy == latent_value``
        """
        return self.forward_actor(features), self.forward_critic(features)

    def forward_actor(self, features: th.Tensor) -> th.Tensor:
        return self.policy_net(features)

    def forward_critic(self, features: th.Tensor) -> th.Tensor:
        return self.value_net(features)

In [104]:
import torch
import torch.nn as nn

class ActorCriticPolicy(nn.Module):

    def __init__(self):
        super().__init__()

        # Features extractor
        self.features_extractor = PromesPPO(observation_space=env.observation_space ,features_dim=80)

        # MLP extractor
        self.mlp_extractor = MlpExtractor(feature_dim=80, net_arch=[64, 64], activation_fn=nn.Tanh, device=device)

        # Action net
        self.action_net = nn.Linear(in_features=64, out_features=6, bias=True)

        # Value net
        self.value_net = nn.Linear(in_features=64, out_features=1, bias=True)

    def forward(self, state):
        # Extract features
        features = self.features_extractor(state)

        # Extract policy and value
        policy = self.mlp_extractor.policy_net(features)
        value = self.mlp_extractor.value_net(features)

        # Get action
        action = self.action_net(policy)

        # Get value
        value = self.value_net(value)

        return action, value

In [105]:
# Make a model with the same parameters
pt_model = ActorCriticPolicy()

In [106]:
# Extract weights from model_policy
weights = model_policy.state_dict()

In [107]:
# Load weights to pt_model
pt_model.load_state_dict(weights)

RuntimeError: Error(s) in loading state_dict for ActorCriticPolicy:
	Missing key(s) in state_dict: "features_extractor.net.net3_.fc1_1.weight", "features_extractor.net.net3_.fc1_1.bias", "features_extractor.net.net3_.fc1_2.weight", "features_extractor.net.net3_.fc1_2.bias", "features_extractor.net.net3_.fc2.weight", "features_extractor.net.net3_.fc2.bias", "features_extractor.net.net3_.fc3.weight", "features_extractor.net.net3_.fc3.bias", "features_extractor.net.fc1_3_1.weight", "features_extractor.net.fc1_3_1.bias", "features_extractor.net.fc1_3_2.weight", "features_extractor.net.fc1_3_2.bias", "features_extractor.net.fc1_3_3.weight", "features_extractor.net.fc1_3_3.bias", "features_extractor.net.fc1_3_4.weight", "features_extractor.net.fc1_3_4.bias", "features_extractor.net.fc1_3_5.weight", "features_extractor.net.fc1_3_5.bias", "features_extractor.net.fc2_1.weight", "features_extractor.net.fc2_1.bias", "features_extractor.net.fc2_2.weight", "features_extractor.net.fc2_2.bias", "features_extractor.net.fc2_3.weight", "features_extractor.net.fc2_3.bias", "features_extractor.net.fc2_4.weight", "features_extractor.net.fc2_4.bias", "features_extractor.net.fc2_5.weight", "features_extractor.net.fc2_5.bias". 
	size mismatch for mlp_extractor.policy_net.0.weight: copying a param with shape torch.Size([64, 12]) from checkpoint, the shape in current model is torch.Size([64, 80]).
	size mismatch for mlp_extractor.value_net.0.weight: copying a param with shape torch.Size([64, 12]) from checkpoint, the shape in current model is torch.Size([64, 80]).

In [93]:
sample1 = torch.tensor([[0.99, 0.90, 0.80, 0.80, 0.95, 0.95, 0.90, 0.85, 0.0, 0.0, 0.0, 0.0]])
sample2 = torch.tensor([[0.99, 0.90, 0.80, 0.80, 0.95, 0.95, 0.90, 0.85, 0.0, 0.0, 0.6, 0.7]])
sample3 = torch.tensor([[0.99, 0.90, 0.40, 0.40, 0.15, 0.15, 0.90, 0.85, 0.8, 0.8, 0.6, 0.7]])

In [94]:
pt_model(sample1)

(tensor([[-6.2938,  1.9472, -0.3218,  0.2534,  2.2748, -3.2760]],
        grad_fn=<AddmmBackward0>),
 tensor([[25.3541]], grad_fn=<AddmmBackward0>))

In [98]:
pt_model(sample1)[0]

tensor([[-6.2938,  1.9472, -0.3218,  0.2534,  2.2748, -3.2760]],
       grad_fn=<AddmmBackward0>)

In [81]:
pt_model(sample2)

(tensor([[-5.9904, -0.0553, -0.9415, -1.1002, -0.3881,  4.7698]],
        grad_fn=<AddmmBackward0>),
 tensor([[-61.6606]], grad_fn=<AddmmBackward0>))

In [83]:
pt_model(sample3)

(tensor([[-6.6532, -0.6983,  1.0152,  0.3938,  1.4332, -0.1519]],
        grad_fn=<AddmmBackward0>),
 tensor([[5.3250]], grad_fn=<AddmmBackward0>))