In [1]:
import os, sys

base_path = os.path.join(os.getcwd(), "..")
print(f"Base Path: {base_path}")
sys.path.append(base_path)

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..


# Data Generation

## 1. 1-label data generation

In [2]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [3]:
from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
from kube_hr_scheduler.strategies.model.default import Model

In [4]:
env = gym.make('SimKubeEnv-v0')
scheduler = SimHrScheduler(env, 'default.py')

In [None]:
# Data generation
# Data consists of state, scheduler decision
data_size = 1000000
data_path = os.path.join(base_path, "dataset", "data_1.csv")
with open(data_path, 'w') as f:
    for i in range(data_size):
        state = list(env.random_state_gen())
        action = scheduler.decision(env)
        data = state + [action]
        f.write(','.join(list(map(str,data))) + '\n')

## 2. 6-confidence data generation

In [7]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [8]:
from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
from kube_hr_scheduler.strategies.model.default import Model

In [9]:
env = gym.make('SimKubeEnv-v0')
scheduler = SimHrScheduler(env, 'default.py')

In [None]:
# Data generation
# Data consists of state, scheduler decision
data_size = 1000000
data_path = os.path.join(base_path, "dataset", "data_5.csv")
with open(data_path, 'w') as f:
    for i in range(data_size):
        state = list(env.random_state_gen())
        confidence = scheduler.model.get_confidence(env)
        data = state + confidence.tolist()
        f.write(','.join(list(map(str,data))) + '\n')

## 3. 1-confidence data generation

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from kube_mm_scheduler.model.net3 import Net3

net3 = Net3()

In [4]:
# predict the score for each action
def predict_score(data, model=net3):
    model.eval()
    if not torch.is_tensor(data):
        data = torch.tensor(data, dtype=torch.float32)
    data10 = data[:, :-2]
    data2 = data[:, -2:]
    output = model(data10, data2)
    # Round up at the 4th decimal point
    output = torch.round(output * 10000) / 10000
    # # Softmax
    # output = F.softmax(output, dim=1)
    # print(output)
    return output

In [5]:
# Read from data_1.csv and formulate data_3.csv which has additional selected node information
src_path = os.path.join(base_path, "dataset", "data_1.csv")
with open(src_path, 'r') as f:
    data_path = os.path.join(base_path, "dataset", "data_3.csv")
    with open(data_path, 'w') as g:
        for line in f.readlines():
            line = line.strip()
            line = line.split(',')
            node_state = line[:-3]
            pod_quota = line[-3:-1]

            # Confidence
            flt_state = list(map(float, node_state+pod_quota))
            # print(flt_state)
            confidence = predict_score([flt_state])

            for i in range(5):
                selected_node_state = node_state[i*2:i*2+2]
                new_state = node_state + pod_quota + selected_node_state
                conf = confidence[0].tolist()[i+1]
                # Takes the 4th decimal point
                conf = round(conf, 4)
                
                new_data = new_state + [conf]
                g.write(','.join(list(map(str,new_data))) + '\n')

KeyboardInterrupt: 

## 4. 6-reward data generation (Dynamic2)

In [2]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [3]:
from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
from kube_hr_scheduler.strategies.model.default import Model

In [4]:
env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic2.py')
scheduler = SimHrScheduler(env, 'default.py')

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..


In [None]:
import tqdm

data_size = 1000000
data_path = os.path.join(base_path, "dataset", "data_5.csv")

with open(data_path, 'w') as f:
    for i in tqdm.tqdm(range(data_size)):
        env.reset()
        state = list(env.random_state_gen())
        # print(f"State: {state}")

        env0 = env.duplicate() # For action 0
        env1 = env.duplicate() # For action 1
        env2 = env.duplicate() # For action 2
        env3 = env.duplicate() # For action 3
        env4 = env.duplicate() # For action 4
        env5 = env.duplicate() # For action 5

        state0, rew0, _, _ = env0.step(0, True)
        # print(f"Action 0")
        # print(f"State0: {state0}")
        # print("====================")
        state1, rew1, _, _ = env1.step(1, True)
        # print(f"Action 1")
        # print(f"State1: {state1}") 
        # print("====================")
        state2, rew2, _, _ = env2.step(2, True)
        # print(f"Action 2")
        # print(f"State2: {state2}")
        # print("====================")
        state3, rew3, _, _ = env3.step(3, True)
        # print(f"Action 3")
        # print(f"State3: {state3}")
        # print("====================")
        state4, rew4, _, _ = env4.step(4, True)
        # print(f"Action 4")
        # print(f"State4: {state4}")
        # print("====================")
        state5, rew5, _, _ = env5.step(5, True)
        # print(f"Action 5")
        # print(f"State5: {state5}")

        reward = [rew0, rew1, rew2, rew3, rew4, rew5]
        # Round up at the 4th decimal point
        reward = list(map(lambda x: round(x, 4), reward))
        
        f.write(','.join(list(map(str,state))) + '  ,  ' + ','.join(list(map(str,reward))) + '\n')

## 5. 6-reward data generation (Static)

In [6]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [7]:
env = gym.make('SimKubeEnv-v0', reward_file='train_drs.py')

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..


In [8]:
import tqdm

data_size = 1000000
data_path = os.path.join(base_path, "dataset", "data_drs.csv")

with open(data_path, 'w') as f:
    for i in tqdm.tqdm(range(data_size)):
        env.reset()
        state = list(env.random_state_gen())
        # print(f"State: {state}")

        env0 = env.duplicate() # For action 0
        env1 = env.duplicate() # For action 1
        env2 = env.duplicate() # For action 2
        env3 = env.duplicate() # For action 3
        env4 = env.duplicate() # For action 4
        env5 = env.duplicate() # For action 5

        state0, rew0, _, _ = env0.step(0, True)
        # print(f"Action 0")
        # print(f"State0: {state0}")
        # print("====================")
        state1, rew1, _, _ = env1.step(1, True)
        # print(f"Action 1")
        # print(f"State1: {state1}") 
        # print("====================")
        state2, rew2, _, _ = env2.step(2, True)
        # print(f"Action 2")
        # print(f"State2: {state2}")
        # print("====================")
        state3, rew3, _, _ = env3.step(3, True)
        # print(f"Action 3")
        # print(f"State3: {state3}")
        # print("====================")
        state4, rew4, _, _ = env4.step(4, True)
        # print(f"Action 4")
        # print(f"State4: {state4}")
        # print("====================")
        state5, rew5, _, _ = env5.step(5, True)
        # print(f"Action 5")
        # print(f"State5: {state5}")

        reward = [rew0, rew1, rew2, rew3, rew4, rew5]
        # Round up at the 4th decimal point
        reward = list(map(lambda x: round(x, 4), reward))
        
        f.write(','.join(list(map(str,state))) + '  ,  ' + ','.join(list(map(str,reward))) + '\n')

 24%|██▍       | 243035/1000000 [1:52:36<5:50:44, 35.97it/s] 


KeyboardInterrupt: 

## 6. 6-reward data generation (Default)

In [2]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [3]:
env = gym.make('SimKubeEnv-v0', reward_file='train_default.py')

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..


In [None]:
import tqdm

data_size = 1000000
data_path = os.path.join(base_path, "dataset", "data_default.csv")

with open(data_path, 'w') as f:
    for i in tqdm.tqdm(range(data_size)):
        env.reset()
        state = list(env.random_state_gen())
        # print(f"State: {state}")

        env0 = env.duplicate() # For action 0
        env1 = env.duplicate() # For action 1
        env2 = env.duplicate() # For action 2
        env3 = env.duplicate() # For action 3
        env4 = env.duplicate() # For action 4
        env5 = env.duplicate() # For action 5

        state0, rew0, _, _ = env0.step(0, True)
        # print(f"Action 0")
        # print(f"State0: {state0}")
        # print("====================")
        state1, rew1, _, _ = env1.step(1, True)
        # print(f"Action 1")
        # print(f"State1: {state1}") 
        # print("====================")
        state2, rew2, _, _ = env2.step(2, True)
        # print(f"Action 2")
        # print(f"State2: {state2}")
        # print("====================")
        state3, rew3, _, _ = env3.step(3, True)
        # print(f"Action 3")
        # print(f"State3: {state3}")
        # print("====================")
        state4, rew4, _, _ = env4.step(4, True)
        # print(f"Action 4")
        # print(f"State4: {state4}")
        # print("====================")
        state5, rew5, _, _ = env5.step(5, True)
        # print(f"Action 5")
        # print(f"State5: {state5}")

        reward = [rew0, rew1, rew2, rew3, rew4, rew5]
        # Round up at the 4th decimal point
        reward = list(map(lambda x: round(x, 4), reward))
        
        f.write(','.join(list(map(str,state))) + '  ,  ' + ','.join(list(map(str,reward))) + '\n')

## 7. 6-reward data generation (Dynamic)

In [2]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [3]:
env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic.py')

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..


In [5]:
import tqdm

data_size = 300000
data_path = os.path.join(base_path, "dataset", "data_dynamic.csv")

with open(data_path, 'a') as f:
    for i in tqdm.tqdm(range(data_size)):
        env.reset()
        state = list(env.random_state_gen())
        # print(f"State: {state}")

        env0 = env.duplicate() # For action 0
        env1 = env.duplicate() # For action 1
        env2 = env.duplicate() # For action 2
        env3 = env.duplicate() # For action 3
        env4 = env.duplicate() # For action 4
        env5 = env.duplicate() # For action 5

        state0, rew0, _, _ = env0.step(0, True)
        # print(f"Action 0")
        # print(f"State0: {state0}")
        # print("====================")
        state1, rew1, _, _ = env1.step(1, True)
        # print(f"Action 1")
        # print(f"State1: {state1}") 
        # print("====================")
        state2, rew2, _, _ = env2.step(2, True)
        # print(f"Action 2")
        # print(f"State2: {state2}")
        # print("====================")
        state3, rew3, _, _ = env3.step(3, True)
        # print(f"Action 3")
        # print(f"State3: {state3}")
        # print("====================")
        state4, rew4, _, _ = env4.step(4, True)
        # print(f"Action 4")
        # print(f"State4: {state4}")
        # print("====================")
        state5, rew5, _, _ = env5.step(5, True)
        # print(f"Action 5")
        # print(f"State5: {state5}")

        reward = [rew0, rew1, rew2, rew3, rew4, rew5]
        # Round up at the 4th decimal point
        reward = list(map(lambda x: round(x, 4), reward))
        
        f.write(','.join(list(map(str,state))) + '  ,  ' + ','.join(list(map(str,reward))) + '\n')

100%|██████████| 300000/300000 [3:15:14<00:00, 25.61it/s]   


## 8. 6-reward data generation (Default2)

In [10]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [11]:
env = gym.make('SimKubeEnv-v0', reward_file='train_default2.py')

In [None]:
import tqdm

data_size = 300000
data_path = os.path.join(base_path, "dataset", "data_default2.csv")

with open(data_path, 'a') as f:
    for i in tqdm.tqdm(range(data_size)):
        env.reset()
        state = list(env.random_state_gen())
        # print(f"State: {state}")

        env0 = env.duplicate() # For action 0
        env1 = env.duplicate() # For action 1
        env2 = env.duplicate() # For action 2
        env3 = env.duplicate() # For action 3
        env4 = env.duplicate() # For action 4
        env5 = env.duplicate() # For action 5

        state0, rew0, _, _ = env0.step(0, True)
        # print(f"Action 0")
        # print(f"State0: {state0}")
        # print("====================")
        state1, rew1, _, _ = env1.step(1, True)
        # print(f"Action 1")
        # print(f"State1: {state1}") 
        # print("====================")
        state2, rew2, _, _ = env2.step(2, True)
        # print(f"Action 2")
        # print(f"State2: {state2}")
        # print("====================")
        state3, rew3, _, _ = env3.step(3, True)
        # print(f"Action 3")
        # print(f"State3: {state3}")
        # print("====================")
        state4, rew4, _, _ = env4.step(4, True)
        # print(f"Action 4")
        # print(f"State4: {state4}")
        # print("====================")
        state5, rew5, _, _ = env5.step(5, True)
        # print(f"Action 5")
        # print(f"State5: {state5}")

        reward = [rew0, rew1, rew2, rew3, rew4, rew5]
        # Round up at the 4th decimal point
        reward = list(map(lambda x: round(x, 4), reward))
        
        f.write(','.join(list(map(str,state))) + '  ,  ' + ','.join(list(map(str,reward))) + '\n')

## 9. 6-reward data generation (Dynamic2_time)

In [2]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [3]:
from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
from kube_hr_scheduler.strategies.model.default import Model

In [4]:
env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic2_time.py')
scheduler = SimHrScheduler(env, 'default.py')

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..


In [None]:
import tqdm

data_size = 300000
data_path = os.path.join(base_path, "dataset", "data_dynamic2_time.csv")

with open(data_path, 'a') as f:
    for i in tqdm.tqdm(range(data_size)):
        env.reset()
        state = list(env.random_state_gen())
        time = env.time
        # print(f"State: {state} / Time: {time}")
        # print(f"State: {state}")

        env0 = env.duplicate() # For action 0
        env1 = env.duplicate() # For action 1
        env2 = env.duplicate() # For action 2
        env3 = env.duplicate() # For action 3
        env4 = env.duplicate() # For action 4
        env5 = env.duplicate() # For action 5

        state0, rew0, _, _ = env0.step(0, True)
        # print(f"Action 0")
        # print(f"State0: {state0}")
        # print("====================")
        state1, rew1, _, _ = env1.step(1, True)
        # print(f"Action 1")
        # print(f"State1: {state1}") 
        # print("====================")
        state2, rew2, _, _ = env2.step(2, True)
        # print(f"Action 2")
        # print(f"State2: {state2}")
        # print("====================")
        state3, rew3, _, _ = env3.step(3, True)
        # print(f"Action 3")
        # print(f"State3: {state3}")
        # print("====================")
        state4, rew4, _, _ = env4.step(4, True)
        # print(f"Action 4")
        # print(f"State4: {state4}")
        # print("====================")
        state5, rew5, _, _ = env5.step(5, True)
        # print(f"Action 5")
        # print(f"State5: {state5}")

        reward = [rew0, rew1, rew2, rew3, rew4, rew5]
        # Round up at the 4th decimal point
        reward = list(map(lambda x: round(x, 4), reward))
        
        f.write(','.join(list(map(str,state))) + '  ,  ' + ','.join(list(map(str,reward))) + '  ,  ' + str(time) + '\n')

## 10. 6-reward data generation (Dynamic_time)

In [2]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [3]:
from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
from kube_hr_scheduler.strategies.model.default import Model

In [4]:
env = gym.make('SimKubeEnv-v0', reward_file='train_dynamic_time.py')
scheduler = SimHrScheduler(env, 'default.py')

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..


In [None]:
import tqdm

data_size = 300000
data_path = os.path.join(base_path, "dataset", "data_dynamic_time.csv")

with open(data_path, 'a') as f:
    for i in tqdm.tqdm(range(data_size)):
        env.reset()
        state = list(env.random_state_gen())
        time = env.time
        # print(f"State: {state} / Time: {time}")
        # print(f"State: {state}")

        env0 = env.duplicate() # For action 0
        env1 = env.duplicate() # For action 1
        env2 = env.duplicate() # For action 2
        env3 = env.duplicate() # For action 3
        env4 = env.duplicate() # For action 4
        env5 = env.duplicate() # For action 5

        state0, rew0, _, _ = env0.step(0, True)
        # print(f"Action 0")
        # print(f"State0: {state0}")
        # print("====================")
        state1, rew1, _, _ = env1.step(1, True)
        # print(f"Action 1")
        # print(f"State1: {state1}") 
        # print("====================")
        state2, rew2, _, _ = env2.step(2, True)
        # print(f"Action 2")
        # print(f"State2: {state2}")
        # print("====================")
        state3, rew3, _, _ = env3.step(3, True)
        # print(f"Action 3")
        # print(f"State3: {state3}")
        # print("====================")
        state4, rew4, _, _ = env4.step(4, True)
        # print(f"Action 4")
        # print(f"State4: {state4}")
        # print("====================")
        state5, rew5, _, _ = env5.step(5, True)
        # print(f"Action 5")
        # print(f"State5: {state5}")

        reward = [rew0, rew1, rew2, rew3, rew4, rew5]
        # Round up at the 4th decimal point
        reward = list(map(lambda x: round(x, 4), reward))
        
        f.write(','.join(list(map(str,state))) + '  ,  ' + ','.join(list(map(str,reward))) + '  ,  ' + str(time) + '\n')

## 11. 6-reward data generation (step_3)

In [17]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [18]:
from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
from kube_hr_scheduler.strategies.model.default import Model

In [19]:
env = gym.make('SimKubeEnv-v0', reward_file='train_step_3.py')
scheduler = SimHrScheduler(env, 'default.py')

In [20]:
import tqdm

data_size = 1000000
data_path = os.path.join(base_path, "dataset", "data_step3.csv")

with open(data_path, 'a') as f:
    for i in tqdm.tqdm(range(data_size)):
        env.reset()
        state = list(env.random_state_gen())
        # print(f"State: {state} / Time: {time}")
        # print(f"State: {state}")

        env0 = env.duplicate() # For action 0
        env1 = env.duplicate() # For action 1
        env2 = env.duplicate() # For action 2
        env3 = env.duplicate() # For action 3
        env4 = env.duplicate() # For action 4
        env5 = env.duplicate() # For action 5

        state0, rew0, _, _ = env0.step(0, True)
        # print(f"Action 0")
        # print(f"State0: {state0}")
        # print("====================")
        state1, rew1, _, _ = env1.step(1, True)
        # print(f"Action 1")
        # print(f"State1: {state1}") 
        # print("====================")
        state2, rew2, _, _ = env2.step(2, True)
        # print(f"Action 2")
        # print(f"State2: {state2}")
        # print("====================")
        state3, rew3, _, _ = env3.step(3, True)
        # print(f"Action 3")
        # print(f"State3: {state3}")
        # print("====================")
        state4, rew4, _, _ = env4.step(4, True)
        # print(f"Action 4")
        # print(f"State4: {state4}")
        # print("====================")
        state5, rew5, _, _ = env5.step(5, True)
        # print(f"Action 5")
        # print(f"State5: {state5}")

        reward = [rew0, rew1, rew2, rew3, rew4, rew5]
        # Round up at the 4th decimal point
        reward = list(map(lambda x: round(x, 4), reward))
        
        f.write(','.join(list(map(str,state))) + '  ,  ' + ','.join(list(map(str,reward))) + '\n')

 48%|████▊     | 479625/1000000 [43:19<47:00, 184.48it/s]  


KeyboardInterrupt: 

## 12. Imitation Transitions generation w/ Default scheduler

In [13]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [14]:
from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
from kube_hr_scheduler.strategies.model.default import Model

In [15]:
env = gym.make('SimKubeEnv-v0', reward_file='train_step_3.py')
scheduler = SimHrScheduler(env, 'default.py')

In [None]:
import tqdm
import numpy as np

epochs = 100
data_path = os.path.join(base_path, "dataset", "data_expert.csv")

with open(data_path, 'a') as f:
    for i in tqdm.tqdm(range(epochs)):
        done = False
        state = env.reset()
        while not done:
            action = scheduler.decision(env)
            next_state, reward, done, _ = env.step(action)
        
            line = state.tolist() + [action] + next_state.tolist() + [int(done)]

            line = list(map(lambda x: round(float(x), 2), line))
            line = list(map(str, line))
            # print(line)
            if len(line) == 26:
                f.write(','.join(line) + '\n')

            state = next_state

## 13. DQfD data gen

In [4]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [5]:
from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
from kube_hr_scheduler.strategies.model.default import Model

In [6]:
env = gym.make('SimKubeEnv-v0', reward_file='train_step_3.py')
scheduler = SimHrScheduler(env, 'default.py')

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..


In [12]:
import tqdm
import numpy as np

epochs = 5
data_path = os.path.join(base_path, "dataset", "data_dqfd.csv")

with open(data_path, 'a') as f:
    for i in tqdm.tqdm(range(epochs)):
        done = False
        state = env.reset()
        while not done:
            action = scheduler.decision(env)
            next_state, reward, done, _ = env.step(action)
        
            line = state.tolist() + [action, reward]

            line = list(map(lambda x: round(float(x), 2), line))
            line = list(map(str, line))
            # print(line)
            if len(line) == 14:
                f.write(','.join(line) + '\n')

            state = next_state

100%|██████████| 5/5 [00:04<00:00,  1.25it/s]


## 14. Selective Expert data generate

In [2]:
# Load gym environment
import gym
from kube_sim_gym import *
from kube_sim_gym.envs.sim_kube_env import SimKubeEnv

In [3]:
from kube_hr_scheduler.scheduler.sim_hr_scheduler import SimHrScheduler
from kube_hr_scheduler.strategies.model.default import Model

In [4]:
env = gym.make('SimKubeEnv-v0', reward_file='train_step_3.py')
scheduler = SimHrScheduler(env, 'default.py')

Base Path: /Users/swkim/Documents/coding/thesis/PROMES_colab/notebook/..


In [10]:
import tqdm
import numpy as np

# epochs = 100
data_size = 1000000
data_path = os.path.join(base_path, "dataset", "data_expert_selective.csv")

def cos_sim(a, b, thres):
    state_a = np.array(a[:12])
    action_a = np.array(a[12])
    state_b = np.array(b[:12])
    action_b = np.array(b[12])

    state_cos_sim = np.dot(state_a, state_b) / (np.linalg.norm(state_a) * np.linalg.norm(state_b))

    if abs(state_cos_sim - 1) < thres * 0.01 and action_a == action_b:
        return True
    else:
        return False

with open(data_path, 'a') as f:
    data_count = 0
    while data_count < data_size:
        done = False
        state = env.reset()
        prev = np.zeros(26)
        while not done:
            if data_count % 1000 == 0:
                print(f"Data Count: {data_count}")
            action = scheduler.decision(env)
            next_state, reward, done, _ = env.step(action)

            line = state.tolist() + [action] + next_state.tolist() + [int(done)]

            line = list(map(lambda x: round(float(x), 2), line))
            # Change action data type to int
            line[12] = int(line[12])

            if len(line) == 26 and not cos_sim(prev, line, 5):
                f.write(','.join(list(map(str, line))) + '\n')
                data_count += 1
                prev = line

            state = next_state

Data Count: 0
[1.   1.   1.   1.   1.   1.   1.   1.   1.   1.   0.01 0.19]


  state_cos_sim = np.dot(state_a, state_b) / (np.linalg.norm(state_a) * np.linalg.norm(state_b))


[1.   1.   0.99 0.81 1.   1.   1.   1.   1.   1.   0.16 0.1 ]
[1.   1.   0.99 0.81 0.84 0.9  1.   1.   1.   1.   0.3  0.24]
[1.   1.   0.99 0.81 0.84 0.9  0.7  0.76 1.   1.   0.03 0.15]
[0.97 0.85 0.99 0.81 0.84 0.9  0.7  0.76 1.   1.   0.09 0.19]
[0.97 0.85 0.99 0.81 0.84 0.9  0.7  0.76 0.91 0.81 0.29 0.12]
[0.97 0.85 0.99 0.81 0.55 0.78 0.7  0.76 0.91 0.81 0.24 0.16]
[0.73 0.69 0.99 0.81 0.55 0.78 0.7  0.76 0.91 0.81 0.05 0.07]
[0.73 0.69 0.99 0.81 0.55 0.78 0.7  0.76 0.86 0.74 0.   0.  ]
[0.73 0.69 0.99 0.81 0.55 0.78 0.7  0.76 0.86 0.74 0.2  0.08]
[0.73 0.69 0.79 0.73 0.55 0.78 0.7  0.76 0.86 0.74 0.18 0.11]
[0.73 0.69 0.61 0.62 0.55 0.78 0.7  0.76 0.86 0.74 0.03 0.21]


KeyboardInterrupt: 

In [7]:
a = np.array([1,2])
b = np.array([1,2])

np.dot(a, b)

5