In [None]:
import random
import json
import pandas as pd
import numpy as np

In [None]:

## State extraction function

# Function to Extract the state space out of the environment given number of critical present at that state, 
#total expected critical, the current week and the amount of time left

def arrival_state_sim(n_critical,current_week, time_available):
    state = []
    

    c_mit_time = n_critical*150
    state.append(c_mit_time)



    state.append(current_week)

    state.append(time_available)
    
    return state

In [None]:
## This function scalarizes any value from one range to another.
def action_scalarization(oldactionmin,oldactionmax,newactionmin,newactionmax,action):
    oldrange = oldactionmax - oldactionmin
    newrange = newactionmax - newactionmin
    newaction = (((action - oldactionmin)* newrange)/oldrange) + newactionmin
    return newaction

In [None]:
def step(state, action, n_crit_next,current_week):
    
    ## Scalarizes the action from range(-1,1) to (0,9600) minutes
    action_minutes =  int(action_scalarization(-1,1,600*4,3600*4,action))
    
    action_reward = - action_minutes * 0.00001 
    
#     action_reward = 1 - (abs(action_minutes - 2400) / 2400)

    
    ## Reward and penalty for taking an action that is more than the remaining time
    flag = False
    if action_minutes > state[-1]:
        reward_leg = 0
        action_minutes = state[-1]
        flag = True
        
    ## Calculating the new remaining time for the next step
    remaining_time = state[-1] - action_minutes
    
    if state[0] > 0:
        if action_minutes > state[0]:
            crit_reward = min(1, action_minutes / state[0])
            rem_crit_minute = 0
        else:
            crit_reward = action_minutes / state[0]
            rem_crit_minute = state[0] - action_minutes
    else:
        crit_reward = 0
        rem_crit_minute = 0 

    ### Generating reward
    reward =  0.8 *crit_reward + 0.2 *action_reward
    
    ### Generating next state
    crit_mins_ns = rem_crit_minute + n_crit_next*150
    next_state = [crit_mins_ns, current_week+1, remaining_time ]
    
    return next_state, reward

## The PPO

In [None]:
############################### Import libraries ###############################


import os
import glob
import time
from datetime import datetime

import torch
import torch.nn as nn
from torch.distributions import MultivariateNormal
from torch.distributions import Categorical

import numpy as np

import gym
import pybullet_envs

In [None]:
################################## set device ##################################

print("============================================================================================")


# set device to cpu or cuda
device = torch.device('cpu')

if(torch.cuda.is_available()): 
    device = torch.device('cuda:0') 
    torch.cuda.empty_cache()
    print("Device set to : " + str(torch.cuda.get_device_name(device)))
else:
    print("Device set to : cpu")
    
print("============================================================================================")

In [None]:
################################## PPO Policy ##################################


class RolloutBuffer:
    def __init__(self):
        self.actions = []
        self.states = []
        self.logprobs = []
        self.rewards = []
        self.is_terminals = []
    

    def clear(self):
        del self.actions[:]
        del self.states[:]
        del self.logprobs[:]
        del self.rewards[:]
        del self.is_terminals[:]


class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim, has_continuous_action_space, action_std_init):
        super(ActorCritic, self).__init__()

        self.has_continuous_action_space = has_continuous_action_space

        if has_continuous_action_space:
            self.action_dim = action_dim
            self.action_var = torch.full((action_dim,), action_std_init * action_std_init).to(device)

        # actor
        if has_continuous_action_space :
            self.actor = nn.Sequential(
                            nn.Linear(state_dim, 64),
                            nn.Tanh(),
                            nn.Linear(64, 64),
                            nn.Tanh(),
                            nn.Linear(64, action_dim),
                            nn.Tanh()
                        )
        else:
            self.actor = nn.Sequential(
                            nn.Linear(state_dim, 64),
                            nn.Tanh(),
                            nn.Linear(64, 64),
                            nn.Tanh(),
                            nn.Linear(64, action_dim),
                            nn.Softmax(dim=-1)
                        )

        
        # critic
        self.critic = nn.Sequential(
                        nn.Linear(state_dim, 64),
                        nn.Tanh(),
                        nn.Linear(64, 64),
                        nn.Tanh(),
                        nn.Linear(64, 1)
                    )
        
    def set_action_std(self, new_action_std):

        if self.has_continuous_action_space:
            self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
        else:
            print("--------------------------------------------------------------------------------------------")
            print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
            print("--------------------------------------------------------------------------------------------")


    def forward(self):
        raise NotImplementedError
    

    def act(self, state):

        if self.has_continuous_action_space:
            action_mean = self.actor(state)
        
            cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
            dist = MultivariateNormal(action_mean, cov_mat)
        else:
            action_probs = self.actor(state)
            dist = Categorical(action_probs)

        action = dist.sample()
        action_logprob = dist.log_prob(action)
        
        return action.detach(), action_logprob.detach()
    

    def evaluate(self, state, action):

        if self.has_continuous_action_space:
            action_mean = self.actor(state)
            action_var = self.action_var.expand_as(action_mean)
            cov_mat = torch.diag_embed(action_var).to(device)
            dist = MultivariateNormal(action_mean, cov_mat)
            
            # for single action continuous environments
            if self.action_dim == 1:
                action = action.reshape(-1, self.action_dim)

        else:
            action_probs = self.actor(state)
            dist = Categorical(action_probs)

        action_logprobs = dist.log_prob(action)
        dist_entropy = dist.entropy()
        state_values = self.critic(state)
        
        return action_logprobs, state_values, dist_entropy


In [None]:
class PPO:
    def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, action_std_init=0.6):

        self.has_continuous_action_space = has_continuous_action_space

        if has_continuous_action_space:
            self.action_std = action_std_init

        self.gamma = gamma
        self.eps_clip = eps_clip
        self.K_epochs = K_epochs
        
        self.buffer = RolloutBuffer()

        self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
        self.optimizer = torch.optim.Adam([
                        {'params': self.policy.actor.parameters(), 'lr': lr_actor},
                        {'params': self.policy.critic.parameters(), 'lr': lr_critic}
                    ])

        self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
        self.policy_old.load_state_dict(self.policy.state_dict())
        
        self.MseLoss = nn.MSELoss()


    def set_action_std(self, new_action_std):
        
        if self.has_continuous_action_space:
            self.action_std = new_action_std
            self.policy.set_action_std(new_action_std)
            self.policy_old.set_action_std(new_action_std)
        
        else:
            print("--------------------------------------------------------------------------------------------")
            print("WARNING : Calling PPO::set_action_std() on discrete action space policy")
            print("--------------------------------------------------------------------------------------------")


    def decay_action_std(self, action_std_decay_rate, min_action_std):
        print("--------------------------------------------------------------------------------------------")

        if self.has_continuous_action_space:
            self.action_std = self.action_std - action_std_decay_rate
            self.action_std = round(self.action_std, 4)
            if (self.action_std <= min_action_std):
                self.action_std = min_action_std
                print("setting actor output action_std to min_action_std : ", self.action_std)
            else:
                print("setting actor output action_std to : ", self.action_std)
            self.set_action_std(self.action_std)

        else:
            print("WARNING : Calling PPO::decay_action_std() on discrete action space policy")

        print("--------------------------------------------------------------------------------------------")


    def select_action(self, state):

        if self.has_continuous_action_space:
            with torch.no_grad():
                state = torch.FloatTensor(state).to(device)
                action, action_logprob = self.policy_old.act(state)

            self.buffer.states.append(state)
            self.buffer.actions.append(action)
            self.buffer.logprobs.append(action_logprob)

            return action.detach().cpu().numpy().flatten()

        else:
            with torch.no_grad():
                state = torch.FloatTensor(state).to(device)
                action, action_logprob = self.policy_old.act(state)
            
            self.buffer.states.append(state)
            self.buffer.actions.append(action)
            self.buffer.logprobs.append(action_logprob)

            return action.item()


    def update(self):

        # Monte Carlo estimate of returns
        rewards = []
        discounted_reward = 0
        for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
            if is_terminal:
                discounted_reward = 0
            discounted_reward = reward + (self.gamma * discounted_reward)
            rewards.insert(0, discounted_reward)
            
        # Normalizing the rewards
        rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
        rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)

        # convert list to tensor
        old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
        old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
        old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)

        
        # Optimize policy for K epochs
        for _ in range(self.K_epochs):

            # Evaluating old actions and values
            logprobs, state_values, dist_entropy = self.policy.evaluate(old_states, old_actions)

            # match state_values tensor dimensions with rewards tensor
            state_values = torch.squeeze(state_values)
            
            # Finding the ratio (pi_theta / pi_theta__old)
            ratios = torch.exp(logprobs - old_logprobs.detach())

            # Finding Surrogate Loss
            advantages = rewards - state_values.detach()   
            surr1 = ratios * advantages
            surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages

            # final loss of clipped objective PPO
            loss = -torch.min(surr1, surr2) + 0.5*self.MseLoss(state_values, rewards) - 0.01*dist_entropy
            
            # take gradient step
            self.optimizer.zero_grad()
            loss.mean().backward()
            self.optimizer.step()
            
        # Copy new weights into old policy
        self.policy_old.load_state_dict(self.policy.state_dict())

        # clear buffer
        self.buffer.clear()
    
    
    def save(self, checkpoint_path):
        torch.save(self.policy_old.state_dict(), checkpoint_path)
   

    def load(self, checkpoint_path):
        self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
        self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))

In [None]:
print("============================================================================================")


################################### Training ###################################


####### initialize environment hyperparameters ######

env_name = 'Dynamic-VI-PPO'
has_continuous_action_space = True

max_ep_len = 4                   # max timesteps in one episode
max_training_timesteps = int(80000)   # break training loop if timeteps > max_training_timesteps

print_freq = max_ep_len * 4     # print avg reward in the interval (in num timesteps)
log_freq = max_ep_len * 2       # log avg reward in the interval (in num timesteps)
save_model_freq = int(2e3)      # save model frequency (in num timesteps)

action_std = 0.6                    # starting std for action distribution (Multivariate Normal)
action_std_decay_rate = 0.05        # linearly decay action_std (action_std = action_std - action_std_decay_rate)
min_action_std = 0.1                # minimum action_std (stop decay after action_std <= min_action_std)
action_std_decay_freq = int(4e3)  # action_std decay frequency (in num timesteps)



#####################################################


## Note : print/log frequencies should be > than max_ep_len


################ PPO hyperparameters ################


update_timestep = max_ep_len * 4      # update policy every n timesteps
K_epochs = 40               # update policy for K epochs
eps_clip = 0.2              # clip parameter for PPO
gamma = 0.99                # discount factor

lr_actor = 0.0002      # learning rate for actor network
lr_critic = 0.001       # learning rate for critic network

random_seed = 0         # set random seed if required (0 = no random seed)

#####################################################



print("training environment name : " + env_name)

# env = gym.make(env_name)

# state space dimension
state_dim = 3

# action space dimension
if has_continuous_action_space:
    action_dim = 1
else:
    action_dim = 1



###################### logging ######################

#### log files for multiple runs are NOT overwritten

log_dir = "PPO_logs"
if not os.path.exists(log_dir):
      os.makedirs(log_dir)

log_dir = log_dir + '/' + env_name + '/'
if not os.path.exists(log_dir):
      os.makedirs(log_dir)


#### get number of log files in log directory
run_num = 0
current_num_files = next(os.walk(log_dir))[2]
run_num = len(current_num_files)


#### create new log file for each run 
log_f_name = log_dir + '/PPO_' + env_name + "_log_" + str(run_num) + ".csv"

print("current logging run number for " + env_name + " : ", run_num)
print("logging at : " + log_f_name)

#####################################################


################### checkpointing ###################

run_num_pretrained = 0      #### change this to prevent overwriting weights in same env_name folder

directory = "PPO_preTrained"
if not os.path.exists(directory):
      os.makedirs(directory)

directory = directory + '/' + env_name + '/'
if not os.path.exists(directory):
      os.makedirs(directory)


checkpoint_path = directory + "PPO_{}_{}_{}.pth".format(env_name, random_seed, run_num_pretrained)
print("save checkpoint path : " + checkpoint_path)

#####################################################


############# print all hyperparameters #############

print("--------------------------------------------------------------------------------------------")

print("max training timesteps : ", max_training_timesteps)
print("max timesteps per episode : ", max_ep_len)

print("model saving frequency : " + str(save_model_freq) + " timesteps")
print("log frequency : " + str(log_freq) + " timesteps")
print("printing average reward over episodes in last : " + str(print_freq) + " timesteps")

print("--------------------------------------------------------------------------------------------")

print("state space dimension : ", state_dim)
print("action space dimension : ", action_dim)

print("--------------------------------------------------------------------------------------------")

if has_continuous_action_space:
    print("Initializing a continuous action space policy")
    print("--------------------------------------------------------------------------------------------")
    print("starting std of action distribution : ", action_std)
    print("decay rate of std of action distribution : ", action_std_decay_rate)
    print("minimum std of action distribution : ", min_action_std)
    print("decay frequency of std of action distribution : " + str(action_std_decay_freq) + " timesteps")

else:
    print("Initializing a discrete action space policy")

print("--------------------------------------------------------------------------------------------")

print("PPO update frequency : " + str(update_timestep) + " timesteps") 
print("PPO K epochs : ", K_epochs)
print("PPO epsilon clip : ", eps_clip)
print("discount factor (gamma) : ", gamma)

print("--------------------------------------------------------------------------------------------")

print("optimizer learning rate actor : ", lr_actor)
print("optimizer learning rate critic : ", lr_critic)

if random_seed:
    print("--------------------------------------------------------------------------------------------")
    print("setting random seed to ", random_seed)
    torch.manual_seed(random_seed)
    env.seed(random_seed)
    np.random.seed(random_seed)

#####################################################

print("============================================================================================")

################# training procedure ################

# initialize a PPO agent
ppo_agent = PPO(state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, action_std)


# track total training time
start_time = datetime.now().replace(microsecond=0)
print("Started training at (GMT) : ", start_time)

print("============================================================================================")


# logging file
log_f = open(log_f_name,"w+")
log_f.write('episode,timestep,reward\n')


# printing and logging variables
print_running_reward = 0
print_running_episodes = 0

log_running_reward = 0
log_running_episodes = 0

time_step = 0
i_episode = 0

reward_list = []
episode_list = []
# training loop
total_expected_critical = 50
epi_no = 0
while time_step <= max_training_timesteps:
    

    crit_week1 = [random.randint(4,20), random.randint(40,60), random.randint(80,100), random.randint(4,20),0]
    crit_week2 = [ random.randint(40,60),random.randint(4,20),  random.randint(4,20),random.randint(80,100),0]
    crit_week3 = [random.randint(4,20), random.randint(4,20),  random.randint(80,100),random.randint(40,60),0]
    crit_week4 = [random.randint(80,100), random.randint(4,20), random.randint(40,60),  random.randint(4,20),0]
    
    crit_pat_sel = [crit_week1,crit_week2,crit_week3,crit_week4]
    
    crit_arrival_list = random.choice(crit_pat_sel)
    
    init_arrival_crit = crit_arrival_list[0]
    

    
    state_l = [init_arrival_crit*150 , 0, 9600*4]
    
    
    state = np.array(state_l)
    
    done = False
    current_ep_reward = 0
    print('EPISODE')
    print(epi_no)
    print('----------------------')
    for t in range(0, max_ep_len):
        print('State')
        print(state)
        print('----------------------')
        
        # select action with policy
        action = ppo_agent.select_action(state)
        action =  action.clip(-1,1)
        print('Action')
        print(action)
        print('in minutes', action_scalarization(-1,1,600*4,3600*4,action[0]))
        print('----------------------')
        
        n_critical_next = crit_arrival_list[t+1]


        
        next_state_l, reward = step(state_l, action[0], n_critical_next,t)
        next_state = np.array(next_state_l)
        state_l = next_state_l
        state = np.array(state_l)
        
        print('next_state')
        print(next_state)
        print('----------------------')
        
        print('Reward')
        print(reward)
        print('----------------------')
        
        if t == max_ep_len - 1:
            done = True
            epi_no += 1
        
        # saving reward and is_terminals
        ppo_agent.buffer.rewards.append(reward)
        ppo_agent.buffer.is_terminals.append(done)
        
        time_step +=1
        current_ep_reward += reward

        # update PPO agent
        if time_step % update_timestep == 0:
            ppo_agent.update()

        # if continuous action space; then decay action std of ouput action distribution
        if has_continuous_action_space and time_step % action_std_decay_freq == 0:
            ppo_agent.decay_action_std(action_std_decay_rate, min_action_std)

        # log in logging file
        if time_step % log_freq == 0:

            # log average reward till last episode
            log_avg_reward = log_running_reward / log_running_episodes
            log_avg_reward = round(log_avg_reward, 4)

            log_f.write('{},{},{}\n'.format(i_episode, time_step, log_avg_reward))
            log_f.flush()

            log_running_reward = 0
            log_running_episodes = 0

        # printing average reward
        if time_step % print_freq == 0:

            # print average reward till last episode
            print_avg_reward = print_running_reward / print_running_episodes
            print_avg_reward = round(print_avg_reward, 2)

            print("Episode : {} \t\t Timestep : {} \t\t Average Reward : {}".format(i_episode, time_step, print_avg_reward))

            print_running_reward = 0
            print_running_episodes = 0
            
        # save model weights
        if time_step % save_model_freq == 0:
            print("--------------------------------------------------------------------------------------------")
            print("saving model at : " + checkpoint_path)
            ppo_agent.save(checkpoint_path)
            print("model saved")
            print("Elapsed Time  : ", datetime.now().replace(microsecond=0) - start_time)
            print("--------------------------------------------------------------------------------------------")
            
        # break; if the episode is over
        if done:
            break

    print_running_reward += current_ep_reward
    print_running_episodes += 1

    log_running_reward += current_ep_reward
    log_running_episodes += 1

    i_episode += 1
    reward_list.append(current_ep_reward)
    episode_list.append(i_episode)


log_f.close()





# print total training time
print("============================================================================================")
end_time = datetime.now().replace(microsecond=0)
print("Started training at (GMT) : ", start_time)
print("Finished training at (GMT) : ", end_time)
print("Total training time  : ", end_time - start_time)
print("============================================================================================")

In [None]:
test_state1 = np.array([340 , 0, 9600*4])

In [None]:
def actionvscrit():
    crit_VIs = []
    actions = []
    for i in range(5,300,25):
        test_state1 = np.array([i*150 , 0, 9600*4])
        action = ppo_agent.select_action(test_state1)
        action_mins_ = action_scalarization(-1,1,600*4,3600*4,action[0])
        crit_VIs.append(i)
        actions.append(action_mins_)
    return crit_VIs,  actions

In [None]:
crit_VIs,  actions = actionvscrit()

In [None]:
import pandas as pd
action_df = pd.DataFrame(actions)

In [None]:
action_df.to_csv('Actions_0.8-0.2_4.csv')

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(crit_VIs,actions)

In [None]:
action_test = ppo_agent.select_action(test_state1)

In [None]:
action_test

In [None]:
action_scalarization(-1,1,600*4,3600*4,action_test[0])

In [None]:
test_state2 = np.array([5600, 0, 1600])

In [None]:
action_test = ppo_agent.select_action(test_state2)



In [None]:
action_scalarization(-1,1,600,3600,action_test[0])

In [None]:
test_state2 = [9600, 0, 9600]

In [None]:
## Test Results

In [None]:
def VI_arrival(n_vi, n_crit, cvss_high_no):
    high_list = [0.75,1]
    low_list = [0.5,0.25,0.1]
    med_list = [0.25,0.5]

    cvss_ultra = [0.75,1]
    cvss_high = [0.5,0.75]
    cvss_med = [0.25,0.5,0.75,1]
    cvss_low = [0.25,0.1]
    
    full_list = [0.5,0.25,0.75,1]

    vi_list = []
    total_vi = 0
    while total_vi < n_vi:

        for i in range(n_crit):
            total_vi += 1
            rand_no = 0.75
            if  np.random.beta(3,1) < rand_no:
                hvas = np.random.choice(high_list)
            else:
                hvas = np.random.choice(low_list)
            
            rand_no1 = 0.75
            if np.random.beta(2,1) < rand_no1:
                protection_level = np.random.choice(high_list)
            else:
                protection_level = np.random.choice(low_list)
            rand_no2 = 0.75
            if np.random.beta(0.5,1) < rand_no2:
                ports = np.random.choice(high_list)
            else:
                ports = np.random.choice(low_list)
            ids_info = 1
#             cvss = np.random.choice(cvss_high)
            rand_no4 = 0.75
            if np.random.beta(1,1) > rand_no4:
                cvss = np.random.choice([0.25,0.75,0.1,0.5])
            else:
                cvss = np.random.choice(low_list)
                
            exposure_score = 0.2 *  hvas + 0.2 * protection_level + 0.2* ports  + 0.2*ids_info + 0.2*cvss
            mit_time = 150
            vi = [hvas,protection_level, ports, ids_info, cvss, exposure_score, mit_time]
            vi_list.append(vi)
            

        for i in range(cvss_high_no):
            total_vi += 1
            
            hvas = np.random.choice([0.5,0.25,0.75,0.1])
            protection_level = np.random.choice([0.5,0.25,0.75,0.1])
            ports = np.random.choice([0.5,0.25,0.75,0.1])
            ids_info = 0
            cvss = np.random.choice(cvss_ultra)
            exposure_score = 0.2 *  hvas + 0.2 * protection_level + 0.2* ports  + 0.2*ids_info + 0.2*cvss
#             cvss = 1
            mit_time = 150
            vi = [hvas,protection_level, ports, ids_info, cvss,exposure_score, mit_time]
            vi_list.append(vi)
            
        
        VI_low_no = n_vi - (n_crit + cvss_high_no)
        for i in range(VI_low_no):
            total_vi += 1
            hvas = np.random.choice(med_list)
            protection_level = np.random.choice(med_list)
            ports = np.random.choice(med_list)
            ids_info = 0
            cvss = np.random.choice(cvss_low)
            exposure_score = 0.2 *  hvas + 0.2 * protection_level + 0.2* ports  + 0.2*ids_info + 0.2*cvss
            mit_time = random.randint(25,100)
            vi = [hvas,protection_level, ports, ids_info, cvss,exposure_score, mit_time]
            vi_list.append(vi)

    return np.array(vi_list)

In [None]:
t1 = VI_arrival(100, 25, 26)

In [None]:
def PPO_model(t1,allocated_time):
    
    a = t1[:,5]
    u = t1[:,-1]
    
    length = len(a)
    model = Model('IT PROJECT')

    x = model.addVars(length, vtype= GRB.BINARY)

    model.addConstr((quicksum((u[i] * x[i] )for i in range(length))<= 13972.085))

    model.setObjective((quicksum((x[i]*a[i]) for i in range(length))),GRB.MAXIMIZE)

    result = model.optimize()

    x_values = [int(x[i].x) for i in range(length)]
    
    k = []
    for i in range(len(x_values)):
        n= np.append(t1[i], x_values[i])
        k.append(n)
        
    mit_list = []
    unmit_list = []
    for ind in k:
        vi_inf = []
        vi_inf.append(ind[0])
        vi_inf.append(ind[1])
        vi_inf.append(ind[2])
        vi_inf.append(ind[3])
        vi_inf.append(ind[4])
        vi_inf.append(ind[5])
        vi_inf.append(ind[6])
        if ind[-1] == 1:
            mit_list.append(vi_inf)
        else:
            unmit_list.append(vi_inf)
            
    return mit_list, unmit_list
    
    

In [None]:
def action_implement_PPO(vi_list, action_, rem_action):
    if action_ > rem_action:
        action_ = rem_action
    print(action_)
        
    mit_list, unmit_list = PPO_model(vi_list, action_)
    
    return mit_list, unmit_list

In [None]:
def action_implement(vi_list, action, rem_action, scheme):
    if action > rem_action:
        action = rem_action
        
        
    if scheme == 'PPO':
        sorted_list = vi_list[np.argsort(vi_list[:, -2])][::-1]
    else:
        sorted_list = vi_list[np.argsort(vi_list[:, -3])][::-1]
    
 
    if scheme == 'CVSS':
        mit_vi_list = []
        for i in sorted_list:

            time_spent = i[-1]
            action = action - time_spent

            mit_vi_list.append(i)
            sorted_list = np.delete(sorted_list, [0], 0)
            if action <= 0 or len(sorted_list)<= 0:
                break
               
    return mit_vi_list, sorted_list

In [None]:
def attribute_calc(mit_list):
    
    n_hvas = 0
    n_low_prot = 0
    n_port = 0
    n_ids = 0
    n_CVSS = 0
    
    for i in mit_list:
        if i[0] >= 0.75:
            n_hvas += 1
            
        if i[1] >= 0.75:
            n_low_prot += 1
            
        if i[2] >= 0.75:
            n_port += 1
        
        if i[3] >= 0.75:
            n_ids += 1
        
        if i[4] >= 0.75:
            n_CVSS += 1
            
    return n_hvas,n_low_prot, n_port,n_ids, n_CVSS

In [None]:
from gurobipy import Model, quicksum, GRB

In [None]:
def VPSS_data_gen(t1):
    VI_VPSS = []
    VPSS_list = []
    time_list = []
    for v in t1:
        VPSS = 0.25 * v[0] + 0.25 * v[1] + 0.25 * v[2] + 0.25 * v[4]
        VPSS_list.append(VPSS)
        time_list.append(v[-1])
        v = np.append(v,VPSS)

        VI_VPSS.append(v)
        
    return VPSS_list,time_list
    

In [None]:
def VULCON_data_gen(t1):
    VI_VPSS = []
    VPSS_list = []
    time_list = []
    for v in t1:
        VPSS = 0.2 * v[-1] + 0.2 * v[-2] + 0.3 * v[0] + 0.1 * v[-4]
        VPSS_list.append(VPSS)
        time_list.append(v[-3])
        v = np.append(v,VPSS)

        VI_VPSS.append(v)
        
    return VPSS_list,time_list

In [None]:
def VPSS_model(t1):
    
    a, u = VPSS_data_gen(t1)
    
    length = len(a)
    model = Model('IT PROJECT')

    x = model.addVars(length, vtype= GRB.BINARY)

    model.addConstr((quicksum((u[i] * x[i] )for i in range(length))<= 9600))

    model.setObjective((quicksum((x[i]*a[i]) for i in range(length))),GRB.MAXIMIZE)

    result = model.optimize()

    x_values = [int(x[i].x) for i in range(length)]
    
    k = []
    for i in range(len(x_values)):
        n= np.append(t1[i], x_values[i])
        k.append(n)
        
    mit_list = []
    unmit_list = []
    for ind in k:
        vi_inf = []
        vi_inf.append(ind[0])
        vi_inf.append(ind[1])
        vi_inf.append(ind[2])
        vi_inf.append(ind[3])
        vi_inf.append(ind[4])
        vi_inf.append(ind[5])
        vi_inf.append(ind[6])
        if ind[-1] == 1:
            mit_list.append(vi_inf)
        else:
            unmit_list.append(vi_inf)
            
    return mit_list, unmit_list
    
    

In [None]:
def VULCON_model(t1):
    
    a, u = VULCON_data_gen(t1)
    
    length = len(a)
    model = Model('IT PROJECT')

    x = model.addVars(length, vtype= GRB.BINARY)

    model.addConstr((quicksum((u[i] * x[i] )for i in range(length))<= 9600))

    model.setObjective((quicksum((x[i]*a[i]) for i in range(length))),GRB.MAXIMIZE)

    result = model.optimize()

    x_values = [int(x[i].x) for i in range(length)]
    
    k = []
    for i in range(len(x_values)):
        n= np.append(t1[i], x_values[i])
        k.append(n)
        
    mit_list = []
    unmit_list = []
    for ind in k:
        vi_inf = []
        vi_inf.append(ind[0])
        vi_inf.append(ind[1])
        vi_inf.append(ind[2])
        vi_inf.append(ind[3])
        vi_inf.append(ind[4])
        vi_inf.append(ind[5])
        vi_inf.append(ind[6])
        vi_inf.append(ind[7])
        vi_inf.append(ind[8])
        if ind[-1] == 1:
            mit_list.append(vi_inf)
        else:
            unmit_list.append(vi_inf)
            
    return mit_list, unmit_list
    
    

In [None]:
n_hvas_PPO_list = []
n_low_prot_PPO_list = []
n_port_PPO_list = []
n_ids_PPO_list = []
n_CVSS_PPO_list = []

n_hvas_CVSS_list = []
n_low_prot_CVSS_list = []
n_port_CVSS_list = []
n_ids_CVSS_list = []
n_CVSS_CVSS_list = []

n_hvas_PPO_list_week = []
n_low_prot_PPO_list_week = []
n_port_PPO_list_week = []
n_ids_PPO_list_week = []
n_CVSS_PPO_list_week = []

n_hvas_CVSS_list_week = []
n_low_prot_CVSS_list_week = []
n_port_CVSS_list_week = []
n_ids_CVSS_list_week = []
n_CVSS_CVSS_list_week = []

n_hvas_VPSS_list_week = []
n_low_prot_VPSS_list_week = []
n_port_VPSS_list_week = []
n_ids_VPSS_list_week = []
n_CVSS_VPSS_list_week = []

n_hvas_VULCON_list_week = []
n_low_prot_VULCON_list_week = []
n_port_VULCON_list_week = []
n_ids_VULCON_list_week = []
n_CVSS_VULCON_list_week = []

for epi in range(13):
    
    rem_action = 9600*4

    
#     total_vi_week1 = [np.random.poisson(220), np.random.poisson(140), np.random.poisson(120), np.random.poisson(240),0]
#     total_vi_week2 = [np.random.poisson(135), np.random.poisson(140), np.random.poisson(220), np.random.poisson(120),0]
#     total_vi_week3 = [np.random.poisson(120), np.random.poisson(220), np.random.poisson(240), np.random.poisson(140),0]
#     total_vi_week4 = [np.random.poisson(220), np.random.poisson(240), np.random.poisson(120), np.random.poisson(130),0]
    
#     total_pat_sel = [total_vi_week1,total_vi_week2,total_vi_week3,total_vi_week4]
    
#     crit_week1 = [random.randint(4,20), random.randint(40,60), random.randint(80,100), random.randint(4,20),0]
#     crit_week2 = [ random.randint(40,60),random.randint(4,20),  random.randint(4,20),random.randint(80,100),0]
#     crit_week3 = [random.randint(4,20), random.randint(4,20),  random.randint(80,100),random.randint(40,60),0]
#     crit_week4 = [random.randint(80,100), random.randint(4,20), random.randint(40,60),  random.randint(4,20),0]
    
#     crit_pat_sel = [crit_week1,crit_week2,crit_week3,crit_week4]
    
    total_vi_week1 = [np.random.poisson(400), np.random.poisson(500), np.random.poisson(500), np.random.poisson(600),0]
    total_vi_week2 = [np.random.poisson(450), np.random.poisson(550), np.random.poisson(650), np.random.poisson(350),0]
    total_vi_week3 = [np.random.poisson(450), np.random.poisson(500), np.random.poisson(600), np.random.poisson(575),0]
    total_vi_week4 = [np.random.poisson(400), np.random.poisson(500), np.random.poisson(650), np.random.poisson(500),0]
    
    total_pat_sel = [total_vi_week1,total_vi_week2,total_vi_week3,total_vi_week4]
    
    crit_week1 = [random.randint(140,200), random.randint(180,200), random.randint(140,200), random.randint(140,200),0]
    crit_week2 = [random.randint(180,200),random.randint(140,220),random.randint(180,200), random.randint(140,150),0]
    crit_week3 = [random.randint(140,200),random.randint(180,200), random.randint(140,160), random.randint(140,150), 0]
    crit_week4 = [random.randint(140,200), random.randint(240,260), random.randint(180,220), random.randint(140,150),0]
     
    
    
    
    crit_pat_sel = [crit_week1,crit_week2,crit_week3,crit_week4]
    

    
    crit_week = random.choice(crit_pat_sel)
    total_vi_week = random.choice(total_pat_sel)
    
    init_arrival = VI_arrival(total_vi_week[0], crit_week[0], crit_week[0])
    
    
    
    state = [crit_week[0]*150 , 0, 9600*4]
    
    rem_action_PPO = rem_action
    rem_action_CVSS = rem_action
    
    
    total_n_hvas_PPO = 0
    total_n_low_prot_PPO = 0
    total_n_port_PPO = 0
    total_n_ids_PPO = 0
    total_n_CVSS_PPO = 0

    total_n_hvas_CVSS = 0
    total_n_low_prot_CVSS = 0
    total_n_port_CVSS = 0
    total_n_ids_CVSS = 0
    total_n_CVSS_CVSS = 0
    
    total_crit = crit_week[0]
    VI_list_PPO = init_arrival
    VI_list_CVSS = init_arrival
    VI_list_VPSS = init_arrival
    init_arrival_VULCON = []
    for item in init_arrival:
        age = random.random()
        
        persistense = 0
        item = np.append(item,age)
        item = np.append(item,persistense)
        
        init_arrival_VULCON.append(item)
    
    VI_list_VULCON = init_arrival_VULCON
        
    for week in range(4):

        action_PPO = ppo_agent.select_action(state)
        
        if week != 3:
            action_PPO_mins = action_scalarization(-1,1,600*4,3600*4,action_PPO[0])
        else:
            action_PPO_mins = rem_action_PPO
        
        
        rem_crit = total_crit - int(action_PPO_mins/150)
        
        action_CVSS = 2400*4
        
        if action_PPO < rem_action_PPO:
            rem_action_PPO -= action_PPO_mins
        else:
            rem_action_PPO = 0
            
        rem_action_CVSS -= action_CVSS
        
        print(action_PPO_mins)

        mit_vi_list_PPO, unmit_vi_list_PPO = action_implement_PPO(VI_list_PPO, action_PPO_mins, rem_action_PPO)
        
        
        mit_vi_list_CVSS, unmit_vi_list_CVSS = action_implement(VI_list_CVSS, action_CVSS, rem_action_CVSS, 'CVSS')
        
        mit_vi_list_VPSS, unmit_vi_list_VPSS = VPSS_model(VI_list_VPSS)
        
        mit_vi_list_VULCON, unmit_vi_list_VULCON = VULCON_model(VI_list_VULCON)
        
        for vi in unmit_vi_list_VULCON:
            vi[-1] += 0.25

        n_hvas_PPO,n_low_prot_PPO, n_port_PPO,n_ids_PPO, n_CVSS_PPO = attribute_calc(mit_vi_list_PPO)
        
        n_hvas_CVSS,n_low_prot_CVSS, n_port_CVSS,n_ids_CVSS, n_CVSS_CVSS = attribute_calc(mit_vi_list_CVSS)
        
        n_hvas_VPSS,n_low_prot_VPSS, n_port_VPSS,n_ids_VPSS, n_CVSS_VPSS = attribute_calc(mit_vi_list_VPSS)
        
        n_hvas_VULCON,n_low_prot_VULCON, n_port_VULCON,n_ids_VULCON, n_CVSS_VULCON = attribute_calc(mit_vi_list_VULCON)
        
        n_hvas_PPO_list_week.append(n_hvas_PPO)
        n_low_prot_PPO_list_week.append(n_low_prot_PPO)
        n_port_PPO_list_week.append(n_port_PPO)
        n_ids_PPO_list_week.append(n_ids_PPO)
        n_CVSS_PPO_list_week.append(n_CVSS_PPO)
        
        n_hvas_CVSS_list_week.append(n_hvas_CVSS)
        n_low_prot_CVSS_list_week.append(n_low_prot_CVSS)
        n_port_CVSS_list_week.append(n_port_CVSS)
        n_ids_CVSS_list_week.append(n_ids_CVSS)
        n_CVSS_CVSS_list_week.append(n_CVSS_CVSS)
        
        n_hvas_VPSS_list_week.append(n_hvas_VPSS)
        n_low_prot_VPSS_list_week.append(n_low_prot_VPSS)
        n_port_VPSS_list_week.append(n_port_VPSS)
        n_ids_VPSS_list_week.append(n_ids_VPSS)
        n_CVSS_VPSS_list_week.append(n_CVSS_VPSS)
        
        n_hvas_VULCON_list_week.append(n_hvas_VULCON)
        n_low_prot_VULCON_list_week.append(n_low_prot_VULCON)
        n_port_VULCON_list_week.append(n_port_VULCON)
        n_ids_VULCON_list_week.append(n_ids_VULCON)
        n_CVSS_VULCON_list_week.append(n_CVSS_VULCON)
        
        
        total_n_hvas_PPO += n_hvas_PPO
        total_n_low_prot_PPO += n_low_prot_PPO
        total_n_port_PPO += n_port_PPO
        total_n_ids_PPO += n_ids_PPO
        total_n_CVSS_PPO += n_CVSS_PPO

        
        
        total_n_hvas_CVSS += n_hvas_CVSS
        total_n_low_prot_CVSS += n_low_prot_CVSS
        total_n_port_CVSS += n_port_CVSS
        total_n_ids_CVSS += n_ids_CVSS
        total_n_CVSS_CVSS += n_CVSS_CVSS
        
        if week == 3:
            break
        arrival = VI_arrival(total_vi_week[week + 1], crit_week[week + 1], crit_week[week + 1])
        
        if len(unmit_vi_list_PPO) > 0:
            VI_list_PPO = np.vstack((arrival , unmit_vi_list_PPO))
        else:
            VI_list_PPO = arrival
            
        if len(unmit_vi_list_CVSS) > 0:
            VI_list_CVSS = np.vstack((arrival , unmit_vi_list_CVSS))
        else:
            VI_list_CVSS = arrival
            
        if len(unmit_vi_list_VPSS) > 0:
            VI_list_VPSS = np.vstack((arrival , unmit_vi_list_VPSS))
        else:
            VI_list_VPSS = arrival
            
            
        arrival_VULCON = []
        for item in arrival:
            age = random.random()

            persistense = 0
            item = np.append(item,age)
            item = np.append(item,persistense)

            arrival_VULCON.append(item)
            
        len(arrival_VULCON)
            
        if len(unmit_vi_list_VULCON) > 0:
            VI_list_VULCON = np.vstack((arrival_VULCON , unmit_vi_list_VULCON))
        else:
            VI_list_VULCON = arrival_VULCON
        
        total_crit = crit_week[week + 1] + rem_crit
        state = [(total_crit)*150, week + 1, rem_action_PPO]
        

    
    n_hvas_PPO_list.append(total_n_hvas_PPO)
    n_low_prot_PPO_list.append(total_n_low_prot_PPO)
    n_port_PPO_list.append(total_n_port_PPO)
    n_ids_PPO_list.append(total_n_ids_PPO)
    n_CVSS_PPO_list.append(total_n_CVSS_PPO)

    n_hvas_CVSS_list.append(total_n_hvas_CVSS)
    n_low_prot_CVSS_list.append(total_n_low_prot_CVSS)
    n_port_CVSS_list.append(total_n_port_CVSS)
    n_ids_CVSS_list.append(total_n_ids_CVSS)
    n_CVSS_CVSS_list.append(total_n_CVSS_CVSS)

In [None]:
n_hvas_PPO_list_cum = np.cumsum(n_hvas_PPO_list_week)
n_hvas_CVSS_list_cum = np.cumsum(n_hvas_CVSS_list_week)
n_hvas_VPSS_list_cum = np.cumsum(n_hvas_VPSS_list_week)
n_hvas_VULCON_list_cum = np.cumsum(n_hvas_VULCON_list_week)

In [None]:
n_CVSS_PPO_list_cum = np.cumsum(n_CVSS_PPO_list_week)
n_CVSS_CVSS_list_cum = np.cumsum(n_CVSS_CVSS_list_week)
n_CVSS_VPSS_list_cum = np.cumsum(n_CVSS_VPSS_list_week)
n_CVSS_VULCON_list_cum = np.cumsum(n_CVSS_VULCON_list_week)

In [None]:
n_low_prot_PPO_list_cum = np.cumsum(n_low_prot_PPO_list_week)
n_low_prot_CVSS_list_cum = np.cumsum(n_low_prot_CVSS_list_week)
n_low_prot_VPSS_list_cum = np.cumsum(n_low_prot_VPSS_list_week)
n_low_prot_VULCON_list_cum = np.cumsum(n_low_prot_VULCON_list_week)

In [None]:
n_port_PPO_list_cum = np.cumsum(n_port_PPO_list_week)
n_port_CVSS_list_cum = np.cumsum(n_port_CVSS_list_week)
n_port_VPSS_list_cum = np.cumsum(n_port_VPSS_list_week)

In [None]:
n_ids_PPO_list_cum = np.cumsum(n_ids_PPO_list_week)
n_ids_CVSS_list_cum = np.cumsum(n_ids_CVSS_list_week)
n_ids_VPSS_list_cum = np.cumsum(n_ids_VPSS_list_week)
n_ids_VULCON_list_cum = np.cumsum(n_ids_VULCON_list_week)

In [None]:
import matplotlib.pyplot as plt
Iteration = [i for i in range(1,53)]

In [None]:
label = [ 'DRL-Agent','CVSS value-based', 'VPSS', 'VULCON']

In [None]:
plt.plot(Iteration, n_hvas_PPO_list_cum , color='g')
plt.plot(Iteration, n_hvas_CVSS_list_cum , color='r')
plt.plot(Iteration, n_hvas_VPSS_list_cum , color='y')
plt.plot(Iteration, n_hvas_VULCON_list_cum , color='b')
plt.legend(labels = label, loc = 'best')
plt.xlabel('Number of Iterations')
plt.ylabel('Number of Selected Vulnerabilities')

In [None]:
plt.plot(Iteration, n_CVSS_PPO_list_cum , color='g')
plt.plot(Iteration, n_CVSS_CVSS_list_cum , color='r')
plt.plot(Iteration, n_CVSS_VPSS_list_cum , color='y')
plt.plot(Iteration, n_CVSS_VULCON_list_cum , color='b')
plt.legend(labels = label, loc = 'best')
plt.xlabel('Number of Iterations')
plt.ylabel('Number of Selected Vulnerabilities')

In [None]:
plt.plot(Iteration, n_low_prot_PPO_list_cum , color='g')
plt.plot(Iteration, n_low_prot_CVSS_list_cum , color='r')
plt.plot(Iteration, n_low_prot_VPSS_list_cum , color='y')
plt.plot(Iteration, n_low_prot_VULCON_list_cum , color='b')
plt.legend(labels = label, loc = 'best')
plt.xlabel('Number of Iterations')
plt.ylabel('Number of Selected Vulnerabilities')

In [None]:
plt.plot(Iteration, n_port_PPO_list_cum , color='g')
plt.plot(Iteration, n_port_CVSS_list_cum , color='r')
plt.plot(Iteration, n_low_prot_VPSS_list_cum , color='y')
plt.plot(Iteration, n_low_prot_VULCON_list_cum , color='b')
plt.legend(labels = label, loc = 'best')
plt.xlabel('Number of Iterations')
plt.ylabel('Number of Selected Vulnerabilities')

In [None]:
plt.plot(Iteration, n_ids_PPO_list_cum , color='g')
plt.plot(Iteration, n_ids_CVSS_list_cum , color='r')
plt.plot(Iteration, n_ids_VPSS_list_cum , color='y')
plt.plot(Iteration, n_ids_VULCON_list_cum , color='b')
plt.legend(labels = label, loc = 'best')
plt.xlabel('Number of Iterations')
plt.ylabel('Number of Selected Vulnerabilities')