In [None]:
import numpy as np
import gym
from gym import spaces
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from collections import deque
import random
import shap


# Абстрактный роутер
class AbstractRouter:
    def __init__(self, router_id, neighbors=None, speed=10):
        self.router_id = router_id
        # self.config = {'ip': f'192.168.1.{router_id}', 'subnet': '255.255.255.0'}
        self.traffic = 0
        self.max_capacity = 100  # Максимальная пропускная способность
        self.speed = speed  # Пропускная способность
        self.is_active = True  # Состояние роутера
        self.neighbors = neighbors or []  # Список соседей

    def configure(self, config):
        if self.is_active:
            self.max_capacity = config["max_capacity"]
            self.speed = config["speed"]

    def receive_traffic(self, amount):
        if not self.is_active:
            return 0
        self.traffic = amount
        if int(self.traffic) > int(self.max_capacity):
            self.is_active = False  # Роутер "падает" под нагрузкой
        return self.traffic

    def forward_traffic(self):
        if not self.is_active or not self.neighbors:
            return []
        # Отправляем трафик соседям с учетом пропускной способности
        forward_amount = min(self.traffic, self.speed)
        traffic_per_neighbor = forward_amount / len(self.neighbors)
        self.traffic -= forward_amount
        return [(neighbor, traffic_per_neighbor) for neighbor in self.neighbors]

    def get_status(self):
        return {
            'router_id': self.router_id,
            'traffic': self.traffic,
            'max_capacity': self.max_capacity,
            'speed': self.speed,
            'is_active': self.is_active,
            'neighbors': self.neighbors,
        }


# Сеть маршрутизаторов
class Network:
    def __init__(self, routers=None, target_server=None):
        self.routers = {router.router_id: router for router in routers} if routers else {}
        self.target_server = target_server  # Целевой сервер

    def add_router(self, router):
        self.routers[router.router_id] = router

    def configure_router(self, router_id, config):
        if router_id in self.routers:
            self.routers[router_id].configure(config)

    def send_traffic(self, router_id, amount):
        if router_id in self.routers:
            return self.routers[router_id].receive_traffic(amount)
        return amount

    def forward_traffic(self):
        for router in self.routers.values():
            forwarded_traffic = router.forward_traffic()
            for neighbor_id, traffic in forwarded_traffic:
                if neighbor_id in self.routers:
                    self.routers[neighbor_id].receive_traffic(traffic)

    def is_network_active(self):
        is_line_active = list()
        is_active = True
        for router in self.routers.values():
            if router.get_status()['router_id'] == self.target_server:
                continue
            if router.get_status()['is_active'] == False:
                is_active = False
            if self.target_server in router.get_status()['neighbors']:
                is_line_active.append(is_active)
                is_active = True
        if (not(any(is_line_active))):
          print([r.get_status() for r in self.routers.values()])
        return any(is_line_active)


    def get_router_status(self, router_id):
        if router_id in self.routers:
            return self.routers[router_id].get_status()
        return None

    def get_network_status(self):
        return {router_id: router.get_status() for router_id, router in self.routers.items()}


# Синий агент
class BlueAgent:
    def __init__(self, num_routers):
        self.num_routers = num_routers
        self.model = self.build_model()
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.mean_losses = []
        self.mean_rewards = []

    def build_model(self):
        model = keras.Sequential([
            layers.Dense(64, activation='relu', input_dim=self.num_routers),
            layers.Dense(64, activation='relu'),
            layers.Dense(self.num_routers * 2, activation='sigmoid')  # Акции для каждого маршрутизатора
        ])
        model.compile(optimizer='adam', loss='mse')
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.randint(0, 100, size=self.num_routers * 2).reshape((self.num_routers, 2))
        # Преобразуем состояние в правильную форму для передачи в модель (добавляем размерность для батча)
        action = self.model.predict(state)
        print(action)
        return action.reshape((self.num_routers, 2))

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def clean_memory(self):
        self.memory = deque(maxlen=2000)

    def replay(self, batch_size):
        batch_size = min(batch_size, len(self.memory))
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)

            mean_loss = np.mean(np.square(target_f - self.model.predict(state)))
            mean_reward = np.mean([reward for _, _, reward, _, _ in minibatch])
            self.mean_losses.append(mean_loss)
            self.mean_rewards.append(mean_reward)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def explain_with_shap(self, state):
        explainer = shap.KernelExplainer(self.model.predict, np.zeros((1, self.num_routers)))
        shap_values = explainer.shap_values(state[0])

        # Визуализация
        print("SHAP values for Blue Agent:")
        shap.summary_plot(shap_values, feature_names=[f"Router {i}" for i in range(self.num_routers)])


# Красный агент
class RedAgent:
    def __init__(self, max_traffic=50):
        self.max_traffic = max_traffic
        self.model = self.build_model()
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.mean_losses = []
        self.mean_rewards = []

    def build_model(self):
        model = keras.Sequential([
            layers.Dense(64, activation='relu', input_dim=3),
            layers.Dense(64, activation='relu'),
            layers.Dense(3, activation='linear')  # Трафик для каждого маршрутизатора
        ])
        model.compile(optimizer='adam', loss='mse')
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def clean_memory(self):
        self.memory = deque(maxlen=2000)

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.randint(0, 50, size=3).reshape(1, 3)
        action = self.model.predict(state)
        print(np.clip(action, 0, 50))
        return np.clip(action, 0, 50) # TODO: ограничивать общий трафик а не на один роутер

    def replay(self, batch_size):
        batch_size = min(batch_size, len(self.memory))
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)

            mean_loss = np.mean(np.square(target_f - self.model.predict(state)))
            mean_reward = np.mean([reward for _, _, reward, _, _ in minibatch])
            self.mean_losses.append(mean_loss)
            self.mean_rewards.append(mean_reward)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def explain_with_shap(self, state):
        explainer = shap.KernelExplainer(self.model.predict, np.zeros((1, 3)))  # 3 маршрутизатора
        shap_values = explainer.shap_values(state[0])
        # Визуализация
        print("SHAP values for Red Agent:")

        shap.summary_plot(shap_values, feature_names=["Router A", "Router B", "Router C"])


# Среда для взаимодействия агентов
class NetworkEnv(gym.Env):
    def __init__(self, num_routers=9, max_capacity=100):
        super(NetworkEnv, self).__init__()
        self.num_routers = num_routers
        self.max_capacity = max_capacity
        self.target_server = num_routers - 1
        self.is_active_sum = num_routers

        # Создаем маршрутизаторы с независимыми путями
        self.routers = [
            AbstractRouter(i, neighbors=[], speed=np.random.randint(5, 15)) for i in range(num_routers)
        ]

        # Настроим три независимые линии маршрутизаторов
        # Линия 1
        self.routers[0].neighbors = [1]
        self.routers[1].neighbors = [2]
        self.routers[2].neighbors = [self.target_server]

        # Линия 2
        self.routers[3].neighbors = [4]
        self.routers[4].neighbors = [5]
        self.routers[5].neighbors = [self.target_server]

        # Линия 3
        self.routers[6].neighbors = [7]
        self.routers[7].neighbors = [8]
        self.routers[8].neighbors = [self.target_server]

        self.network = Network(self.routers, target_server=self.target_server)

        # Пространство состояний
        self.observation_space = spaces.Dict({
            'router_status': spaces.Box(low=0, high=max_capacity, shape=(num_routers,), dtype=np.float32),
            'is_active': spaces.MultiBinary(num_routers),
        })

        # Пространство действий
        self.blue_action_space = spaces.Box(low=0, high=255, shape=(num_routers, 2), dtype=np.float32)
        self.red_action_space = spaces.Box(low=0, high=50, shape=(3,), dtype=np.float32)  # Красный агент выбирает 3 маршрутизатора

    def step(self, blue_action, red_action):
        for i, config in enumerate(blue_action):
            self.network.configure_router(
                i, {'max_capacity': config[0], 'speed': config[1]}
            )
        # Применяем действия красного агента
        self.network.send_traffic(0, red_action[0][0])
        self.network.send_traffic(3, red_action[0][1])
        self.network.send_traffic(6, red_action[0][2])
        bonus_reward = 0

        # Передаем трафик по сети
        self.network.forward_traffic()

        # Проверяем состояние сети
        network_status = self.network.get_network_status()
        router_status = np.array([status['traffic'] for status in network_status.values()])
        is_active = np.array([1 if status['is_active'] else 0 for status in network_status.values()])
        if sum(is_active) < self.is_active_sum:
            bonus_reward = 50 * (self.is_active_sum - sum(is_active))
            self.is_active_sum = sum(is_active)
        done = not(self.network.is_network_active())

        # Награды
        self.root_server_traffic = self.routers[self.target_server].traffic
        self.routers[self.target_server].traffic = 0  # Очищаем трафик у корневого сервера

        blue_reward = self.root_server_traffic - bonus_reward  # Награда за доставку данных
        red_reward = -self.root_server_traffic + bonus_reward  # Штраф за доставку данных

        # Состояние сети
        red_state = [self.routers[0].traffic, self.routers[3].traffic, self.routers[6].traffic]
        blue_state = [r.traffic for r in self.routers]
        observation = {'router_status': router_status, 'is_active': is_active}


        return (observation, red_state), (blue_reward, red_reward), done, {}

    def reset(self):
        # Создаем маршрутизаторы с независимыми путями
        self.routers = [
            AbstractRouter(i, neighbors=[], speed=np.random.randint(5, 15)) for i in range(self.num_routers)
        ]

        # Настроим три независимые линии маршрутизаторов
        # Линия 1
        self.routers[0].neighbors = [1]
        self.routers[1].neighbors = [2]
        self.routers[2].neighbors = [self.target_server]

        # Линия 2
        self.routers[3].neighbors = [4]
        self.routers[4].neighbors = [5]
        self.routers[5].neighbors = [self.target_server]

        # Линия 3
        self.routers[6].neighbors = [7]
        self.routers[7].neighbors = [8]
        self.routers[8].neighbors = [self.target_server]

        self.network = Network(self.routers, target_server=self.target_server)

        # Пространство состояний
        self.observation_space = spaces.Dict({
            'router_status': spaces.Box(low=0, high=self.max_capacity, shape=(self.num_routers,), dtype=np.float32),
            'is_active': spaces.MultiBinary(self.num_routers),
        })

        # Пространство действий
        self.blue_action_space = spaces.Box(low=0, high=255, shape=(self.num_routers, 2), dtype=np.float32)
        self.red_action_space = spaces.Box(low=0, high=50, shape=(3,), dtype=np.float32)


        router_status = np.array([0 for _ in self.routers])
        is_active = np.array([1 for _ in self.routers])

        return {'router_status': router_status, 'is_active': is_active}




if __name__ == "__main__":
    env = NetworkEnv(num_routers=9, max_capacity=100)
    blue_agent = BlueAgent(num_routers=env.num_routers)
    red_agent = RedAgent()

    num_episodes = 100
    max_steps = 150

    for episode in range(num_episodes):
        observation = env.reset()
        print(f"\nЭпизод {episode + 1}")

        for step in range(max_steps):
            blue_state = np.expand_dims(observation['router_status'], axis=0)
            red_state = np.expand_dims(np.array([0, 0, 0]), axis=0)

            blue_action = blue_agent.act(blue_state)
            red_action = red_agent.act(red_state)

            states, rewards, done, _ = env.step(blue_action, red_action)

            blue_agent.remember(blue_state, blue_action, rewards[0], np.expand_dims(states[0]['router_status'], axis=0), done)
            red_agent.remember(red_state, red_action, rewards[1], np.expand_dims(states[1], axis=0), done)

            print(f"Шаг {step + 1}: Награды (Blue, Red): {rewards}")

            '''
            # Вызов функций объяснения действий агентов
            print(f"Объяснение действий синего агента на шаге {step + 1}:")
            blue_agent.explain_with_shap(blue_state)

            print(f"Объяснение действий красного агента на шаге {step + 1}:")
            red_agent.explain_with_shap(red_state)
            '''

            if done:
                print("Сеть упала! Завершаем эпизод.")
                break

        print("Обучение агентов...")
        blue_agent.replay(batch_size=32)
        red_agent.replay(batch_size=32)
        blue_agent.clean_memory()
        red_agent.clean_memory()

    print("Все эпизоды завершены.")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Эпизод 1
Шаг 1: Награды (Blue, Red): (-33.0, 33.0)
[{'router_id': 0, 'traffic': 0, 'max_capacity': 87, 'speed': 89, 'is_active': True, 'neighbors': [1]}, {'router_id': 1, 'traffic': 17.0, 'max_capacity': 62, 'speed': 32, 'is_active': True, 'neighbors': [2]}, {'router_id': 2, 'traffic': 32.0, 'max_capacity': 8, 'speed': 7, 'is_active': False, 'neighbors': [8]}, {'router_id': 3, 'traffic': 21, 'max_capacity': 19, 'speed': 27, 'is_active': False, 'neighbors': [4]}, {'router_id': 4, 'traffic': 0.0, 'max_capacity': 55, 'speed': 69, 'is_active': True, 'neighbors': [5]}, {'router_id': 5, 'traffic': 0.0, 'max_capacity': 60, 'speed': 17, 'is_active': True, 'neighbors': [8]}, {'router_id': 6, 'traffic': 23, 'max_capacity': 2, 'speed': 93, 'is_active': False, 'neighbors': [7]}, {'router_id': 7, 'traffic': 31.0, 'max_capacity': 27, 'speed': 12, 'is_active': False, 'neighbors': [8]}, {'router_id': 8, 'traffic': 0.0, 'max_capacity': 31, 'speed': 34, 'is_active': True, 'neighbors': [8]}]
Шаг 2: Нагр

KeyboardInterrupt: 