In [4]:
class InitialConditionsMissileInterception:
    def __init__(self):
        self.radius = 0.02
        self.create_target()
        self.create_attack(self.target)
    
    def create_target(self):
        x = random.uniform(-0.3, 0.3)
        y = random.uniform(-0.3, 0.3)
        self.target = np.array([x, y])

    def create_attack(self, target):
        x_side_left = random.uniform(-0.95, max(((target[0] - self.radius) - 0.5), -0.94))
        x_side_right = random.uniform(min(((target[0] + self.radius) + 0.5), 0.94), 0.95)
        y_below = random.uniform(max(((target[1] - self.radius) - 0.5), -0.94), -0.95)
        y_above = random.uniform(max(((target[1] + self.radius) + 0.5), 0.94), 0.95)
        x_inclusive = random.uniform(-0.95, 0.95)
        y_inclusive = random.uniform(-0.95, 0.95)
        y_below_x_inclusive = np.array([x_inclusive, y_below])
        y_above_x_inclusive = np.array([x_inclusive, y_above])
        x_left_y_inclusive = np.array([x_side_left, y_inclusive])
        x_right_y_inclusive = np.array([x_side_right, y_inclusive])

        self.attack = random.choice([y_below_x_inclusive, y_above_x_inclusive, x_left_y_inclusive, x_right_y_inclusive])

    def get_initial_coords(self):
        return self.target.copy(), self.attack.copy()

# Create a single instance of InitialConditionsMissileInterception to get the initial coordinates
init_conditions = InitialConditionsMissileInterception()
init_coords = init_conditions.get_initial_coords()
target_coords = init_coords[0].copy()
attack_coords = init_coords[1].copy()

In [48]:
import gymnasium as gym 
from gymnasium import Env
import numpy as np
import matplotlib.pyplot as plt
import random
import math
import tensorflow as tf
import datetime
import gymnasium as gym
import numpy as npm
from stable_baselines3 import DQN
from stable_baselines3.common.env_util import make_vec_env
import tensorflow as tf

# # Set seed for reproducibility
# seed = 42
# random.seed(seed)
# np.random.seed(seed)

class missile_interception(Env):
    def __init__(self, target_coords, attack_coords):
        self.action_space = gym.spaces.Discrete(5)
        low = np.array([-1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -2, -2])
        high = np.array([1, 1, 1, 1, 1, 1, 2*math.pi, 2*math.pi, 2*math.pi, math.pi, 2.9, 2.9, 1, 1, 1, 1, 2, 2])
        self.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)
        self.radius = 0.02

        self.episode_count = 0
        self.distance_t_minus_one = 0
        self.distance_change = 0

        self.out_of_bounds = 0
        self.interceptions = 0
        self.reached_max_steps = 0
        self.enemy_impacts = 0

        self.defense_positions = []
        self.attack_positions = []
        self.attack_starting_position = 0

        self.max_steps_per_episode = 150
        self.activate_value = 0

        target_coords = target_coords
        attack_coords = attack_coords

        print("Initital Coords", target_coords, attack_coords)

    def reset(self, seed=None):
        self.dict_state = {}
        self.activate_enemy_impact = False
        self.defense_positions = []
        self.attack_positions = []
        self.reward = 0
        self.current_step = 0
        self.done = False
        self.target, self.attack = target_coords.copy(), attack_coords.copy()
        self.create_defense(self.target)

        self.calc_defense_attack_theta(self.defense, self.attack)
        self.calc_attack_target_theta(self.attack, self.target)
        self.initial_defense_angle()
        self.calc_defense_attack_distance()
        self.calc_attack_target_distance()
        self.attack_starting_position = self.attack.copy()

        self.get_state()
        
        return self.state, {}

    def create_target(self):
        x = random.uniform(-0.3, 0.3)
        y = random.uniform(-0.3, 0.3)
        self.target = np.array([x, y])

    def create_defense(self, target):
        x = random.uniform((target[0] - self.radius) - 0.15, (target[0] + self.radius) + 0.15)
        y = random.uniform((target[1] - self.radius) - 0.15, (target[1] + self.radius) + 0.15)
        self.defense = np.array([x, y])

    def create_attack(self, target):
        x_side_left = random.uniform(-0.95, max(((target[0] - self.radius) - 0.5), -0.94))
        x_side_right = random.uniform(min(((target[0] + self.radius) + 0.5), 0.94), 0.95)
        y_below = random.uniform(max(((target[1] - self.radius) - 0.5), -0.94), -0.95)
        y_above = random.uniform(max(((target[1] + self.radius) + 0.5), 0.94), 0.95)
        x_inclusive = random.uniform(-0.95, 0.95)
        y_inclusive = random.uniform(-0.95, 0.95)
        y_below_x_inclusive = np.array([x_inclusive, y_below])
        y_above_x_inclusive = np.array([x_inclusive, y_above])
        x_left_y_inclusive = np.array([x_side_left, y_inclusive])
        x_right_y_inclusive = np.array([x_side_right, y_inclusive])

        self.attack = random.choice([y_below_x_inclusive, y_above_x_inclusive, x_left_y_inclusive, x_right_y_inclusive])

    def calc_defense_attack_theta(self, defense, attack):
        # create an adjacent point of the form (attack_x, defense_y)
        adjacent_point = np.array([attack[0], defense[1]])

        # calculate the distance between the adjacent point and the defense, attack points
        adj_point_defense_len = abs(defense[0] - adjacent_point[0]) 
        adj_point_attack_len = abs(attack[1] - adjacent_point[1])

        # calculate the angle, using soh cah toa, where adj_point_defense_len is the adjacent side and adj_point_attack_len is the opposite side
        self.defense_attack_theta = np.arctan(adj_point_attack_len / adj_point_defense_len)
        
        if attack[0] > defense[0]:
            if attack[1] > defense[1]:
                self.defense_attack_theta = self.defense_attack_theta # 1st quadrant
            else: 
                self.defense_attack_theta = (2*math.pi) - self.defense_attack_theta # 360 - theta
        else:
            if attack[1] > defense[1]:
                self.defense_attack_theta = math.pi - self.defense_attack_theta # 180 - theta
            else:
                self.defense_attack_theta = math.pi + self.defense_attack_theta # 180 + theta

    def calc_attack_target_theta(self, attack, target):
        # create an adjacent point of the form (target_x, attack_y)
        adjacent_point = np.array([target[0], attack[1]])

        # calculate the distance between the adjacent point and the attack, target points
        adj_point_attack_len = abs(attack[0] - adjacent_point[0])
        adj_point_target_len = abs(target[1] - adjacent_point[1])
        
        # calculate the angle, using soh cah toa, where adj_point_attack_len is the adjacent side and adj_point_target_len is the opposite side
        self.attack_target_theta = np.arctan(adj_point_target_len / adj_point_attack_len)

        if target[0] > attack[0]:
            if target[1] > attack[1]:
                self.attack_target_theta = self.attack_target_theta
            else:
                self.attack_target_theta = (2*math.pi) - self.attack_target_theta
        else:
            if target[1] > attack[1]:
                self.attack_target_theta = math.pi - self.attack_target_theta
            else:
                self.attack_target_theta = math.pi + self.attack_target_theta        

    def initial_defense_angle(self):
        self.defense_angle = np.random.uniform((self.defense_attack_theta - 2.35619), (self.defense_attack_theta + 2.35619))
        if self.defense_angle > 2*math.pi:
            self.defense_angle = self.defense_angle - 2*math.pi
        elif self.defense_angle < 0:
            self.defense_angle = 2*math.pi + self.defense_angle

    def calculate_distance(self, point1, point2):
        return math.hypot(point1[0] - point2[0], point1[1] - point2[1])
    
    def calc_defense_attack_distance(self):
        self.defense_attack_distance = (self.calculate_distance(self.defense, self.attack) - (2 * self.radius))

    def calc_attack_target_distance(self):
        self.attack_target_distance = (self.calculate_distance(self.attack, self.target) - (2 * self.radius))

    def calc_defense_angle(self, action):
        if action == 0:
            self.defense_angle = self.defense_angle 
        elif action == 1:
            self.defense_angle += 0.174532925
        elif action == 2:
            self.defense_angle += 0.523599
        elif action == 3:
            self.defense_angle -= 0.174532925
        elif action == 4:
            self.defense_angle -= 0.523599
        
        if self.defense_angle > 2*math.pi:
            self.defense_angle = self.defense_angle - 2*math.pi
        elif self.defense_angle < 0:
            self.defense_angle = 2*math.pi + self.defense_angle

    def update_coords(self):
        self.defense[0] += (0.02 * math.cos(self.defense_angle)) # gotta test this
        self.defense[1] += (0.02 * math.sin(self.defense_angle))
        self.attack[0] += (0.02 * math.cos(self.attack_target_theta))
        self.attack[1] += (0.02 * math.sin(self.attack_target_theta))
        self.defense_positions.append(self.defense.copy())
        self.attack_positions.append(self.attack.copy())

    def calculate_reward(self):
        self.calc_defense_attack_distance()
        self.calc_attack_target_distance()

        if self.attack_target_distance < 0:
            print("ENEMY HIT!")
            self.activate_enemy_impact = True
            self.activate_value = 0
            self.dict_state = self.get_state_dict()
            self.reward = -10000
            self.done = True
            self.enemy_impacts += 1

        elif self.defense_attack_distance < 0:
            print("HIT!")
            self.activate_interception = True
            self.reward = 10000
            self.done = True
            self.interceptions += 1
        else:
            self.angle_diff = abs(self.defense_attack_theta - self.defense_angle)
            self.angle_diff = min(self.angle_diff, 2*math.pi - self.angle_diff)
            self.reward = 1/self.angle_diff
                
        if self.defense[0] < -1 or self.defense[0] > 1 or self.defense[1] < -1 or self.defense[1] > 1:
            print("OUT OF BOUNDS")
            self.reward = -1000
            self.done = True
            self.out_of_bounds += 1

    def angle_conversion(self):
        self.sin_defense_attack_theta, self.sin_defense_angle = np.sin(self.defense_attack_theta), np.sin(self.defense_angle)
        self.cos_defense_attack_theta, self.cos_defense_angle = np.cos(self.defense_attack_theta), np.cos(self.defense_angle)

        self.delta_sin = self.sin_defense_attack_theta - self.sin_defense_angle
        self.delta_cos = self.cos_defense_attack_theta - self.cos_defense_angle

    def get_state(self):
        self.angle_conversion()

        self.state = np.array([
            self.attack[0], self.attack[1], 
            self.defense[0], self.defense[1], 
            self.target[0], self.target[1],
            self.defense_attack_theta, self.attack_target_theta,
            self.defense_angle,
            min(abs(self.defense_attack_theta - self.defense_angle), 2*math.pi - abs(self.defense_attack_theta - self.defense_angle)),
            self.defense_attack_distance,
            self.attack_target_distance,
            self.sin_defense_attack_theta, self.cos_defense_attack_theta, 
            self.sin_defense_angle, self.cos_defense_angle,
            self.delta_sin, self.delta_cos
        ])

    def get_state_dict(self):
        return {
            "self.activate": self.activate_enemy_impact,
            "attack_x": self.attack[0],
            "attack_y": self.attack[1],
            "defense_x": self.defense[0],
            "defense_y": self.defense[1],
            "target_x": self.target[0],
            "target_y": self.target[1],
            "defense_attack_theta": self.defense_attack_theta,
            "attack_target_theta": self.attack_target_theta,
            "defense_angle": self.defense_angle,
            "angle_diff": min(abs(self.defense_attack_theta - self.defense_angle), 2*math.pi - abs(self.defense_attack_theta - self.defense_angle)),
            "distance_attack_missile": self.defense_attack_distance,
            "distance_attack_target": self.attack_target_distance,
            "sin_defense_attack_theta": self.sin_defense_attack_theta,
            "cos_defense_attack_theta": self.cos_defense_attack_theta,
            "sin_defense_angle": self.sin_defense_angle,
            "cos_defense_angle": self.cos_defense_angle,
            "delta_sin": self.delta_sin,
            "delta_cos": self.delta_cos
        }
                 
    def step(self, action):
        self.distance_t_minus_one = self.defense_attack_distance
        self.calc_defense_angle(action)
        self.update_coords()
        self.calc_defense_attack_theta(self.defense, self.attack)
        self.calculate_reward()
        self.current_step += 1

        if self.current_step >= self.max_steps_per_episode:
            print("MAX STEPS REACHED")
            self.done = True
            self.reward = -1000
            self.reached_max_steps += 1

        self.get_state()
        return self.state, self.reward, self.done, False, {'activated': self.activate_enemy_impact}

    def graph(self, defense, attack, target):
        fig, ax = plt.subplots()
        plt.xlim(-1, 1)
        plt.ylim(-1, 1)

        plt.axhline(0, color='black', linewidth=0.5)
        plt.axvline(0, color='black', linewidth=0.5)
        plt.grid(True)

        # Plot trails
        if self.defense_positions:
            defense_xs, defense_ys = zip(*self.defense_positions)
            ax.plot(defense_xs, defense_ys, color='#858585', label='Defense Trail')  # Blue line for defense

        if self.attack_positions:
            attack_xs, attack_ys = zip(*self.attack_positions)
            ax.plot(attack_xs, attack_ys, color='#FFA281', label='Attack Trail')  # Red line for attack

        # Plot current positions
        plt.scatter(defense[0], defense[1], color='#1C1C1C')
        plt.scatter(attack[0], attack[1], color='#FF5A1F')
        plt.scatter(self.target[0], self.target[1], color='#85A3FF')

        ax.set_aspect('equal')
        plt.show()


In [161]:
import numpy as np
from stable_baselines3 import DQN
from copy import deepcopy
import tensorflow as tf
import gymnasium as gym
import matplotlib.pyplot as plt
from stable_baselines3.common.env_util import make_vec_env

# Create a single instance of InitialConditionsMissileInterception to get the initial coordinates
init_conditions = InitialConditionsMissileInterception()
init_coords = init_conditions.get_initial_coords()
target_coords = init_coords[0].copy()
attack_coords = init_coords[1].copy()

model_path = "dqn_missile_guidance_v(2.4.2.5)_PROD_11"
model = DQN.load(model_path)

# Pass the initial coordinates to each instance
env0 = make_vec_env(lambda: missile_interception(target_coords, attack_coords), n_envs=1)
env1 = make_vec_env(lambda: missile_interception(target_coords, attack_coords), n_envs=1)
envs = [env0, env1]

def run_episode(envs, model):
    env0 = envs[0]
    env1 = envs[1]

    env0single = env0.envs[0]
    env1single = env1.envs[0]

    obs0 = env0.reset()
    obs1 = env1.reset()

    done_list = [False] * len(envs)
    total_rewards = [0] * len(envs)
    steps = [0] * len(envs)
    episode_data = [{
        'past_defense_positions': [],
        'past_attack_positions': [],
        'attack_positions': [],
        'defense_positions': [],
        'target_position': [],
        'actions': [],
        'rewards': [],
        'defense_angle': [],
        'defense_attack_theta': [],
    } for _ in envs]

    episode_data[0]['target_position'] = obs0[0][4:6]
    episode_data[1]['target_position'] = obs1[0][4:6]

    while not any(done_list):
        action0, _ = model.predict(obs0, deterministic=True)
        episode_data[0]['actions'].append(deepcopy(action0))
        action1, _ = model.predict(obs1, deterministic=True)
        episode_data[1]['actions'].append(deepcopy(action1))

        episode_data[0]['past_defense_positions'].append(deepcopy(np.array([obs0[0][2], obs0[0][3]])))
        episode_data[1]['past_defense_positions'].append(deepcopy(np.array([obs1[0][2], obs1[0][3]])))   
        episode_data[0]['past_attack_positions'].append(deepcopy(np.array([obs0[0][0], obs0[0][1]])))
        episode_data[1]['past_attack_positions'].append(deepcopy(np.array([obs1[0][0], obs1[0][1]])))                                                                                                                                              
                                                        
        obs0, reward0, done0, info0 = env0.step(action0)

        episode_data[0]['rewards'].append(deepcopy(reward0))
        episode_data[0]['defense_angle'].append(deepcopy(obs0[0][8]))
        episode_data[0]['defense_attack_theta'].append(deepcopy(obs0[0][6]))
        episode_data[0]['defense_positions'].append(deepcopy(np.array([obs0[0][2], obs0[0][3]])))
        episode_data[0]['attack_positions'].append(deepcopy(np.array([obs0[0][0], obs0[0][1]])))
        
        obs1, reward1, done1, info1 = env1.step(action1)

        episode_data[1]['rewards'].append(deepcopy(reward1))
        episode_data[1]['defense_angle'].append(deepcopy(obs1[0][8]))
        episode_data[1]['defense_attack_theta'].append(deepcopy(obs1[0][6]))
        episode_data[1]['defense_positions'].append(deepcopy(np.array([obs1[0][2], obs1[0][3]])))
        episode_data[1]['attack_positions'].append(deepcopy(np.array([obs1[0][0], obs1[0][1]])))

        done_list[0] = done0
        done_list[1] = done1
        total_rewards[0] += reward0
        total_rewards[1] += reward1
        steps[0] += 1
        steps[1] += 1

    print(f"Episode Finished in {min(steps)} steps with reward {sum(total_rewards)}. Out of bounds: {env0single.out_of_bounds + env1single.out_of_bounds} ({((env0single.out_of_bounds + env1single.out_of_bounds) / ((env0single.out_of_bounds + env1single.out_of_bounds) + (env0single.interceptions + env1single.interceptions) + (env0single.reached_max_steps + env1single.reached_max_steps) + (env0single.enemy_impacts + env1single.enemy_impacts)) * 100)}), Interceptions: {env0single.interceptions + }")




Initital Coords [-0.06384841 -0.13910434] [ 0.90716211 -0.7988906 ]
Initital Coords [-0.06384841 -0.13910434] [ 0.90716211 -0.7988906 ]
HIT!
Episode Finished in 29 steps with reward [11022.776]


In [57]:
import random
import numpy as np

class SharedCoordinates:
    def __init__(self):
        self.radius = 0.02
        self.generate_coordinates()

    def generate_coordinates(self):
        self.create_target()
        self.create_attack(self.target)

    def create_target(self):
        x = random.uniform(-0.3, 0.3)
        y = random.uniform(-0.3, 0.3)
        self.target = np.array([x, y])

    def create_attack(self, target):
        x_side_left = random.uniform(-0.95, max(((target[0] - self.radius) - 0.5), -0.94))
        x_side_right = random.uniform(min(((target[0] + self.radius) + 0.5), 0.94), 0.95)
        y_below = random.uniform(max(((target[1] - self.radius) - 0.5), -0.94), -0.95)
        y_above = random.uniform(max(((target[1] + self.radius) + 0.5), 0.94), 0.95)
        x_inclusive = random.uniform(-0.95, 0.95)
        y_inclusive = random.uniform(-0.95, 0.95)
        y_below_x_inclusive = np.array([x_inclusive, y_below])
        y_above_x_inclusive = np.array([x_inclusive, y_above])
        x_left_y_inclusive = np.array([x_side_left, y_inclusive])
        x_right_y_inclusive = np.array([x_side_right, y_inclusive])

        self.attack = random.choice([y_below_x_inclusive, y_above_x_inclusive, x_left_y_inclusive, x_right_y_inclusive])

    def get_coordinates(self):
        return self.target.copy(), self.attack.copy()

# Create a single instance of SharedCoordinates
shared_coords = SharedCoordinates()

class missile_interception(Env):
    def __init__(self, shared_coordinates):
        self.shared_coordinates = shared_coordinates
        # ... (rest of your __init__ method)

    def reset(self, seed=None):
        self.dict_state = {}
        self.activate_enemy_impact = False
        self.defense_positions = []
        self.attack_positions = []
        self.reward = 0
        self.current_step = 0
        self.done = False

        # Get shared coordinates
        self.target, self.attack = self.shared_coordinates.get_coordinates()
        self.create_defense(self.target)

        self.calc_defense_attack_theta(self.defense, self.attack)
        self.calc_attack_target_theta(self.attack, self.target)
        self.initial_defense_angle()
        self.calc_defense_attack_distance()
        self.calc_attack_target_distance()
        self.attack_starting_position = self.attack.copy()

        self.get_state()
        print("Resetting ------------------------------------")
        print("attack: ", self.attack)
        print("----------------------------------------------")
        
        return self.state, {}

    # ... (rest of your class methods)

# Usage:
env1 = missile_interception(shared_coords)
env2 = missile_interception(shared_coords)

env1.reset()
env1.step(0)
print("\n")
env2.reset()
env2.step(0)

AttributeError: 'missile_interception' object has no attribute 'create_defense'