# (2.4.2.6)
Testing out v.2.4.2.5 with multiple defense missiles in order to achieve 100% accuracy.

In [None]:
class initial_conditions_missile_interception(Env):
    def __init__(self):
        self.radius = 0.02
        self.create_target()
        self.create_attack(self.target)
    
    def create_target(self):
        x = random.uniform(-0.3, 0.3)
        y = random.uniform(-0.3, 0.3)
        self.target = np.array([x, y])

    def create_attack(self, target):
        x_side_left = random.uniform(-0.95, max(((target[0] - self.radius) - 0.5), -0.94))
        x_side_right = random.uniform(min(((target[0] + self.radius) + 0.5), 0.94), 0.95)
        y_below = random.uniform(max(((target[1] - self.radius) - 0.5), -0.94), -0.95)
        y_above = random.uniform(max(((target[1] + self.radius) + 0.5), 0.94), 0.95)
        x_inclusive = random.uniform(-0.95, 0.95)
        y_inclusive = random.uniform(-0.95, 0.95)
        y_below_x_inclusive = np.array([x_inclusive, y_below])
        y_above_x_inclusive = np.array([x_inclusive, y_above])
        x_left_y_inclusive = np.array([x_side_left, y_inclusive])
        x_right_y_inclusive = np.array([x_side_right, y_inclusive])

        self.attack = random.choice([y_below_x_inclusive, y_above_x_inclusive, x_left_y_inclusive, x_right_y_inclusive])

    def reset(self):
        return self.target, self.attack

init = initial_conditions_missile_interception()
init_coords = init.reset()
print(init_coords[0])
print(init_coords[1])

In [None]:
import gymnasium as gym 
from gymnasium import Env
import numpy as np
import matplotlib.pyplot as plt
import random
import math
import tensorflow as tf
import datetime
import gymnasium as gym
import numpy as npm
from stable_baselines3 import DQN
from stable_baselines3.common.env_util import make_vec_env
import tensorflow as tf

# # Set seed for reproducibility
# seed = 42
# random.seed(seed)
# np.random.seed(seed)

class missile_interception(Env):
    def __init__(self):
        self.action_space = gym.spaces.discrete.Discrete(5)
        
        low = np.array([-1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -2, -2])
        high = np.array([1, 1, 1, 1, 1, 1, 2*math.pi, 2*math.pi, 2*math.pi, math.pi, 2.9, 2.9, 1, 1, 1, 1, 2, 2])
        self.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)
        self.radius = 0.02

        self.episode_count = 0
        self.distance_t_minus_one = 0
        self.distance_change = 0

        self.out_of_bounds = 0
        self.interceptions = 0
        self.reached_max_steps = 0
        self.enemy_impacts = 0

        self.defense_positions = []
        self.attack_positions = []
        self.attack_starting_position = 0

        self.max_steps_per_episode = 150
        self.activate_value = 0

    def reset(self, seed=None):
        global init_coords
        self.dict_state = {}
        self.activate_enemy_impact = False
        self.defense_positions = []
        self.attack_positions = []
        self.reward = 0
        self.current_step = 0
        self.done = False
        self.target = init_coords[0]
        self.create_defense(self.target)
        self.attack = init_coords[1]

        self.calc_defense_attack_theta(self.defense, self.attack)
        self.calc_attack_target_theta(self.attack, self.target)
        self.initial_defense_angle()
        self.calc_defense_attack_distance()
        self.calc_attack_target_distance()
        self.attack_starting_position = self.attack.copy()

        self.get_state()
        
        return self.state, {}

    def create_target(self):
        x = random.uniform(-0.3, 0.3)
        y = random.uniform(-0.3, 0.3)
        self.target = np.array([x, y])

    def create_defense(self, target):
        x = random.uniform((target[0] - self.radius) - 0.15, (target[0] + self.radius) + 0.15)
        y = random.uniform((target[1] - self.radius) - 0.15, (target[1] + self.radius) + 0.15)
        self.defense = np.array([x, y])

    def create_attack(self, target):
        x_side_left = random.uniform(-0.95, max(((target[0] - self.radius) - 0.5), -0.94))
        x_side_right = random.uniform(min(((target[0] + self.radius) + 0.5), 0.94), 0.95)
        y_below = random.uniform(max(((target[1] - self.radius) - 0.5), -0.94), -0.95)
        y_above = random.uniform(max(((target[1] + self.radius) + 0.5), 0.94), 0.95)
        x_inclusive = random.uniform(-0.95, 0.95)
        y_inclusive = random.uniform(-0.95, 0.95)
        y_below_x_inclusive = np.array([x_inclusive, y_below])
        y_above_x_inclusive = np.array([x_inclusive, y_above])
        x_left_y_inclusive = np.array([x_side_left, y_inclusive])
        x_right_y_inclusive = np.array([x_side_right, y_inclusive])

        self.attack = random.choice([y_below_x_inclusive, y_above_x_inclusive, x_left_y_inclusive, x_right_y_inclusive])

    def calc_defense_attack_theta(self, defense, attack):

        # create an adjacent point of the form (attack_x, defense_y)
        adjacent_point = np.array([attack[0], defense[1]])

        # calculate the distance between the adjacent point and the defense, attack points
        adj_point_defense_len = abs(defense[0] - adjacent_point[0]) 
        adj_point_attack_len = abs(attack[1] - adjacent_point[1])

        # calculate the angle, using soh cah toa, where adj_point_defense_len is the adjacent side and adj_point_attack_len is the opposite side
        self.defense_attack_theta = np.arctan(adj_point_attack_len / adj_point_defense_len)
        
        if attack[0] > defense[0]:
            if attack[1] > defense[1]:
                self.defense_attack_theta = self.defense_attack_theta # 1st quadrant
            else: 
                self.defense_attack_theta = (2*math.pi) - self.defense_attack_theta # 360 - theta
        else:
            if attack[1] > defense[1]:
                self.defense_attack_theta = math.pi - self.defense_attack_theta # 180 - theta
            else:
                self.defense_attack_theta = math.pi + self.defense_attack_theta # 180 + theta

    def calc_attack_target_theta(self, attack, target):
        # create an adjacent point of the form (target_x, attack_y)
        adjacent_point = np.array([target[0], attack[1]])

        # calculate the distance between the adjacent point and the attack, target points
        adj_point_attack_len = abs(attack[0] - adjacent_point[0])
        adj_point_target_len = abs(target[1] - adjacent_point[1])
        
        # calculate the angle, using soh cah toa, where adj_point_attack_len is the adjacent side and adj_point_target_len is the opposite side
        self.attack_target_theta = np.arctan(adj_point_target_len / adj_point_attack_len)

        if target[0] > attack[0]:
            if target[1] > attack[1]:
                self.attack_target_theta = self.attack_target_theta
            else:
                self.attack_target_theta = (2*math.pi) - self.attack_target_theta
        else:
            if target[1] > attack[1]:
                self.attack_target_theta = math.pi - self.attack_target_theta
            else:
                self.attack_target_theta = math.pi + self.attack_target_theta        

    def initial_defense_angle(self):
        self.defense_angle = np.random.uniform((self.defense_attack_theta - 2.35619), (self.defense_attack_theta + 2.35619))
        if self.defense_angle > 2*math.pi:
            self.defense_angle = self.defense_angle - 2*math.pi
        elif self.defense_angle < 0:
            self.defense_angle = 2*math.pi + self.defense_angle

    def calculate_distance(self, point1, point2):
        return math.hypot(point1[0] - point2[0], point1[1] - point2[1])
    
    def calc_defense_attack_distance(self):
        self.defense_attack_distance = (self.calculate_distance(self.defense, self.attack) - (2 * self.radius))

    def calc_attack_target_distance(self):
        self.attack_target_distance = (self.calculate_distance(self.attack, self.target) - (2 * self.radius))

    def calc_defense_angle(self, action):
        if action == 0:
            self.defense_angle = self.defense_angle 
        elif action == 1:
            self.defense_angle += 0.174532925
        elif action == 2:
            self.defense_angle += 0.523599
        elif action == 3:
            self.defense_angle -= 0.174532925
        elif action == 4:
            self.defense_angle -= 0.523599
        
        if self.defense_angle > 2*math.pi:
            self.defense_angle = self.defense_angle - 2*math.pi
        elif self.defense_angle < 0:
            self.defense_angle = 2*math.pi + self.defense_angle

    def update_coords(self):
        self.defense[0] += (0.02 * math.cos(self.defense_angle)) # gotta test this
        self.defense[1] += (0.02 * math.sin(self.defense_angle))
        self.attack[0] += (0.02 * math.cos(self.attack_target_theta))
        self.attack[1] += (0.02 * math.sin(self.attack_target_theta))
        self.defense_positions.append(self.defense.copy())
        self.attack_positions.append(self.attack.copy())

    def calculate_reward(self):
        self.calc_defense_attack_distance()
        self.calc_attack_target_distance()

        if self.attack_target_distance < 0:
            print("ENEMY HIT!")
            self.activate_enemy_impact = True
            self.activate_value = 0
            self.dict_state = self.get_state_dict()
            self.reward = -10000
            self.done = True
            self.enemy_impacts += 1

        elif self.defense_attack_distance < 0:
            print("HIT!")
            self.activate_interception = True
            self.reward = 10000
            self.done = True
            self.interceptions += 1
        else:
            self.angle_diff = abs(self.defense_attack_theta - self.defense_angle)
            self.angle_diff = min(self.angle_diff, 2*math.pi - self.angle_diff)
            self.reward = 1/self.angle_diff
                
        if self.defense[0] < -1 or self.defense[0] > 1 or self.defense[1] < -1 or self.defense[1] > 1:
            print("OUT OF BOUNDS")
            self.reward = -1000
            self.done = True
            self.out_of_bounds += 1

    def angle_conversion(self):
        self.sin_defense_attack_theta, self.sin_defense_angle = np.sin(self.defense_attack_theta), np.sin(self.defense_angle)
        self.cos_defense_attack_theta, self.cos_defense_angle = np.cos(self.defense_attack_theta), np.cos(self.defense_angle)

        self.delta_sin = self.sin_defense_attack_theta - self.sin_defense_angle
        self.delta_cos = self.cos_defense_attack_theta - self.cos_defense_angle

    def get_state(self):
        self.angle_conversion()

        self.state = np.array([
            self.attack[0], self.attack[1], 
            self.defense[0], self.defense[1], 
            self.target[0], self.target[1],
            self.defense_attack_theta, self.attack_target_theta,
            self.defense_angle,
            min(abs(self.defense_attack_theta - self.defense_angle), 2*math.pi - abs(self.defense_attack_theta - self.defense_angle)),
            self.defense_attack_distance,
            self.attack_target_distance,
            self.sin_defense_attack_theta, self.cos_defense_attack_theta, 
            self.sin_defense_angle, self.cos_defense_angle,
            self.delta_sin, self.delta_cos
        ])

    def get_state_dict(self):
        return {
            "self.activate": self.activate_enemy_impact,
            "attack_x": self.attack[0],
            "attack_y": self.attack[1],
            "defense_x": self.defense[0],
            "defense_y": self.defense[1],
            "target_x": self.target[0],
            "target_y": self.target[1],
            "defense_attack_theta": self.defense_attack_theta,
            "attack_target_theta": self.attack_target_theta,
            "defense_angle": self.defense_angle,
            "angle_diff": min(abs(self.defense_attack_theta - self.defense_angle), 2*math.pi - abs(self.defense_attack_theta - self.defense_angle)),
            "distance_attack_missile": self.defense_attack_distance,
            "distance_attack_target": self.attack_target_distance,
            "sin_defense_attack_theta": self.sin_defense_attack_theta,
            "cos_defense_attack_theta": self.cos_defense_attack_theta,
            "sin_defense_angle": self.sin_defense_angle,
            "cos_defense_angle": self.cos_defense_angle,
            "delta_sin": self.delta_sin,
            "delta_cos": self.delta_cos
        }
                 
    def step(self, action):
        self.distance_t_minus_one = self.defense_attack_distance
        self.calc_defense_angle(action)
        self.update_coords()
        self.calc_defense_attack_theta(self.defense, self.attack)
        self.calculate_reward()
        self.current_step += 1

        if self.current_step >= self.max_steps_per_episode:
            print("MAX STEPS REACHED")
            self.done = True
            self.reward = -1000
            self.reached_max_steps += 1

        self.get_state()
        # print("/////////////////////////////////////////")
        # print("dict state: ", dict_state)
        # print("state: ", state)
        return self.state, self.reward, self.done, False, {'activated': self.activate_enemy_impact}

    def graph(self, defense, attack, target):
        fig, ax = plt.subplots()
        plt.xlim(-1, 1)
        plt.ylim(-1, 1)

        plt.axhline(0, color='black', linewidth=0.5)
        plt.axvline(0, color='black', linewidth=0.5)
        plt.grid(True)

        # Plot trails
        if self.defense_positions:
            defense_xs, defense_ys = zip(*self.defense_positions)
            ax.plot(defense_xs, defense_ys, color='#858585', label='Defense Trail')  # Blue line for defense

        if self.attack_positions:
            attack_xs, attack_ys = zip(*self.attack_positions)
            ax.plot(attack_xs, attack_ys, color='#FFA281', label='Attack Trail')  # Red line for attack

        # Plot current positions
        plt.scatter(defense[0], defense[1], color='#1C1C1C')
        plt.scatter(attack[0], attack[1], color='#FF5A1F')
        plt.scatter(self.target[0], self.target[1], color='#85A3FF')

        ax.set_aspect('equal')
        plt.show()

In [None]:
env = missile_interception()
env.reset()

print(env.target)
print(env.defense)
print(env.attack)
env.graph(env.defense, env.attack, env.target)

In [413]:
import numpy as np
from stable_baselines3 import DQN
from copy import deepcopy
import gymnasium as gym
import matplotlib.pyplot as plt
from stable_baselines3.common.env_util import make_vec_env

# Custom environment import placeholder
# from your_custom_environment import missile_interception, initial_conditions_missile_interception

model_path = "dqn_missile_guidance_v(2.4.2.5)_PROD_11"
model = DQN.load(model_path)

# Initialize environments
env1 = missile_interception()
env2 = missile_interception()
envs = [env1, env2]

# Initialize episode data
episode_data = [{
    'past_defense_positions': [],
    'past_attack_positions': [],
    'attack_positions': [],
    'defense_positions': [],
    'target_position': [],
    'actions': [],
    'rewards': [],
    'defense_angle': [],
    'defense_attack_theta': [],
} for _ in envs]

# Initialize conditions and reset environments
init = initial_conditions_missile_interception()
init_coords = init.reset()
obs_list = [env.reset()[0] for env in envs]  

# Set initial target positions
for i, obs in enumerate(obs_list):
    episode_data[i]['target_position'] = obs[4:6]

done_list = [False] * len(envs)
total_rewards = [0] * len(envs)
steps = [0] * len(envs)

while not done:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = env1.step(action)
    print("Done", done)

while not done:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = env2.step(action)



# while not any(done_list):
#     for i, env in enumerate(envs):
#         print("Env: ", i)
#         print("Attack Coords", env.attack)
#         print("Defense Coords", env.defense)
#         obs = np.array([obs_list[i]]) 
#         action, _ = model.predict(obs, deterministic=True)
#         obs, reward, done, truncated, info = env.step(action)
#         obs_list[i] = obs
#         total_rewards[i] += reward
#         steps[i] += 1
#         done_list[i] = done



In [None]:
import numpy as np
from stable_baselines3 import DQN
from copy import deepcopy
import tensorflow as tf
import gymnasium as gym
import matplotlib.pyplot as plt
from stable_baselines3.common.env_util import make_vec_env

# Path to the saved model
model_path = "dqn_missile_guidance_v(2.4.2.5)_PROD_11"
model = DQN.load(model_path)

# Create environments
env1 = make_vec_env(lambda: missile_interception(), n_envs=1)
env2 = make_vec_env(lambda: missile_interception(), n_envs=1)
envs = [env1, env2]

# Initialize episode data
episode_data = [{
    'past_defense_positions': [],
    'past_attack_positions': [],
    'attack_positions': [],
    'defense_positions': [],
    'target_position': [],
    'actions': [],
    'rewards': [],
    'defense_angle': [],
    'defense_attack_theta': [],
} for _ in envs]

# Reset environments
init = initial_conditions_missile_interception()
init_coords = init.reset()
print(init_coords[0])
print(init_coords[1])
obs_list = [env.reset() for env in envs]

for i, obs in enumerate(obs_list):
    episode_data[i]['target_position'] = obs[0][4:6]

# Run environments step-by-step
done_list = [False] * len(envs)
total_rewards = [0] * len(envs)
steps = [0] * len(envs)

while not any(done_list):
    for i, env in enumerate(envs):  
        # print(obs_list)
        # print("i", i)
        # print(obs_list[i])
        # print(f"Environment {i} ob: {obs_list[i]}")
        # print(f"Environment {i} done: {done_list[i]}")
        if not done_list[i]:
            print(f"Environment {i} done: {done_list[i]}")
            print("----------------------------------------------------------")
            print("Before")
            print(f"Environment {i}")
            print(f"Environment {0} steaaaaaaaaaaaaaaaap: {obs_list[0][0][0:2]}")
            print(f"Environment {1} steaaaaaaaaaaaaaaaap: {obs_list[1][0][0:2]}")
            print("----------------------------------------------------------")
            action, _ = model.predict(obs_list[i], deterministic=True)
            episode_data[i]['actions'].append(deepcopy(action))
            # print("\n Defense position")
            # print(np.array([obs_list[i][0][2], obs_list[i][0][3]]))
            # print("\n")
            episode_data[i]['past_defense_positions'].append(deepcopy(np.array([obs_list[i][0][2], obs_list[i][0][3]])))
            episode_data[i]['past_attack_positions'].append(deepcopy(np.array([obs_list[i][0][0], obs_list[i][0][1]])))
            # print("\n Attack position")
            # print(np.array([obs_list[i][0][0], obs_list[i][0][1]]))
            # print("\n")
            print(f"Environment {i} action: {obs_list[i][0][0:2]}")
            print(envs)
            print(env)
            carlos, reward, done, info = env.step(action)
            print("Carlos: ", carlos)   
            print(f"Environment {i} action: {obs_list[i][0][0:2]}")
            print("----------------------------------------------------------")
            print("After")
            print(f"Environment {i}")
            print(f"Environment {0} steaaaaaaaaaaaaaaaap: {obs_list[0][0][0:2]}")
            print(f"Environment {1} steaaaaaaaaaaaaaaaap: {obs_list[1][0][0:2]}")
            print("----------------------------------------------------------")

            episode_data[i]['rewards'].append(deepcopy(reward))
            episode_data[i]['defense_angle'].append(deepcopy(obs[0][8]))
            episode_data[i]['defense_attack_theta'].append(deepcopy(obs[0][6]))
            episode_data[i]['attack_positions'].append(deepcopy(np.array([obs[0][2], obs[0][3]])))
            episode_data[i]['defense_positions'].append(deepcopy(np.array([obs[0][0], obs[0][1]])))
            print("----------------------------------------------------------------------")
            print(f"Environment {i} step: {steps[i]}")
            # print(f"Environment {i} action: {action}")
            # print(f"Environment {i} reward: {reward}")
            print(f"Environment {i} done: {done}")
            # print(f"Environment {i} defense position: {obs[0][2], obs[0][3]}")
            print(f"Environment {i} attack position: {obs_list[i][0][0], obs_list[i][0][1]}")
            # print(f"Environment {i} target position: {obs[0][4], obs[0][5]}")
            # print(f"Environment {i} defense angle: {obs[0][8]}")
            # print(f"Environment {i} defense attack theta: {obs[0][6]}")
            print("----------------------------------------------------------------------")

            total_rewards[i] += reward
            steps[i] += 1
            # obs_list[i] = ob
            done_list[i] = done

# Print results
for i, total_reward in enumerate(total_rewards):
    print(f"Environment {i} finished with total reward: {total_reward}")

In [None]:
import numpy as np
from stable_baselines3 import DQN
from copy import deepcopy
import tensorflow as tf
import gymnasium as gym
import matplotlib.pyplot as plt
from stable_baselines3.common.env_util import make_vec_env

# so we're gonna need two environments 

# Function to run an episode
def run_episode(env, model):
    single_env = env.envs[0]
    obs = env.reset()
    done = False
    total_reward = 0
    step = 0
    episode_data = {
        'past_defense_positions': [],
        'past_attack_positions': [],
        'attack_positions': [],
        'defense_positions': [],
        'target_position': [],
        'actions': [],
        'rewards': [],
        'defense_angle': [],
        'defense_attack_theta': [],
    }
    
    episode_data['target_position'] = obs[0][4:6]

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        episode_data['actions'].append(deepcopy(action))
        episode_data['past_defense_positions'].append(deepcopy(np.array([obs[0][2], obs[0][3]])))
        episode_data['past_attack_positions'].append(deepcopy(np.array([obs[0][0], obs[0][1]])))
        obs, reward, done, info = env.step(action)
        episode_data['rewards'].append(deepcopy(reward))
        episode_data['defense_angle'].append(deepcopy(obs[0][8]))
        episode_data['defense_attack_theta'].append(deepcopy(obs[0][6]))
        episode_data['attack_positions'].append(deepcopy(np.array([obs[0][2], obs[0][3]])))
        episode_data['defense_positions'].append(deepcopy(np.array([obs[0][0], obs[0][1]])))
        total_reward += reward
        step += 1

    print(f"Episode finished in {step} steps with reward {total_reward}. Out of bounds: {single_env.out_of_bounds} ({((single_env.out_of_bounds / (single_env.out_of_bounds + single_env.interceptions + single_env.reached_max_steps + single_env.enemy_impacts)) * 100)}), Interceptions: {single_env.interceptions} ({((single_env.interceptions / (single_env.out_of_bounds + single_env.interceptions + single_env.reached_max_steps + single_env.enemy_impacts)) * 100)}), Reached max steps: {single_env.reached_max_steps} ({((single_env.reached_max_steps / (single_env.out_of_bounds + single_env.interceptions + single_env.reached_max_steps + single_env.enemy_impacts)) * 100)}), Enemy impacts: {single_env.enemy_impacts} ({((single_env.enemy_impacts / (single_env.out_of_bounds + single_env.interceptions + single_env.reached_max_steps + single_env.enemy_impacts)) * 100)})")

    return step, total_reward, single_env.out_of_bounds, single_env.interceptions, single_env.reached_max_steps, single_env.enemy_impacts, info, episode_data

# Function to evaluate the model
def evaluate_model(model_path, n_episodes=100):
    # Create the environment
    env = make_vec_env(lambda: missile_interception(), n_envs=1)
    env2 = make_vec_env(lambda: missile_interception(), n_envs=1)
    
    # Load the trained model
    model = DQN.load(model_path)
    
    target_hit_episodes = []
    
    # Run multiple evaluation episodes
    for i in range(n_episodes):
        print(f"Running episode {i + 1}")
        episode_length, episode_reward, out_of_bounds, interceptions, reached_max_steps, enemy_impacts, info, episode_data = run_episode(env, model)

        print(info[0]['activated'])
        print("........................................................................................")

        if info[0]['activated'] == True:
            target_hit_episodes.append(episode_data)
    
    return target_hit_episodes

# Function to graph the episode data
def graph_episode(defense_positions, attack_positions, target_position):
    fig, ax = plt.subplots()
    plt.xlim(-1, 1)
    plt.ylim(-1, 1)

    plt.axhline(0, color='black', linewidth=0.5)
    plt.axvline(0, color='black', linewidth=0.5)
    plt.grid(True)

    # Plot trails
    if defense_positions:
        defense_xs, defense_ys = zip(*defense_positions)
        ax.plot(defense_xs, defense_ys, color='#858585', label='Defense Trail')  # Blue line for defense

    if attack_positions:
        attack_xs, attack_ys = zip(*attack_positions)
        ax.plot(attack_xs, attack_ys, color='#FFA281', label='Attack Trail')  # Red line for attack

    # Plot current positions
    plt.scatter(defense_positions[-1][0], defense_positions[-1][1], color='#1C1C1C', label='Defense Position')
    plt.scatter(attack_positions[-1][0], attack_positions[-1][1], color='#FF5A1F', label='Attack Position')
    plt.scatter(target_position[0], target_position[1], color='#85A3FF', label='Target Position')

    ax.set_aspect('equal')
    plt.legend()
    plt.show()

# Path to the saved model
model_path = "dqn_missile_guidance_v(2.4.2.5)_PROD_11"

# Evaluate the model and get episodes with target hits
target_hit_episodes = evaluate_model(model_path, n_episodes=50000)

# Graph the episodes with target hits
i = 0
for episode_data in target_hit_episodes:
    episode_name = f'Episode {i} collision.gif '
    animate_episode(episode_data, episode_name)
    i += 1
