In [219]:
import gymnasium as gym 
from gymnasium import Env
import numpy as np
import matplotlib.pyplot as plt
import random
import math
import tensorflow as tf
import datetime

# set a seed for reproducibility
# random.seed(0)

class line_system(Env):
    def __init__(self):
        self.action_space = gym.spaces.Box(low=0, high=2*np.pi, shape=(1,), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32)

        self.max_steps = 100
        self.current_step = 0
        self.hit_threshold = 0.05

    def create_defense(self):
        x = random.uniform(-0.45, 0.45)
        y = random.uniform(-0.45, 0.45)
        self.defense = np.array([x, y])

    def create_attack(self, defense):
        x_side_left = random.uniform(-0.95, (defense[0] - 0.02) - 0.1)
        x_side_right = random.uniform((defense[0] + 0.02) + 0.1, 0.95)
        y_below = random.uniform((defense[1] - 0.02) - 0.1, -0.95)
        y_above = random.uniform((defense[1] + 0.02) + 0.1, 0.95)
        x_inclusive = random.uniform(-0.95, 0.95)
        y_inclusive = random.uniform(-0.95, 0.95)
        y_below_x_inclusive = np.array([x_inclusive, y_below])
        y_above_x_inclusive = np.array([x_inclusive, y_above])
        x_left_y_inclusive = np.array([x_side_left, y_inclusive])
        x_right_y_inclusive = np.array([x_side_right, y_inclusive])

        self.attack = random.choice([y_below_x_inclusive, y_above_x_inclusive, x_left_y_inclusive, x_right_y_inclusive])

    def calc_angle(self, defense, attack):

        # create an adjacent point of the form (attack_x, defense_y)
        adjacent_point = np.array([attack[0], defense[1]])

        # calculate the distance between the adjacent point and the defense, attack points
        adj_point_defense_len = abs(defense[0] - adjacent_point[0]) 
        adj_point_attack_len = abs(attack[1] - adjacent_point[1])

        # calculate the angle, using soh cah toa, where adj_point_defense_len is the adjacent side and adj_point_attack_len is the opposite side
        self.theta = np.arctan(adj_point_attack_len / adj_point_defense_len)
        
        if attack[0] > defense[0]:
            if attack[1] > defense[1]:
                self.theta = self.theta # 1st quadrant
            else: 
                self.theta = (2*math.pi) - self.theta # 360 - theta
        else:
            if attack[1] > defense[1]:
                self.theta = math.pi - self.theta # 180 - theta
            else:
                self.theta = math.pi + self.theta # 180 + theta
            
        return self.theta

    def graph(self, defense, attack):
        fig, ax = plt.subplots()
        plt.xlim(-1, 1)
        plt.ylim(-1, 1)

        plt.axhline(0, color='black',linewidth=0.5)
        plt.axvline(0, color='black',linewidth=0.5)
        plt.grid(True)

        plt.scatter(defense[0], defense[1], color='black')
        plt.scatter(attack[0], attack[1], color='red')

        ax.set_aspect('equal')
        plt.show()

    def calculate_hit(self, action):
        diff = np.abs(action - self.theta)
        diff = min(diff, 2*np.pi - diff)  # Proper angular difference
        if diff < 0.05:
            self.reward += 1
            self.done = True
        self.reward += -diff  

    def step(self, action):
        self.current_step += 1
        self.calculate_hit(action)
        done = self.current_step >= self.max_steps
        observation = np.concatenate([self.attack, self.defense])
        return observation, self.reward, done, False, {}

    def reset(self, seed=None):
        self.reward = 0
        self.current_step = 0
        self.done = False
        self.create_defense()
        self.create_attack(self.defense)
        self.theta = self.calc_angle(self.defense, self.attack)
        initial_state = np.concatenate((self.defense, self.attack))
        # self.graph(self.defense, self.attack)
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
        return initial_state, {}

In [232]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# Wrap it in a dummy vectorized environment (needed for some stable-baselines3 algorithms)
env = DummyVecEnv([lambda: line_system()])

model = PPO("MlpPolicy", env, tensorboard_log="./ppo_line_system_tensorboard/", verbose=1)
model.learn(total_timesteps=1000000)

# Save the model
model.save("ppo_line_system")

# Don't forget to close the environment
env.close()


Using cpu device
Logging to ./ppo_line_system_tensorboard/PPO_4
-----------------------------
| time/              |      |
|    fps             | 2403 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 1519          |
|    iterations           | 2             |
|    time_elapsed         | 2             |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 0.00026338213 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.41         |
|    explained_variance   | 0.000128      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.08e+06      |
|    n_updates            | 10            |
|    policy_gradient_loss | -0.000675     |
|    std              

In [233]:
# Load the model
model = PPO.load("ppo_line_system")

# Test the environment
obs = env.reset()
for i in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = env.step(action)
    if dones:
        obs = env.reset()


In [235]:
import numpy as np

num_episodes = 100
episode_rewards = []

for episode in range(num_episodes):
    observation = env.reset()
    done = False
    total_rewards = 0

    while not done:
        action, _states = model.predict(observation, deterministic=True)  # Use deterministic for evaluation
        observation, reward, done, info = env.step(action)
        total_rewards += reward
    
    episode_rewards.append(total_rewards)

average_reward = np.mean(episode_rewards)
print(f"Average Reward: {average_reward:.2f}")
print(f"Standard Deviation of Rewards: {np.std(episode_rewards):.2f}")


Average Reward: -5446.60
Standard Deviation of Rewards: 5054.62


In [244]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# Assuming 'line_system' is your custom environment class
env = DummyVecEnv([lambda: line_system()])

# Load your model and specify the environment if not already set
model = PPO.load("ppo_line_system", env=env)

assert model.env is not None, "Environment not set properly"
model.learn(total_timesteps=2000000)

Logging to ./ppo_line_system_tensorboard/PPO_6
-----------------------------
| time/              |      |
|    fps             | 2459 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1516         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0001582843 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.11        |
|    explained_variance   | 0.465        |
|    learning_rate        | 0.0003       |
|    loss                 | 2.62e+05     |
|    n_updates            | 4900         |
|    policy_gradient_loss | -0.000289    |
|    std                  | 0.734        |
|    value_l

<stable_baselines3.ppo.ppo.PPO at 0x1971f9d0550>

In [263]:
def create_defense():
    x = random.uniform(-0.45, 0.45)
    y = random.uniform(-0.45, 0.45)
    defense = np.array([x, y])

    return defense

def create_attack(defense):
    x_side_left = random.uniform(-0.95, (defense[0] - 0.02) - 0.1)
    x_side_right = random.uniform((defense[0] + 0.02) + 0.1, 0.95)
    y_below = random.uniform((defense[1] - 0.02) - 0.1, -0.95)
    y_above = random.uniform((defense[1] + 0.02) + 0.1, 0.95)
    x_inclusive = random.uniform(-0.95, 0.95)
    y_inclusive = random.uniform(-0.95, 0.95)
    y_below_x_inclusive = np.array([x_inclusive, y_below])
    y_above_x_inclusive = np.array([x_inclusive, y_above])
    x_left_y_inclusive = np.array([x_side_left, y_inclusive])
    x_right_y_inclusive = np.array([x_side_right, y_inclusive])

    attack = random.choice([y_below_x_inclusive, y_above_x_inclusive, x_left_y_inclusive, x_right_y_inclusive])
    return attack 

def calc_angle(defense, attack):

    # create an adjacent point of the form (attack_x, defense_y)
    adjacent_point = np.array([attack[0], defense[1]])

    # calculate the distance between the adjacent point and the defense, attack points
    adj_point_defense_len = abs(defense[0] - adjacent_point[0]) 
    adj_point_attack_len = abs(attack[1] - adjacent_point[1])

    # calculate the angle, using soh cah toa, where adj_point_defense_len is the adjacent side and adj_point_attack_len is the opposite side
    theta = np.arctan(adj_point_attack_len / adj_point_defense_len)
    
    if attack[0] > defense[0]:
        if attack[1] > defense[1]:
            theta = theta # 1st quadrant
        else: 
            theta = (2*math.pi) - theta # 360 - theta
    else:
        if attack[1] > defense[1]:
            theta = math.pi - theta # 180 - theta
        else:
            theta = math.pi + theta # 180 + theta
        
    return theta

import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

def generate_data(num_samples=100000):
    data = []
    labels = []
    for _ in range(num_samples):
        defense = create_defense()
        attack = create_attack(defense)
        
        # Use your existing calc_angle function
        theta = calc_angle(defense, attack)
        
        data.append(np.concatenate([defense, attack]))
        labels.append(theta)
    
    return np.array(data), np.array(labels)

# Generate data
X, y = generate_data()

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [264]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(4,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [262]:
history = model.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [272]:
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f"Test MAE: {test_mae}")

Test MAE: 3.1232969760894775


In [273]:
predictions = model.predict(X_test)

for i in range(10):  # Display first 10 predictions
    print(f"True angle: {y_test[i]:.4f}, Predicted angle: {predictions[i][0]:.4f}")

True angle: 3.7588, Predicted angle: -0.0040
True angle: 0.6062, Predicted angle: 0.0097
True angle: 3.8910, Predicted angle: -0.0383
True angle: 6.2153, Predicted angle: -0.0190
True angle: 5.0285, Predicted angle: -0.0016
True angle: 0.8476, Predicted angle: 0.2064
True angle: 5.1108, Predicted angle: 0.0091
True angle: 0.7043, Predicted angle: 0.1173
True angle: 4.4806, Predicted angle: 0.0247
True angle: 3.6809, Predicted angle: 0.0433
