<a href="https://colab.research.google.com/github/senushidinara/Neuron-Controlled/blob/main/Untitled27.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Create a custom environment for neuron-controlled simulation
class NeuronControlledEnv(gym.Env):
    def __init__(self):
        super(NeuronControlledEnv, self).__init__()
        self.action_space = gym.spaces.Discrete(3)  # Actions: Move, Shoot, Do Nothing
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32)  # Richer state space
        self.state = self.reset()
        self.target_position = np.random.rand(3)  # 3D target
        self.enemy_position = np.random.rand(3)  # 3D enemy
        self.reward_history = []

    def reset(self):
        self.state = np.random.rand(10)  # Random initial state
        self.target_position = np.random.rand(3)
        self.enemy_position = np.random.rand(3)
        return self.state

    def step(self, action):
        # Reward scaling based on distances
        reward = 0
        distance_to_target = np.linalg.norm(self.target_position - self.state[:3])
        distance_to_enemy = np.linalg.norm(self.enemy_position - self.state[:3])

        if action == 0:  # Move towards target
            reward += (1 / distance_to_target) if distance_to_target > 0 else 5  # Avoid division by zero
        elif action == 1:  # Shoot
            if distance_to_enemy < 0.1:  # Hit
                reward += 5  # Reward for hitting
            else:  # Miss
                reward -= 1  # Penalty for missing

        # Update state with noise
        self.state = np.random.rand(10) + np.random.normal(0, 0.1, 10)
        done = False  # Define your own condition for episode completion
        self.reward_history.append(reward)
        return self.state, reward, done, {}

    def render(self, save_image=False):
        # 3D Visualization
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')

        # Plot target and enemy positions
        ax.scatter(self.target_position[0], self.target_position[1], self.target_position[2], color='green', s=150, label='Target', alpha=0.8)
        ax.scatter(self.enemy_position[0], self.enemy_position[1], self.enemy_position[2], color='red', s=150, label='Enemy', alpha=0.8)

        # Plot the agent's position
        ax.scatter(self.state[0], self.state[1], self.state[2], color='blue', s=100, label='Agent', alpha=0.8)

        # Set limits and labels
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.set_zlim(0, 1)
        ax.set_title("Neuron-Controlled Simulation", fontsize=16, fontweight='bold', color='darkviolet')
        ax.set_xlabel("X Position", fontsize=12)
        ax.set_ylabel("Y Position", fontsize=12)
        ax.set_zlabel("Z Position", fontsize=12)
        ax.legend()
        plt.grid(True, linestyle='--', alpha=0.5)

        if save_image:
            plt.savefig("simulation_output.png")  # Save the figure as an image file
        else:
            plt.show()  # Show the plot interactively

        plt.close(fig)  # Close the figure to avoid displaying multiple images

# Train the reinforcement learning agent using PPO
def train_agent(num_episodes=10000):
    env = DummyVecEnv([lambda: NeuronControlledEnv()])  # Create a vectorized environment
    model = PPO('MlpPolicy', env, verbose=0)  # Use the PPO algorithm

    model.learn(total_timesteps=num_episodes)
    return model

# Evaluate the trained model
def evaluate_model(model, num_episodes=100):
    env = NeuronControlledEnv()
    total_rewards = []

    for _ in range(num_episodes):
        state = env.reset()
        total_reward = 0
        done = False

        while not done:
            action, _ = model.predict(state)
            state, reward, done, _ = env.step(action)
            total_reward += reward

        total_rewards.append(total_reward)

    env.render(save_image=True)  # Render and save the final state image
    return total_rewards

# Main function to run the training and evaluation
if __name__ == "__main__":
    print("Training the agent...")
    trained_model = train_agent(num_episodes=10000)
    print("Evaluating the agent...")
    rewards = evaluate_model(trained_model)

    # Plotting the evaluation results
    plt.figure()
    plt.plot(rewards, color='purple', linewidth=2)
    plt.title('Total Rewards from Evaluation', fontsize=16, fontweight='bold', color='darkviolet')
    plt.xlabel('Episode', fontsize=12)
    plt.ylabel('Total Reward', fontsize=12)
    plt.grid(True)
    plt.show()

Training the agent...


  return datetime.utcnow().replace(tzinfo=utc)


Evaluating the agent...


  return datetime.utcnow().replace(tzinfo=utc)


In [2]:
!pip install stable_baselines3 gymnasium

Collecting stable_baselines3
  Downloading stable_baselines3-2.7.0-py3-none-any.whl.metadata (4.8 kB)
Downloading stable_baselines3-2.7.0-py3-none-any.whl (187 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m187.2/187.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: stable_baselines3
Successfully installed stable_baselines3-2.7.0


In [4]:
!pip install shimmy>=2.0