<a href="https://colab.research.google.com/github/thanuja-007/webmining-/blob/main/ROBOTICS_PATHCOVRAGE%20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import random
from collections import deque
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

# Simplified Grid World
class GridWorld:
    def __init__(self, size=5, num_agents=1, num_obstacles=3):
        self.size = size
        self.num_agents = num_agents
        self.num_obstacles = num_obstacles
        self.agent_positions = [(0, 0)]  # Fixed start position
        self.obstacle_positions = [(1, 1), (2, 3), (3, 2)]  # Fixed obstacles
        self.target = (size-1, size-1)
        self.state_shape = (size, size, 1)

    def reset(self):
        self.agent_positions = [(0, 0)]
        return self.get_state()

    def get_state(self):
        state = np.zeros((self.size, self.size))
        for pos in self.obstacle_positions:
            state[pos] = -1  # Obstacle
        for agent in self.agent_positions:
            state[agent] = 1  # Agents
        state[self.target] = 2  # Target
        return np.expand_dims(state, axis=-1)

    def step(self, action):
        x, y = self.agent_positions[0]
        # Action mapping: 0=Up, 1=Down, 2=Left, 3=Right
        if action == 0: x = max(0, x-1)
        elif action == 1: x = min(self.size-1, x+1)
        elif action == 2: y = max(0, y-1)
        elif action == 3: y = min(self.size-1, y+1)

        new_pos = (x, y)
        done = False  # Initialize done here

        if new_pos in self.obstacle_positions:
            reward = -10  # Penalty for hitting obstacle
        else:
            self.agent_positions[0] = new_pos
            if new_pos == self.target:
                reward = 100  # Large reward for reaching target
                done = True
            else:
                reward = -1  # Small penalty for each step

        return self.get_state(), reward, done

# Simplified DQN Agent
class DQNAgent:
    def __init__(self, state_shape, action_size):
        self.state_shape = state_shape
        self.action_size = action_size
        self.memory = deque(maxlen=1000)  # Smaller memory
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Flatten(input_shape=self.state_shape),
            Dense(32, activation='relu'),  # Smaller network
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=0.001))
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state, verbose=0)
        return np.argmax(q_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train(self, batch_size=32):
        if len(self.memory) < batch_size:
            return

        minibatch = random.sample(self.memory, batch_size)
        states = np.array([t[0][0] for t in minibatch])
        next_states = np.array([t[3][0] for t in minibatch])

        # Batch prediction for efficiency
        targets = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        for i, (state, action, reward, next_state, done) in enumerate(minibatch):
            if done:
                targets[i][action] = reward
            else:
                targets[i][action] = reward + self.gamma * np.max(next_q_values[i])

        self.model.fit(states, targets, epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Training
env = GridWorld(size=5, num_agents=1, num_obstacles=3)
agent = DQNAgent(env.state_shape, 4)
episodes = 200  # Fewer episodes

for episode in range(episodes):
    state = env.reset()
    state = np.reshape(state, (1, *env.state_shape))
    done = False
    total_reward = 0

    while not done:
        action = agent.act(state)
        next_state, reward, done = env.step(action)
        next_state = np.reshape(next_state, (1, *env.state_shape))
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        agent.train()

    print(f"Episode: {episode+1}, Total Reward: {total_reward}, Epsilon: {agent.epsilon:.2f}")

print("Training complete!")

  super().__init__(**kwargs)


Episode: 1, Total Reward: 82, Epsilon: 1.00
Episode: 2, Total Reward: -143, Epsilon: 0.56
Episode: 3, Total Reward: -121, Epsilon: 0.23
Episode: 4, Total Reward: -13, Epsilon: 0.20
Episode: 5, Total Reward: -189, Epsilon: 0.15
Episode: 6, Total Reward: 30, Epsilon: 0.11
Episode: 7, Total Reward: -376, Epsilon: 0.08
Episode: 8, Total Reward: 23, Epsilon: 0.05
Episode: 9, Total Reward: 90, Epsilon: 0.05
Episode: 10, Total Reward: 91, Epsilon: 0.05
Episode: 11, Total Reward: 93, Epsilon: 0.05
Episode: 12, Total Reward: 93, Epsilon: 0.04
Episode: 13, Total Reward: 91, Epsilon: 0.04
Episode: 14, Total Reward: 93, Epsilon: 0.04
Episode: 15, Total Reward: 91, Epsilon: 0.04
Episode: 16, Total Reward: 91, Epsilon: 0.04
Episode: 17, Total Reward: 86, Epsilon: 0.03
Episode: 18, Total Reward: -233, Epsilon: 0.01
Episode: 19, Total Reward: 93, Epsilon: 0.01
Episode: 20, Total Reward: -549, Epsilon: 0.01
Episode: 21, Total Reward: 40, Epsilon: 0.01
Episode: 22, Total Reward: 93, Epsilon: 0.01
Episod