In [2]:
import gym
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from collections import deque
import gymnasium as gym  # Use gymnasium (modern version of gym)

In [1]:

# Create the CarRacing-v2 environment
env = gym.make('CarRacing-v3', render_mode='human')

# Define the DQN model
def build_model():
    model = Sequential([
        Flatten(input_shape=(96, 96, 3)),  # Process image input
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(3, activation='linear')  # 3 possible actions: left, right, accelerate
    ])
    model.compile(loss='mse', optimizer=Adam(learning_rate=0.001))
    return model

# Define the DQN agent
class DQNAgent:
    def __init__(self):
        self.model = build_model()
        self.target_model = build_model()
        self.target_model.set_weights(self.model.get_weights())
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.choice([0, 1, 2])  # Random action
        q_values = self.model.predict(np.expand_dims(state, axis=0), verbose=0)
        return np.argmax(q_values[0])
    
    def replay(self, batch_size=32):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.target_model.predict(np.expand_dims(next_state, axis=0), verbose=0))
            target_f = self.model.predict(np.expand_dims(state, axis=0), verbose=0)
            target_f[0][action] = target
            self.model.fit(np.expand_dims(state, axis=0), target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

# Train the agent
def train_dqn(episodes=500):
    agent = DQNAgent()
    for episode in range(episodes):
        state, _ = env.reset()  # Updated reset method
        total_reward = 0
        done = False
        while not done:
            action = agent.act(state)
            next_state, reward, terminated, truncated, _ = env.step([0, action - 1, 0.2])  # Updated step
            done = terminated or truncated
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
        agent.replay()
        agent.update_target_model()
        print(f"Episode {episode + 1}/{episodes}, Total Reward: {total_reward}, Epsilon: {agent.epsilon:.4f}")
    return agent

# Run training
if __name__ == "__main__":
    trained_agent = train_dqn(episodes=100)  # Reduce episodes for faster testing
    env.close()

NameError: name 'gym' is not defined