In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import gym
import random
import sumolib
import traci
from collections import deque
from sklearn.preprocessing import MinMaxScaler

# Load METR-LA Traffic Dataset
def load_data():
    df = pd.read_hdf("metr-la.h5")
    df.fillna(method='ffill', inplace=True)
    return df

# Preprocess Data
def preprocess_data(df):
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df.values)
    return scaled_data, scaler

# Prepare Data for LSTM
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

# Define LSTM Model
class TrafficLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(TrafficLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h, _ = self.lstm(x)
        return self.fc(h[:, -1, :])

# Train LSTM Model
def train_lstm(X_train, y_train, model, epochs=10, lr=0.001):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    for epoch in range(epochs):
        model.train()
        inputs = torch.tensor(X_train, dtype=torch.float32)
        targets = torch.tensor(y_train, dtype=torch.float32)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch+1}, Loss: {loss.item()}")

# Reinforcement Learning for Traffic Signal Optimization
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = nn.Sequential(
            nn.Linear(self.state_size, 24),
            nn.ReLU(),
            nn.Linear(24, 24),
            nn.ReLU(),
            nn.Linear(24, self.action_size)
        )
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state = torch.tensor(state, dtype=torch.float32)
        act_values = self.model(state)
        return torch.argmax(act_values).item()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                next_state = torch.tensor(next_state, dtype=torch.float32)
                target += self.gamma * torch.max(self.model(next_state)).item()
            target_f = self.model(torch.tensor(state, dtype=torch.float32))
            target_f[action] = target
            optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
            optimizer.zero_grad()
            loss = nn.MSELoss()(self.model(torch.tensor(state, dtype=torch.float32)), target_f)
            loss.backward()
            optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Traffic Simulation with SUMO
def run_sumo(agent, episodes=100):
    sumo_binary = sumolib.checkBinary('sumo')
    for episode in range(episodes):
        traci.start([sumo_binary, '-c', 'sumo_config.sumocfg'])
        state = np.zeros(agent.state_size)
        while traci.simulation.getMinExpectedNumber() > 0:
            action = agent.act(state)
            reward = -traci.vehicle.getWaitingTime('veh0')
            next_state = np.zeros(agent.state_size)
            done = False
            agent.remember(state, action, reward, next_state, done)
            state = next_state
        agent.replay(32)
        traci.close()

# Main Execution
if __name__ == "__main__":
    df = load_data()
    scaled_data, scaler = preprocess_data(df)
    seq_length = 12
    X, y = create_sequences(scaled_data, seq_length)
    train_size = int(len(X) * 0.8)
    X_train, y_train = X[:train_size], y[:train_size]

    lstm_model = TrafficLSTM(input_size=X.shape[2], hidden_size=64, num_layers=2, output_size=1)
    train_lstm(X_train, y_train, lstm_model)

    agent = DQNAgent(state_size=10, action_size=4)
    run_sumo(agent)
