# Exercise: Smart Traffic Light Controller Using Q-Learning
## Objective: In this exercise, you'll design a Smart Traffic Light System that learns when to switch lights (Green/Red) based on real-time traffic conditions using Q-Learning. Your AI agent will balance reducing waiting time for vehicles, saving energy, and maintaining safety

In [83]:
import numpy as np
import random

In [84]:
states = [0, 1, 2, 3, 4]
actions = ['GREEN', 'RED']
print("Traffic States:")
for s in states:
    if s == 0:
        label = "Empty road"
    elif s == 1:
        label = "Light traffic"
    elif s == 2:
        label = "Moderate traffic"
    elif s == 3:
        label = "Heavy traffic"
    elif s == 4:
        label = "Very heavy traffic"
    print(f"  {s} = {label}")
print("\nActions:")
for a in actions:
    if a == 'GREEN':
        meaning = "Keep the light green (GO)"
    else:
        meaning = "Turn the light red (STOP)"
    print(f"  {a} = {meaning}")

Traffic States:
  0 = Empty road
  1 = Light traffic
  2 = Moderate traffic
  3 = Heavy traffic
  4 = Very heavy traffic

Actions:
  GREEN = Keep the light green (GO)
  RED = Turn the light red (STOP)


In [85]:
Q = np.zeros((len(states), len(actions)))
alpha = 0.1     # Learning rate
gamma = 0.9     # Discount factor
epsilon = 0.2   # Exploration rate
episodes = 300  # Number of training episodes

print('Q-table shape:', Q.shape)
print('Initial Q-table:\n', Q)

Q-table shape: (5, 2)
Initial Q-table:
 [[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]


In [86]:
def get_reward(traffic, action):
    if traffic in [3, 4]:  # Heavy or very heavy
        if action == 'GREEN':
            return +10   # Good: clear congestion
        else:
            return -10   # Bad: jams  
    elif traffic == 0:
        if action == 'RED':
            return +5    # Good
        else:
            return -5    # Bad
    elif traffic == 2:
        return +1   
    elif traffic == 1:
        if action == 'RED':
            return +2
        else:
            return 0     # Slightly less efficient

    # Default (shouldn't happen)
    return 0

In [87]:
test_cases = [
    (3, 'GREEN'),  # Heavy + Green
    (3, 'RED'),    # Heavy + Red
    (0, 'RED'),    # Empty + Red
    (0, 'GREEN'),  # Empty + Green
    (2, 'GREEN'),  # Moderate + Green
    (1, 'RED')     # Light + Red
]

for traffic, action in test_cases:
    print(f"Traffic={traffic}, Action={action} -> Reward={get_reward(traffic, action)}")

Traffic=3, Action=GREEN -> Reward=10
Traffic=3, Action=RED -> Reward=-10
Traffic=0, Action=RED -> Reward=5
Traffic=0, Action=GREEN -> Reward=-5
Traffic=2, Action=GREEN -> Reward=1
Traffic=1, Action=RED -> Reward=2


In [88]:
def next_traffic(current):
    change = random.choice([-1, 0, 1])
    next_level = np.clip(current + change, 0, 4)

    return next_level
    traffic = 2  # Start with moderate traffic
print("Initial traffic:", traffic)

for step in range(10):
    traffic = next_traffic(traffic)
    print(f"Step {step+1}: Traffic level = {traffic}")

Initial traffic: 1
Step 1: Traffic level = 0
Step 2: Traffic level = 0
Step 3: Traffic level = 0
Step 4: Traffic level = 0
Step 5: Traffic level = 0
Step 6: Traffic level = 1
Step 7: Traffic level = 2
Step 8: Traffic level = 3
Step 9: Traffic level = 3
Step 10: Traffic level = 3


In [92]:
for episode in range(episodes):
    # Start from a random traffic level
    state = np.random.randint(0, 5)
    
    for step in range(40):  # Steps per episode
        if random.uniform(0, 1) < epsilon:
            action_idx = np.random.randint(0, len(actions))  # Explore
        else:
            action_idx = np.argmax(Q[state])  # Exploit best action
        
        action = actions[action_idx]
        
        next_state = next_traffic(state)
        reward = get_reward(state, action)
        
        Q[state, action_idx] = Q[state, action_idx] + alpha * (
            reward + gamma * np.max(Q[next_state]) - Q[state, action_idx]
        )
        
        state = next_state

print("\nTraining completed successfully!")
print("Final Q-table:")
print(Q)


Training completed successfully!
Final Q-table:
[[35.28334674 46.4332045 ]
 [43.83576733 47.01886871]
 [56.61168645 47.93528592]
 [74.98489807 54.53235793]
 [83.31181435 59.42253221]]


In [93]:
traffic = level
for step in range(10):
    # Ensure traffic index is valid
    if traffic < 0 or traffic >= len(Q):
        print(f"Warning: Invalid traffic index {traffic}. Resetting to 0.")
        traffic = 0
    
    action_idx = np.argmax(Q[traffic])
    action = actions[action_idx]
    
    print(f"Step {step+1}: Traffic = {state_labels[traffic]}  →  Action = {action}")
    
    # Simulate environment
    traffic = next_traffic(traffic)
    traffic = max(0, min(traffic, len(state_labels) - 1))

Step 1: Traffic = Empty road  →  Action = RED
Step 2: Traffic = Light traffic  →  Action = RED
Step 3: Traffic = Light traffic  →  Action = RED
Step 4: Traffic = Empty road  →  Action = RED
Step 5: Traffic = Light traffic  →  Action = RED
Step 6: Traffic = Light traffic  →  Action = RED
Step 7: Traffic = Empty road  →  Action = RED
Step 8: Traffic = Light traffic  →  Action = RED
Step 9: Traffic = Empty road  →  Action = RED
Step 10: Traffic = Light traffic  →  Action = RED
