# Exercise: Smart Traffic Light Controller using Q-Learning

    # Objective: In this exercise, you’ll design a Smart Traffic Light System that learns when to switch lights (Green/Red) based on 
    # real-time traffic conditions using Q-Learning. Your AI agent will balance reducing waiting time for vehicles, saving energy, and maintaining safety

In [39]:
# Step 1:
import numpy as np
import random

print('Import successful')

Import successful


In [40]:
#Step 2:
states = np.arange(0,5,1)
actions = ['GREEN','RED']
print('States:', states)
print('Action:', actions)

States: [0 1 2 3 4]
Action: ['GREEN', 'RED']


In [41]:
# create Q table and set alpha, gamma, epsilon, episodes
#Q Table: [s,a]
Q = np.zeros((len(states), len(actions)))

alpha= 0.1
gamma= 0.9
epsilon= 0.2
episodes= 300

print('Q-table Shape:', Q.shape)

Q-table Shape: (5, 2)


In [43]:
def get_reward(states,action):
    if 3 <= states < 5:  
        if action == 'GREEN':
            reward = 10 
        else:
            reward = -10 
    elif states == 0:  
        if action == 'RED':
            reward =  5 
        else:
            reward = -5 
    else:
        reward = 1 
    return reward

print('Reward Example (Traffic= 3, red):', get_reward(3, 'RED'))

Reward Example (Traffic= 3, red): -10


In [44]:
def next_traffic(traffic,action):
    if action == 'GREEN':
        traffic += np.random.choice([-1, 0, 1])
    else:
        traffic -= np.random.choice([-1, 0, 1])
    return int(np.clip(traffic, 0, 4) )

print("Next Traffic Example:", next_traffic(1,'GREEN'))

Next Traffic Example: 1


In [47]:
for ep in range(episodes):
    traffic = random.choice(states)
    for _ in range(15): # steps per episode
        if random.uniform(0, 1) < epsilon:
            action = random.choice(actions)
        else:
            action = actions[np.argmax(Q[traffic])]
 
        next_state = next_traffic(traffic, action)
        reward = get_reward(next_state, action)
 
        a = actions.index(action)
        best_next = np.max(Q[next_state])
        Q[traffic, a] += alpha * (reward + gamma * best_next - Q[traffic, a])
print("Training Completed")

Training Completed


In [50]:
try:
    state = int(input('Enter starting traffic state (0–4): '))
    if state < 0 or state > 4:
        raise ValueError('Traffic level out of range!')
except ValueError as e:
    print(e)
    state = 2
    print('Defaut level set to 2')

print(f'\nStarting with state: {state}')
print('Simulating for 10 steps:\n')

for step in range(10):
    action = actions[np.argmax(Q[state])]
    print(f'Step {step+1}: State={state} -> Action={action}')
    state= next_traffic(state, action)

print('\nSimulation complete. Traffic Control finished.')

Enter starting traffic state (0–4):  1



Starting with state: 1
Simulating for 10 steps:

Step 1: State=1 -> Action=RED
Step 2: State=0 -> Action=RED
Step 3: State=1 -> Action=RED
Step 4: State=2 -> Action=GREEN
Step 5: State=2 -> Action=GREEN
Step 6: State=2 -> Action=GREEN
Step 7: State=3 -> Action=GREEN
Step 8: State=3 -> Action=GREEN
Step 9: State=4 -> Action=GREEN
Step 10: State=4 -> Action=GREEN

Simulation complete. Traffic Control finished.
