In [1]:
import numpy as np
import random

In [2]:
#Define State and Actions
states=np.arange(0, 101, 10)
actions = ['FILL','STOP']
print('States:',states)
print('Actions:', actions)

States: [  0  10  20  30  40  50  60  70  80  90 100]
Actions: ['FILL', 'STOP']


In [25]:
#create Q table and set alpha, gamma, apsilon, episodes
# Q Table: [s,a]
Q= np.zeros((len(states), len(actions)))

alpha=0.1 #Learning rate
gamma=0.9 #Discount factor
epsilon=0.2 #Exploration rate
episodes=300 #Training runs

print('Q-table shape:', Q.shape)

Q-table shape: (11, 2)


In [26]:
def get_reward(level, action):
    if 40 <= level <= 70:
        reward = 10 #ideal range
    else:
        reward = -10 # too low/high
    if action == 'FILL' and level >= 90:
        reward -= 10 # overflow risk
    if action == 'STOP' and level <= 10:
        reward -= 10# empty risk
    return reward
print ('Reward Example (level=60, FILL):', get_reward(60, 'FILL'))
    

Reward Example (level=60, FILL): 10


In [27]:
def next_level(level, action):
    if action == 'Fill':
        level += random.choice([5, 10, 15])
    else:
        level -= random.choice([5, 10, 15])
    return int(np.clip(level, 0, 100))

print('Next Level Example:', next_level(50, 'FILL'))

Next Level Example: 35


In [28]:
for ep in range(episodes):
    level = random.choice(states)
    for _ in range(15): #steps per episode
        if random.uniform(0,1)< epsilon:
            action = random.choice(actions)
        else:
            action=actions[np.argmax(Q[level//10])]
            
        next_state = next_level(level, action)
        reward=get_reward(next_state, action)

        a= action.index(action)
        best_next=np.max(Q[next_state//10])
        Q[level// 10, a] += alpha * (reward + gamma * best_next - Q[level // 10, a])
print('Training Completed')

Training Completed


In [29]:
try:
    level = int(input('Enter starting water level (0-100):'))
    if level < 0 or level > 100:
        raise ValueError('Water level out of range!')
except ValueError as e:
    print(e)
    level=50
    print('Default level set to 50%')

print(f'\nStarting with level: {level}%')
print('Simulation for 10 steps:\n')

for step in range(10):
    action=actions[np.argmax(Q[level// 10])]
    print(f'Step {step+1}: Level={level}% ⮕ Action={action}')
    level = next_level(level, action)
      

print('\Simulation Complete. Water tank control finished.')

Enter starting water level (0-100): 50



Starting with level: 50%
Simulation for 10 steps:

Step 1: Level=50% ⮕ Action=FILL
Step 2: Level=40% ⮕ Action=STOP
Step 3: Level=35% ⮕ Action=STOP
Step 4: Level=30% ⮕ Action=STOP
Step 5: Level=20% ⮕ Action=STOP
Step 6: Level=15% ⮕ Action=STOP
Step 7: Level=10% ⮕ Action=STOP
Step 8: Level=0% ⮕ Action=STOP
Step 9: Level=0% ⮕ Action=STOP
Step 10: Level=0% ⮕ Action=STOP
\Simulation Complete. Water tank control finished.


# Exercise: Smart Traffic Light Controller using Q-Learning

In [31]:
import numpy as np
import random

In [33]:
#Define State and Actions
states=np.arange(0, 101,10)
actions = ['GREEN','RED']
print('States:',states)
print('Actions:', actions)

States: [  0  10  20  30  40  50  60  70  80  90 100]
Actions: ['GREEN', 'RED']


In [34]:
Q= np.zeros((len(states), len(actions)))

alpha=0.1 #Learning rate
gamma=0.9 #Discount factor
epsilon=0.2 #Exploration rate
episodes=300 #Training runs

print('Q-table shape:', Q.shape)

Q-table shape: (11, 2)


In [35]:
def get_reward(level, action):
    if 50 <= level <= 75:
        reward = 10 #ideal range
    else:
        reward = -10 # too low/high
    if action == 'GREEN' and level >= 60:
        reward -= 10 # overflow risk
    if action == 'RED' and level <= 50:
        reward -= 10# empty risk
    return reward
print ('Reward Example (level=60, GREEN):', get_reward(60, 'GREEN'))

Reward Example (level=51, GREEN): 0


In [36]:
def next_level(level, action):
    if action == 'GREEN':
        level += random.choice([5, 10, 15])
    else:
        level -= random.choice([5, 10, 15])
    return int(np.clip(level, 0, 100))

print('Next Level Example:', next_level(51, 'GREEN'))

Next Level Example: 66


In [37]:
for ep in range(episodes):
    level = random.choice(states)
    for _ in range(15): #steps per episode
        if random.uniform(0,1)< epsilon:
            action = random.choice(actions)
        else:
            action=actions[np.argmax(Q[level//10])]
            
        next_state = next_level(level, action)
        reward=get_reward(next_state, action)

        a= action.index(action)
        best_next=np.max(Q[next_state//10])
        Q[level// 10, a] += alpha * (reward + gamma * best_next - Q[level // 10, a])
print('Training Completed')

Training Completed


In [40]:
try:
    level = int(input('Enter Traffic Level (0-100):'))
    if level < 0 or level > 100:
        raise ValueError('Traffic Level is bad!')
except ValueError as e:
    print(e)
    level=50
    print('Default level set to 50%')

print(f'\nStarting with level: {level}%')
print('Simulation for 10 steps:\n')

for step in range(10):
    action=actions[np.argmax(Q[level// 10])]
    print(f'Step {step+1}: Level={level}% ⮕ Action={action}')
    level = next_level(level, action)
      

print('\Simulation Complete. Traffic level control finished.')

Enter Traffic Level (0-100): 80



Starting with level: 80%
Simulation for 10 steps:

Step 1: Level=80% ⮕ Action=RED
Step 2: Level=70% ⮕ Action=RED
Step 3: Level=55% ⮕ Action=RED
Step 4: Level=50% ⮕ Action=RED
Step 5: Level=45% ⮕ Action=RED
Step 6: Level=35% ⮕ Action=RED
Step 7: Level=30% ⮕ Action=RED
Step 8: Level=25% ⮕ Action=RED
Step 9: Level=10% ⮕ Action=RED
Step 10: Level=5% ⮕ Action=RED
\Simulation Complete. Traffic level control finished.
