In [2]:
import numpy as np
import random

In [3]:
states= np.arange(16,31)
actions=['ON','OFF']

In [4]:
print(states)
print(actions)

[16 17 18 19 20 21 22 23 24 25 26 27 28 29 30]
['ON', 'OFF']


In [5]:
# Q Tables is [s,a] state, action
Q = np.zeros((len(states), len(actions)))

In [17]:
# alpha: (0-1)
# gama= 1
# epsilon:probability
# episodes: independent training runs ()
alpha=0.1
gamma=0.9
epsilon=0.2
episodes=300

In [18]:
#Step 3: Reward Write Reward Function
def get_reward(temp, action):
    if 20 <= temp <= 24:
        reward = 10 # confortable
    else:
        reward = -5 # unconfortable
    if action == 'ON':
        reward -= 2 # cost of energy
    return reward

In [19]:
#Step 4: Environment Dynamic Change(e.g. Temperature Changes)
def next_temp(temp, action):
    if action == 'ON':
        temp = random.choice([1,2])
    else:
        temp += random.choice([0,1,2])
    return int(np.clip(temp,16,30))


In [20]:
# Step 5
for ep in range (episodes):
    temp = random.choice(states)
    done = False
    for _ in range(20):    #limit steps per episode
        # choosen action (epsilon-greedy)
        if random.uniform(0,1) < epsilon:
            action = random.choice(actions)
        else:
            action = actions[np.argmax(Q[temp - 16])]

        next_state = next_temp(temp,action)
        reward = get_reward(next_state,action)
        a = actions.index(action)
        best_next = np.max(Q[next_state - 16])
        Q[temp -16,a] += alpha * (reward + gamma + best_next - Q[temp - 16, a])

        temp = next_state

print("Training complete")

Training complete


In [23]:
# Test learned policy
temp= 28
for step in range (10):
    action = actions[np.argmax(Q[temp - 16])]
    print(f"Step {step + 1}: Temp = {temp} Cel -> Action {action}")
    temp = next_temp(temp,action)

Step 1: Temp = 28 Cel -> Action ON
Step 2: Temp = 16 Cel -> Action OFF
Step 3: Temp = 16 Cel -> Action OFF
Step 4: Temp = 17 Cel -> Action OFF
Step 5: Temp = 18 Cel -> Action OFF
Step 6: Temp = 20 Cel -> Action OFF
Step 7: Temp = 22 Cel -> Action OFF
Step 8: Temp = 22 Cel -> Action OFF
Step 9: Temp = 24 Cel -> Action ON
Step 10: Temp = 16 Cel -> Action OFF


In [25]:
try:
    temp =int(input('Enter Starting Room Temp(16-30)'))
    if temp<16 or temp>30:
              raise ValueError('Temp out of range')
except ValueError as ve:
    print(ve)
    temp=25
    print('Set to default 25°C')
print('Starting temp: {temp} C')
for step in range (10):
    action = actions[np.argmax(Q[temp - 16])]
    print(f"Step {step+1}: Temp={temp}°C -> Action={action}")
    temp = next_temp(temp, action)
print('\n Done ')

Enter Starting Room Temp(16-30) 34


Temp out of range
Set to default 25°C
Starting temp: {temp} C
Step 1: Temp=25°C -> Action=ON
Step 2: Temp=16°C -> Action=OFF
Step 3: Temp=18°C -> Action=OFF
Step 4: Temp=18°C -> Action=OFF
Step 5: Temp=20°C -> Action=OFF
Step 6: Temp=20°C -> Action=OFF
Step 7: Temp=22°C -> Action=OFF
Step 8: Temp=22°C -> Action=OFF
Step 9: Temp=23°C -> Action=OFF
Step 10: Temp=23°C -> Action=OFF

 Done 
