In [1]:
import numpy as np
import random

In [2]:
print(random.choice([0,1,2]))

2


In [3]:
# Step 1 : Find out step and actions

states = np.arange(16,31)
actions = ['ON','OFF']

In [4]:
print(states)
print(actions)

[16 17 18 19 20 21 22 23 24 25 26 27 28 29 30]
['ON', 'OFF']


In [5]:
# Step 2 : Create QTable, set alpha,gama,epsilon,episodes
# QTable is [states,actions]
#alpha (0-1)
#gama :1
#epsilon = probability
#episodes = independent training runs()

Q =np.zeros((len(states),len(actions)))
alpha =0.1
gama = 0.9
epsilon = 0.2
episodes = 300

In [6]:
# Step 3 : Write Reward Function

def get_reward(temp,action):
    if 20 <= temp <= 24:
        reward = 10 #comfortable
    else:
        reward = -5 #uncomfortable
    if action == 'ON':
        reward = -2 #cost of energy
    return reward

In [7]:
# Step 4 : Environment of Dynamic Change (e.g. Temperature Changes)

def next_temp(temp,action):
    if action == 'ON':
        temp -=random.choice([1,2])
    else:
       temp +=random.choice([0,1,2]) 
    return int(np.clip(temp,16,30))

In [8]:
# Step 5 : Training Loop

for ep in range(episodes):
    temp = random.choice(states)
    done = False
    for _ in range(20): #Limit steps per episode
        #choose action (epsilon - greedy)
        if random.uniform(0,1) < epsilon :
            action = random.choice(actions)
        else:
            action = actions[np.argmax(Q[temp-16])]

        next_state = next_temp(temp,action)
        reward = get_reward(next_state, action)
        a = actions.index(action)
        best_next = np.argmax(Q[next_state-16])
        Q[temp-16,a] += alpha * (reward + gama * best_next -  Q[temp-16,a] )
        temp = next_state
print("Training completed")
        

Training completed


In [9]:
# Step 6 : Test Learn Policy

temp = 28
for step in range(10):
    action = actions[np.argmax(Q[temp-16])]
    print(f'Step {step+1}: Temp={temp}°C -> Action={action}')
    temp = next_temp(temp, action)


Step 1: Temp=28°C -> Action=ON
Step 2: Temp=27°C -> Action=ON
Step 3: Temp=26°C -> Action=ON
Step 4: Temp=24°C -> Action=ON
Step 5: Temp=22°C -> Action=OFF
Step 6: Temp=23°C -> Action=OFF
Step 7: Temp=24°C -> Action=ON
Step 8: Temp=23°C -> Action=OFF
Step 9: Temp=25°C -> Action=ON
Step 10: Temp=23°C -> Action=OFF


In [16]:
try:
    temp = int(input('Enter Starting Room Temp (16-30)'))
    if temp<16 or temp>30:
        raise ValueError('Temp out of range')
except ValueError as ve:
    print(ve)
    temp=25
    print('Set to default 25 Celcius')
print(f'\n Starting temp {temp} celcius')

for step in range(10):
    action = actions[np.argmax(Q[temp-16])]
    print(f'Step {step+1}: Temp={temp}°C -> Action={action}')
    temp = next_temp(temp, action)
print("Done")
    


Enter Starting Room Temp (16-30) 30



 Starting temp 30 celcius
Step 1: Temp=30°C -> Action=ON
Step 2: Temp=28°C -> Action=ON
Step 3: Temp=26°C -> Action=ON
Step 4: Temp=25°C -> Action=ON
Step 5: Temp=24°C -> Action=ON
Step 6: Temp=23°C -> Action=OFF
Step 7: Temp=25°C -> Action=ON
Step 8: Temp=23°C -> Action=OFF
Step 9: Temp=24°C -> Action=ON
Step 10: Temp=22°C -> Action=OFF
Done
