# Adding Baselines to the already existant REINFORCE Algorithm

\begin{array}
A_t &= G_t - V(s_t) \\
\nabla J(\theta) &= \sum_{t=0}^{T-1} \nabla 
\log \pi(a_t/s_t) \times A_t
\end{array}


In [11]:
import gymnasium as gym
import torch
import torch.nn as nn
from torch.autograd import Variable
env = gym.make('CartPole-v1')

# Let's create a custom environment
https://www.youtube.com/watch?v=bD6V3rcr_54 is the reference for this.An RL Environment to adjust the temperature of your shower

In [1]:
from gymnasium import Env
from gymnasium.spaces import Discrete, Box
import numpy as np
import random

In [27]:
# creating placeholder functions
class ShowerEnv(Env):
    def __init__(self):
        # defining the actions
        self.action_space = Discrete(3) # 3 discrete actions - meddling with the shower handle
        # Temperature Array
        self.observation_space = Box(low=np.array([0]),high=np.array([100]))
        # Set start temperature
        self.state = 38+ random.randint(-3,3)
        # Set shower length
        self.shower_length = 60
        
    def step(self,action):
        self.state += action -1
        self.shower_length -= 1
        
        if self.state >=37 and self.state <=38:
            reward = 1
        else:
            reward = -1
        if self.shower_length<= 0:
            truncated = True
            terminated = True
        else:
            truncated = False
            terminated = False
        self.state += random.randint(-1,1)
        info = {}
        return self.state, reward, terminated, truncated, info
    def render(self):
        None
    def reset(self):
        self.state = 38+ random.randint(-3,3)
        # Set shower length
        self.shower_length = 60
        dummy='dum'
        info = {}
        return self.state, info

In [79]:
class PolicyNetwork():
    def __init__(self,n_state,n_action,n_hidden=50,lr=0.001):
        '''
        forward prop
        '''
        self.model = nn.Sequential(
        nn.Linear(n_state,n_hidden),
        nn.ReLU(),
        nn.Linear(n_hidden,n_action),
        nn.Softmax(),
        )
        self.optimizer = torch.optim.Adam(self.model.parameters(),lr)
        
    def predict(self,s):
        '''
        Compute the action probabilities of state s using
     the learning model
        @param s: input state
        @return: predicted policy
        '''
        return self.model(torch.tensor([float(s)]))
    
    def update(self, returns, log_probs):
        policy_gradient = []
        for log_prob, Gt in zip(log_probs,returns):
            policy_gradient.append(-log_prob*Gt)
        loss = torch.stack(policy_gradient).sum()
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
    
    def get_action(self, s):
        '''
        @param s: input state
        @return: the selected action and log probability
        '''
        probs = self.predict(s)
        action = torch.multinomial(probs, 1).item()
        log_prob = torch.log(probs[action])
        return action,log_prob
        
    
def reinforce(env,estimator,n_episode,gamma=1.0):
    """
    REINFORCE algorithm
    @param env: Gym environment
    @param estimator: policy network
    @param n_episode: number of episodes
    @param gamma: the discount factor
    """
    for episode in range(n_episode):
        log_probs = []
        rewards = []
        state, info = env.reset()
        state = torch.tensor([float(state)])
        while True:
            action,log_prob = estimator.get_action(state)
            next_state,reward,terminated,truncated,info = env.step(action)
            total_reward_episode[episode] += reward
            log_probs.append(log_prob)
            rewards.append(reward)
            if terminated or truncated:
                returns = []
                Gt = 0 
                pw = 0 # this is essentially the power
                for reward in rewards[::-1]:
                    Gt += gamma ** pw * reward
                    pw += 1
                    returns.append(Gt)
                returns = returns[::-1]
                returns = torch.tensor(returns)
                returns = (returns - returns.mean()) / (returns.std() + 1e-9) # some sort of standardisation
                estimator.update(returns, log_probs)
                print('Episode: {}, total reward: {}'.format(
                    episode, total_reward_episode[episode]))
                break
            state = next_state

            

In [80]:
env=ShowerEnv()

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [81]:
torch.tensor(np.array(env.reset()[0]))

tensor(37)

In [82]:
torch.tensor([41])

tensor([41])

In [83]:

n_state = env.observation_space.shape[0]
n_action = env.action_space.n
n_hidden = 128
lr = 0.0007
gamma = 0.95
entropic_factor = 0.001
n_episode = 8000
total_reward_episode = [0]*n_episode

In [84]:
policy_net = PolicyNetwork(n_state, n_action, n_hidden, lr)

In [85]:
reinforce(env,estimator=policy_net,n_episode=n_episode,gamma=gamma)


Episode: 0, total reward: -60
Episode: 1, total reward: -52
Episode: 2, total reward: -58
Episode: 3, total reward: -54
Episode: 4, total reward: -58
Episode: 5, total reward: -50
Episode: 6, total reward: -60
Episode: 7, total reward: -54
Episode: 8, total reward: -56
Episode: 9, total reward: -60
Episode: 10, total reward: -60
Episode: 11, total reward: -60
Episode: 12, total reward: -56
Episode: 13, total reward: -54
Episode: 14, total reward: -56
Episode: 15, total reward: -52
Episode: 16, total reward: -60
Episode: 17, total reward: -58
Episode: 18, total reward: -60
Episode: 19, total reward: -56
Episode: 20, total reward: -52
Episode: 21, total reward: -54
Episode: 22, total reward: -56
Episode: 23, total reward: -56
Episode: 24, total reward: -60
Episode: 25, total reward: -60
Episode: 26, total reward: -56
Episode: 27, total reward: -54
Episode: 28, total reward: -56
Episode: 29, total reward: -58
Episode: 30, total reward: -54
Episode: 31, total reward: -54
Episode: 32, total

Episode: 271, total reward: -60
Episode: 272, total reward: -56
Episode: 273, total reward: -60
Episode: 274, total reward: -60
Episode: 275, total reward: -60
Episode: 276, total reward: -54
Episode: 277, total reward: -60
Episode: 278, total reward: -58
Episode: 279, total reward: -60
Episode: 280, total reward: -56
Episode: 281, total reward: -60
Episode: 282, total reward: -54
Episode: 283, total reward: -54
Episode: 284, total reward: -58
Episode: 285, total reward: -54
Episode: 286, total reward: -58
Episode: 287, total reward: -58
Episode: 288, total reward: -54
Episode: 289, total reward: -60
Episode: 290, total reward: -60
Episode: 291, total reward: -56
Episode: 292, total reward: -60
Episode: 293, total reward: -56
Episode: 294, total reward: -54
Episode: 295, total reward: -60
Episode: 296, total reward: -60
Episode: 297, total reward: -50
Episode: 298, total reward: -52
Episode: 299, total reward: -54
Episode: 300, total reward: -60
Episode: 301, total reward: -52
Episode:

Episode: 545, total reward: -56
Episode: 546, total reward: -60
Episode: 547, total reward: -58
Episode: 548, total reward: -60
Episode: 549, total reward: -58
Episode: 550, total reward: -56
Episode: 551, total reward: -60
Episode: 552, total reward: -60
Episode: 553, total reward: -58
Episode: 554, total reward: -58
Episode: 555, total reward: -56
Episode: 556, total reward: -60
Episode: 557, total reward: -58
Episode: 558, total reward: -60
Episode: 559, total reward: -58
Episode: 560, total reward: -54
Episode: 561, total reward: -60
Episode: 562, total reward: -56
Episode: 563, total reward: -58
Episode: 564, total reward: -60
Episode: 565, total reward: -58
Episode: 566, total reward: -58
Episode: 567, total reward: -60
Episode: 568, total reward: -52
Episode: 569, total reward: -60
Episode: 570, total reward: -58
Episode: 571, total reward: -58
Episode: 572, total reward: -60
Episode: 573, total reward: -58
Episode: 574, total reward: -56
Episode: 575, total reward: -58
Episode:

Episode: 807, total reward: -60
Episode: 808, total reward: -58
Episode: 809, total reward: -56
Episode: 810, total reward: -54
Episode: 811, total reward: -58
Episode: 812, total reward: -54
Episode: 813, total reward: -60
Episode: 814, total reward: -58
Episode: 815, total reward: -56
Episode: 816, total reward: -54
Episode: 817, total reward: -58
Episode: 818, total reward: -60
Episode: 819, total reward: -60
Episode: 820, total reward: -58
Episode: 821, total reward: -60
Episode: 822, total reward: -58
Episode: 823, total reward: -60
Episode: 824, total reward: -56
Episode: 825, total reward: -60
Episode: 826, total reward: -56
Episode: 827, total reward: -58
Episode: 828, total reward: -52
Episode: 829, total reward: -60
Episode: 830, total reward: -56
Episode: 831, total reward: -56
Episode: 832, total reward: -60
Episode: 833, total reward: -60
Episode: 834, total reward: -58
Episode: 835, total reward: -58
Episode: 836, total reward: -58
Episode: 837, total reward: -52
Episode:

Episode: 1070, total reward: -54
Episode: 1071, total reward: -56
Episode: 1072, total reward: -58
Episode: 1073, total reward: -60
Episode: 1074, total reward: -60
Episode: 1075, total reward: -60
Episode: 1076, total reward: -60
Episode: 1077, total reward: -60
Episode: 1078, total reward: -60
Episode: 1079, total reward: -50
Episode: 1080, total reward: -60
Episode: 1081, total reward: -60
Episode: 1082, total reward: -60
Episode: 1083, total reward: -58
Episode: 1084, total reward: -60
Episode: 1085, total reward: -58
Episode: 1086, total reward: -58
Episode: 1087, total reward: -58
Episode: 1088, total reward: -58
Episode: 1089, total reward: -58
Episode: 1090, total reward: -56
Episode: 1091, total reward: -56
Episode: 1092, total reward: -58
Episode: 1093, total reward: -52
Episode: 1094, total reward: -58
Episode: 1095, total reward: -58
Episode: 1096, total reward: -60
Episode: 1097, total reward: -52
Episode: 1098, total reward: -58
Episode: 1099, total reward: -58
Episode: 1

Episode: 1321, total reward: -60
Episode: 1322, total reward: -60
Episode: 1323, total reward: -58
Episode: 1324, total reward: -60
Episode: 1325, total reward: -60
Episode: 1326, total reward: -58
Episode: 1327, total reward: -60
Episode: 1328, total reward: -60
Episode: 1329, total reward: -58
Episode: 1330, total reward: -56
Episode: 1331, total reward: -52
Episode: 1332, total reward: -58
Episode: 1333, total reward: -58
Episode: 1334, total reward: -56
Episode: 1335, total reward: -60
Episode: 1336, total reward: -58
Episode: 1337, total reward: -60
Episode: 1338, total reward: -56
Episode: 1339, total reward: -58
Episode: 1340, total reward: -56
Episode: 1341, total reward: -58
Episode: 1342, total reward: -54
Episode: 1343, total reward: -56
Episode: 1344, total reward: -60
Episode: 1345, total reward: -60
Episode: 1346, total reward: -60
Episode: 1347, total reward: -58
Episode: 1348, total reward: -56
Episode: 1349, total reward: -60
Episode: 1350, total reward: -60
Episode: 1

Episode: 1573, total reward: -52
Episode: 1574, total reward: -54
Episode: 1575, total reward: -60
Episode: 1576, total reward: -58
Episode: 1577, total reward: -58
Episode: 1578, total reward: -60
Episode: 1579, total reward: -60
Episode: 1580, total reward: -52
Episode: 1581, total reward: -60
Episode: 1582, total reward: -60
Episode: 1583, total reward: -60
Episode: 1584, total reward: -56
Episode: 1585, total reward: -58
Episode: 1586, total reward: -54
Episode: 1587, total reward: -58
Episode: 1588, total reward: -60
Episode: 1589, total reward: -58
Episode: 1590, total reward: -58
Episode: 1591, total reward: -54
Episode: 1592, total reward: -60
Episode: 1593, total reward: -60
Episode: 1594, total reward: -56
Episode: 1595, total reward: -56
Episode: 1596, total reward: -58
Episode: 1597, total reward: -60
Episode: 1598, total reward: -54
Episode: 1599, total reward: -60
Episode: 1600, total reward: -60
Episode: 1601, total reward: -60
Episode: 1602, total reward: -60
Episode: 1

Episode: 1839, total reward: -56
Episode: 1840, total reward: -60
Episode: 1841, total reward: -58
Episode: 1842, total reward: -52
Episode: 1843, total reward: -60
Episode: 1844, total reward: -56
Episode: 1845, total reward: -60
Episode: 1846, total reward: -58
Episode: 1847, total reward: -60
Episode: 1848, total reward: -56
Episode: 1849, total reward: -58
Episode: 1850, total reward: -58
Episode: 1851, total reward: -58
Episode: 1852, total reward: -56
Episode: 1853, total reward: -60
Episode: 1854, total reward: -60
Episode: 1855, total reward: -58
Episode: 1856, total reward: -56
Episode: 1857, total reward: -54
Episode: 1858, total reward: -60
Episode: 1859, total reward: -58
Episode: 1860, total reward: -58
Episode: 1861, total reward: -60
Episode: 1862, total reward: -60
Episode: 1863, total reward: -54
Episode: 1864, total reward: -58
Episode: 1865, total reward: -56
Episode: 1866, total reward: -60
Episode: 1867, total reward: -54
Episode: 1868, total reward: -54
Episode: 1

Episode: 2094, total reward: -54
Episode: 2095, total reward: -60
Episode: 2096, total reward: -58
Episode: 2097, total reward: -60
Episode: 2098, total reward: -56
Episode: 2099, total reward: -56
Episode: 2100, total reward: -60
Episode: 2101, total reward: -60
Episode: 2102, total reward: -54
Episode: 2103, total reward: -60
Episode: 2104, total reward: -48
Episode: 2105, total reward: -60
Episode: 2106, total reward: -60
Episode: 2107, total reward: -54
Episode: 2108, total reward: -56
Episode: 2109, total reward: -56
Episode: 2110, total reward: -58
Episode: 2111, total reward: -60
Episode: 2112, total reward: -58
Episode: 2113, total reward: -58
Episode: 2114, total reward: -60
Episode: 2115, total reward: -60
Episode: 2116, total reward: -60
Episode: 2117, total reward: -60
Episode: 2118, total reward: -54
Episode: 2119, total reward: -58
Episode: 2120, total reward: -60
Episode: 2121, total reward: -58
Episode: 2122, total reward: -60
Episode: 2123, total reward: -60
Episode: 2

Episode: 2353, total reward: -60
Episode: 2354, total reward: -56
Episode: 2355, total reward: -56
Episode: 2356, total reward: -60
Episode: 2357, total reward: -60
Episode: 2358, total reward: -60
Episode: 2359, total reward: -60
Episode: 2360, total reward: -58
Episode: 2361, total reward: -60
Episode: 2362, total reward: -58
Episode: 2363, total reward: -60
Episode: 2364, total reward: -56
Episode: 2365, total reward: -54
Episode: 2366, total reward: -56
Episode: 2367, total reward: -58
Episode: 2368, total reward: -56
Episode: 2369, total reward: -60
Episode: 2370, total reward: -60
Episode: 2371, total reward: -58
Episode: 2372, total reward: -60
Episode: 2373, total reward: -56
Episode: 2374, total reward: -56
Episode: 2375, total reward: -58
Episode: 2376, total reward: -56
Episode: 2377, total reward: -60
Episode: 2378, total reward: -58
Episode: 2379, total reward: -60
Episode: 2380, total reward: -48
Episode: 2381, total reward: -60
Episode: 2382, total reward: -60
Episode: 2

Episode: 2604, total reward: -54
Episode: 2605, total reward: -60
Episode: 2606, total reward: -58
Episode: 2607, total reward: -56
Episode: 2608, total reward: -60
Episode: 2609, total reward: -60
Episode: 2610, total reward: -58
Episode: 2611, total reward: -58
Episode: 2612, total reward: -58
Episode: 2613, total reward: -60
Episode: 2614, total reward: -60
Episode: 2615, total reward: -58
Episode: 2616, total reward: -60
Episode: 2617, total reward: -50
Episode: 2618, total reward: -60
Episode: 2619, total reward: -58
Episode: 2620, total reward: -56
Episode: 2621, total reward: -56
Episode: 2622, total reward: -54
Episode: 2623, total reward: -58
Episode: 2624, total reward: -56
Episode: 2625, total reward: -52
Episode: 2626, total reward: -58
Episode: 2627, total reward: -60
Episode: 2628, total reward: -58
Episode: 2629, total reward: -54
Episode: 2630, total reward: -58
Episode: 2631, total reward: -56
Episode: 2632, total reward: -58
Episode: 2633, total reward: -60
Episode: 2

Episode: 2863, total reward: -60
Episode: 2864, total reward: -60
Episode: 2865, total reward: -60
Episode: 2866, total reward: -58
Episode: 2867, total reward: -60
Episode: 2868, total reward: -48
Episode: 2869, total reward: -60
Episode: 2870, total reward: -54
Episode: 2871, total reward: -60
Episode: 2872, total reward: -58
Episode: 2873, total reward: -60
Episode: 2874, total reward: -60
Episode: 2875, total reward: -58
Episode: 2876, total reward: -60
Episode: 2877, total reward: -60
Episode: 2878, total reward: -50
Episode: 2879, total reward: -60
Episode: 2880, total reward: -56
Episode: 2881, total reward: -58
Episode: 2882, total reward: -56
Episode: 2883, total reward: -60
Episode: 2884, total reward: -60
Episode: 2885, total reward: -58
Episode: 2886, total reward: -60
Episode: 2887, total reward: -58
Episode: 2888, total reward: -60
Episode: 2889, total reward: -60
Episode: 2890, total reward: -60
Episode: 2891, total reward: -50
Episode: 2892, total reward: -58
Episode: 2

Episode: 3126, total reward: -60
Episode: 3127, total reward: -60
Episode: 3128, total reward: -60
Episode: 3129, total reward: -60
Episode: 3130, total reward: -56
Episode: 3131, total reward: -58
Episode: 3132, total reward: -60
Episode: 3133, total reward: -54
Episode: 3134, total reward: -60
Episode: 3135, total reward: -60
Episode: 3136, total reward: -58
Episode: 3137, total reward: -54
Episode: 3138, total reward: -58
Episode: 3139, total reward: -52
Episode: 3140, total reward: -60
Episode: 3141, total reward: -58
Episode: 3142, total reward: -60
Episode: 3143, total reward: -60
Episode: 3144, total reward: -58
Episode: 3145, total reward: -46
Episode: 3146, total reward: -60
Episode: 3147, total reward: -60
Episode: 3148, total reward: -58
Episode: 3149, total reward: -56
Episode: 3150, total reward: -54
Episode: 3151, total reward: -54
Episode: 3152, total reward: -58
Episode: 3153, total reward: -60
Episode: 3154, total reward: -56
Episode: 3155, total reward: -54
Episode: 3

Episode: 3383, total reward: -60
Episode: 3384, total reward: -56
Episode: 3385, total reward: -58
Episode: 3386, total reward: -60
Episode: 3387, total reward: -54
Episode: 3388, total reward: -52
Episode: 3389, total reward: -60
Episode: 3390, total reward: -60
Episode: 3391, total reward: -60
Episode: 3392, total reward: -60
Episode: 3393, total reward: -60
Episode: 3394, total reward: -60
Episode: 3395, total reward: -58
Episode: 3396, total reward: -52
Episode: 3397, total reward: -60
Episode: 3398, total reward: -60
Episode: 3399, total reward: -60
Episode: 3400, total reward: -58
Episode: 3401, total reward: -56
Episode: 3402, total reward: -58
Episode: 3403, total reward: -56
Episode: 3404, total reward: -60
Episode: 3405, total reward: -60
Episode: 3406, total reward: -58
Episode: 3407, total reward: -60
Episode: 3408, total reward: -58
Episode: 3409, total reward: -60
Episode: 3410, total reward: -48
Episode: 3411, total reward: -56
Episode: 3412, total reward: -56
Episode: 3

Episode: 3652, total reward: -58
Episode: 3653, total reward: -58
Episode: 3654, total reward: -58
Episode: 3655, total reward: -60
Episode: 3656, total reward: -58
Episode: 3657, total reward: -60
Episode: 3658, total reward: -60
Episode: 3659, total reward: -58
Episode: 3660, total reward: -58
Episode: 3661, total reward: -56
Episode: 3662, total reward: -60
Episode: 3663, total reward: -56
Episode: 3664, total reward: -60
Episode: 3665, total reward: -58
Episode: 3666, total reward: -50
Episode: 3667, total reward: -58
Episode: 3668, total reward: -60
Episode: 3669, total reward: -60
Episode: 3670, total reward: -56
Episode: 3671, total reward: -60
Episode: 3672, total reward: -60
Episode: 3673, total reward: -60
Episode: 3674, total reward: -60
Episode: 3675, total reward: -58
Episode: 3676, total reward: -48
Episode: 3677, total reward: -58
Episode: 3678, total reward: -60
Episode: 3679, total reward: -60
Episode: 3680, total reward: -54
Episode: 3681, total reward: -60
Episode: 3

Episode: 3908, total reward: -58
Episode: 3909, total reward: -54
Episode: 3910, total reward: -58
Episode: 3911, total reward: -50
Episode: 3912, total reward: -60
Episode: 3913, total reward: -58
Episode: 3914, total reward: -56
Episode: 3915, total reward: -58
Episode: 3916, total reward: -56
Episode: 3917, total reward: -56
Episode: 3918, total reward: -60
Episode: 3919, total reward: -56
Episode: 3920, total reward: -58
Episode: 3921, total reward: -58
Episode: 3922, total reward: -60
Episode: 3923, total reward: -60
Episode: 3924, total reward: -60
Episode: 3925, total reward: -54
Episode: 3926, total reward: -60
Episode: 3927, total reward: -60
Episode: 3928, total reward: -58
Episode: 3929, total reward: -58
Episode: 3930, total reward: -60
Episode: 3931, total reward: -60
Episode: 3932, total reward: -58
Episode: 3933, total reward: -56
Episode: 3934, total reward: -58
Episode: 3935, total reward: -58
Episode: 3936, total reward: -60
Episode: 3937, total reward: -58
Episode: 3

Episode: 4168, total reward: -58
Episode: 4169, total reward: -58
Episode: 4170, total reward: -58
Episode: 4171, total reward: -60
Episode: 4172, total reward: -60
Episode: 4173, total reward: -58
Episode: 4174, total reward: -58
Episode: 4175, total reward: -56
Episode: 4176, total reward: -60
Episode: 4177, total reward: -60
Episode: 4178, total reward: -56
Episode: 4179, total reward: -58
Episode: 4180, total reward: -60
Episode: 4181, total reward: -60
Episode: 4182, total reward: -56
Episode: 4183, total reward: -60
Episode: 4184, total reward: -60
Episode: 4185, total reward: -54
Episode: 4186, total reward: -60
Episode: 4187, total reward: -60
Episode: 4188, total reward: -56
Episode: 4189, total reward: -60
Episode: 4190, total reward: -48
Episode: 4191, total reward: -60
Episode: 4192, total reward: -60
Episode: 4193, total reward: -50
Episode: 4194, total reward: -56
Episode: 4195, total reward: -60
Episode: 4196, total reward: -60
Episode: 4197, total reward: -60
Episode: 4

Episode: 4430, total reward: -56
Episode: 4431, total reward: -56
Episode: 4432, total reward: -56
Episode: 4433, total reward: -56
Episode: 4434, total reward: -58
Episode: 4435, total reward: -56
Episode: 4436, total reward: -58
Episode: 4437, total reward: -54
Episode: 4438, total reward: -60
Episode: 4439, total reward: -60
Episode: 4440, total reward: -58
Episode: 4441, total reward: -58
Episode: 4442, total reward: -54
Episode: 4443, total reward: -60
Episode: 4444, total reward: -60
Episode: 4445, total reward: -52
Episode: 4446, total reward: -58
Episode: 4447, total reward: -60
Episode: 4448, total reward: -56
Episode: 4449, total reward: -60
Episode: 4450, total reward: -60
Episode: 4451, total reward: -58
Episode: 4452, total reward: -60
Episode: 4453, total reward: -60
Episode: 4454, total reward: -56
Episode: 4455, total reward: -58
Episode: 4456, total reward: -60
Episode: 4457, total reward: -56
Episode: 4458, total reward: -58
Episode: 4459, total reward: -58
Episode: 4

Episode: 4696, total reward: -60
Episode: 4697, total reward: -56
Episode: 4698, total reward: -58
Episode: 4699, total reward: -60
Episode: 4700, total reward: -60
Episode: 4701, total reward: -60
Episode: 4702, total reward: -60
Episode: 4703, total reward: -58
Episode: 4704, total reward: -54
Episode: 4705, total reward: -60
Episode: 4706, total reward: -60
Episode: 4707, total reward: -60
Episode: 4708, total reward: -60
Episode: 4709, total reward: -56
Episode: 4710, total reward: -54
Episode: 4711, total reward: -56
Episode: 4712, total reward: -60
Episode: 4713, total reward: -58
Episode: 4714, total reward: -58
Episode: 4715, total reward: -54
Episode: 4716, total reward: -60
Episode: 4717, total reward: -56
Episode: 4718, total reward: -58
Episode: 4719, total reward: -56
Episode: 4720, total reward: -60
Episode: 4721, total reward: -58
Episode: 4722, total reward: -60
Episode: 4723, total reward: -60
Episode: 4724, total reward: -56
Episode: 4725, total reward: -58
Episode: 4

Episode: 4963, total reward: -58
Episode: 4964, total reward: -60
Episode: 4965, total reward: -60
Episode: 4966, total reward: -60
Episode: 4967, total reward: -52
Episode: 4968, total reward: -56
Episode: 4969, total reward: -58
Episode: 4970, total reward: -58
Episode: 4971, total reward: -56
Episode: 4972, total reward: -56
Episode: 4973, total reward: -60
Episode: 4974, total reward: -54
Episode: 4975, total reward: -60
Episode: 4976, total reward: -58
Episode: 4977, total reward: -42
Episode: 4978, total reward: -56
Episode: 4979, total reward: -58
Episode: 4980, total reward: -60
Episode: 4981, total reward: -60
Episode: 4982, total reward: -56
Episode: 4983, total reward: -58
Episode: 4984, total reward: -58
Episode: 4985, total reward: -58
Episode: 4986, total reward: -60
Episode: 4987, total reward: -58
Episode: 4988, total reward: -58
Episode: 4989, total reward: -58
Episode: 4990, total reward: -60
Episode: 4991, total reward: -60
Episode: 4992, total reward: -54
Episode: 4

Episode: 5227, total reward: -56
Episode: 5228, total reward: -60
Episode: 5229, total reward: -60
Episode: 5230, total reward: -60
Episode: 5231, total reward: -56
Episode: 5232, total reward: -60
Episode: 5233, total reward: -60
Episode: 5234, total reward: -56
Episode: 5235, total reward: -60
Episode: 5236, total reward: -58
Episode: 5237, total reward: -54
Episode: 5238, total reward: -60
Episode: 5239, total reward: -58
Episode: 5240, total reward: -58
Episode: 5241, total reward: -46
Episode: 5242, total reward: -60
Episode: 5243, total reward: -60
Episode: 5244, total reward: -60
Episode: 5245, total reward: -60
Episode: 5246, total reward: -60
Episode: 5247, total reward: -58
Episode: 5248, total reward: -54
Episode: 5249, total reward: -58
Episode: 5250, total reward: -60
Episode: 5251, total reward: -56
Episode: 5252, total reward: -54
Episode: 5253, total reward: -58
Episode: 5254, total reward: -60
Episode: 5255, total reward: -60
Episode: 5256, total reward: -60
Episode: 5

Episode: 5477, total reward: -60
Episode: 5478, total reward: -56
Episode: 5479, total reward: -56
Episode: 5480, total reward: -60
Episode: 5481, total reward: -54
Episode: 5482, total reward: -60
Episode: 5483, total reward: -60
Episode: 5484, total reward: -58
Episode: 5485, total reward: -58
Episode: 5486, total reward: -54
Episode: 5487, total reward: -56
Episode: 5488, total reward: -60
Episode: 5489, total reward: -60
Episode: 5490, total reward: -52
Episode: 5491, total reward: -56
Episode: 5492, total reward: -60
Episode: 5493, total reward: -52
Episode: 5494, total reward: -60
Episode: 5495, total reward: -60
Episode: 5496, total reward: -60
Episode: 5497, total reward: -58
Episode: 5498, total reward: -58
Episode: 5499, total reward: -52
Episode: 5500, total reward: -54
Episode: 5501, total reward: -60
Episode: 5502, total reward: -60
Episode: 5503, total reward: -60
Episode: 5504, total reward: -60
Episode: 5505, total reward: -58
Episode: 5506, total reward: -58
Episode: 5

Episode: 5744, total reward: -54
Episode: 5745, total reward: -56
Episode: 5746, total reward: -58
Episode: 5747, total reward: -56
Episode: 5748, total reward: -56
Episode: 5749, total reward: -60
Episode: 5750, total reward: -60
Episode: 5751, total reward: -58
Episode: 5752, total reward: -60
Episode: 5753, total reward: -58
Episode: 5754, total reward: -58
Episode: 5755, total reward: -42
Episode: 5756, total reward: -58
Episode: 5757, total reward: -60
Episode: 5758, total reward: -58
Episode: 5759, total reward: -54
Episode: 5760, total reward: -60
Episode: 5761, total reward: -60
Episode: 5762, total reward: -60
Episode: 5763, total reward: -58
Episode: 5764, total reward: -54
Episode: 5765, total reward: -58
Episode: 5766, total reward: -56
Episode: 5767, total reward: -60
Episode: 5768, total reward: -58
Episode: 5769, total reward: -52
Episode: 5770, total reward: -60
Episode: 5771, total reward: -58
Episode: 5772, total reward: -60
Episode: 5773, total reward: -60
Episode: 5

Episode: 6001, total reward: -54
Episode: 6002, total reward: -54
Episode: 6003, total reward: -60
Episode: 6004, total reward: -60
Episode: 6005, total reward: -60
Episode: 6006, total reward: -58
Episode: 6007, total reward: -58
Episode: 6008, total reward: -54
Episode: 6009, total reward: -58
Episode: 6010, total reward: -56
Episode: 6011, total reward: -60
Episode: 6012, total reward: -58
Episode: 6013, total reward: -60
Episode: 6014, total reward: -60
Episode: 6015, total reward: -60
Episode: 6016, total reward: -58
Episode: 6017, total reward: -60
Episode: 6018, total reward: -60
Episode: 6019, total reward: -60
Episode: 6020, total reward: -48
Episode: 6021, total reward: -58
Episode: 6022, total reward: -58
Episode: 6023, total reward: -58
Episode: 6024, total reward: -60
Episode: 6025, total reward: -60
Episode: 6026, total reward: -60
Episode: 6027, total reward: -58
Episode: 6028, total reward: -56
Episode: 6029, total reward: -58
Episode: 6030, total reward: -58
Episode: 6

Episode: 6266, total reward: -54
Episode: 6267, total reward: -60
Episode: 6268, total reward: -60
Episode: 6269, total reward: -60
Episode: 6270, total reward: -56
Episode: 6271, total reward: -58
Episode: 6272, total reward: -60
Episode: 6273, total reward: -58
Episode: 6274, total reward: -58
Episode: 6275, total reward: -54
Episode: 6276, total reward: -60
Episode: 6277, total reward: -60
Episode: 6278, total reward: -56
Episode: 6279, total reward: -56
Episode: 6280, total reward: -56
Episode: 6281, total reward: -60
Episode: 6282, total reward: -60
Episode: 6283, total reward: -50
Episode: 6284, total reward: -60
Episode: 6285, total reward: -60
Episode: 6286, total reward: -60
Episode: 6287, total reward: -60
Episode: 6288, total reward: -60
Episode: 6289, total reward: -60
Episode: 6290, total reward: -60
Episode: 6291, total reward: -54
Episode: 6292, total reward: -60
Episode: 6293, total reward: -46
Episode: 6294, total reward: -54
Episode: 6295, total reward: -60
Episode: 6

Episode: 6518, total reward: -56
Episode: 6519, total reward: -60
Episode: 6520, total reward: -60
Episode: 6521, total reward: -60
Episode: 6522, total reward: -60
Episode: 6523, total reward: -58
Episode: 6524, total reward: -60
Episode: 6525, total reward: -60
Episode: 6526, total reward: -60
Episode: 6527, total reward: -56
Episode: 6528, total reward: -60
Episode: 6529, total reward: -58
Episode: 6530, total reward: -60
Episode: 6531, total reward: -60
Episode: 6532, total reward: -56
Episode: 6533, total reward: -54
Episode: 6534, total reward: -58
Episode: 6535, total reward: -54
Episode: 6536, total reward: -60
Episode: 6537, total reward: -58
Episode: 6538, total reward: -56
Episode: 6539, total reward: -60
Episode: 6540, total reward: -58
Episode: 6541, total reward: -60
Episode: 6542, total reward: -60
Episode: 6543, total reward: -60
Episode: 6544, total reward: -60
Episode: 6545, total reward: -60
Episode: 6546, total reward: -58
Episode: 6547, total reward: -60
Episode: 6

Episode: 6769, total reward: -58
Episode: 6770, total reward: -60
Episode: 6771, total reward: -60
Episode: 6772, total reward: -56
Episode: 6773, total reward: -60
Episode: 6774, total reward: -56
Episode: 6775, total reward: -60
Episode: 6776, total reward: -58
Episode: 6777, total reward: -58
Episode: 6778, total reward: -60
Episode: 6779, total reward: -52
Episode: 6780, total reward: -60
Episode: 6781, total reward: -56
Episode: 6782, total reward: -56
Episode: 6783, total reward: -58
Episode: 6784, total reward: -58
Episode: 6785, total reward: -48
Episode: 6786, total reward: -60
Episode: 6787, total reward: -60
Episode: 6788, total reward: -58
Episode: 6789, total reward: -52
Episode: 6790, total reward: -58
Episode: 6791, total reward: -60
Episode: 6792, total reward: -60
Episode: 6793, total reward: -58
Episode: 6794, total reward: -60
Episode: 6795, total reward: -60
Episode: 6796, total reward: -60
Episode: 6797, total reward: -60
Episode: 6798, total reward: -54
Episode: 6

Episode: 7019, total reward: -60
Episode: 7020, total reward: -60
Episode: 7021, total reward: -58
Episode: 7022, total reward: -58
Episode: 7023, total reward: -58
Episode: 7024, total reward: -56
Episode: 7025, total reward: -58
Episode: 7026, total reward: -60
Episode: 7027, total reward: -46
Episode: 7028, total reward: -56
Episode: 7029, total reward: -60
Episode: 7030, total reward: -50
Episode: 7031, total reward: -58
Episode: 7032, total reward: -58
Episode: 7033, total reward: -54
Episode: 7034, total reward: -60
Episode: 7035, total reward: -60
Episode: 7036, total reward: -58
Episode: 7037, total reward: -58
Episode: 7038, total reward: -58
Episode: 7039, total reward: -60
Episode: 7040, total reward: -60
Episode: 7041, total reward: -56
Episode: 7042, total reward: -60
Episode: 7043, total reward: -60
Episode: 7044, total reward: -58
Episode: 7045, total reward: -60
Episode: 7046, total reward: -56
Episode: 7047, total reward: -58
Episode: 7048, total reward: -60
Episode: 7

Episode: 7268, total reward: -60
Episode: 7269, total reward: -56
Episode: 7270, total reward: -58
Episode: 7271, total reward: -56
Episode: 7272, total reward: -60
Episode: 7273, total reward: -56
Episode: 7274, total reward: -56
Episode: 7275, total reward: -60
Episode: 7276, total reward: -54
Episode: 7277, total reward: -60
Episode: 7278, total reward: -60
Episode: 7279, total reward: -60
Episode: 7280, total reward: -54
Episode: 7281, total reward: -58
Episode: 7282, total reward: -60
Episode: 7283, total reward: -60
Episode: 7284, total reward: -54
Episode: 7285, total reward: -58
Episode: 7286, total reward: -58
Episode: 7287, total reward: -60
Episode: 7288, total reward: -58
Episode: 7289, total reward: -60
Episode: 7290, total reward: -58
Episode: 7291, total reward: -60
Episode: 7292, total reward: -60
Episode: 7293, total reward: -52
Episode: 7294, total reward: -58
Episode: 7295, total reward: -58
Episode: 7296, total reward: -60
Episode: 7297, total reward: -54
Episode: 7

Episode: 7529, total reward: -60
Episode: 7530, total reward: -52
Episode: 7531, total reward: -58
Episode: 7532, total reward: -56
Episode: 7533, total reward: -56
Episode: 7534, total reward: -56
Episode: 7535, total reward: -60
Episode: 7536, total reward: -58
Episode: 7537, total reward: -60
Episode: 7538, total reward: -60
Episode: 7539, total reward: -60
Episode: 7540, total reward: -56
Episode: 7541, total reward: -52
Episode: 7542, total reward: -58
Episode: 7543, total reward: -58
Episode: 7544, total reward: -60
Episode: 7545, total reward: -54
Episode: 7546, total reward: -60
Episode: 7547, total reward: -60
Episode: 7548, total reward: -54
Episode: 7549, total reward: -58
Episode: 7550, total reward: -56
Episode: 7551, total reward: -60
Episode: 7552, total reward: -60
Episode: 7553, total reward: -56
Episode: 7554, total reward: -52
Episode: 7555, total reward: -56
Episode: 7556, total reward: -58
Episode: 7557, total reward: -54
Episode: 7558, total reward: -60
Episode: 7

Episode: 7794, total reward: -54
Episode: 7795, total reward: -56
Episode: 7796, total reward: -54
Episode: 7797, total reward: -60
Episode: 7798, total reward: -54
Episode: 7799, total reward: -60
Episode: 7800, total reward: -58
Episode: 7801, total reward: -58
Episode: 7802, total reward: -56
Episode: 7803, total reward: -60
Episode: 7804, total reward: -58
Episode: 7805, total reward: -60
Episode: 7806, total reward: -60
Episode: 7807, total reward: -54
Episode: 7808, total reward: -60
Episode: 7809, total reward: -60
Episode: 7810, total reward: -56
Episode: 7811, total reward: -56
Episode: 7812, total reward: -56
Episode: 7813, total reward: -56
Episode: 7814, total reward: -60
Episode: 7815, total reward: -60
Episode: 7816, total reward: -52
Episode: 7817, total reward: -60
Episode: 7818, total reward: -60
Episode: 7819, total reward: -60
Episode: 7820, total reward: -50
Episode: 7821, total reward: -60
Episode: 7822, total reward: -58
Episode: 7823, total reward: -56
Episode: 7

In [67]:
policy_net.predict(torch.tensor([1.9]))

  return self.model(torch.tensor(s))
  input = module(input)


tensor([0.2585, 0.3375, 0.4040], grad_fn=<SoftmaxBackward0>)

In [None]:
# creating placeholder functions
class ShowerEnvNew(Env):
    def __init__(self):
        # defining the actions
        self.action_space = Discrete(3) # 3 discrete actions - meddling with the shower handle
        # Temperature Array
        self.observation_space = Bo```x(low=np.array([0]),high=np.array([100]))
        # Set start temperature
        self.state = 38+ random.randint(-3,3)
        # Set shower length
        self.shower_length = 60
        
    def step(self,action):
        self.state += action -1
        self.shower_length -= 1
        
        if self.state >=37 and self.state <=38:
            reward = 1
        else:
            reward = -1
        if self.shower_length<= 0:
            truncated = True
            terminated = True
        else:
            truncated = False
            terminated = False
        self.state += random.randint(-1,1)
        info = {}
        return self.state, reward, terminated, truncated, info
    def render(self):
        None
    def reset(self):
        self.state = 38+ random.randint(-3,3)
        # Set shower length
        self.shower_length = 60
        info = {}
        return self.state, info 

In [8]:
env = ShowerEnv()

In [13]:
n_episode = 100

for episode in range(n_episode):

    action = env.action_space.sample()
    new_state, reward, terminated, truncated,info = env.step(action)
    print(new_state, reward, terminated, truncated,info )

38 1 True True {}
38 1 True True {}
39 1 True True {}
41 -1 True True {}
40 -1 True True {}
40 -1 True True {}
40 -1 True True {}
41 -1 True True {}
41 -1 True True {}
42 -1 True True {}
43 -1 True True {}
41 -1 True True {}
40 -1 True True {}
41 -1 True True {}
41 -1 True True {}
40 -1 True True {}
40 -1 True True {}
40 -1 True True {}
42 -1 True True {}
40 -1 True True {}
40 -1 True True {}
41 -1 True True {}
42 -1 True True {}
44 -1 True True {}
46 -1 True True {}
47 -1 True True {}
47 -1 True True {}
45 -1 True True {}
45 -1 True True {}
44 -1 True True {}
44 -1 True True {}
45 -1 True True {}
45 -1 True True {}
47 -1 True True {}
46 -1 True True {}
45 -1 True True {}
45 -1 True True {}
44 -1 True True {}
44 -1 True True {}
42 -1 True True {}
43 -1 True True {}
44 -1 True True {}
45 -1 True True {}
47 -1 True True {}
45 -1 True True {}
45 -1 True True {}
44 -1 True True {}
45 -1 True True {}
43 -1 True True {}
43 -1 True True {}
42 -1 True True {}
40 -1 True True {}
40 -1 True True