In [8]:
import numpy as np
from utils.test_env import EnvTest


class LinearSchedule(object):
    def __init__(self, eps_begin, eps_end, nsteps):
        """
        Args:
            eps_begin: initial exploration
            eps_end: end exploration
            nsteps: number of steps between the two values of eps
        """
        self.epsilon        = eps_begin
        self.eps_begin      = eps_begin
        self.eps_end        = eps_end
        self.nsteps         = nsteps


    def update(self, t):
        """
        Updates epsilon

        Args:
            t: int
                frame number
        """
        ##############################################################
        """
        TODO: modify self.epsilon such that
			  it is a linear interpolation from self.eps_begin to
			  self.eps_end as t goes from 0 to self.nsteps
			  For t > self.nsteps self.epsilon remains constant
        """
        ##############################################################
        ################ YOUR CODE HERE - 3-4 lines ##################
        if t<= self.nsteps:
            self.epsilon = self.eps_begin -t * (self.eps_begin-self.eps_end) / self.nsteps
        else:
            self.epsilon = self.eps_end
         
        ##############################################################
        ######################## END YOUR CODE ############## ########


class LinearExploration(LinearSchedule):
    def __init__(self, env, eps_begin, eps_end, nsteps):
        """
        Args:
            env: gym environment
            eps_begin: float
                initial exploration rate
            eps_end: float
                final exploration rate
            nsteps: int
                number of steps taken to linearly decay eps_begin to eps_end
        """
        self.env = env
        super(LinearExploration, self).__init__(eps_begin, eps_end, nsteps)


    def get_action(self, best_action):
        """
        Returns a random action with prob epsilon, otherwise returns the best_action

        Args:
            best_action: int
                best action according some policy
        Returns:
            an action
        """
        ##############################################################
        """
        TODO: with probability self.epsilon, return a random action
                else, return best_action

                you can access the environment via self.env

                you may use env.action_space.sample() to generate
                a random action
        """
        ##############################################################
        ################ YOUR CODE HERE - 4-5 lines ##################
		#1-epision  take the optimal policy
        #compute t and update epsilon based on t
        t= round((self.epsilon-self.eps_begin)/(self.eps_end-self.eps_begin) * self.nsteps +1)
        #update the eplison
        self.update(t)
        print("t is %d",t)
        print("epsilon is %f",self.epsilon)
        actions = [best_action,self.env.action_space.sample()]#return an random action
        act = np.random.choice(actions, 1, p=[1-self.epsilon ,self.epsilon])
       
        
        return act
        ##############################################################
        ######################## END YOUR CODE #######################





In [9]:
#test 1
env = EnvTest((5, 5, 1))
exp_strat = LinearExploration(env, 1, 0, 10)

found_diff = False
for i in range(10):
    rnd_act = exp_strat.get_action(0)
    print(rnd_act)
    if rnd_act != 0 and rnd_act is not None:
        found_diff = True

assert found_diff, "Test 1 failed."
print("Test1: ok")

t is %d 1
epsilon is %f 0.9
[0]
t is %d 2
epsilon is %f 0.8
[1]
t is %d 3
epsilon is %f 0.7
[0]
t is %d 4
epsilon is %f 0.6
[3]
t is %d 5
epsilon is %f 0.5
[1]
t is %d 6
epsilon is %f 0.4
[1]
t is %d 7
epsilon is %f 0.30000000000000004
[0]
t is %d 8
epsilon is %f 0.19999999999999996
[4]
t is %d 9
epsilon is %f 0.09999999999999998
[3]
t is %d 10
epsilon is %f 0.0
[0]
Test1: ok


In [12]:
#test2
env = EnvTest((5, 5, 1))
exp_strat = LinearExploration(env, 1, 0.5, 10)
exp_strat.update(5)
assert exp_strat.epsilon == 0.5, "Test 2 failed"
print("Test2: ok")

AssertionError: Test 2 failed

In [14]:
exp_strat.update(20)
print(exp_strat.epsilon)

0.5


In [16]:
import torch
torch.arange(0, 5)%3

tensor([0, 1, 2, 0, 1])

In [28]:
a = torch.randn(4, 3)
print(a)
torch.max(a,dim=1)[0]

tensor([[ 0.4446, -0.3754,  1.1079],
        [ 0.7860,  0.1047,  0.1031],
        [ 0.5682, -0.3683,  1.1609],
        [ 0.3840, -1.2969,  0.7715]])


tensor([1.1079, 0.7860, 1.1609, 0.7715])

In [34]:
a=torch.arange(0, 5) % 3
print(a)
b=torch.tensor([2, 2, 4, 4, 1, 3, 4, 1, 2, 4, 1, 2, 1, 1, 3, 2])
print(b)

tensor([0, 1, 2, 0, 1])
tensor([2, 2, 4, 4, 1, 3, 4, 1, 2, 4, 1, 2, 1, 1, 3, 2])


In [37]:
torch.nn.functional.one_hot(b, num_classes=10)

tensor([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]])

In [39]:
a.dtype

torch.int64

In [40]:
from utils.general import get_logger, Progbar, export_plot

In [42]:
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt

In [46]:
export_plot([1,2,3],'scores','output')

In [48]:
from platform import python_version
print(python_version())

3.8.8


In [50]:
 %pip install tensorboard
stris%run q5_linear_torch.py 

Collecting tensorboard
  Downloading tensorboard-2.9.0-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 4.9 MB/s eta 0:00:01
[?25hCollecting google-auth-oauthlib<0.5,>=0.4.1
  Using cached google_auth_oauthlib-0.4.6-py2.py3-none-any.whl (18 kB)
Collecting grpcio>=1.24.3
  Downloading grpcio-1.46.1-cp38-cp38-macosx_10_10_x86_64.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 5.9 MB/s eta 0:00:01
[?25hCollecting absl-py>=0.4
  Downloading absl_py-1.0.0-py3-none-any.whl (126 kB)
[K     |████████████████████████████████| 126 kB 5.7 MB/s eta 0:00:01
Collecting markdown>=2.6.8
  Downloading Markdown-3.3.7-py3-none-any.whl (97 kB)
[K     |████████████████████████████████| 97 kB 10.1 MB/s eta 0:00:01
[?25hCollecting tensorboard-data-server<0.7.0,>=0.6.0
  Using cached tensorboard_data_server-0.6.1-py3-none-macosx_10_9_x86_64.whl (3.5 MB)
Collecting google-auth<3,>=1.6.3
  Downloading google_auth-2.6.6-py2.py3-none-any.whl (156 kB)
[K     |███████

ModuleNotFoundError: No module named 'gym'

In [54]:
stride=4
img_height =4
filter_size=32
((stride - 1) * img_height - stride + filter_size) // 2

20

In [55]:
shape=(8,8,6)
state_0 = np.random.randint(0, 50, shape, dtype=np.uint16)

In [56]:
state_0

array([[[44, 18, 18, 31, 19, 20],
        [13, 28, 46, 32, 43, 26],
        [42,  1, 11, 33, 36, 40],
        [32, 46, 19, 28, 46, 10],
        [ 2,  1, 32, 30, 45, 28],
        [39, 30, 36,  4, 37,  9],
        [26, 33, 33,  2,  1, 19],
        [26, 30, 31, 31, 19,  9]],

       [[ 4, 10, 21, 17, 34, 35],
        [38, 40, 28, 42, 15, 36],
        [14,  2, 22,  6, 11, 38],
        [38, 12, 21,  1, 14,  8],
        [17,  5, 31, 38, 47, 28],
        [29, 32,  4, 14, 15,  8],
        [ 7, 39, 45, 45,  2, 35],
        [33, 35, 13, 48, 42, 42]],

       [[29, 20, 26, 25,  8, 41],
        [29, 40, 29, 26, 44,  1],
        [18, 10, 13, 35, 49, 21],
        [ 3, 49, 15, 23, 13, 26],
        [27, 29, 27,  9, 13,  0],
        [24, 30, 24, 11, 32, 11],
        [ 1,  5, 44, 42, 30, 37],
        [16, 20, 25, 21,  7, 38]],

       [[31, 37, 48, 24, 27, 32],
        [39, 32, 33, 22, 17, 20],
        [28, 42, 28, 36, 28,  3],
        [21, 12, 15, 47,  3, 35],
        [23, 28, 48,  2, 22,  1],
        

In [57]:
state_1 = np.random.randint(100, 150, shape, dtype=np.uint16)
state_1

array([[[116, 112, 111, 122, 121, 142],
        [131, 119, 106, 120, 130, 110],
        [132, 115, 148, 130, 110, 142],
        [130, 136, 109, 135, 104, 112],
        [148, 133, 117, 120, 104, 115],
        [147, 130, 119, 127, 137, 106],
        [112, 136, 145, 100, 106, 114],
        [108, 101, 106, 145, 149, 102]],

       [[109, 111, 141, 143, 122, 142],
        [128, 148, 132, 109, 124, 124],
        [139, 105, 139, 117, 118, 118],
        [102, 115, 134, 147, 110, 142],
        [119, 131, 131, 127, 103, 144],
        [110, 131, 136, 103, 104, 125],
        [106, 119, 132, 105, 107, 109],
        [131, 113, 104, 122, 102, 105]],

       [[112, 110, 121, 112, 125, 132],
        [131, 105, 104, 106, 132, 142],
        [139, 149, 103, 132, 130, 126],
        [107, 144, 102, 120, 106, 144],
        [103, 107, 122, 116, 121, 146],
        [140, 124, 132, 101, 138, 128],
        [137, 114, 137, 131, 107, 119],
        [122, 145, 127, 141, 104, 131]],

       [[125, 106, 124, 148, 114, 

In [58]:
np.random.rand()

0.757930882695492

In [64]:
349//100

3

In [94]:
import torch
x = torch.randn(4, 4,2)
print(x)
torch.movedim(x, 2, 0).shape

tensor([[[ 0.1864,  0.1465],
         [-2.1349, -0.0449],
         [ 0.6333,  0.3722],
         [ 0.9121,  0.7015]],

        [[ 0.0352, -1.7649],
         [-0.2103, -0.7273],
         [-1.7923, -0.1965],
         [-1.3804,  0.9932]],

        [[ 0.0996, -1.5308],
         [-0.6203, -1.2527],
         [-0.4741, -1.0620],
         [ 0.8359,  1.3252]],

        [[ 0.0321,  0.8137],
         [-1.2941, -0.5392],
         [-0.0583, -0.3762],
         [-0.5618, -1.0204]]])


torch.Size([2, 4, 4])

In [78]:
x.view(3,4)

tensor([[ 0.7146,  1.4473,  1.4733, -1.8084],
        [-1.1987,  0.0792, -0.5801,  2.5141],
        [ 0.0046,  0.2811, -0.3957, -0.1813]])

In [93]:
x = torch.reshape(4, 4,2)
print(x)


TypeError: reshape() takes 2 positional arguments but 3 were given

In [85]:
torch.movedim(x, 2, 0).shape

TypeError: transpose() received an invalid combination of arguments - got (int, int, int), but expected one of:
 * (int dim0, int dim1)
 * (name dim0, name dim1)


In [87]:
x = np.zeros((12, 12, 3))
x.shape
#yields: 
(12, 12, 3)

x = np.moveaxis(x, -1, 0)
x.shape
#yields: 
(3, 12, 12)

(3, 12, 12)

In [97]:
((4-1)*80-4+32)//2

134

In [99]:
import math
math.sqrt(6400)

80.0