In [1]:
import ale_py
# if using gymnasium
import shimmy
import gymnasium as gym
import torch
from torch import nn
from torch.utils.data import DataLoader, random_split, Dataset
from torch.nn import functional as F
from torchvision.datasets import MNIST
from torchvision import datasets, transforms
import torchvision.transforms.functional as TF
import os
import random
import copy
import time
import pickle



# NN

In [2]:

class CNN(nn.Module):

    def __init__(self):
        super().__init__()
        
        
        self.Stack = torch.nn.Sequential(
            nn.Conv2d(4, 32, kernel_size=8, stride=4, padding=0),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(start_dim = 0),
            nn.Linear(3136, 512),
            nn.ReLU(),
            nn.Linear(512, 5),
        )
        '''
        self.convStack = torch.nn.Sequential(
                   
            nn.Conv2d(1, 10, kernel_size=9, stride=1, padding = 4),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(10, 50, kernel_size=5, stride=1, padding = 2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=5, stride=5), 
            
            nn.Conv2d(50, 150, kernel_size=5, stride=1, padding = 2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fully_connected = nn.Sequential(
            nn.Flatten(start_dim = 0),
            nn.Linear(14400, 1000),
            nn.ReLU(),
            nn.Linear(1000, 100),
            nn.ReLU(),
            nn.Linear(100,5),
        )
        '''

    def forward(self, x):
        x = self.Stack(x)
        #x = self.convStack(x)
        #print(x.size())
        #x = self.fully_connected(x)
        #x = torch.softmax(x, dim = 0)
        return x


# Agent

In [3]:
class DQN_agent:

    def __init__(self):
        # Get cpu, gpu or mps device for training.
        device = (
            "cuda"
            if torch.cuda.is_available()
            else "mps"
            if torch.backends.mps.is_available()
            else "cpu"
        )
        print(f"Using {device} device")
        self.pred_NN = CNN().to(device)
        self.target_NN = copy.deepcopy(self.pred_NN)
    
    def predict(self, x):
        pred = self.NN_pred.forward(x)
        return pred
    
    def action(self, pred):
        if random.random() < epsilon:
            action = random.randint(0, pred.size(dim=0) - 1)
        else: 
            action = torch.argmax(pred)
        return action
    
    def train(self, experiences):
      
        data_set = ExperiencesDataset(experiences, self)
        data_loader = DataLoader(data_set, batch_size=samples_per_batch, shuffle=True)
        optimizer = torch.optim.Adam(self.pred_NN.parameters(), lr=1e-3)
        epoch_loss = 0
        
        for batch, (pred, y) in enumerate(data_loader):
            loss = F.mse_loss(pred, y)
            optimizer.zero_grad()
            loss.backward(retain_graph=True)
            epoch_loss += loss.item()
            optimizer.step()
            if batch == (len(data_loader) - 1):
                return epoch_loss
        
    def copy(self):
        self.target_NN = copy.deepcopy(self.pred_NN)
        

# Dataset

In [4]:
class ExperiencesDataset(Dataset):
    
    def __init__(self, experiences, agent):
    
        self.predictions = torch.empty(len(experiences), 5)
        self.labels = torch.empty(len(experiences), 5)
        
        for n, experience in enumerate(experiences):
            state, action, reward, next_state = experience
            
            pred = agent.pred_NN.forward(state)
            self.predictions[n] = pred.detach().clone()
             
            label = pred.detach().clone()
            label[action] = reward + gamma * max(agent.target_NN.forward(state))
            self.labels[n] = label    
            
    def __len__(self):
        return len(self.predictions)
    
    def __getitem__(self, idx):
        return self.predictions[idx], self.labels[idx]

# Run agent (main)

In [6]:
torch.set_printoptions(threshold=10_000)
import matplotlib.pyplot as plt
#initialize environment
env = gym.make("ALE/Pacman-v5",render_mode="rgb_array")
#preprocess environment
env = gym.wrappers.AtariPreprocessing(env, screen_size=84, grayscale_obs=True, frame_skip=1, noop_max=30, scale_obs = True)
#render_mode="rgb_array"
state, info = env.reset(seed=123, options={})


#hyperparams
epsilon = 0.5
epsilon_decay = 0.9
gamma = 0.75
experience_capacity = 10000
training_freq = 1000
copying_freq = 2000
samples_per_training_session = 500
samples_per_batch = 50

#initialize agent
agent = DQN_agent()

#load saved model to agent NNs
agent.pred_NN.load_state_dict(torch.load("model"))
agent.target_NN = copy.deepcopy(agent.pred_NN)

#select first action
action = random.randint(0, 4)

#get first states
state1, reward, terminated, truncated, info = env.step(action)
state2, reward, terminated, truncated, info = env.step(action)
state3, reward, terminated, truncated, info = env.step(action)
state4, reward, terminated, truncated, info = env.step(action)

#plot image, to se resolution
#img = state4.permute(1, 2, 0).numpy()
#plt.imshow(img)
#plt.show()

#convert first states to correct datatype and stack
state1 = TF.to_tensor(state1)     
state2 = TF.to_tensor(state2)
state3 = TF.to_tensor(state3)
state4 = TF.to_tensor(state4)
state_stack =torch.cat((state1,state2,state3,state4))

start_time = time.time()
epsilon_start_time = time.time()
elapsed_time = 0

max_time = 28800
training_time = 0
decay_freq = max_time/16

#for logging
training_session = 0
while_loop_nr = 0

while elapsed_time < max_time:
    experiences = []
    for i in range(1, experience_capacity):
        
        print('state stack: ', type(state_stack))
        #predict qvalues and select action
        pred = agent.pred_NN.forward(state_stack) 
        print('prediction: ', pred)
        action = agent.action(pred)
        print('choose action: ', action)
        
        #get next states
        next_state1, reward, terminated, truncated, info = env.step(action)
        next_state2, reward, terminated, truncated, info = env.step(action)
        next_state3, reward, terminated, truncated, info = env.step(action)
        next_state4, reward, terminated, truncated, info = env.step(action)
       
        
        #convert next state to correct type and stack
        next_state1 = TF.to_tensor(next_state1)
        next_state2 = TF.to_tensor(next_state2)
        next_state3 = TF.to_tensor(next_state3)
        next_state4 = TF.to_tensor(next_state4)
        next_state_stack = torch.cat((next_state1,next_state2,next_state3,next_state4))
        
        #add to experiences
        save_this = [state_stack, action, reward, next_state_stack] 
        experiences.append(save_this)

        if i % training_freq == 0:
            training_session += 1
            print(f'-----Training {training_session}-----')
            elapsed_time = time.time() - start_time
            #print(f'Elapsed time: {elapsed_time} seconds')
            #print("Epsilon: ", epsilon)
            experiences_train = random.choices(experiences, k=samples_per_training_session)
            epoch_loss = agent.train(experiences_train)
            print("Epoch loss: ", epoch_loss, "\n")  

        if i % copying_freq == 0:
            #print('-----Copying pred_NN to target_NN-----')
            #print(f'Elapsed time: {time.time() - start_time} seconds\n')
            agent.copy()
            elapsed_time = time.time() - start_time
            if elapsed_time > max_time:
                break
        
        #next state become current state and stack 
        state1 = next_state1
        state2 = next_state2
        state3 = next_state3
        state4 = next_state4
        state_stack = torch.cat((state1,state2,state3,state4))

    if(epsilon > 0.2 and (time.time() - epsilon_start_time) > decay_freq):
        print(f'------Epsilon decays after {(time.time() - epsilon_start_time)} seconds since last decay-----')
        print(f'old epsilon: {epsilon}')
        epsilon = epsilon * epsilon_decay 
        print(f'new epsilon: {epsilon}\n')
        epsilon_start_time = time.time()
    
        

print(f'*****stopped training after {elapsed_time} seconds*****\n')
print('*****Pickle Dumping model*****\n')

file = open('safetyPickleDump', 'wb')
pickle.dump(agent, file)
file.close()

print("*****Saving model*****")
torch.save(agent.pred_NN.state_dict(), "model")

env.close()

Using cpu device
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0179, -0.0353, -0.0107, -0.0451, -0.0285], grad_fn=<AddBackward0>)
choose action:  0
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0179, -0.0353, -0.0107, -0.0451, -0.0286], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0179, -0.0353, -0.0107, -0.0451, -0.0286], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0179, -0.0352, -0.0107, -0.0451, -0.0285], grad_fn=<AddBackward0>)
choose action:  3
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0179, -0.0353, -0.0107, -0.0451, -0.0285], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0179, -0.0353, -0.0107, -0.0451, -0.0286], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0183, -0.0346, -0.0114, -0.044

prediction:  tensor([-0.0176, -0.0351, -0.0124, -0.0440, -0.0280], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0169, -0.0350, -0.0114, -0.0436, -0.0277], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0172, -0.0353, -0.0116, -0.0431, -0.0279], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0177, -0.0353, -0.0112, -0.0433, -0.0277], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0177, -0.0354, -0.0113, -0.0435, -0.0279], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0175, -0.0353, -0.0110, -0.0441, -0.0279], grad_fn=<AddBackward0>)
choose action:  0
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0176, -0.0350, -0.0109, -0.0433, -0.0281], grad_fn=<AddBackw

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose ac

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  0
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  t

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
c

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  0
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  3
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  0
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  0
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  3
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
c

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  0
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose ac

state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  3
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  0
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  0
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  3
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  3
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  3
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  3
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)

state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  3
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  0
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  0
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose ac

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  3
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose ac

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  0
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  3
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  3
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  4
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose ac

prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state s

state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  2
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  1
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
choose action:  tensor(2)
state stack:  <class 'torch.Tensor'>
prediction:  tensor([-0.0182, -0.0355, -0.0111, -0.0455, -0.0281], grad_fn=<AddBackward0>)
c

TypeError: object of type 'NoneType' has no len()