# Explaining Deep Q-Learning Experience Replay with SHapley Additive exPlanations


## Import Libraries

In [1]:
#https://gymnasium.farama.org/environments/atari/
#pip install gymnasium[atari]
#pip install gymnasium[accept-rom-license]
#pip install moviepy
#https://www.youtube.com/watch?v=hCeJeq8U0lo&t=447s
import datetime

#test environments
import hrl_gym #simulate addiction
import gymnasium as gym

#image preprocessing
from PIL import Image
from gymnasium.core import ObservationWrapper
from gymnasium.spaces.box import Box
import numpy as np
import matplotlib.pyplot as plt
plt.ioff() #prevent plots from being displayed automatically in the notebook

#n-step experience replay
from collections import namedtuple, deque

#deep learning
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from gymnasium.wrappers import RecordVideo #enviornment monitoring
import random

from sklearn.model_selection import train_test_split #for splitting memory into training and testing
import pandas as pd

import shap # explaining deep q learning model
from shap.plots._image import image as image_plotter
from shap.plots import colors
import math
import datetime #logging experiment time

from PIL import Image #converting state test images into shap inputs
import matplotlib.gridspec as gridspec #displaying shap graphs with states and q values

import seaborn as sns
import statsmodels.api as sm
import statsmodels.stats.multicomp as mc
from statsmodels.formula.api import ols

if torch.cuda.is_available():
  device = torch.device('cuda:0')
  print('Running on the GPU')
else:
  device = torch.device('cpu')
  print('Running on the CPU')

  from .autonotebook import tqdm as notebook_tqdm


Running on the GPU


## Deep Q-Learning Agent
A learning agent that can control from vector input

In [None]:
class Network(nn.Module):  
    def __init__(self, input_size, nb_action):
        #ref: https://discuss.pytorch.org/t/super-model-in-init/97426
        #super(Network, self).__init__()
        super().__init__() #pytorch's NN model
        self.input_size = input_size
        self.nb_action = nb_action
        self.fc1 = nn.Linear(input_size, 30)#arbitrarily chose 30 hidden layers
        self.fc2 = nn.Linear(30, nb_action)
    
    #base pytorch NN model runs and we override the
    #forward function with our own relu activation function
    def forward(self, state):
        x = F.relu(self.fc1(state))
        q_values = self.fc2(x)
        return q_values
    
# This model is used for training our DQN model. It stores the transitions that the agent observes, 
# allowing us to reuse this data later. By sampling from it randomly, the transitions that build up a 
# batch are decorrelated. It has been shown that this greatly stabilizes and improves the DQN training procedure.
class ReplayMemory(): 
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
    
    def push(self, event):
        self.memory.append(event)
        if len(self.memory) > self.capacity:
            del self.memory[0] #forget first transition
    
    def sample(self, batch_size):
        samples = zip(*random.sample(self.memory, batch_size)) 
        return map(lambda x: Variable(torch.cat(x, 0)), samples) #tensor and gradient.
        #this contains sample of memory. get random sample from memory with given size
        # before list = [[state,action,reward], [state,action,reward]] zip*=> [[state], [action], [reward]]
        #the * operator unpacks a list and applies it to a function
        #a pytorch variable contains a tensor and a gradient. in order for pytorch to differenciate with respect to a tensor, we need a tensor and gradient
        # For each batch within a sample, we then have to concatenate it to the first dimension because 
        # everything needs to be aligned so state action and reward align to same time t
        # Lambda is used to do a short computation inline without def and if, etc.


#Comprised of a neural network model and a memory model. 
#* The NN takes in observation of sensor data (brain chemicals) and chooses actions based on the relu activation function. 
#* The agent will sample some of the sensor data and store in long term memory to be reused later for training. 
#* We also use the Adam Optimisation algorithm. This is an extension to stocastic gradient desent to update weights of the neural network. 
class Dqn():
    def __init__(self, input_size, nb_action, gamma, capacity=100000, learning=0.001, temperature=100, sample_rate=100, random_episodes):
        self.gamma = gamma
        self.reward_window = []
        self.cumulative_rewards = []
        self.model = Network(input_size, nb_action)
        self.memory = ReplayMemory(capacity) #100k
        self.sample_rate = sample_rate
        self.optimizer = optim.Adam(self.model.parameters(), lr = learning)
        self.last_state = torch.Tensor(input_size).unsqueeze(0)
        self.last_action = 0
        self.last_reward = 0
        self.temp = temperature
        self.random_episodes = random_episodes
        self.explainer = None #used to calculate shap values
        self.shap_values = [] #store collected shap values

        #create table for memory data collection
        self.df_shap = pd.DataFrame(columns=['batch_state', 'batch_next_state', 'batch_action', 'batch_reward'])

    # select action for x duration
    def select_action(self, state):
        #softmax converts numbers into probabilities
        #Q values are the output of the neural network
            #view q values
        q_value_tensor = self.model(Variable(state, volatile = True)) 
        q_values = [q_value.detach().numpy() for q_value in q_value_tensor]
            #print(q_values)
            #viz q value for each action, (T value by user choice)
            #pie chart 0/1 #seperate action
        # Temperature value = 100. closer to zero the less sure the NN will be to taking the action
        probs = F.softmax(self.model(Variable(state, volatile = True))*self.temp) # T=100
        
        action_prob = [prob.detach().numpy() for prob in probs]

        action = probs.multinomial(num_samples=1) # action taken
        #q_values[0][action] #quality of taking action in state
        #action_prob[0][action] #probability of taking action

        #return the action taken, q values and probabilities of taking action given state.
        #return action.data[0,0], q_values[0][action], action_prob[0][action]
        return action.data[0,0], q_values, action_prob
    
    #When ai reaches a new state we update everything
    #update action, last action becomes the new action but also the last state becomes the new state and last reward becomes the new state
    # we then get this new transition and update our reward window to track training progress and exploration
    def update(self, reward, new_signal, episode):
        new_state = torch.Tensor(new_signal).float().unsqueeze(0)
        self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward])))
        action, q, p = self.select_action(new_state)

        # Initialize an empty list to store the sampled inputs for SHAP Explainer
        sampling_episode = []
        sampled_inputs = []
        sampled_targets = []
        episode_sampled = False
        if len(self.memory.memory) > self.sample_rate: #100
            batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(self.sample_rate)

            #first converting tensors to numpy arrays
            batch_state_np = batch_state.numpy()
            batch_next_state_np = batch_next_state.numpy()
            batch_action_np = batch_action.numpy()
            batch_reward_np = batch_reward.numpy()


            #%%
            #We take some training samples for shap.deepxplainer to create heatmap images or we use them for training. Not both!
            if episode in self.random_episodes and not episode_sampled:
                sampling_episode.append(episode) #record when sample was taken
                sampled_inputs.append(inputs)
                sampled_targets.append(targets)
                episode_sampled = True # Set the flag to True
            else:
                #we convert them to tensor variables
                inputs, targets = Variable(inputs), Variable(targets)
                #like during eligibility_trace we get predicted q values from the cnn model
                predictions = cnn(inputs)
                loss_error = loss(predictions, targets)
                optimizer.zero_grad()
                loss_error.backward()
                optimizer.step()
                
                episode_sampled = False # Reset the flag for the next iteration
            #%%
            # Split the data into training and testing sets
            state_train, state_test, next_state_train, next_state_test, action_train, action_test, reward_train, reward_test = train_test_split(
                batch_state_np, batch_next_state_np, batch_action_np, batch_reward_np, test_size=0.1, random_state=42)
            
            # Convert back to tensors
            state_train = torch.tensor(state_train)
            next_state_train = torch.tensor(next_state_train)
            reward_train = torch.tensor(reward_train)
            action_train = torch.tensor(action_train)

            state_test = torch.tensor(state_test)
            next_state_test = torch.tensor(next_state_test)
            reward_test = torch.tensor(reward_test)
            action_test = torch.tensor(action_test)

            #use test for SHAP
            self.df_shap.loc[len(self.df)] = [state_test, next_state_test, reward_test, action_test]

            #now train the DQN
            self.learn(state_train, next_state_train, reward_train, action_train)
            #self.learn(batch_state, batch_next_state, batch_reward, batch_action)
            #X=batch_state and y=batch_next_state
        self.last_action = action
        self.last_state = new_state
        self.last_reward = reward
        self.reward_window.append(reward)
        self.cumulative_rewards.append(sum(self.reward_window))
        #if len(self.reward_window) > 1000:
        #    del self.reward_window[0]
        return action, q, p
    
    #to train our AI
    #forward propagation then backproagation
    # get our output, target, compare our output to the target to compute the loss error
    # backproagate loss error into the nn and use stochastic gradient descent we update the weights according to how much they contributed to the loss error
    def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
        #when we pass in batch state the output will be all possible actions
        # we use gather passing in 1 and batch action because we only want the chosen action
        # however batch_state has a fake dimention from unsqueeze in __init__ of NN and batch action doesn't
        outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1) #pass state into neural network input layer., gather outputs a new tensor. input dimension
        #gets the q values for all the next states with respect to action (i.e 1) then get the max
        next_outputs = self.model(batch_next_state).detach().max(1)[0]
        target = self.gamma*next_outputs + batch_reward
        td_loss = F.smooth_l1_loss(outputs, target)
        self.optimizer.zero_grad()
        td_loss.backward(retain_graph = True)
        self.optimizer.step()
    
    def score(self):
        """ Current sum of all values in the reward window """
        return sum(self.reward_window)
    
    def cumulative_reward(self):
        """ Sum of all values in the reward window at step/time"""
        return self.cumulative_rewards
    
    def rewards(self):
        """ current value in the reward window at step/time"""
        return self.reward_window

## Deep Convelutional Q-Learning Agent
A learning agent that can control from pixel input

In [2]:
### Convelutional Neural Network
class CNN(nn.Module):
    def __init__(self, number_actions):
        super(CNN, self).__init__() # call nn module init
        #define what each layer in CNN is
        self.convolution1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5)
        self.convolution2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
        self.convolution3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=2)

        #pass image through convolution layers and get neurons in a flatten layer to pass into a neural network
        self.fc1 = nn.Linear(in_features=self.count_neurons((1, 80, 80)), out_features=40) #1 is number of channels so black and white images, 80 80 is width and height
        self.fc2 = nn.Linear(in_features=40, out_features=number_actions)

    def count_neurons(self, image_dim):#image_dim for example 80px x 80px in size
        """Will give us the number of neurons after convolutions are applied"""
        #we need to first create a fake image (1 batch, 80px x 80px in size), * allows image_dim to be passed as a list
        fake_image = Variable(torch.rand(1, *image_dim))

        #pass image into first layer and max pool result then activate all neurons in max pool layer
        x = F.relu(F.max_pool2d(self.convolution1(fake_image), 3, 2)) #kernal size is 3, #stride is 2

        #pass image into second layer and max pool result then activate all neurons in max pool layer
        x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2)) #kernal size is 3, #stride is 2

        #pass image into third layer and max pool result then activate all neurons in max pool layer
        x = F.relu(F.max_pool2d(self.convolution3(x), 3, 2)) #kernal size is 3, #stride is 2
        
        #now we get all pixels in third layer and flatten it. we get the data, view what's inside it then we get all the pixels and put it into 1 dimension
        return x.data.view(1, -1).size(1)
    
    def forward(self, x):       
        x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2))
        x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2))
        x = F.relu(F.max_pool2d(self.convolution3(x), 3, 2))
        #propagate data from convolutional layers to hidden layers by first flattening convolutional layers
        #flatten third layer by taking all pixels and all channels in third layer and arrange one after another
        x = x.view(x.size(0), -1) #RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x49 and 3136x40)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

### Softmax Policy
class SoftmaxPolicy(nn.Module):
    """data from the CNN is passed to softmax to play an action.
    Temperature (often denoted as τ or tau) is a hyperparameter that controls the level of randomness or exploration in the action selection process. 
    - High T values (e.g., > 5): A high temperature encourages a high level of exploration and randomness in action selection. This can be useful when you want the agent to explore a wide range of actions to discover their effects and learn about the environment.
    - Moderate T values (e.g., 1 - 5): A moderate temperature strikes a balance between exploration and exploitation. It allows the agent to favor actions with higher Q-values while still exploring other options. 
    - Low T values (e.g., < 1): A low temperature reduces the randomness in action selection, making the agent more deterministic and focused on exploiting actions with higher Q-values. This can be useful when the agent has learned a relatively good policy and you want to minimize unnecessary exploration.

    """
    def __init__(self, T=10):
        super(SoftmaxPolicy, self).__init__()
        self.T = T

    def forward(self, outputs, number_actions=1):
        probs = F.softmax(outputs * self.T, dim=1)
        actions = probs.multinomial(num_samples=number_actions)
        return actions

### Agent (DCQ Learning System )
class DCQ():
    def __init__(self, CNN, SoftmaxPolicy):
        self.cnn = CNN
        self.softmax = SoftmaxPolicy

    def __call__(self, inputs):#comes from NStepProgress -> np.array([state])
        """similar to init function but it allows this AI class 
        instance to be treated like a function, not modifying the initial instance"""
        #receive images from the game by converting image into a numpy array then into a torch tensor, then put a torch tensor into a torch variable with a gradient
        input = Variable(torch.from_numpy(np.array(inputs, dtype = np.float32)))
        output = self.cnn(input)
        actions = self.softmax(output)
        return actions.data.numpy()

### Experience Replay
#### N-Step
class NStepProgress:
    """This class allows the AI to progress on several (n_step) steps"""
    def __init__(self, env, ai, n_step):
        self.ai = ai
        self.rewards = []
        self.env = env
        self.n_step = n_step
        self.step = namedtuple('Step', ['state', 'action', 'reward', 'done']) #Defining one Step
    def __iter__(self):
        """Repeats but only incrementing parent loop when yield is called"""
        state, info = self.env.reset()
        history = deque()
        reward = 0.0
        while True: #go on forever until parent flag in ReplayMemory.runstep triggered
            #select an action
            action = self.ai(np.array([state]))[0][0] #agent.update
            #get reward and next state
            next_state, r, terminated, truncated, info = self.env.step(action)
            done = terminated or truncated #if  game has some kind of max_steps or timeout, read 'truncated' with 'terminated'
            reward += r #sum reward for every step
            #add to stacked frame
            history.append(self.step(state=state, action=action, reward=r, done=done))
            while len(history) > self.n_step +1: #Always keep it n-steps e.g 10
                history.popleft()
            if len(history) == self.n_step + 1:#create our stacked tuple when finished
                yield tuple(history)
            state = next_state
            if done: #either terminated or truncated signaling that the game has ended
                if len(history) > self.n_step + 1:
                    history.popleft()
                while len(history) >= 1:
                    yield tuple(history)
                    history.popleft()
                self.rewards.append(reward) #save accumulated reward per done
                reward = 0.0
                state, info  = self.env.reset()
                history.clear()
    
    def rewards_steps(self):
        """stores total reward accumulated from start to done trigger"""
        rewards_steps = self.rewards
        self.rewards = []
        return rewards_steps

#### Replay Memory
class ReplayMemory:
    """This class is modified to do n-step learning"""
    def __init__(self, n_steps, capacity = 10000):
        self.capacity = capacity # https://github.com/juliuskunze/nevermind/blob/master/nevermind/configurations.py
        self.n_steps = n_steps
        self.n_steps_iter = iter(n_steps) #creates an object that can be accessed one element at a time using __next__()
        self.buffer = deque()

    def sample_batch(self, batch_size): # creates an iterator that returns random batches
        ofs = 0 #we use an offset to keep track of starting index for each batch
        #we get samples from experience replay
        vals = list(self.buffer)
        #then randomly suffle them
        np.random.shuffle(vals)
        #now we check to see if we have enough samples in the buffer to make a batch if not we wait.
        while (ofs+1)*batch_size <= len(self.buffer):
            yield vals[ofs*batch_size:(ofs+1)*batch_size] #we slice from the offset position to the e.g 128 to 256
            ofs += 1

    def run_steps(self, steps):
        """Runs environment wait 10 consecutive steps of (state, action, reward, done) then save to buffer
        until n sample steps are saved in buffer. Does not iterate until n_steps_iter collects 10 steps"""

        while steps > 0:
            entry = next(self.n_steps_iter) # run subtask as many times as it takes to return 10 consecutive steps of (state, action, reward, done)
            self.buffer.append(entry) # we put e.g 200 n-step samples for the current episode, e.g 200 samples x 10 steps = 2,000 steps per episode
            steps -= 1

        while len(self.buffer) > self.capacity: # we accumulate no more than the capacity (e.g 10,000)
            self.buffer.popleft()
            
### N-Step Q-Learning
def eligibility_trace(batch, cnn, g=0.99):#batch is a sample of 128 10-steps where each step is ['state', 'action', 'reward', 'done'] so 1,280 transitions from memory
    """Asynchronous N-Step Q-Learning
    learns the cumulative rewards and cumulative targets
    on n-steps instead of one step like DQL"""
    gamma = g
    inputs = []
    targets = []
    for series in batch: #series of 10 transitions in our batch
        #get the first and last image as the input. Convert fron numpy to torch variable
        input = Variable(torch.from_numpy(np.array([series[0].state, series[-1].state], dtype=np.float32)))
        output = cnn(input) #this is the prediction from the ai

        #if the last transition of the series is not done we get the max q values
        cumulative_reward = 0.0 if series[-1].done else output[1].data.max()

        #start with the last step and go backwards to the first step
        for step in reversed(series[:-1]):#reversed goes backwards 
            state = series[0].state # first state we need. This is where we started
            target = output[0].data # this is the q value of the input state of the first step. This is what we thought we would get being in this state

            # what new reward did we actually get
            cumulative_reward = step.reward + gamma * cumulative_reward 
            target[series[0].action] = cumulative_reward # this is the q value we actually got

            inputs.append(state) # we append our first state
            targets.append(target) #we append the actual target q value for the first state

            #output the input and the target after being processed through eligibility_trace
            #we now have the first state and the target q values for the first state over 10 steps
            return torch.from_numpy(np.array(inputs, dtype=np.float32)), torch.stack(targets)#we stack targets together

### Image Preprocessing
class ImagePreprocessor(ObservationWrapper):
    """Custom Image Preprocessor similar to Atari standard in gymnasium"""
    def __init__(self, env, height = 64, width = 64, grayscale = True, crop = lambda img: img):
        super(ImagePreprocessor, self).__init__(env)
        self.img_size = (height, width)
        self.grayscale = grayscale
        self.crop = crop
        n_colors = 1 if self.grayscale else 3
        self.observation_space = Box(0.0, 1.0, [n_colors, height, width])

    def observation(self, img):
        img = self.crop(img)
        img = Image.fromarray(img)
        img = img.resize(self.img_size)
        if self.grayscale:
            img = img.convert('L')  # Convert to grayscale
        else:
            img = img.convert('RGB')  # Convert to RGB if necessary

        #view preprocessed image
        #plt.imshow(img)
        #plt.show()

        img = np.array(img)
        #adds a new dimension to the array to represent the single color channel, resulting in the desired shape
        img = np.expand_dims(img, axis=2)
        
        #if img.ndim == 2:  # Add channel dimension if missing
        #    img = np.expand_dims(img, axis=2)
        img = np.transpose(img, (2, 0, 1))
        img = img.astype('float32') / 255

        #check dim of new image
        #print("image was preprocessed to: " + str("greyscale" if self.grayscale else "RGB") + " with shape " + str(img.shape))
        return img

### Moving Average Reward (for evaluation) on n-steps
class MA:
    def __init__(self, size):
        self.list_of_rewards = []
        self.size = size
    def add(self, rewards):
        """adds step rewards until nth step then removes oldest rewardm leaving 100 steps of reward saved per episode"""
        if isinstance(rewards, list):
            self.list_of_rewards += rewards
        else:
            self.list_of_rewards.append(rewards)
        while len(self.list_of_rewards) > self.size:
            del self.list_of_rewards[0]
    def average(self):
        """gets the average reward per nth step"""
        if len(self.list_of_rewards) > 0:
            return np.mean(self.list_of_rewards)
        else:
            return 0.0

class CustomMask(Dataset): 
    """ Defines a custom dataset mask for SHAP Deep Explainer"""
    #ref: https://blog.paperspace.com/deep-learning-model-interpretability-with-shap/
    def __init__(self, data, transforms=None):
        self.data = data
        self.transforms = transforms
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        image = self.data[idx]
        if self.transforms!=None:
            image = self.transforms(image)
        return image

## Experiments

### Simulation

In [3]:
vector = False # run tests on vector or image data

if vector:
    sims = ['CartPole-v1', 
    'HRLSim-v0',
    'LunarLander-v2',]
    for sim in sims:
        sim_name = sim # obs_type="ram", frameskip=4, repeat_action_probability=0.25
        filename = sim_name.replace("/", "_").lower()
        # stella emulator for atari = https://stella-emu.github.io/
        # https://stella-emu.github.io/docs/index.html#Remapping:~:text=count%20and%20associated-,frames%20per%20second,-%2C%20bankswitch%20and%20display
        # assume 60 fpS

        capacity = [1000000, 500000, 100000, 50000, 10000, 5000, 1000, 500]
        labels = ['1M', '500k', '100k', '50k', '10k', '5k', '1k', '500']

        episodes = 200 #200
        n = 10 # n-steps
        samples = 200 #step samples to take per episode
        bat = 128 + 26 #training batches randomly sampled from experience replay + extra samples to accommodate shap training
        rw = 10 # reward window (e.g 100 steps)
        shap_test_sample_ratio = 0.20 # e.g 10%


        #create table for evaluating capacity against reward
        df_rewards = pd.DataFrame(columns=['sim','capacity', 'episodes', 'reward'])

        for i, cap in enumerate(capacity):
            #get current timestamp
            start_datetime = datetime.datetime.now()
            start_datetime = start_datetime.strftime("%Y-%m-%d %H:%M:%S")# Format the date and time as a string
            print(filename.upper() + " EXPERIMENT " + str(i) + ": Date-Time: " + str(start_datetime) + ", Capacity: " + str(labels[i]))

            #### Setup Preprocessed Environment
            print("Setting up environment")

            title = sim_name[4:-3]

            #environment
            env = gym.make(sim)#render_mode='human'
            state, info = env.reset(seed=42)
            print("actions: " + str(env.action_space.n))
            print(env.action_space)
            print("observation vector: " + str(env.observation_space.shape[0]))
            print(state)

            ##### Create DCQL Agent
            print("Creating Agent with capacity set to: " + str(cap))
            # Agent - a neural network that represents our Q-function
            s = env.observation_space.shape[0] # sensors
            a = env.action_space.n # actions
            g = 0.99 #gamma
            c = cap #1000000 #10000 #memory capacity
            b = bat #training batches #128
            l = 0.001 #learning rate
            t = 1.0 #softmax policy temperature rate (tau). T controls the level of randomness or exploration in the action selection process. T is temperature high, meaning other actions are more explored

            #define a random selection size such as 10%
            random_select_size = math.ceil(episodes * shap_test_sample_ratio) 


            # Create an array of random episode numbers
            random_episodes = np.random.choice(range(episodes), size=random_select_size, replace=False)

            agent = Dqn(s,a,g,c,l,t,b, random_episodes)

            ma = MA(rw) #used to get the average of the last n-step rewards
            
            #### Simulate the environment
            print("Simulating environment")
            for episode in range(episodes):
                state, info = env.reset()
                done = False
                reward = 0
                while not done:
                    env.render()       
                    action, qValues, transitionProbs = agent.update(reward, state, episode)
                    next_state, r, terminated, truncated, info = env.step(int(action))
                    done = terminated or truncated #if  game has some kind of max_steps or timeout, read 'truncated' with 'terminated'
                    reward += r #sum reward for every step
                    done = terminated or truncated #max duration should be 200 for episode length
                
                ma.add(reward) #100 rewards kept

                avg_reward = round(ma.average(),2) #average 100 rewards
                print("Episode: %s, Reward: %s" % (str(episode), str(avg_reward)))

                #save rewards per given nth step
                new_row = [title, labels[i], episode, avg_reward]
                df_rewards.loc[len(df_rewards)] = new_row
            env.close()
            
            #Current sum of all values in the reward window
            #print(agent.score())
            
            #get end timestamp
            end_datetime = datetime.datetime.now()
            end_datetime = end_datetime.strftime("%Y-%m-%d %H:%M:%S")# Format the date and time as a string
            print("END - Date-Time: " + str(end_datetime))
            #SHAP here...

else:
    """
    #split into different runs as full 20 difficult on limited hardware
    sims = ['ALE/AirRaid-v5', 
    'ALE/Asterix-v5', 
    'ALE/Asteroids-v5', 
    'ALE/Bowling-v5', 
    'ALE/Breakout-v5',
    'ALE/DemonAttack-v5',
    'ALE/Freeway-v5',
    'ALE/Gravitar-v5']

    sims = ['ALE/Jamesbond-v5',
    'ALE/MontezumaRevenge-v5',
    'ALE/MsPacman-v5',
    'ALE/Pong-v5',
    'ALE/PrivateEye-v5',
    'ALE/Qbert-v5',
    'ALE/Seaquest-v5',
    'ALE/SpaceInvaders-v5', 
    'ALE/Venture-v5',
    'ALE/WizardOfWor-v5',
    'ALE/YarsRevenge-v5',
    'ALE/Zaxxon-v5'
    ]

    """

    sims = ['ALE/SpaceInvaders-v5', 
    'ALE/Venture-v5',
    'ALE/WizardOfWor-v5',
    'ALE/YarsRevenge-v5',
    'ALE/Zaxxon-v5']


    for sim in sims:
        sim_name = sim # obs_type="ram", frameskip=4, repeat_action_probability=0.25
        filename = sim_name.replace("/", "_").lower()
        # stella emulator for atari = https://stella-emu.github.io/
        # https://stella-emu.github.io/docs/index.html#Remapping:~:text=count%20and%20associated-,frames%20per%20second,-%2C%20bankswitch%20and%20display
        # assume 60 fpS

        capacity = [1000000, 500000, 100000, 50000, 10000, 5000, 1000, 500]
        labels = ['1M', '500k', '100k', '50k', '10k', '5k', '1k', '500']

        episodes = 200 #200
        n = 10 # n-steps
        samples = 200 #step samples to take per episode
        bat = 128 + 26 #training batches randomly sampled from experience replay + extra samples to accommodate shap training
        rw = 10 # reward window (e.g 100 steps)
        shap_test_sample_ratio = 0.20 # e.g 10%

        #create table for evaluating capacity against reward
        df_rewards = pd.DataFrame(columns=['sim','capacity', 'episodes', 'reward'])


        for i, cap in enumerate(capacity):
            #get current timestamp
            start_datetime = datetime.datetime.now()
            start_datetime = start_datetime.strftime("%Y-%m-%d %H:%M:%S")# Format the date and time as a string
            print(filename.upper() + " EXPERIMENT " + str(i) + ": Date-Time: " + str(start_datetime) + ", Capacity: " + str(labels[i]))
            
            #### Setup Preprocessed Environment
            print("Setting up environment")
            #env = gym.make(sim)
            #We first preprocess the image by applying a greyscale and reducing the size to 80px.
            title = sim_name[4:-3]
            env = ImagePreprocessor(gym.make(sim_name), width=80, height=80, grayscale=True) #width=210, height=160 # , render_mode='human'
            state, info = env.reset(seed=42)
            
            ##### Create DCQL Agent
            print("Creating Agent with capacity set to: " + str(cap))
            a = env.action_space.n # actions
            g = 0.99 #gamma
            c = cap #1000000 #10000 #memory capacity
            b = bat #training batches #128
            l = 0.001 #learning rate
            t = 1.0 #softmax policy temperature rate (tau). T controls the level of randomness or exploration in the action selection process. T is temperature high, meaning other actions are more explored
        
            
            #Build the Agent
            cnn = CNN(a)
            softmax = SoftmaxPolicy(T=t)

            agent = DCQ(CNN=cnn, SoftmaxPolicy=softmax)

            # Set up Experience Replay
            n_steps = NStepProgress(env=env, ai=agent, n_step=n) #instead of learning every transition we learn every nth transition
            memory = ReplayMemory(n_steps=n_steps, capacity=c) #store the last c steps in memory e.g 1millm 500k etc.

            #set learning parameters
            loss = nn.MSELoss()#calculate mean squared error loss
            optimizer = optim.Adam(cnn.parameters(), lr=l) #use adams optimiser with a learning rate of 0.001

            ma = MA(rw) #used to get the average of the last n-step rewards

            # Initialize an empty list to store the sampled inputs for SHAP Explainer
            # set a sampling rate based on episodes as batches could be zero if not enough available at start

            #define a random selection size such as 10%
            random_select_size = math.ceil(episodes * shap_test_sample_ratio) 

            # Create an array of random episode numbers
            random_episodes = np.random.choice(range(episodes), size=random_select_size, replace=False)

            # Initialize an empty list to store the sampled inputs for SHAP Explainer
            sampling_episode = []
            sampled_inputs = []
            sampled_targets = []
            episode_sampled = False

            #### Simulate the environment
            print("Simulating environment")
            for episode in range(episodes):
                #run the game for 200 runs of 10 steps and push sample transitions into memory
                memory.run_steps(steps=samples) # e.g 200 steps sampled per episode
                #sample 128 x10['state', 'action', 'reward', 'done'] or 1,280 transitions from memory if there is enough in memory to sample otherwise skip
                for batch in memory.sample_batch(b): #b= e.g 128 batches
                    #agent training

                    #creates the training set for the agent, 
                    # we get target discounted q values for the first state in the batch over 10 steps
                    inputs, targets = eligibility_trace(batch, cnn, g)

                    #We take some training samples for shap.deepxplainer to create heatmap images or we use them for training. Not both!
                    if episode in random_episodes and not episode_sampled:
                        sampling_episode.append(episode) #record when sample was taken
                        sampled_inputs.append(inputs)
                        sampled_targets.append(targets)
                        episode_sampled = True # Set the flag to True
                    else:
                        #we convert them to tensor variables
                        inputs, targets = Variable(inputs), Variable(targets)
                        #like during eligibility_trace we get predicted q values from the cnn model
                        predictions = cnn(inputs)
                        loss_error = loss(predictions, targets)
                        optimizer.zero_grad()
                        loss_error.backward()
                        optimizer.step()
                
                episode_sampled = False # Reset the flag for the next iteration
                        
                rewards_steps = n_steps.rewards_steps() # accumulated reward per 200 steps
                ma.add(rewards_steps) #100 rewards kept

                avg_reward = round(ma.average(),2) #average 100 rewards
                print("Episode: %s, Reward: %s" % (str(episode), str(avg_reward)))
                
                #save rewards per given nth step
                new_row = [title, labels[i], episode, avg_reward]
                df_rewards.loc[len(df_rewards)] = new_row
            env.close()

            #get end timestamp
            end_datetime = datetime.datetime.now()
            end_datetime = end_datetime.strftime("%Y-%m-%d %H:%M:%S")# Format the date and time as a string
            print("END - Date-Time: " + str(end_datetime))

            print("Saving SHAP graphs")
            #### Shap graphs
            #took 10% of total episodes played as sample experience replay to be used as shap training/test data. So if 200 episodes we took 20 episodes. The agent has not seen these images yet
            print(str(len(sampled_inputs)) + " test images available:")

            print("Episodes samples were taken from:")
            print(sampling_episode)
            
            #convert sample states into PIL images
            sampled_inputs_reshaped = []
            for tensor in sampled_inputs:
                # Convert tensor to numpy array and reshape
                image_array = tensor.numpy().squeeze()  # Remove singleton dimensions
            
                # Convert the numpy array to a PIL Image
                pil_image = Image.fromarray((image_array * 255).astype(np.uint8), mode='L')  # Convert to grayscale
                sampled_inputs_reshaped.append(pil_image)
            
            #shap training set
            print("Shap training set: ")
            mask = sampled_inputs_reshaped[::2] #get all even images from index zero as training
            print(len(mask))

            #  turning mask to pytorch dataset then into tensors
            mask = CustomMask(mask, transforms=transforms.ToTensor())
            print("Masks created:")
            print(len(mask))
            
            print("Moving CNN to GPU and creating a mask loader")
            # Move the model to the same device as the input data
            cnn.to(device)

            # Convert the model weights to the same data type as the input data
            cnn.float()  # Or whatever appropriate data type

            #  creating dataloader for mask
            mask_loader = DataLoader(mask, batch_size=10)

            print("Training Deep SHAP Explainer please wait...")
            #  creating explainer for model behaviour
            for images in mask_loader:
                images = images.to(device)
                explainer = shap.DeepExplainer(cnn, images)
                break

            print("Generating SHAP images and saving...")
            #  converting image to tensor
            test_images = sampled_inputs_reshaped[1::2] # get all odd values from index 1 to use as test images
            test_episodes = sampling_episode[1::2]
            for t, test_image in enumerate(test_images):
                
                image = transforms.ToTensor()(test_image)
                image = image.to(device)

                #  deriving shap values for image of interest based on model behaviour
                shap_values = explainer.shap_values(image.view(-1, 1, 80, 80))

                #  preparing for visualization by changing channel arrangement
                shap_numpy = [np.swapaxes(np.swapaxes(x, 1, -1), 1, 2) for x in shap_values]
                image_numpy = np.swapaxes(np.swapaxes(image.view(-1, 1, 80, 80).cpu().numpy(), 1, -1), 1, 2)
                shap_fig = plt.figure()
                #  producing shap plots
                shap.image_plot(shap_numpy, image_numpy, show=False, labels=env.unwrapped.get_action_meanings())
                plt.savefig("./../plots/shap/" + filename +"_"+ str(labels[i]) + "_shap_ep_"+ str(test_episodes[t]) +".png")
                plt.close()

            print("Saving Reward Graphs...")
            #### reward/episode graph
            # Save and plot reward
            reward_fig = plt.figure()
            rewards_data = df_rewards[df_rewards['capacity'] == labels[i]]['reward'].values
            plt.title(title.capitalize() + ": " + str(labels[i]))
            plt.xlabel("Episodes")
            plt.ylabel("Average Reward/" + str(rw) + " steps")
            plt.plot(rewards_data)

            # Draw a vertical line at the optimal point
            plt.axhline(y=round(np.mean(rewards_data),2), color='r', linestyle='--', label='Optimal Point')

            # Draw red dots at the sampled experiences SHAP values will appear from
            plt.scatter(test_episodes, [rewards_data[exp] for exp in test_episodes],
                        marker='x', color='r', label='Experience Sampled', zorder=5)

            plt.tight_layout()
            reward_fig.savefig("./../plots/rewardplots/" + filename +"_"+ str(labels[i]) + "_reward.png") #must be before show to save correctly
            #plt.show()
            plt.close()

            print("Saving Q-value/action graphs...")
            #### Q-values/Actions Taken/Episode graph

            #save and plot q-values from sampled experience
            # Iterate through sampled_targets

            test_targets = sampled_targets[1::2]
            action_labels = env.unwrapped.get_action_meanings()

            for s in range(len(test_targets)):
                # Create a figure
                qvalue_fig = plt.figure(figsize=(10, 5))

                # Iterate through the actions
                for al in range(len(action_labels)):
                    # Initialize an empty array to store the y-values for this action
                    y_values = []

                    # Extract the y-values for the current action 'i' from all series
                    for st in range(len(test_targets)):
                        y_values.append(np.array(test_targets[st][0][al]))

                    # Create a line plot for this action using all series' y-values
                    plt.plot(test_episodes, y_values, label='Action ' + str(action_labels[al]))

                # Set the labels and title
                plt.xlabel('Episodes')
                plt.ylabel('Q-values')

                # Draw a vertical line at sample point point
                plt.axvline(x=test_episodes[s], color='r', linestyle='--', label='State: '+ str(test_episodes[s]))

                # Set x-axis ticks for each value in test_episodes
                plt.xticks(test_episodes,  rotation=90)

                # Add a legend
                plt.legend()

                plt.tight_layout()

                # Save the line chart to a file
                qvalue_fig.savefig("./../plots/qvalueplots/" + filename +"_"+ str(labels[i]) + "_qvalues_ep_"+ str(test_episodes[s]) +".png")
                
                # Display the line chart
                #plt.show()
                plt.close()

            #### reward graph showing sample points
            for s in range(len(test_targets)):
            
                # Save and plot reward
                reward_test_fig = plt.figure(figsize=(5, 5))
                plt.title(title.capitalize() + ": " + str(labels[i]))
                plt.xlabel("Episodes")
                plt.ylabel("Average Reward/" + str(rw) + " steps")
                plt.plot(rewards_data)

                # Draw a vertical line at sample point point
                plt.axvline(x=test_episodes[s], color='r', linestyle='--', label='State: '+ str(test_episodes[s]))

                # Draw red dots at the sampled experiences
                plt.scatter(test_episodes, [rewards_data[exp] for exp in test_episodes],
                                marker='x', color='r', label='Experience Sampled', zorder=5)
            
                plt.tight_layout()
                reward_test_fig.savefig("./../plots/rewardtest/" + filename +"_"+ str(labels[i]) + "_reward_ep_"+ str(test_episodes[s]) +".png") #must be before show to save correctly
                #plt.show()
                plt.close()

            #CREATE SHAP GIF
            # List to store frames for the GIF
            gif_frames = []

            for s in range(len(test_targets)):
                # Load images
                reward_sample = Image.open('./../plots/rewardtest/' + filename +'_' + str(labels[i]) +'_reward_ep_' + str(test_episodes[s]) +'.png') 
                q_values = Image.open('./../plots/qvalueplots/' + filename +'_' + str(labels[i]) + '_qvalues_ep_' + str(test_episodes[s]) +'.png')
                shap_values = Image.open('./../plots/shap/' + filename + '_' + str(labels[i]) +'_shap_ep_' + str(test_episodes[s]) + '.png')

                # Get dimensions of input images
                rs_width, rs_height = reward_sample.size  # Assuming the square images have the same dimensions
                qv_width, qv_height = q_values.size
                s_width, s_height = shap_values.size
                new_width = s_width
                new_height = (qv_height + s_height)

                # Create a new image with the calculated dimensions
                new_image = Image.new('RGB', (new_width, (new_height)), (255, 255, 255))


                # Paste the square images on top
                new_image.paste(reward_sample, (240, 0))
                new_image.paste(q_values, (310 + s_width - rs_width - qv_width, 1))

                # Paste the landscape image on the bottom
                new_image.paste(shap_values, (0, qv_height))

                # Save the new image
                new_image.save('./../plots/shapexplainer/'+ filename +'_'+ str(labels[i]) +'_ep_' + str(test_episodes[s]) +'.png')

                #pil_image = Image.open('images/thumbnail.webp')
                #display(new_image)

                #add image to gif array
                gif_frames.append(new_image)

            # Save the list of frames as a GIF
            gif_frames[0].save('./../plots/shapexplainer/'+ filename +'_'+ str(labels[i]) +'_shap.gif', save_all=True, append_images=gif_frames[1:], loop=0, duration=200)
            #gif_frames[0].show()
                
        print("Saving rewards to csv")
        x = datetime.datetime.now()
        file_name = str(x.year) + "_" + str(x.month) + "_" + str(x.day) + "_" + str(x.strftime("%H")) + "_" + str(x.strftime("%M")) + "_" + str(x.strftime("%S")) + "_" + filename + ".csv"
        df_rewards.to_csv(file_name, index=False, encoding='utf-8')

        # Create a box plots using Seaborn for Evaluation
        boxplot_fig = plt.figure()
        unique_capacities = df_rewards['capacity'].unique()

        print("Saving boxplot of results")
        #### Agent's final score to determine how it performed
        average_accumulated_reward = round(sum(df_rewards['reward'].values),0)
        print("Average Accumulated Reward: " + str(average_accumulated_reward))
        num_unique_capacities = len(unique_capacities)
        p = []
        for c in range(num_unique_capacities):
            if c<1:
                p.append('#FFF380')
            else:
                p.append('#8FD9F6')

        ax = sns.boxplot(x='capacity', y='reward', data=df_rewards, width=0.5,
                        palette=p)  # Specify colors for each box

        # Adding labels and title to the plot
        ax.set(ylabel='Average Reward/' + str(rw) + ' steps', xlabel='Experience Replay Capacity',
            title=title.capitalize() + ": " + str(episodes) + " episodes")  # Set labels and title for the axes

        # Calculate the average value for the first box plot
        average_value = df_rewards[df_rewards['capacity'] == df_rewards['capacity'].unique()[0]]['reward'].mean()

        plt.tight_layout()
        boxplot_fig.savefig("./../plots/boxplot/" + filename +".png") #must be before show to save correctly
        # Show the plot
        #plt.show()
        plt.close()

ALE_SPACEINVADERS-V5 EXPERIMENT 0: Date-Time: 2023-08-21 06:49:23, Capacity: 1M
Setting up environment
Creating Agent with capacity set to: 1000000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 135.0
Episode: 4, Reward: 90.0
Episode: 5, Reward: 90.0
Episode: 6, Reward: 90.0
Episode: 7, Reward: 110.0
Episode: 8, Reward: 110.0
Episode: 9, Reward: 110.0
Episode: 10, Reward: 110.0
Episode: 11, Reward: 197.5
Episode: 12, Reward: 197.5
Episode: 13, Reward: 197.5
Episode: 14, Reward: 201.0
Episode: 15, Reward: 201.0
Episode: 16, Reward: 201.0
Episode: 17, Reward: 201.0
Episode: 18, Reward: 208.33
Episode: 19, Reward: 208.33
Episode: 20, Reward: 208.33
Episode: 21, Reward: 212.14
Episode: 22, Reward: 212.14
Episode: 23, Reward: 212.14
Episode: 24, Reward: 204.38
Episode: 25, Reward: 204.38
Episode: 26, Reward: 204.38
Episode: 27, Reward: 197.78
Episode: 28, Reward: 197.78
Episode: 29, Reward: 197.78
Episode: 30, Reward: 197.7

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...




ALE_SPACEINVADERS-V5 EXPERIMENT 1: Date-Time: 2023-08-21 06:58:03, Capacity: 500k
Setting up environment
Creating Agent with capacity set to: 500000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 45.0
Episode: 2, Reward: 45.0
Episode: 3, Reward: 50.0
Episode: 4, Reward: 50.0
Episode: 5, Reward: 53.33
Episode: 6, Reward: 53.33
Episode: 7, Reward: 73.75
Episode: 8, Reward: 73.75
Episode: 9, Reward: 73.75
Episode: 10, Reward: 103.0
Episode: 11, Reward: 103.0
Episode: 12, Reward: 86.67
Episode: 13, Reward: 86.67
Episode: 14, Reward: 86.67
Episode: 15, Reward: 74.29
Episode: 16, Reward: 74.29
Episode: 17, Reward: 74.29
Episode: 18, Reward: 99.38
Episode: 19, Reward: 99.38
Episode: 20, Reward: 99.38
Episode: 21, Reward: 111.67
Episode: 22, Reward: 111.67
Episode: 23, Reward: 111.67
Episode: 24, Reward: 111.67
Episode: 25, Reward: 100.5
Episode: 26, Reward: 100.5
Episode: 27, Reward: 96.0
Episode: 28, Reward: 96.0
Episode: 29, Reward: 96.0
Episode: 30, Reward: 96.0
Episode

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_SPACEINVADERS-V5 EXPERIMENT 2: Date-Time: 2023-08-21 07:05:31, Capacity: 100k
Setting up environment
Creating Agent with capacity set to: 100000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 260.0
Episode: 4, Reward: 260.0
Episode: 5, Reward: 160.0
Episode: 6, Reward: 160.0
Episode: 7, Reward: 160.0
Episode: 8, Reward: 120.0
Episode: 9, Reward: 120.0
Episode: 10, Reward: 120.0
Episode: 11, Reward: 135.0
Episode: 12, Reward: 135.0
Episode: 13, Reward: 135.0
Episode: 14, Reward: 135.0
Episode: 15, Reward: 165.0
Episode: 16, Reward: 165.0
Episode: 17, Reward: 165.0
Episode: 18, Reward: 155.0
Episode: 19, Reward: 155.0
Episode: 20, Reward: 155.0
Episode: 21, Reward: 132.86
Episode: 22, Reward: 132.86
Episode: 23, Reward: 132.86
Episode: 24, Reward: 132.86
Episode: 25, Reward: 142.5
Episode: 26, Reward: 142.5
Episode: 27, Reward: 142.5
Episode: 28, Reward: 142.5
E

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_SPACEINVADERS-V5 EXPERIMENT 3: Date-Time: 2023-08-21 07:12:56, Capacity: 50k
Setting up environment
Creating Agent with capacity set to: 50000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 90.0
Episode: 2, Reward: 90.0
Episode: 3, Reward: 90.0
Episode: 4, Reward: 165.0
Episode: 5, Reward: 165.0
Episode: 6, Reward: 135.0
Episode: 7, Reward: 135.0
Episode: 8, Reward: 116.25
Episode: 9, Reward: 116.25
Episode: 10, Reward: 116.25
Episode: 11, Reward: 105.0
Episode: 12, Reward: 105.0
Episode: 13, Reward: 107.5
Episode: 14, Reward: 107.5
Episode: 15, Reward: 107.5
Episode: 16, Reward: 107.5
Episode: 17, Reward: 129.29
Episode: 18, Reward: 129.29
Episode: 19, Reward: 129.29
Episode: 20, Reward: 129.29
Episode: 21, Reward: 126.88
Episode: 22, Reward: 126.88
Episode: 23, Reward: 129.44
Episode: 24, Reward: 129.44
Episode: 25, Reward: 129.44
Episode: 26, Reward: 124.0
Episode: 27, Reward: 124.0
Episode: 28, Reward: 

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_SPACEINVADERS-V5 EXPERIMENT 4: Date-Time: 2023-08-21 07:20:30, Capacity: 10k
Setting up environment
Creating Agent with capacity set to: 10000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 110.0
Episode: 3, Reward: 110.0
Episode: 4, Reward: 70.0
Episode: 5, Reward: 70.0
Episode: 6, Reward: 108.33
Episode: 7, Reward: 108.33
Episode: 8, Reward: 108.33
Episode: 9, Reward: 133.75
Episode: 10, Reward: 133.75
Episode: 11, Reward: 133.75
Episode: 12, Reward: 133.75
Episode: 13, Reward: 107.0
Episode: 14, Reward: 107.0
Episode: 15, Reward: 91.67
Episode: 16, Reward: 91.67
Episode: 17, Reward: 91.67
Episode: 18, Reward: 91.67
Episode: 19, Reward: 137.86
Episode: 20, Reward: 137.86
Episode: 21, Reward: 137.86
Episode: 22, Reward: 137.86
Episode: 23, Reward: 137.86
Episode: 24, Reward: 137.86
Episode: 25, Reward: 159.38
Episode: 26, Reward: 159.38
Episode: 27, Reward: 150.0
Episode: 28, Reward

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_SPACEINVADERS-V5 EXPERIMENT 5: Date-Time: 2023-08-21 07:25:11, Capacity: 5k
Setting up environment
Creating Agent with capacity set to: 5000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 410.0
Episode: 4, Reward: 410.0
Episode: 5, Reward: 272.5
Episode: 6, Reward: 272.5
Episode: 7, Reward: 272.5
Episode: 8, Reward: 240.0
Episode: 9, Reward: 240.0
Episode: 10, Reward: 240.0
Episode: 11, Reward: 210.0
Episode: 12, Reward: 210.0
Episode: 13, Reward: 210.0
Episode: 14, Reward: 210.0
Episode: 15, Reward: 178.0
Episode: 16, Reward: 178.0
Episode: 17, Reward: 178.0
Episode: 18, Reward: 160.83
Episode: 19, Reward: 160.83
Episode: 20, Reward: 160.83
Episode: 21, Reward: 160.83
Episode: 22, Reward: 160.83
Episode: 23, Reward: 152.86
Episode: 24, Reward: 152.86
Episode: 25, Reward: 152.86
Episode: 26, Reward: 149.38
Episode: 27, Reward: 149.38
Episode: 28, Reward: 144.4

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_SPACEINVADERS-V5 EXPERIMENT 6: Date-Time: 2023-08-21 07:28:55, Capacity: 1k
Setting up environment
Creating Agent with capacity set to: 1000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 50.0
Episode: 2, Reward: 50.0
Episode: 3, Reward: 80.0
Episode: 4, Reward: 80.0
Episode: 5, Reward: 93.33
Episode: 6, Reward: 93.33
Episode: 7, Reward: 93.33
Episode: 8, Reward: 91.25
Episode: 9, Reward: 91.25
Episode: 10, Reward: 91.25
Episode: 11, Reward: 104.0
Episode: 12, Reward: 104.0
Episode: 13, Reward: 104.0
Episode: 14, Reward: 121.67
Episode: 15, Reward: 121.67
Episode: 16, Reward: 121.67
Episode: 17, Reward: 113.57
Episode: 18, Reward: 113.57
Episode: 19, Reward: 108.75
Episode: 20, Reward: 108.75
Episode: 21, Reward: 108.75
Episode: 22, Reward: 108.75
Episode: 23, Reward: 146.11
Episode: 24, Reward: 146.11
Episode: 25, Reward: 146.11
Episode: 26, Reward: 131.5
Episode: 27, Reward: 131.5
Episode: 28, Reward: 131

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_SPACEINVADERS-V5 EXPERIMENT 7: Date-Time: 2023-08-21 07:31:51, Capacity: 500
Setting up environment
Creating Agent with capacity set to: 500
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 110.0
Episode: 3, Reward: 110.0
Episode: 4, Reward: 110.0
Episode: 5, Reward: 160.0
Episode: 6, Reward: 160.0
Episode: 7, Reward: 160.0
Episode: 8, Reward: 153.33
Episode: 9, Reward: 153.33
Episode: 10, Reward: 142.5
Episode: 11, Reward: 142.5
Episode: 12, Reward: 142.5
Episode: 13, Reward: 127.0
Episode: 14, Reward: 127.0
Episode: 15, Reward: 109.17
Episode: 16, Reward: 109.17
Episode: 17, Reward: 109.17
Episode: 18, Reward: 109.17
Episode: 19, Reward: 121.43
Episode: 20, Reward: 121.43
Episode: 21, Reward: 120.62
Episode: 22, Reward: 120.62
Episode: 23, Reward: 111.11
Episode: 24, Reward: 111.11
Episode: 25, Reward: 111.11
Episode: 26, Reward: 111.11
Episode: 27, Reward: 146.5
Episode: 28, Reward:

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
Saving rewards to csv
Saving boxplot of results
Average Accumulated Reward: 237180.0
ALE_VENTURE-V5 EXPERIMENT 0: Date-Time: 2023-08-21 07:34:42, Capacity: 1M
Setting up environment
Creating Agent with capacity set to: 1000000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Epis

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_VENTURE-V5 EXPERIMENT 1: Date-Time: 2023-08-21 07:42:54, Capacity: 500k
Setting up environment
Creating Agent with capacity set to: 500000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, 

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_VENTURE-V5 EXPERIMENT 2: Date-Time: 2023-08-21 07:51:41, Capacity: 100k
Setting up environment
Creating Agent with capacity set to: 100000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, 

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_VENTURE-V5 EXPERIMENT 3: Date-Time: 2023-08-21 08:00:39, Capacity: 50k
Setting up environment
Creating Agent with capacity set to: 50000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, Re

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_VENTURE-V5 EXPERIMENT 4: Date-Time: 2023-08-21 08:10:26, Capacity: 10k
Setting up environment
Creating Agent with capacity set to: 10000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, Re

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_VENTURE-V5 EXPERIMENT 5: Date-Time: 2023-08-21 08:16:44, Capacity: 5k
Setting up environment
Creating Agent with capacity set to: 5000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, Rewa

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_VENTURE-V5 EXPERIMENT 6: Date-Time: 2023-08-21 08:22:18, Capacity: 1k
Setting up environment
Creating Agent with capacity set to: 1000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, Rewa

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_VENTURE-V5 EXPERIMENT 7: Date-Time: 2023-08-21 08:26:47, Capacity: 500
Setting up environment
Creating Agent with capacity set to: 500
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, Rewa

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
Saving rewards to csv
Saving boxplot of results
Average Accumulated Reward: 0.0
ALE_WIZARDOFWOR-V5 EXPERIMENT 0: Date-Time: 2023-08-21 08:31:23, Capacity: 1M
Setting up environment
Creating Agent with capacity set to: 1000000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 400.0
Episode: 6, Reward: 400.0
Episode: 7, Reward: 400.0
Episode: 8, Reward: 250.0
Episode: 9, Reward: 250.0
Episode: 10, Reward: 166.67
Episode: 11, Reward: 125.0
Episode: 12, Reward: 125.0
Episode: 13, Reward: 125.0
Episode: 14, Reward: 100.0
Episode: 15, Reward: 100.0
Episode: 16, Reward: 100.0
Episode: 17, Reward: 100.0
Episode: 18, Reward: 100.0
Episode: 19, Reward: 133.33
Episode: 20, Reward: 133.33
Episode: 21, Reward: 133.33
Episode: 22, Reward: 133.33
Episode: 23, Reward: 128.57
Episode: 24, Reward: 128.57
Episode: 25, Reward: 112.5
Episod

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_WIZARDOFWOR-V5 EXPERIMENT 1: Date-Time: 2023-08-21 08:45:31, Capacity: 500k
Setting up environment
Creating Agent with capacity set to: 500000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 500.0
Episode: 12, Reward: 300.0
Episode: 13, Reward: 300.0
Episode: 14, Reward: 233.33
Episode: 15, Reward: 233.33
Episode: 16, Reward: 175.0
Episode: 17, Reward: 140.0
Episode: 18, Reward: 140.0
Episode: 19, Reward: 116.67
Episode: 20, Reward: 100.0
Episode: 21, Reward: 100.0
Episode: 22, Reward: 87.5
Episode: 23, Reward: 87.5
Episode: 24, Reward: 87.5
Episode: 25, Reward: 87.5
Episode: 26, Reward: 88.89
Episode: 27, Reward: 88.89
Episode: 28, Reward: 88.89
Episode: 29, Reward: 88.

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_WIZARDOFWOR-V5 EXPERIMENT 2: Date-Time: 2023-08-21 09:01:24, Capacity: 100k
Setting up environment
Creating Agent with capacity set to: 100000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 800.0
Episode: 8, Reward: 400.0
Episode: 9, Reward: 400.0
Episode: 10, Reward: 266.67
Episode: 11, Reward: 266.67
Episode: 12, Reward: 266.67
Episode: 13, Reward: 266.67
Episode: 14, Reward: 225.0
Episode: 15, Reward: 225.0
Episode: 16, Reward: 225.0
Episode: 17, Reward: 225.0
Episode: 18, Reward: 220.0
Episode: 19, Reward: 220.0
Episode: 20, Reward: 220.0
Episode: 21, Reward: 200.0
Episode: 22, Reward: 200.0
Episode: 23, Reward: 200.0
Episode: 24, Reward: 185.71
Episode: 25, Reward: 185.71
Episode: 26, Reward: 175.0
Episode: 27, Reward: 175.0
Episode: 28, Reward: 155.56
Episode:

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_WIZARDOFWOR-V5 EXPERIMENT 3: Date-Time: 2023-08-21 09:17:49, Capacity: 50k
Setting up environment
Creating Agent with capacity set to: 50000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 800.0
Episode: 9, Reward: 400.0
Episode: 10, Reward: 266.67
Episode: 11, Reward: 266.67
Episode: 12, Reward: 266.67
Episode: 13, Reward: 266.67
Episode: 14, Reward: 266.67
Episode: 15, Reward: 200.0
Episode: 16, Reward: 160.0
Episode: 17, Reward: 160.0
Episode: 18, Reward: 133.33
Episode: 19, Reward: 133.33
Episode: 20, Reward: 133.33
Episode: 21, Reward: 133.33
Episode: 22, Reward: 133.33
Episode: 23, Reward: 133.33
Episode: 24, Reward: 142.86
Episode: 25, Reward: 142.86
Episode: 26, Reward: 125.0
Episode: 27, Reward: 111.11
Episode: 28, Reward: 111.11
Epis

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_WIZARDOFWOR-V5 EXPERIMENT 4: Date-Time: 2023-08-21 09:34:08, Capacity: 10k
Setting up environment
Creating Agent with capacity set to: 10000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 200.0
Episode: 5, Reward: 200.0
Episode: 6, Reward: 200.0
Episode: 7, Reward: 200.0
Episode: 8, Reward: 200.0
Episode: 9, Reward: 200.0
Episode: 10, Reward: 300.0
Episode: 11, Reward: 300.0
Episode: 12, Reward: 300.0
Episode: 13, Reward: 300.0
Episode: 14, Reward: 300.0
Episode: 15, Reward: 300.0
Episode: 16, Reward: 250.0
Episode: 17, Reward: 200.0
Episode: 18, Reward: 200.0
Episode: 19, Reward: 200.0
Episode: 20, Reward: 200.0
Episode: 21, Reward: 200.0
Episode: 22, Reward: 200.0
Episode: 23, Reward: 200.0
Episode: 24, Reward: 200.0
Episode: 25, Reward: 171.43
Episode: 26, Reward: 171.43
Episode: 27, Reward: 171.43
Episode: 28, Reward: 171.43
Episode

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_WIZARDOFWOR-V5 EXPERIMENT 5: Date-Time: 2023-08-21 09:43:28, Capacity: 5k
Setting up environment
Creating Agent with capacity set to: 5000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 300.0
Episode: 6, Reward: 300.0
Episode: 7, Reward: 300.0
Episode: 8, Reward: 300.0
Episode: 9, Reward: 300.0
Episode: 10, Reward: 300.0
Episode: 11, Reward: 300.0
Episode: 12, Reward: 300.0
Episode: 13, Reward: 250.0
Episode: 14, Reward: 166.67
Episode: 15, Reward: 166.67
Episode: 16, Reward: 125.0
Episode: 17, Reward: 125.0
Episode: 18, Reward: 100.0
Episode: 19, Reward: 83.33
Episode: 20, Reward: 83.33
Episode: 21, Reward: 71.43
Episode: 22, Reward: 71.43
Episode: 23, Reward: 71.43
Episode: 24, Reward: 71.43
Episode: 25, Reward: 87.5
Episode: 26, Reward: 87.5
Episode: 27, Reward: 87.5
Episode: 28, Reward: 87.5
Episode: 29, Rewa

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_WIZARDOFWOR-V5 EXPERIMENT 6: Date-Time: 2023-08-21 09:50:58, Capacity: 1k
Setting up environment
Creating Agent with capacity set to: 1000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 200.0
Episode: 4, Reward: 200.0
Episode: 5, Reward: 100.0
Episode: 6, Reward: 100.0
Episode: 7, Reward: 100.0
Episode: 8, Reward: 100.0
Episode: 9, Reward: 100.0
Episode: 10, Reward: 233.33
Episode: 11, Reward: 233.33
Episode: 12, Reward: 233.33
Episode: 13, Reward: 233.33
Episode: 14, Reward: 233.33
Episode: 15, Reward: 325.0
Episode: 16, Reward: 325.0
Episode: 17, Reward: 325.0
Episode: 18, Reward: 325.0
Episode: 19, Reward: 325.0
Episode: 20, Reward: 280.0
Episode: 21, Reward: 280.0
Episode: 22, Reward: 280.0
Episode: 23, Reward: 280.0
Episode: 24, Reward: 233.33
Episode: 25, Reward: 200.0
Episode: 26, Reward: 200.0
Episode: 27, Reward: 200.0
Episode: 28, Reward: 200.0
Episo

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_WIZARDOFWOR-V5 EXPERIMENT 7: Date-Time: 2023-08-21 09:56:45, Capacity: 500
Setting up environment
Creating Agent with capacity set to: 500
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 500.0
Episode: 5, Reward: 500.0
Episode: 6, Reward: 500.0
Episode: 7, Reward: 500.0
Episode: 8, Reward: 500.0
Episode: 9, Reward: 500.0
Episode: 10, Reward: 500.0
Episode: 11, Reward: 500.0
Episode: 12, Reward: 500.0
Episode: 13, Reward: 400.0
Episode: 14, Reward: 400.0
Episode: 15, Reward: 325.0
Episode: 16, Reward: 325.0
Episode: 17, Reward: 325.0
Episode: 18, Reward: 325.0
Episode: 19, Reward: 300.0
Episode: 20, Reward: 300.0
Episode: 21, Reward: 300.0
Episode: 22, Reward: 300.0
Episode: 23, Reward: 283.33
Episode: 24, Reward: 283.33
Episode: 25, Reward: 283.33
Episode: 26, Reward: 283.33
Episode: 27, Reward: 283.33
Episode: 28, Reward: 283.33
Episode

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
Saving rewards to csv
Saving boxplot of results
Average Accumulated Reward: 203272.0
ALE_YARSREVENGE-V5 EXPERIMENT 0: Date-Time: 2023-08-21 10:02:20, Capacity: 1M
Setting up environment
Creating Agent with capacity set to: 1000000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 3726.0
Episode: 5, Reward: 3726.0
Episode: 6, Reward: 3726.0
Episode: 7, Reward: 3726.0
Episode: 8, Reward: 3726.0
Episode: 9, Reward: 3622.5
Episode: 10, Reward: 3622.5
Episode: 11, Reward: 3622.5
Episode: 12, Reward: 3622.5
Episode: 13, Reward: 3622.5
Episode: 14, Reward: 3622.5
Episode: 15, Reward: 3622.5
Episode: 16, Reward: 4140.0
Episode: 17, Reward: 4140.0
Episode: 18, Reward: 4140.0
Episode: 19, Reward: 4140.0
Episode: 20, Reward: 4140.0
Episode: 21, Reward: 3845.0
Episode: 22, Reward: 3845.0
Episode: 23, Reward: 3845.0
Episode: 24, Reward: 3845.0
Episode: 25,

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_YARSREVENGE-V5 EXPERIMENT 1: Date-Time: 2023-08-21 10:18:41, Capacity: 500k
Setting up environment
Creating Agent with capacity set to: 500000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 3657.0
Episode: 7, Reward: 3657.0
Episode: 8, Reward: 3657.0
Episode: 9, Reward: 3657.0
Episode: 10, Reward: 3795.0
Episode: 11, Reward: 3798.33
Episode: 12, Reward: 3798.33
Episode: 13, Reward: 3798.33
Episode: 14, Reward: 4158.5
Episode: 15, Reward: 4158.5
Episode: 16, Reward: 4158.5
Episode: 17, Reward: 4158.5
Episode: 18, Reward: 4070.4
Episode: 19, Reward: 4070.4
Episode: 20, Reward: 4070.4
Episode: 21, Reward: 3843.83
Episode: 22, Reward: 3843.83
Episode: 23, Reward: 3843.83
Episode: 24, Reward: 3843.83
Episode: 25, Reward: 4043.14
Episode: 26, Reward: 4043.14
Episode: 27, Reward: 4043.14
Episode:

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_YARSREVENGE-V5 EXPERIMENT 2: Date-Time: 2023-08-21 10:36:56, Capacity: 100k
Setting up environment
Creating Agent with capacity set to: 100000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 2691.0
Episode: 4, Reward: 2691.0
Episode: 5, Reward: 2691.0
Episode: 6, Reward: 2691.0
Episode: 7, Reward: 2691.0
Episode: 8, Reward: 2173.5
Episode: 9, Reward: 2173.5
Episode: 10, Reward: 2173.5
Episode: 11, Reward: 2173.5
Episode: 12, Reward: 2173.5
Episode: 13, Reward: 2173.5
Episode: 14, Reward: 3082.0
Episode: 15, Reward: 3082.0
Episode: 16, Reward: 3082.0
Episode: 17, Reward: 3082.0
Episode: 18, Reward: 3082.0
Episode: 19, Reward: 3172.25
Episode: 20, Reward: 3172.25
Episode: 21, Reward: 3172.25
Episode: 22, Reward: 2910.4
Episode: 23, Reward: 2910.4
Episode: 24, Reward: 2910.4
Episode: 25, Reward: 2910.4
Episode: 26, Reward: 2910.4
Episode: 27, Reward: 2910.4
Episod

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_YARSREVENGE-V5 EXPERIMENT 3: Date-Time: 2023-08-21 10:56:46, Capacity: 50k
Setting up environment
Creating Agent with capacity set to: 50000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 3243.0
Episode: 5, Reward: 3243.0
Episode: 6, Reward: 3243.0
Episode: 7, Reward: 2518.5
Episode: 8, Reward: 2518.5
Episode: 9, Reward: 2518.5
Episode: 10, Reward: 2941.67
Episode: 11, Reward: 2941.67
Episode: 12, Reward: 2941.67
Episode: 13, Reward: 2689.25
Episode: 14, Reward: 2689.25
Episode: 15, Reward: 2689.25
Episode: 16, Reward: 2689.25
Episode: 17, Reward: 2875.0
Episode: 18, Reward: 2875.0
Episode: 19, Reward: 2875.0
Episode: 20, Reward: 2875.0
Episode: 21, Reward: 2875.0
Episode: 22, Reward: 2875.0
Episode: 23, Reward: 2875.0
Episode: 24, Reward: 2875.0
Episode: 25, Reward: 2875.0
Episode: 26, Reward: 2875.0
Episode: 27, Reward: 2875.0
Episode

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_YARSREVENGE-V5 EXPERIMENT 4: Date-Time: 2023-08-21 11:16:57, Capacity: 10k
Setting up environment
Creating Agent with capacity set to: 10000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 2967.0
Episode: 8, Reward: 2967.0
Episode: 9, Reward: 2967.0
Episode: 10, Reward: 2967.0
Episode: 11, Reward: 2967.0
Episode: 12, Reward: 2967.0
Episode: 13, Reward: 2967.0
Episode: 14, Reward: 2967.0
Episode: 15, Reward: 2967.0
Episode: 16, Reward: 3691.5
Episode: 17, Reward: 3691.5
Episode: 18, Reward: 3691.5
Episode: 19, Reward: 3691.5
Episode: 20, Reward: 3691.5
Episode: 21, Reward: 3691.5
Episode: 22, Reward: 2461.0
Episode: 23, Reward: 2461.0
Episode: 24, Reward: 2461.0
Episode: 25, Reward: 2461.0
Episode: 26, Reward: 2461.0
Episode: 27, Reward: 2461.0
Episode: 28, Reward: 18

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_YARSREVENGE-V5 EXPERIMENT 5: Date-Time: 2023-08-21 11:30:39, Capacity: 5k
Setting up environment
Creating Agent with capacity set to: 5000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 4071.0
Episode: 7, Reward: 4071.0
Episode: 8, Reward: 4071.0
Episode: 9, Reward: 3829.5
Episode: 10, Reward: 3829.5
Episode: 11, Reward: 3880.0
Episode: 12, Reward: 3880.0
Episode: 13, Reward: 3880.0
Episode: 14, Reward: 5398.0
Episode: 15, Reward: 5335.4
Episode: 16, Reward: 5335.4
Episode: 17, Reward: 5335.4
Episode: 18, Reward: 5335.4
Episode: 19, Reward: 5202.17
Episode: 20, Reward: 5202.17
Episode: 21, Reward: 5202.17
Episode: 22, Reward: 5202.17
Episode: 23, Reward: 5202.17
Episode: 24, Reward: 5202.17
Episode: 25, Reward: 5202.17
Episode: 26, Reward: 5202.17
Episode: 27, Reward: 5202.17
Episode: 28, 

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_YARSREVENGE-V5 EXPERIMENT 6: Date-Time: 2023-08-21 11:42:27, Capacity: 1k
Setting up environment
Creating Agent with capacity set to: 1000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 3105.0
Episode: 6, Reward: 3105.0
Episode: 7, Reward: 3105.0
Episode: 8, Reward: 2982.5
Episode: 9, Reward: 2982.5
Episode: 10, Reward: 2982.5
Episode: 11, Reward: 2982.5
Episode: 12, Reward: 2931.33
Episode: 13, Reward: 2931.33
Episode: 14, Reward: 2931.33
Episode: 15, Reward: 2371.0
Episode: 16, Reward: 2371.0
Episode: 17, Reward: 2371.0
Episode: 18, Reward: 2371.0
Episode: 19, Reward: 2371.0
Episode: 20, Reward: 2371.0
Episode: 21, Reward: 2371.0
Episode: 22, Reward: 2371.0
Episode: 23, Reward: 2371.0
Episode: 24, Reward: 2371.0
Episode: 25, Reward: 2371.0
Episode: 26, Reward: 2371.0
Episode: 27, Reward: 2371.0
Episode: 28, Rew

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_YARSREVENGE-V5 EXPERIMENT 7: Date-Time: 2023-08-21 11:51:34, Capacity: 500
Setting up environment
Creating Agent with capacity set to: 500
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 2829.0
Episode: 5, Reward: 2829.0
Episode: 6, Reward: 2829.0
Episode: 7, Reward: 2829.0
Episode: 8, Reward: 2829.0
Episode: 9, Reward: 2829.0
Episode: 10, Reward: 2691.0
Episode: 11, Reward: 2691.0
Episode: 12, Reward: 2691.0
Episode: 13, Reward: 2691.0
Episode: 14, Reward: 2691.0
Episode: 15, Reward: 2369.0
Episode: 16, Reward: 2369.0
Episode: 17, Reward: 2369.0
Episode: 18, Reward: 2369.0
Episode: 19, Reward: 2369.0
Episode: 20, Reward: 2369.0
Episode: 21, Reward: 2369.0
Episode: 22, Reward: 2369.0
Episode: 23, Reward: 2018.25
Episode: 24, Reward: 2018.25
Episode: 25, Reward: 2018.25
Episode: 26, Reward: 2018.25
Episode: 27, Reward: 2018.25
Episode: 28

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
Saving rewards to csv
Saving boxplot of results
Average Accumulated Reward: 3213670.0
ALE_ZAXXON-V5 EXPERIMENT 0: Date-Time: 2023-08-21 12:00:08, Capacity: 1M
Setting up environment
Creating Agent with capacity set to: 1000000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Epis

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_ZAXXON-V5 EXPERIMENT 1: Date-Time: 2023-08-21 12:25:44, Capacity: 500k
Setting up environment
Creating Agent with capacity set to: 500000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, R

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_ZAXXON-V5 EXPERIMENT 2: Date-Time: 2023-08-21 12:50:27, Capacity: 100k
Setting up environment
Creating Agent with capacity set to: 100000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, R

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_ZAXXON-V5 EXPERIMENT 3: Date-Time: 2023-08-21 13:16:51, Capacity: 50k
Setting up environment
Creating Agent with capacity set to: 50000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, Rew

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_ZAXXON-V5 EXPERIMENT 4: Date-Time: 2023-08-21 13:43:19, Capacity: 10k
Setting up environment
Creating Agent with capacity set to: 10000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, Rew

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_ZAXXON-V5 EXPERIMENT 5: Date-Time: 2023-08-21 14:00:43, Capacity: 5k
Setting up environment
Creating Agent with capacity set to: 5000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, Rewar

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_ZAXXON-V5 EXPERIMENT 6: Date-Time: 2023-08-21 14:15:03, Capacity: 1k
Setting up environment
Creating Agent with capacity set to: 1000
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 1000.0
Episode: 7, Reward: 1000.0
Episode: 8, Reward: 1000.0
Episode: 9, Reward: 1000.0
Episode: 10, Reward: 500.0
Episode: 11, Reward: 500.0
Episode: 12, Reward: 500.0
Episode: 13, Reward: 500.0
Episode: 14, Reward: 500.0
Episode: 15, Reward: 333.33
Episode: 16, Reward: 333.33
Episode: 17, Reward: 333.33
Episode: 18, Reward: 333.33
Episode: 19, Reward: 250.0
Episode: 20, Reward: 250.0
Episode: 21, Reward: 250.0
Episode: 22, Reward: 250.0
Episode: 23, Reward: 250.0
Episode: 24, Reward: 200.0
Episode: 25, Reward: 200.0
Episode: 26, Reward: 200.0
Episode: 27, Reward: 200.0
Episode: 28, Reward: 166.67
Episode: 29, 

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
ALE_ZAXXON-V5 EXPERIMENT 7: Date-Time: 2023-08-21 14:26:05, Capacity: 500
Setting up environment
Creating Agent with capacity set to: 500
Simulating environment
Episode: 0, Reward: 0.0
Episode: 1, Reward: 0.0
Episode: 2, Reward: 0.0
Episode: 3, Reward: 0.0
Episode: 4, Reward: 0.0
Episode: 5, Reward: 0.0
Episode: 6, Reward: 0.0
Episode: 7, Reward: 0.0
Episode: 8, Reward: 0.0
Episode: 9, Reward: 0.0
Episode: 10, Reward: 0.0
Episode: 11, Reward: 0.0
Episode: 12, Reward: 0.0
Episode: 13, Reward: 0.0
Episode: 14, Reward: 0.0
Episode: 15, Reward: 0.0
Episode: 16, Reward: 0.0
Episode: 17, Reward: 0.0
Episode: 18, Reward: 0.0
Episode: 19, Reward: 0.0
Episode: 20, Reward: 0.0
Episode: 21, Reward: 0.0
Episode: 22, Reward: 0.0
Episode: 23, Reward: 0.0
Episode: 24, Reward: 0.0
Episode: 25, Reward: 0.0
Episode: 26, Reward: 0.0
Episode: 27, Reward: 0.0
Episode: 28, Reward: 0.0
Episode: 29, Reward: 0.0
Episode: 30, Reward: 0.0
Episode: 31, Rewar

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Saving Reward Graphs...
Saving Q-value/action graphs...
Saving rewards to csv
Saving boxplot of results
Average Accumulated Reward: 13694.0


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>