In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import time
import datetime
import torch
import random
import numpy as np
import copy
import logging
import os
from pathlib import Path
import argparse
from collections import deque, Counter, namedtuple

import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.autograd as autograd

from minatar import Environment

import seaborn as sns
import matplotlib.pyplot as plt

from replay_buffer import ReplayBuffer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from my_dqn import AgentDQN, setup_logger

In [4]:
game = "breakout"
proj_dir = os.path.abspath(".")
default_checkpoint_folder = os.path.join(proj_dir, "checkpoints", game)

checkpoint_folder = default_checkpoint_folder

model_file_name = os.path.join(checkpoint_folder, game + "_model")
replay_buffer_file = os.path.join(checkpoint_folder, game + "_replay_buffer")
train_stats_file = os.path.join(checkpoint_folder, game + "_train_stats")
logs_path = os.path.join(checkpoint_folder, "logs")

Path(checkpoint_folder).mkdir(parents=True, exist_ok=True)
Path(logs_path).mkdir(parents=True, exist_ok=True)

env = Environment(game)

train_logger = setup_logger(game, logs_path)

# print("Cuda available?: " + str(torch.cuda.is_available()))
my_agent = AgentDQN(
        env=env,
        model_file=model_file_name,
        replay_buffer_file=replay_buffer_file,
        train_stats_file=train_stats_file,
        save_checkpoints=True,
        logger=train_logger,
    )


In [5]:
my_agent.train(1)

2023-01-30 01:41:04,959 - root - INFO - Starting/resuming training session at: 0
2023-01-30 01:41:04,960 - root - INFO - Starting training epoch at t = 0
2023-01-30 01:41:05,906 - root - INFO - Frames seen: 2000 | Episode: 173 | Max reward: 3.0 | Avg reward: 0.55 | Avg frames (episode): 11.560693641618498 | Avg max Q: 0.09245185119410355 | Epsilon: 1.0 | Train epoch time: 0:00:00.943135
2023-01-30 01:41:05,907 - root - INFO - Starting validation epoch at t = 2000


[0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 3.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.0, 3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 3.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0]
[6, 16, 6, 16, 6, 6, 16, 6, 6, 6, 16, 6, 38, 6, 16, 6, 6, 16, 6, 6, 16, 16, 16, 16, 6, 6, 16, 16, 6, 6, 6, 28, 26, 6, 6, 6, 6, 16, 6, 

2023-01-30 01:41:06,724 - root - INFO - Max reward: 1.0 | Avg reward: 0.49 | Avg frames (episode): 10.869565217391305 | Avg max Q: 0.09257919527590275 | Validation epoch time: 0:00:00.813331
2023-01-30 01:41:06,725 - root - INFO - Saving checkpoint at t = 2000 ...
  data = np.asarray(data, order="C", dtype=as_dtype)
  data = np.asarray(data, order="C", dtype=as_dtype)


TypeError: Object dtype dtype('O') has no native HDF5 equivalent

In [18]:
my_agent.replay_buffer.save(replay_buffer_file)

In [17]:
s = get_state(my_agent.env.state())
s.shape

torch.Size([1, 4, 10, 10])

In [23]:
s.data.dtype

torch.float32

In [24]:
action = my_agent.select_action(s, my_agent.t, my_agent.num_actions)
reward, is_terminated = my_agent.env.act(action)
reward = torch.tensor([[reward]], device="cpu").float()
is_terminated = torch.tensor([[is_terminated]], device="cpu")
s_prime = get_state(my_agent.env.state())

In [25]:
s_prime.dtype

torch.float32

In [26]:
sample = my_agent.replay_buffer.sample(1)
len(sample)

5

In [47]:
sample[0][0][0][0][0]
type(sample[0][0][0][0][0])

numpy.float64

In [29]:

state, action, reward, next_state, terminated = sample
state = torch.from_numpy(state)
next_state = torch.from_numpy(next_state)
action = torch.LongTensor(action)
reward = torch.FloatTensor(reward).unsqueeze(1)
terminated = torch.FloatTensor(terminated).unsqueeze(1)


In [49]:
state.shape

torch.Size([1, 4, 10, 10])

In [28]:
state.dtype

torch.float64

In [35]:
action

tensor([[1]])

In [71]:

my_agent.policy_model(state)


tensor([[0.2652, 0.1641, 0.2825, 0.3029, 0.2577, 0.2576]],
       grad_fn=<AddmmBackward0>)

In [94]:

is_terminated = torch.tensor([[None]], device="cpu")
is_terminated

if is_terminated:
    print("yes")

RuntimeError: Could not infer dtype of NoneType

In [48]:
my_agent.policy_model(state).max(1)[0].item()

0.30291375517845154

In [87]:
act = torch.tensor([[random.randrange(6)]], device="cpu")
act = act.squeeze(0)
act

tensor([1])

In [88]:

torch.index_select(my_agent.policy_model(state), 1, act).item()

0.16414308547973633

In [90]:
my_agent.policy_model(state).max(1)

AttributeError: 'torch.return_types.max' object has no attribute 'item'

In [74]:
my_agent.policy_model(state).max(1)[1].view(1, 1)

[autoreload of my_dqn failed: Traceback (most recent call last):
  File "c:\Users\Marius\anaconda3\envs\general\lib\site-packages\IPython\extensions\autoreload.py", line 261, in check
    superreload(m, reload, self.old_objects)
  File "c:\Users\Marius\anaconda3\envs\general\lib\site-packages\IPython\extensions\autoreload.py", line 459, in superreload
    module = reload(module)
  File "c:\Users\Marius\anaconda3\envs\general\lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 619, in _exec
  File "<frozen importlib._bootstrap_external>", line 879, in exec_module
  File "<frozen importlib._bootstrap_external>", line 1017, in get_code
  File "<frozen importlib._bootstrap_external>", line 947, in source_to_code
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "d:\Facultate\PhD\work\learning_stuff\dqn_clean\minatar_work\my_dqn.py", line 274
    def get_action_from_model(self, st

tensor([[3]])