# Model implementation

Notebook for initial development of DQN model

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

from dqn_implementation.data.generator import generate

In [2]:
states, actions, rewards, new_states = generate(100)

In [3]:
device = torch.device('cuda')

In [4]:
class DQN(nn.Module):
    def __init__(self, action_count):
        super().__init__()
        self.conv_first = nn.Conv2d(4, 32, 8, stride=4)
        self.conv_second = nn.Conv2d(32, 64, 4, stride=2)
        self.conv_third = nn.Conv2d(64, 64, 3, stride=1)
        self.dense_first = nn.Linear(3136, 512)
        self.dense_second = nn.Linear(512, action_count)
        
    def forward(self, inp):
        # inp = batch x channels x width x height
        x = self.conv_first(inp)
        x = self.conv_second(x)
        x = self.conv_third(x)
        
        x = x.view(x.shape[0], x.shape[1] * x.shape[2] * x.shape[3])
        x = self.dense_first(x)
        x = self.dense_second(x)
        return x

In [5]:
a = torch.tensor([
    [[1, 2], [3, 4]], 
    [[5, 6], [7, 8]]
]); a

tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])

In [6]:
a.shape

torch.Size([2, 2, 2])

In [7]:
a.view(2, 4)

tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])

In [8]:
dqn = DQN(16).to(device)

In [11]:
np.shape(states)

(100, 84, 84, 4)

In [43]:
b = dqn(torch.from_numpy(states).permute(0, 3, 1, 2).float().to(device))

In [44]:
b

tensor([[-0.3840, -3.7226,  1.0425,  ..., -5.6897, -0.4084,  2.1808],
        [-0.4237, -3.7925,  0.9448,  ..., -5.7304, -0.4430,  2.2022],
        [-0.3080, -3.8389,  0.9622,  ..., -5.6178, -0.5513,  2.3110],
        ...,
        [-0.3459, -3.8629,  0.9919,  ..., -5.6169, -0.5176,  2.2495],
        [-0.3744, -3.9073,  0.9746,  ..., -5.6197, -0.5454,  2.2847],
        [-0.3645, -3.7110,  1.0104,  ..., -5.6463, -0.4905,  2.2494]],
       device='cuda:0', grad_fn=<AddmmBackward>)