<a href="https://colab.research.google.com/github/razvancraciun/space-invaders-ai/blob/master/space.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
EPSILON = 1
EPSILON_MIN = 0.01
EPSILON_DEC = 0.9995
LEARNING_RATE = 0.001

## Imports

In [0]:
import numpy as np
import torch
import torch.nn as nn
import gym

## Buffer

In [0]:
class ReplayBuffer:
    def __init__(self, size, state_shape, n_actions):
        self.size = size
        self.count = 0
        self.from_states = np.zeros( (self.size, state_shape) ) 
        self.to_states = np.zeros( (self.size, state_shape) )
        self.actions = np.zeros(self.size, dtype=np.int8)
        self.rewards = np.zeros(self.size)
        self.terminals = np.zeros(self.size)

    def store(self, from_state, action, reward, to_state, done):
        index = self.count % self.size
        self.from_states[index] = from_state
        self.to_states[index] = to_state
        self.actions[index] = action
        self.rewards[index] = self.size
        self.terminals[index] = 1 - int(done)
        self.count += 1

    def sample(self, batch_size):
        size = min(self.count, self.size)
        batch = np.random.choice(size, batch_size)

        from_states = self.from_states[batch]
        to_states = self.to_states[batch]
        actions = self.actions[batch]
        rewards = self.rewards[batch]
        terminals = self.terminals[batch]

        return from_states, actions, rewards, to_states, terminals 

## Model

In [0]:
class Model(nn.Module):
    def __init__(self, input_channels, output_shape, learning_rate):
        super(Model, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(input_channels,32, 3, padding=1, stride=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )

        self.block2 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1, stride=2),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 256, 3, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )

        self.block3 = nn.Sequential(
            nn.Conv2d(256, 256, 3, padding=1, stride=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 512, 3, stride=2, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU()
        )

        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 6 * 5, 256),
            nn.Linear(256, output_shape)
        )
       

        self.optimizer = torch.optim.Adam(self.parameters(), learning_rate)
        self.loss = nn.MSELoss()

    def forward(self, x):
        y = self.block1(x)
        y = self.block2(y)
        y = self.block3(y)
        print(y.shape)
        y = self.fc(y)
        return y


## Frame handling

In [0]:
def stack_frames():
    pass


def init_stack():
    pass


def preprocess(state):
    state = np.moveaxis(state, 2, 0)
    r,g,b = state[0], state[1], state[2]
    state = 0.3 * r + 0.59 * g + 0.11 * b
    state = state[20:-14, 15:-15]
    state = torch.Tensor(state)
    return state

In [118]:




env = gym.make('SpaceInvaders-v0')

model = Model(1,1, LEARNING_RATE)
state = env.reset()
print(state.shape)
state = preprocess(state)
print(state.shape)

state.unsqueeze_(0)
state.unsqueeze_(0)

pred = model.forward(state)
print(model)

(210, 160, 3)
torch.Size([176, 130])
torch.Size([1, 512, 6, 5])
Model(
  (block1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
  )
  (block2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
  )
  (block3): Sequential(
    (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.

In [107]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
