In [5]:
import ale_py
# if using gymnasium
import shimmy
import gymnasium as gym
import torch
from torch import nn
import pytorch_lightning as pl
from torch.utils.data import DataLoader, random_split
from torch.nn import functional as F
from torchvision.datasets import MNIST
from torchvision import datasets, transforms
import torchvision.transforms.functional as TF
import os
import copy
import random
from torch.utils.data import Dataset



In [6]:
! pip  install pytorch-lightning

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.0.2-py3-none-any.whl (719 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m719.0/719.0 KB[0m [31m137.8 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torchmetrics>=0.7.0
  Downloading torchmetrics-0.11.4-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 KB[0m [31m616.6 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting fsspec[http]>2021.06.0
  Downloading fsspec-2023.4.0-py3-none-any.whl (153 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.0/154.0 KB[0m [31m418.6 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting lightning-utilities>=0.7.0
  Downloading lightning_utilities-0.8.0-py3-none-any.whl (20 kB)
Collecting PyYAML>=5.4
  Using cached PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (682 kB)
Collecting aiohttp!=4.0.0a0,!=4.0.0a1


# NN

In [9]:
#nn class

class CNN(pl.LightningModule):

    def __init__(self):
        super().__init__()
        self.convStack = torch.nn.Sequential(
            nn.Conv2d(3, 5, kernel_size=9, stride=1, padding = 4),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(5, 15, kernel_size=5, stride=1, padding = 2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=5, stride=5),   
        )

        self.fully_connected = nn.Sequential(
            nn.Flatten(start_dim = 0),
            nn.Linear(6000, 100),
            nn.ReLU(),
            nn.Linear(100,5),
        )

    def forward(self, x):
        x = self.convStack(x)
        #print(x.size())
        x = self.fully_connected(x)
        # x = torch.softmax(x, dim = 0)
        return x

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        pred = self.forward(x)
        loss = F.mse_loss(pred, y)
        self.log('train_loss', loss)
        return loss
    
    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        pred = self.forward(x)
        loss = F.mse_loss(x, y)
        self.log('val_loss', loss)
        return loss




In [18]:
class ExperienceDataset(Dataset):
    
    def __init__(self, experience_list, target_net):
        self.data = []
        for sample in experience_list:
            state, action, reward, next_state = sample
            next_q = torch.max(target_net.forward(next_state))
            target = reward + 0.7 * next_q
            # transformation on state rgb -> greyscale 
            state =  TF.to_tensor(state)
            self.data.append(state, target)
            
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
        # transformations
        return self.data[idx][0], self.data[idx][1]
    

# Algorithms 

### DQN

In [19]:
class DQN_agent:
    """ The agent maps X-states to Y-actions
    e.g. The neural network output is [.1, .7, .1, .3, .4]
    Where each element is a predicted value of (state, action) pair.
    """
    def __init__(self, epsilon):
        self.pred_NN = CNN()
        self.target_NN = copy.deepcopy(self.pred_NN)
        self.epsilon = epsilon
    
    def train(self,experiences):
        # Experience replay take only a part of it
        dataset = ExperienceDataset(experiences, self.target_NN)
        dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
        trainer = Trainer()
        trainer.fit(self.pred_NN, train_dataloader)
    
    def predict(self,input_):
        pred = self.pred_NN.forward(input_) 
        return pred
    
    def action(self, pred):
        #choose action using epsilon-greedy
        if random.random() < self.epsilon:
            action = random.randint(0, pred.size(dim=0) - 1)
        else: 
            action = torch.argmax(pred)
        return action
    
    def copy_to_target(self):
        self.target_NN = copy.deepcopy(self.pred_NN)
        
        

# Run agent

In [21]:
env = gym.make("ALE/Pacman-v5",render_mode="human")
state, info = env.reset(seed=123, options={})
state = TF.to_tensor(state) #transform to correct shape and datatype 
epsilon = 0.5
agent = DQN_agent(epsilon)
experience_list = []
for i in range(1,1000):
    
    pred = agent.pred_NN.forward(state) 
    action = agent.action(pred)
    print(action)
    if i % 10 == 0:
       agent.train(experience_list)
    
    if i % 100 == 0:
        agent.copy_to_target()
        # copy main model into target model
    
    next_state, reward, terminated, truncated, info = env.step(action)
    
    #save state, action, reward and next state
    experience_list.append([state, action, reward, next_state])
    
    state = next_state

    done = terminated or truncated

env.close()

2


TypeError: conv2d() received an invalid combination of arguments - got (numpy.ndarray, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple of (int, int)!, !tuple of (int, int)!, !tuple of (int, int)!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple of (int, int)!, !tuple of (int, int)!, !tuple of (int, int)!, int)
