In [12]:
# imports
import gymnasium as gym
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import random
import os
from collections import deque


In [13]:

#hyperparameters
#epsilon greedy exploration
epsilon = 1
epsilon_decay = 0.995
epsilon_min = 0.01
# tetris from openai gym
env = gym.make("ALE/Tetris-v5")
#
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

batch_size = 32  # increase by powers of 2
num_episodes = 1000 # increase for more training


output_dir = 'results/tetris/'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)


In [14]:

# dqn agent for the atari problem openai gym
class Agent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = epsilon  # exploration rate
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.model = self._build_model()

    def _build_model(self):
        model = nn.Sequential(
            nn.Linear(self.state_size, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, self.action_size)
        )
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        # epsilon greedy exploration
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * torch.max(self.model(next_state))
            target_f = self.model(state)
            target_f[0][action] = target
            self.model.zero_grad()
            loss = F.mse_loss(self.model(state), target_f)
            loss.backward()
            optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        weights_path = f"{output_dir}weights_{e:04d}.pt"  # Make sure 'e' or the episode number is correctly specified
        state_dict = torch.load(weights_path)
        self.model.load_state_dict(state_dict)

    def save(self, name):
        torch.save(self.model.state_dict(), name)


In [18]:
import time

# Initialize necessary variables for logging
total_frames = 0
mean_epr = 0
run_loss = 0

# Set up
agent = Agent(state_size, action_size)
done = False
scores = []
for e in range(num_episodes):
    state = env.reset()
    # fix for Tetris
    state = torch.tensor(state, dtype=torch.uint8).unsqueeze(0)
    state = state.view(1, state_size)
    episode_frames = 0
    episode_rewards = []
    for t in range(500):  # Using 't' for time steps to avoid confusion with time module
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        reward = reward if not done else -10
        next_state = torch.tensor(next_state, dtype=torch.float32).unsqueeze(0)
        next_state = next_state.view(1, state_size)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        episode_frames += 1
        episode_rewards.append(reward)
        if done:
            total_frames += episode_frames
            mean_epr = sum(episode_rewards) / len(episode_rewards)
            # Assuming run_loss is calculated within agent.replay or similar
            run_loss = agent.replay(batch_size) if len(agent.memory) > batch_size else 0
            print(f"_{t//3600:02d}h {(t%3600)//60:02d}m {t%60:02d}s, episodes {e+1}/{num_episodes}, frames {total_frames/1e6:.1f}M, mean epr {mean_epr:.2f}, run loss {run_loss:.2f}\\")
            break
    if e % 100 == 0:
        torch.save(agent.state_dict(), f"{output_dir}weights_{e:04d}.pt")
        # agent.save(output_dir + "weights_" + '{:04d}'.format(e) + ".hdf5")

ValueError: expected sequence of length 210 at dim 1 (got 3)

In [None]:
# Assuming 'Agent' is your model class and 'agent' is an instance of this class
agent = Agent(state_size, action_size)  # Initialize the agent with the appropriate parameters

# Load the weights
weights_path = f"{output_dir}weights_{e:04d}.pt"  # Make sure 'e' or the episode number is correctly specified
state_dict = torch.load(weights_path)

# Apply the loaded weights to the model
agent.load_state_dict(state_dict)

# If your model is going to be used for inference only, switch to evaluation mode
agent.eval()

In [None]:

#run an train
# python3 tetris.py
# tensorboard --logdir=results/tetris/
# tensorboard --logdir=results/tetris/ --host=

#eval



