In [None]:
import torch

# Create a tensor
x = torch.rand(5, 3)
print(x)
torch.cuda.is_available()

print(type(x))

In [7]:
import asyncio
import websockets
import nest_asyncio
import ast

import random
import numpy as np
import torch
import time
from collections import deque
from typing import Tuple

import actionset
from holographic_transformer import HolographicTransformer

class ReplayBuffer:
    def __init__(self, buffer_size):
        self.buffer_size = buffer_size
        self.buffer = deque(maxlen=buffer_size)

    def add(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size: int) -> Tuple[np.ndarray]:
        batch = np.random.choice(len(self.buffer), batch_size, replace=False)
        states, actions, rewards, next_states, dones = [], [], [], [], []
        for i in batch:
            s, a, r, s2, d = self.buffer[i]
            states.append(np.array(s, copy=False))
            actions.append(np.array(a, copy=False))
            rewards.append(np.array(r, copy=False))
            next_states.append(np.array(s2, copy=False))
            dones.append(np.array(d, copy=False))
        return np.array(states), np.array(actions), np.array(rewards), np.array(next_states), np.array(dones)

    def __len__(self):
        return len(self.buffer)

# Define hyperparameters
BUFFER_SIZE = 100000
BATCH_SIZE = 64
GAMMA = 0.99
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200
TARGET_UPDATE = 10
LEARNING_RATE = 0.00025
NUM_TICKS = 4

# Initialize action set and game state
action_set = actionset.ActionSet()
state = {
    "health": 100,
    "energy": 100,
    "location": [0, 0, 0],
    "inventory": [None] * 28,
    "abilities": [False] * 10,
    "tiles": {}  # dictionary mapping coordinates to tile objects
}

# Initialize holographic transformer q-learning neural network
vocab_size = 100  # number of unique game states
embedding_dim = 64
transformer = HolographicTransformer(vocab_size, embedding_dim, 2, 8, 128, 0.2)
optimizer = torch.optim.Adam(transformer.parameters(), lr=LEARNING_RATE)

# Initialize replay buffer
memory = ReplayBuffer(BUFFER_SIZE)

# Initialize variables for training loop
steps_done = 0
episode_durations = []

# Define function for selecting actions
def select_action(state):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        np.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        with torch.no_grad():
            state_tensor = torch.tensor(state, dtype=torch.float).unsqueeze(0)
            action_values = transformer(state_tensor)
            action = action_set.actions[action_values.argmax().item()]
    else:
        action = action_set.sample()
    return action

# Define function for performing a single tick of the game
def tick(state, action_set):
    # Choose up to 4 actions to perform
    actions = [select_action(state) for _ in range(NUM_TICKS)]
    
    # Perform each action in order
    for action in actions:
        if action == "move_north":
            # Update state to reflect movement
            new_location = [state["location"][0], state["location"][1] + 1, state["location"][2]]
            if tuple(new_location) in state["tiles"]:
                state["location"] = new_location
        elif action == "move_south":
            # Update state to reflect movement
            new_location = [state["location"][0], state["location"][1] - 1, state["location"][2]]
            if tuple(new_location) in state["tiles"]:
                state["location"] = new_location
        elif action == "move_east":
            # Update state to reflect movement
            new_location = [state["location"][0] + 1, state["location"][1], state["location"][2]]
            if tuple(new_location) in state["tiles"]:
                state["location"] = new_location
        elif action == "move_west":
            # Update state to reflect movement
            new_location = [state["location"][0] - 1, state["location"][1], state["location"][2]]
            if tuple(new_location) in state["tiles"]:
                state["location"] = new_location
        elif action == "attack":
            # Update state to reflect attack
            pass  # TODO:


nest_asyncio.apply()

async def echo(websocket):
    async for message in websocket:
        await websocket.send(message)
        print('\r' + message, end='')

async def main():
    async with websockets.serve(echo, "localhost", 8765):
        await asyncio.Future()  # run forever

asyncio.run(main())

{"health":99,"location":"[3235, 3224]","energy":5830}}