In [None]:
import torch as T # for the neural network
import torch.nn as nn # for the neural network
import torch.nn.functional as F # for the activation functions
import torch.optim as optim # for the optimizer

import numpy as np # for the replay buffer

import struct # for converting bytes to floats
import socket # for connecting to the server
import json # for parsing the server's response
import random # for generating random actions

In [None]:
# pytorch setup
device = T.device("cuda" if T.cuda.is_available() else "cpu")

# model definition
class DQN(nn.Module):
    
    def __init__(self, n_observations, n_actions):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(n_observations, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, n_actions)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
TCP_IP = "127.0.0.1"
TCP_PORT = 9876

# start a server
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) # allow the port to be reused immediately after the server is killed
sock.bind((TCP_IP, TCP_PORT))

# listen for incoming connections
sock.listen(1)

# keep trying until a connection is established
print("Waiting for connection...")

# setup with godot
conn, addr = sock.accept()
print("Connection established with: ", addr)

# this is the outline of the main observation-action-reward loop
# sends random data as a test
doTest = False
while doTest:

    # send a ready message to signal the python script is ready
    conn.send("ready".encode())

    # first we wait for godot to send an observation
    observation = conn.recv(4096)
    observation = json.loads(observation.decode())
    print("Received observation: ", observation)

    # then we send an action
    action = random.choice([0, 1, 2])
    print("Sending action: ", action)
    conn.send(action.to_bytes(1, byteorder='big'))

    # finally we wait for the reward
    reward = conn.recv(32)
    reward = struct.unpack('f', reward)[0]
    print("Received reward: ", reward)

In [None]:
# now we use the observation, action, and reward to train the neural network
# we will use the DQN algorithm to train the neural network

num_iterations = 1000

# hyperparameters
n_observations = 49
n_actions = 3
learning_rate = 0.001

# model setup
model = DQN(n_observations, n_actions).to(device)

# optimizer setup
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# loss function
loss_function = nn.MSELoss()

# training loop
for i in range(num_iterations):
    
    # Send a ready message to signal the Python script is ready
    conn.send("ready".encode())

    # Wait for Godot to send an observation
    observation = conn.recv(4096)
    observation = json.loads(observation.decode())
    print("Received observation:", observation)

    # Convert observation dictionary to a list of floats
    observation_list = [float(value) if value is not False else 1000.0 for value in observation.values()]
    observation_array = np.array(observation_list).reshape(1, -1)  # Reshape for model input
    observation_tensor = T.tensor(observation_array, dtype=T.float32).to(device)

    # Get action from the model
    q_values = model(observation_tensor)
    action = T.argmax(q_values).item()

    print("Sending action:", action)
    conn.send(action.to_bytes(1, byteorder='big'))

    # Wait for the reward
    reward = conn.recv(32)
    reward = struct.unpack('f', reward)[0]
    print("Received reward:", reward)

    # Convert reward to tensor
    reward_tensor = T.tensor([reward], dtype=T.float32).to(device)

    # Calculate loss and update model
    optimizer.zero_grad()
    target = reward_tensor + 0.99 * T.max(q_values)
    loss = loss_function(q_values[0, action], target)
    loss.backward()
    optimizer.step()

    print(f"Iteration {i}, Loss: {loss.item()}")
