In [21]:
import numpy as np
import math

In [22]:
class DroneEnv:
    def __init__(self, width=1000, height=1000, checkpoint_radius=30):
        self.width = width
        self.height = height
        self.checkpoint_radius = checkpoint_radius

        self.GRAVITY = 2.5
        self.THRUST_POWER = 5
        self.ROTATION_SPEED = 10
        self.TILT_SENSITIVITY = 0.1
        self.X_AXIS_SENS = 50
        self.Y_AXIS_SENS = 1
        self.ROTATION_DRAG = 0.7
        self.DT = 0.5

        self.reset()

    def reset(self):
        self.x = self.width / 2
        self.y = self.height / 2
        self.vx = 0.0
        self.vy = 0.0
        self.angle = 0.0 #degrees
        self.angular_velocity = 0.0

        self._spawn_checkpoint()

        return self.get_state()

    def _spawn_checkpoint(self):
        self.checkpoint_x = np.random.uniform(0.1 * self.width, 0.9 * self.width)
        self.checkpoint_y = np.random.uniform(0.1 * self.height, 0.9 * self.height)

    def step(self, action):
        thrust_left = action[0]
        thrust_right = action[1]

        #physics:
        self.vy += self.GRAVITY

        if thrust_left:
            self.angular_velocity -= self.ROTATION_SPEED
            thrust = self.THRUST_POWER * math.cos(math.radians(self.angle))
            self.vy -= self.Y_AXIS_SENS * thrust

        if thrust_right:
            self.angular_velocity += self.ROTATION_SPEED
            thrust = self.THRUST_POWER * math.cos(math.radians(self.angle))
            self.vy -= self.Y_AXIS_SENS * thrust

        self.angular_velocity *= self.ROTATION_DRAG

        self.angle += self.angular_velocity
        self.angle %= 360.0

        angle_rad = math.radians(self.angle)
        self.vx = self.X_AXIS_SENS * math.sin(angle_rad)

        self.x += self.DT * self.vx
        self.y += self.DT * self.vy

        dx = self.checkpoint_x - self.x
        dy = self.checkpoint_y - self.y
        dist = math.sqrt(dx * dx + dy * dy)

        reached = False
        reward = -dist * 0.01

        if dist < self.checkpoint_radius:
            reached = True
            reward += 10.0
            self._spawn_checkpoint()

        done = False
        if self.x < 0 or self.x > self.width or self.y < 0 or self.y > self.height:
            done = True
            reward -= 5.0

        return self.get_state(), reward, done, {"checkpoint_reached": reached}

    def get_state(self):
        dx = self.checkpoint_x - self.x
        dy = self.checkpoint_y - self.y

        return np.array([
            self.x / self.width,
            self.y / self.height,
            self.vx / self.X_AXIS_SENS,
            self.vy / 100,
            math.sin(math.radians(self.angle)),
            math.cos(math.radians(self.angle)),
            self.angular_velocity / self.ROTATION_SPEED,
            dx / self.width,
            dy / self.height,
        ], dtype=np.float32)


In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

device = torch.device("cuda")
torch.cuda.is_available()

#print(torch.version.cuda)


True