In [1]:
import gymnasium as gym
from gymnasium.spaces import Box
import numpy as np
import math
import random
import ctypes 
import subprocess
import time
import vector

In [3]:
#Caucasus terrain map
# minX -418619.187500, minZ 113728.156250, maxX 26382.500000, maxZ 943187.062500 
# X, Z, Vz, Vz, course, state(0-stand, 1-crouch, 2-pron)
observation_space = Box(low=np.array([-418619.,113728.,-3.6,-3.6,-math.pi, 0], dtype=np.float32), high=np.array([26382.,943187.,3.6,3.6,math.pi, 2], dtype=np.float32), shape=(6,), dtype=np.float32)

#                 b_move_to_view, course move, course view, speed, state(0-stand, 1-crouch, 2-pron), action(0-idle, 1-climb)
action_space = Box(low=np.array([0.,-math.pi,-math.pi, 0, 0, 0], dtype=np.float32), high=np.array([1., math.pi, math.pi, 3.6, 2, 1], dtype=np.float32), shape=(6,), dtype=np.float32)

In [4]:

env_path = 'r:/Projects/trunk/LockOnExe/bin/x86_64/vc143.debug-mt/dcs.exe'
path = 'r:/Projects/trunk/LockOnExe/bin/x86_64/vc143.debug-mt/RLConnector.dll'


In [5]:

subprocess.Popen([env_path, "-dofile", "rl_simple_move.miz"])


<Popen: returncode: None args: ['r:/Projects/trunk/LockOnExe/bin/x86_64/vc14...>

In [6]:
class C_RLState(ctypes.Structure):
    _fields_ = [('pos_x', ctypes.c_double),
                ('pos_y', ctypes.c_double),
                ('pos_z', ctypes.c_double),
                ('v_x', ctypes.c_double),
                ('v_y', ctypes.c_double),
                ('v_z', ctypes.c_double),
                ('course', ctypes.c_double)]
    
    def to_numpy(self):
        return np.array([self.pos_x, self.pos_z, self.v_x, self.v_z, self.course, 0])

class C_RLAction(ctypes.Structure):
    _fields_ = [
        ('speed', ctypes.c_float),
        ('dir_move', ctypes.c_float),
        ('dir_view', ctypes.c_float),
        ('is_move_to_view', ctypes.c_int),
        ('state', ctypes.c_int),
        ('action', ctypes.c_int),
    ]

    def from_numpy(self, array):
        self.is_move_to_view = 1 #int(array[0])
        self.dir_move = float(array[1])
        self.dir_view = float(array[2])
        self.speed = float(array[3])
        self.state = 0 #int(array[4])
        self.action = 0 #int(array[4])



In [7]:
class HumanMoveActions(gym.Env):
    """непрерывные состояния, непрерывные действия"""

    uid = 0
    terminate_after_attempts = 60
    reward = 0
    tick = 0
    predicted_actions = []
    true_actions = []

    def __init__(self, render_mode=None):
        self.observation_space = observation_space
        self.action_space = action_space

    def _get_info(self):
        return {"terminate_after_attempts": self.terminate_after_attempts}

    def step(self, timecount, action):

        action_to = C_RLAction()
        action_to.from_numpy(action)
        
        self.RLConnector.set_action(self.uid, action_to)

        while self.RLConnector.set_start() == False:
            time.sleep(0.01)
        time.sleep(1)
        while self.RLConnector.set_pause() == False:
            time.sleep(0.01)

        start_pos = self.RLConnector.current_state(self.uid)
        observation = start_pos.to_numpy()
        #print(observation)
    
        self.predicted_actions.append(action[1])
        self.true_actions.append(observation)
        
        # вознаграждение за правильное решение
        self.reward = self.RLConnector.reward(self.uid)

        self.terminate_after_attempts -= 1

        terminated = self.RLConnector.is_terminated(self.uid)
        truncated = self.RLConnector.is_truncated(self.uid)        
        info = self._get_info()
        return observation, self.reward, terminated, truncated, info

    def prepare(self, connector_lib):
        # Загрузка библиотеки
        self.RLConnector = ctypes.CDLL(connector_lib)
        # определение параметров функций
        self.RLConnector.is_ready_to_start.restype = ctypes.c_bool
        self.RLConnector.reward.restype = ctypes.c_float
        self.RLConnector.agents_count.restype = ctypes.c_int
        self.RLConnector.agent_id.restype = ctypes.c_int
        self.RLConnector.agent_id.argtypes = [ctypes.c_int,]

        self.RLConnector.set_restart.restype = ctypes.c_bool
        self.RLConnector.set_pause.restype = ctypes.c_bool
        self.RLConnector.set_start.restype = ctypes.c_bool

        self.RLConnector.reward.argtypes = [ctypes.c_int,]
        self.RLConnector.is_terminated.argtypes = [ctypes.c_int,]
        self.RLConnector.is_truncated.argtypes = [ctypes.c_int,]

        self.RLConnector.current_state.restype = C_RLState
        self.RLConnector.current_state.argtypes = [ctypes.c_int,]
        self.RLConnector.set_action.argtypes = [ctypes.c_int, C_RLAction,]

        while self.RLConnector.is_ready_to_start() == False:
            print (self.RLConnector.is_ready_to_start())
            time.sleep(30)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        print("reset", f"\tвознаграждение {self.reward:.2f}",
              f"\tпопыток {self.terminate_after_attempts}",
              "*" * 10)
        
        if self.RLConnector.is_ready_to_start() == False:
            self.RLConnector.set_restart()
            while self.RLConnector.is_ready_to_start() == False:
                time.sleep(5)

         
        self.uid = self.RLConnector.agent_id(0)

        self.terminate_after_attempts = 60
        self.reward = 0
        start_pos = self.RLConnector.current_state(self.uid)
        observation = start_pos.to_numpy()

        info = self._get_info()
        return observation, info

    def close(self):
        del self.RLConnector
        print("close", f"\tвознаграждение {self.reward:.2f}",
              f"\tпопыток {self.terminate_after_attempts}",
              "*" * 10)
        

In [8]:
#чтобы посмотреть работу модели - раскомментирйте этот блок 
env = HumanMoveActions()
env.action_space.seed(42)
env.prepare(path)

observation, info = env.reset(seed=42)
action = env.action_space.sample()
print(action)
for tick in range(60):


    observation, reward, terminated, truncated, info = env.step(tick, action)
    if terminated or truncated:
        observation, info = env.reset()

        


False
reset 	вознаграждение 0.00 	попыток 60 **********
[ 0.77395606 -0.3840381   2.2531374   2.5105247   0.1883547   0.97562236]
[-3.05689219e+05  6.25976562e+05 -2.06956363e+00 -2.54702806e+00
  2.25313735e+00  0.00000000e+00]
[-3.05691312e+05  6.25974000e+05 -2.06956363e+00 -2.54702806e+00
  2.25313735e+00  0.00000000e+00]
[-3.05693406e+05  6.25971375e+05 -2.06956363e+00 -2.54702806e+00
  2.25313735e+00  0.00000000e+00]
[-3.05695469e+05  6.25968875e+05 -2.06956363e+00 -2.54702806e+00
  2.25313735e+00  0.00000000e+00]
[-3.05697562e+05  6.25966312e+05 -2.06956363e+00 -2.54702806e+00
  2.25313735e+00  0.00000000e+00]
[-3.05699656e+05  6.25963688e+05 -2.06956363e+00 -2.54702806e+00
  2.25313735e+00  0.00000000e+00]
[-3.05701781e+05  6.25961125e+05 -2.06956363e+00 -2.54702806e+00
  2.25313735e+00  0.00000000e+00]
[-3.05703844e+05  6.25958562e+05 -2.06956363e+00 -2.54702806e+00
  2.25313735e+00  0.00000000e+00]
[-3.05705938e+05  6.25956000e+05 -2.06956363e+00 -2.54702806e+00
  2.25313735e

In [9]:
env.close()

close 	вознаграждение 0.00 	попыток 60 **********
