In [2]:
# Installazione delle dipendenze
!pip install stable-baselines3[extra] gymnasium pygame pyvirtualdisplay
!apt-get install -y xvfb
!pip install pyvirtualdisplay

# Import delle librerie
from pyvirtualdisplay import Display
import numpy as np
import gymnasium as gym
import torch
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import DummyVecEnv
from stable_baselines3.common.vec_env import VecNormalize
import pyvirtualdisplay
import cv2
from IPython.display import display, Image

# Avvia display virtuale
display = Display(visible=0, size=(1920, 1080))
display.start()

Collecting pyvirtualdisplay
  Downloading PyVirtualDisplay-3.0-py3-none-any.whl.metadata (943 bytes)
Collecting stable-baselines3[extra]
  Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cudnn_cu12-9.1.0.7

<pyvirtualdisplay.display.Display at 0x78c60fd00110>

In [10]:
class MouseEnv(gym.Env):
    def __init__(self):
        super(MouseEnv, self).__init__()
        self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(2,), dtype=np.float32)
        self.action_space = gym.spaces.Box(low=-10, high=10, shape=(2,), dtype=np.float32)

        self.cursor = np.array([960, 540], dtype=np.float32)  # Inizia al centro della schermata
        self.target = np.random.randint(0, [1920, 1080])  # Target randomico
        self.max_steps = 1
        self.steps = 0

    def reset(self, seed=None, options=None):
        self.steps = 0
        self.cursor = np.array([960, 540], dtype=np.float32)  # Reset cursore al centro
        self.target = np.random.randint(0, [1920, 1080])
        return self.cursor / np.array([1920, 1080]), {}  # Spazio normalizzato

    def step(self, action):
        self.steps += 1
        dx, dy = action

        # Rumore verticale
        dy += np.random.normal(0, 10)

        # Distanza orizzontale tra cursore e target
        distance_to_target_x = self.target[0] - self.cursor[0]
        distance_to_target_y = self.target[1] - self.cursor[1]

        # Velocità orizzontale in base alla distanza
        max_speed = 20
        speed_x = min(max_speed, abs(distance_to_target_x) / 10)
        speed_y = np.random.randn() * 10

        # Direzione movimento in base a posizione del target
        if distance_to_target_x > 0:
            dx = min(speed_x, distance_to_target_x)
        else:
            dx = max(-speed_x, distance_to_target_x)

        # Muove il cursore
        self.cursor = np.clip(self.cursor + np.array([dx, dy], dtype=np.float32), [0, 0], [1920, 1080])

        # Distanza dal target
        distance = np.linalg.norm(self.cursor - self.target)

        # Più vicino al target, maggiore è la ricompensa
        reward = -distance / 100

        # Piccola ricompensa se la distanza è diminuita
        if distance < np.linalg.norm(self.cursor - self.target):
            reward += 10

        reward -= 0.1  # Penalità per ogni passo

        # Condizione di fine (target raggiunto o massimo numero di passi)
        done = distance < 10 or self.steps >= self.max_steps
        return self.cursor / np.array([1920, 1080]), reward, done, False, {}

In [11]:
# Creazione dell'ambiente
env = DummyVecEnv([lambda: MouseEnv()])
env = VecNormalize(env, norm_reward=True)

# Configurazione del modello PPO
policy_kwargs = dict(
    net_arch=[256, 256],
    activation_fn=torch.nn.ReLU
)

# Modello PPO
model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
    batch_size=64,
    n_steps=1024,
    learning_rate=0.0005,
    ent_coef=0.01,
    policy_kwargs=policy_kwargs,
    device="cuda" if torch.cuda.is_available() else "cpu"
)

# Addestramento del modello
model.learn(total_timesteps=200000)

# Salvataggio del modello
model.save("mouse_movement_model.zip")

# Download del modello
from google.colab import files
files.download("mouse_movement_model.zip")

Using cpu device
-----------------------------
| time/              |      |
|    fps             | 889  |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 1024 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 591          |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 2048         |
| train/                  |              |
|    approx_kl            | 0.0064644273 |
|    clip_fraction        | 0.0208       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.82        |
|    explained_variance   | -1.47e-05    |
|    learning_rate        | 0.0005       |
|    loss                 | 0.439        |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00276     |
|    std                  | 0.982        |
|    value_loss           | 3.3          |

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>