### RNN Training Notebook  

This notebook trains an AI model to predict mouse cursor movement paths. The model is built using a Recurrent Neural Network (RNN) with an LSTM layer for attention.

In [22]:
import os
import json
import time
import torch
import numpy as np
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [None]:
dataset_path = os.path.join(
    os.path.dirname(os.getcwd()),
    "data",
    "processed",
    "cleaned-data-39-steps-merged-prepared-data-2025-03-08-16:27:23.json",
)  # this path is for the cleaned data; must be changed accordingly

print(
    f"{dataset_path} exists: {"Yes" if os.path.exists(dataset_path) else 'No'}"
)  ## must be Yes

In [24]:
dataset_file = open(dataset_path, "r")
dataset_json = json.load(dataset_file)
dataset_file.close()

In [25]:
device = torch.device(
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)
device

device(type='mps')

In [None]:
min_coordinate = 0
max_coordinate = 1920  # the maximum display resolution used is 1920x1080 so taking 1920 as the max coordinate


def normalize(data):  # Normalizing the data to be in the range [0, 1]
    return (data - min_coordinate) / (max_coordinate - min_coordinate)


def denormalize(
    data,
):  # Denormalizing the data to convert it back to the original range
    return (data * (max_coordinate - min_coordinate)) + min_coordinate

In [27]:
input_data = np.array(dataset_json["input"], dtype=np.float32)
output_data = np.array(dataset_json["output"], dtype=np.float32)

input_data = normalize(input_data)
output_data = normalize(output_data)

intermediate_steps_num = output_data.shape[1]

X_tensor = torch.tensor(input_data, dtype=torch.float, device=device)
y_tensor = torch.tensor(output_data, dtype=torch.float, device=device)

X_tensor = X_tensor.unsqueeze(1)
y_tensor = y_tensor = y_tensor.view(
    -1, 2 * intermediate_steps_num
)  # Flattening the output tensor, 2 is used because only x, y corrdinates are needed to be predicted for each step


del input_data, output_data  # to free up memory

In [28]:
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attn = nn.Linear(
            hidden_dim, 1, bias=False
        )  # Attention layer to assign weights to different time steps

    def forward(self, lstm_out):
        scores = self.attn(lstm_out)  # Compute attention scores for each time step
        attn_weights = torch.softmax(
            scores, dim=1
        )  # Apply softmax to normalize attention weights
        context = torch.sum(
            attn_weights * lstm_out, dim=1
        )  # Create context vector by weighted sum of LSTM outputs

        return context, attn_weights


class CursorRNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1, dropout=0.2):
        super(CursorRNN, self).__init__()
        self.lstm = nn.LSTM(
            input_dim,
            hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=False,
            dropout=dropout,
        )  # LSTM layer for sequence processing
        self.attention = Attention(
            hidden_dim
        )  # Attention mechanism to focus on important time steps
        self.residual_fc = nn.Linear(
            input_dim, hidden_dim
        )  # Residual connection to help with gradient flow
        self.layer_norm = nn.LayerNorm(
            hidden_dim
        )  # Layer normalization for training stability
        self.fc = nn.Linear(
            hidden_dim, output_dim
        )  # Output projection layer to generate final predictions

    def forward(self, x):
        lstm_out, _ = self.lstm(x)  # Process sequence through LSTM
        context, attn_weights = self.attention(
            lstm_out
        )  # Apply attention to focus on relevant parts
        residual = self.residual_fc(
            x[:, -1, :]
        )  # Create residual connection from last input
        combined = self.layer_norm(
            context + residual
        )  # Combine attention output with residual and normalize
        output = self.fc(combined)  # Generate final trajectory prediction

        return output, attn_weights

In [35]:
input_size = X_tensor.shape[2]
output_size = y_tensor.shape[1]

hidden_size = (input_size**2) * int(output_size ** (1 / 2)) * 4
epochs = 500
lstm_layers = 2

In [30]:
train_dataset = TensorDataset(X_tensor, y_tensor)
train_loader = DataLoader(
    train_dataset, batch_size=64, shuffle=True, num_workers=0, pin_memory=False
)

In [31]:
model = CursorRNN(input_size, hidden_size, output_size, num_layers=lstm_layers).to(
    device
)

criterion = nn.MSELoss()  # loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # optimizer

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", factor=0.5, patience=5, verbose=True
)  # learning rate scheduler for better convergence

# for best model tracking
best_loss = float("inf")
best_model_state = None

for epoch in range(epochs):
    model.train()

    running_loss = 0.0

    for x_batch, y_batch in train_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(
            device
        )  # moving batch to gpu or cpu based on availability
        optimizer.zero_grad()  # zeroing the gradients

        y_pred, _ = model(x_batch)  # forward pass

        loss = criterion(y_pred, y_batch)  # calculating loss
        loss.backward()  # backpropagation

        torch.nn.utils.clip_grad_norm_(
            model.parameters(), max_norm=1.0
        )  # gradient clipping to prevent exploding gradients

        optimizer.step()  # updating weights
        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    scheduler.step(avg_loss)  # updating learning rate based on loss

    if avg_loss < best_loss:
        best_loss = avg_loss
        best_model_state = model.state_dict().copy()

    print(
        f'Epoch: {epoch+1}/{epochs}, Loss: {avg_loss:.10f}, LR: {optimizer.param_groups[0]["lr"]:.10f}'
    )

Epoch: 1/500, Loss: 0.0069492316, LR: 0.0010000000
Epoch: 2/500, Loss: 0.0039896650, LR: 0.0010000000
Epoch: 3/500, Loss: 0.0040888302, LR: 0.0010000000
Epoch: 4/500, Loss: 0.0040622482, LR: 0.0010000000
Epoch: 5/500, Loss: 0.0040255222, LR: 0.0010000000
Epoch: 6/500, Loss: 0.0040010492, LR: 0.0010000000
Epoch: 7/500, Loss: 0.0039247756, LR: 0.0010000000
Epoch: 8/500, Loss: 0.0039617283, LR: 0.0010000000
Epoch: 9/500, Loss: 0.0039001164, LR: 0.0010000000
Epoch: 10/500, Loss: 0.0038046737, LR: 0.0010000000
Epoch: 11/500, Loss: 0.0037908000, LR: 0.0010000000
Epoch: 12/500, Loss: 0.0037980298, LR: 0.0010000000
Epoch: 13/500, Loss: 0.0037094286, LR: 0.0010000000
Epoch: 14/500, Loss: 0.0037699338, LR: 0.0010000000
Epoch: 15/500, Loss: 0.0037692718, LR: 0.0010000000
Epoch: 16/500, Loss: 0.0037420403, LR: 0.0010000000
Epoch: 17/500, Loss: 0.0036581326, LR: 0.0010000000
Epoch: 18/500, Loss: 0.0036941812, LR: 0.0010000000
Epoch: 19/500, Loss: 0.0036428810, LR: 0.0010000000
Epoch: 20/500, Loss: 

In [None]:
model_name = f"cursor-rnn-model-{time.strftime('%Y-%m-%d-%H:%M:%S')}.pth"
model_path = os.path.join(os.path.dirname(os.getcwd()), "data", "models", model_name)
torch.save(best_model_state, model_path)
print(f"Model saved at {model_path}")