In [1]:
import os

# Define paths
base_path = '/kaggle/working'
project_name = 'Double-Pendulum-Simulation'
project_root = os.path.join(base_path, project_name)

# Clone or pull repository
if not os.path.exists(project_root):
    os.chdir(base_path)
    !git clone https://github.com/nthday-jpg/Double-Pendulum-Simulation.git
    print("Repository cloned successfully!")
else:
    os.chdir(project_root)
    !git pull
    print("Repository updated successfully!")
%cd {base_path}

# Ensure project root is in PYTHONPATH for script's imports
os.environ['PYTHONPATH'] = project_root


Cloning into 'Double-Pendulum-Simulation'...
remote: Enumerating objects: 666, done.[K
remote: Counting objects: 100% (270/270), done.[K
remote: Compressing objects: 100% (181/181), done.[K
remote: Total 666 (delta 175), reused 178 (delta 89), pack-reused 396 (from 1)[K
Receiving objects: 100% (666/666), 245.87 KiB | 9.46 MiB/s, done.
Resolving deltas: 100% (409/409), done.
Repository cloned successfully!
/kaggle/working


In [2]:
!python {project_root}/scripts/generate_data.py \
    --output_dir {project_root}/data/raw \
    --num_trajectories 120 \
    --num_points 1500 \
    --t_start 0.0 \
    --t_end 2.0 \
    --check_energy

Generating 120 trajectories...
Deriving equations symbolically (this may take a moment)...
Symbolic derivation complete!
  Trajectory 000 (m1=1.00, m2=1.00, l1=1.00, l2=1.00): Energy drift = 0.000%
  Saved: /kaggle/working/Double-Pendulum-Simulation/data/raw/trajectory_000.npz and /kaggle/working/Double-Pendulum-Simulation/data/raw/parameters_000.json
  Trajectory 001 (m1=1.00, m2=1.00, l1=1.00, l2=1.00): Energy drift = 0.000%
  Saved: /kaggle/working/Double-Pendulum-Simulation/data/raw/trajectory_001.npz and /kaggle/working/Double-Pendulum-Simulation/data/raw/parameters_001.json
  Trajectory 002 (m1=1.00, m2=1.00, l1=1.00, l2=1.00): Energy drift = 0.000%
  Saved: /kaggle/working/Double-Pendulum-Simulation/data/raw/trajectory_002.npz and /kaggle/working/Double-Pendulum-Simulation/data/raw/parameters_002.json
  Trajectory 003 (m1=1.00, m2=1.00, l1=1.00, l2=1.00): Energy drift = 0.000%
  Saved: /kaggle/working/Double-Pendulum-Simulation/data/raw/trajectory_003.npz and /kaggle/w

In [3]:
# Remove old dataset
!rm -rf /kaggle/working/Double-Pendulum-Simulation/data/raw/

In [4]:
# Training Configuration
# Experiment
seed = 42
run_name = None  # Auto-generated if None
checkpoint_path = "runs/run_20260115_154248/checkpoints/best_model.pth"

# Data
data_dir = f"{project_root}/data/raw"
val_split = 0.2
test_split = 0.1
normalize_time = True
normalize_state = False

# Model Architecture
model = "pinn"  # mlp | neural_ode | hnn | pinn
hidden_dims = "124 124 124 124"  # Space-separated
activation = "tanh"  # tanh | relu | gelu | silu | softplus
use_batch_norm = False
dropout_rate = 0.0
final_activation = None  # None | tanh | sigmoid
input_dim = 5
output_dim = 2

# Training
lr = 0.001
batch_size = 2048
epochs = 200
optimizer = "adam"  # adam | adamw | sgd
weight_decay = 0.0
grad_clip = None
scheduler = True
scheduler_patience = 40

# PyTorch Optimizations
use_compile = False
compile_mode = "default"  # default | reduce-overhead | max-autotune
mixed_precision = False
gradient_accumulation_steps = 1

# Regularization
l1_lambda = 0.0
l2_lambda = 0.001

# Physics / PINN
data_loss_ratio = 0.5
residual_type = "lagrangian"  # eom | lagrangian | hamiltonian

# Logging
log_interval = 10
print_interval = 10
save_checkpoints = True
checkpoint_interval = 50
test_interval = 100

# Early Stopping
early_stopping_patience = 60  # None to disable

# Build command arguments
args_list = [
    f"--seed {seed}",
    f"--hidden_dims {hidden_dims}",
    f"--input_dim {input_dim}",
    f"--output_dim {output_dim}",
    f"--residual_type {residual_type}",
    f"--epochs {epochs}",
    f"--lr {lr}",
    f"--batch_size {batch_size}",
    f"--data_dir {data_dir}",
    f"--val_split {val_split}",
    f"--test_split {test_split}",
    f"--model {model}",
    f"--scheduler {scheduler}",
    f"--scheduler_patience {scheduler_patience}",
    f"--activation {activation}",
    f"--dropout_rate {dropout_rate}",
    f"--optimizer {optimizer}",
    f"--weight_decay {weight_decay}",
    f"--compile_mode {compile_mode}",
    f"--gradient_accumulation_steps {gradient_accumulation_steps}",
    f"--l1_lambda {l1_lambda}",
    f"--l2_lambda {l2_lambda}",
    f"--data_loss_ratio {data_loss_ratio}",
    f"--log_interval {log_interval}",
    f"--print_interval {print_interval}",
    f"--test_interval {test_interval}",
    f"--checkpoint_interval {checkpoint_interval}",
]

# Add optional flags
if run_name:
    args_list.append(f"--run_name {run_name}")
if checkpoint_path:
    args_list.append(f"--checkpoint_path {checkpoint_path}")
if use_compile:
    args_list.append("--use_compile")
if normalize_time:
    args_list.append("--normalize_time")
if normalize_state:
    args_list.append("--normalize_state")
if use_batch_norm:
    args_list.append("--use_batch_norm")
if final_activation:
    args_list.append(f"--final_activation {final_activation}")
if grad_clip:
    args_list.append(f"--grad_clip {grad_clip}")
if mixed_precision:
    args_list.append("--mixed_precision")
if save_checkpoints:
    args_list.append("--save_checkpoints")
if early_stopping_patience:
    args_list.append(f"--early_stopping_patience {early_stopping_patience}")

args = " ".join(args_list)

print(f"Training Configuration:")
print(f"  Seed: {seed}")
print(f"  Model: {model}, Hidden: {hidden_dims}, Activation: {activation}")
print(f"  Optimizer: {optimizer}, LR: {lr}, Weight Decay: {weight_decay}")
print(f"  Epochs: {epochs}, Early Stop: {early_stopping_patience if early_stopping_patience else 'disabled'}")
print(f"  Batch size: {batch_size}")
print(f"  Data loss ratio = {data_loss_ratio}, type={residual_type}")
print(f"  Checkpoint: {checkpoint_path if checkpoint_path else 'None (training from scratch)'}")
print(f"\nCommand arguments generated with {len(args_list)} parameters")


Training Configuration:
  Seed: 42
  Model: pinn, Hidden: 124 124 124 124, Activation: tanh
  Optimizer: adam, LR: 0.001, Weight Decay: 0.0
  Epochs: 200, Early Stop: 60
  Batch size: 2048
  Data loss ratio = 0.5, type=lagrangian
  Checkpoint: runs/run_20260115_154248/checkpoints/best_model.pth

Command arguments generated with 31 parameters


In [5]:
# Launch distributed training with Accelerate
!accelerate launch --num_processes=2 {project_root}/scripts/train.py {args}

The following values were not passed to `accelerate launch` and had defaults used instead:
		More than one GPU was found, enabling multi-GPU training.
		If this was unintended please pass in `--num_processes=1`.
	`--num_machines` was set to a value of `1`
	`--mixed_precision` was set to a value of `'no'`
	`--dynamo_backend` was set to a value of `'no'`
E0000 00:00:1768495101.468709      85 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768495101.468739      84 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768495101.543028      85 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
E0000 00:00:1768495101.543034      84 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to re

In [6]:
t_start = 0
t_end = 3
num_points = 1000
theta1 = 0.5
theta2 = 0.2
omega1 = 0.1
omega2 = -0.1
checkpoint = f"{base_path}/runs/run_20260115_154248/checkpoints/best_model.pth"
output_dir = f"{base_path}/runs/test_inference"

arg_list = [
    f"{checkpoint}",
    f"--output-dir {output_dir}",
    f"--t-start {t_start}",
    f"--t-end {t_end}",
    f"--omega1 {omega1}",
    f"--omega2 {omega2}",
    f"--theta1 {theta1}",
    f"--theta2 {theta2}",
]
args = " ".join(arg_list)
!python {project_root}/scripts/inference.py {args}

Loading model from: /kaggle/working/runs/run_20260115_154248/checkpoints/best_model.pth
Traceback (most recent call last):
  File "/kaggle/working/Double-Pendulum-Simulation/scripts/inference.py", line 403, in <module>
    main()
  File "/kaggle/working/Double-Pendulum-Simulation/scripts/inference.py", line 391, in main
    run_inference(
  File "/kaggle/working/Double-Pendulum-Simulation/scripts/inference.py", line 294, in run_inference
    model, cfg, norm_params = load_model(checkpoint_path, device)
                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/kaggle/working/Double-Pendulum-Simulation/scripts/inference.py", line 22, in load_model
    checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=False)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/torch/serialization.py", line 1484, in load
    with _open_file_like(f, "rb") as opened_file:
 

In [7]:
!zip -r /kaggle/working/runs.zip runs


zip error: Nothing to do! (try: zip -r /kaggle/working/runs.zip . -i runs)


In [8]:
!rm -rf runs