In [1]:
import os

# Define paths
base_path = '/kaggle/working'
project_name = 'Double-Pendulum-Simulation'
project_root = os.path.join(base_path, project_name)

# Clone or pull repository
if not os.path.exists(project_root):
    os.chdir(base_path)
    !git clone https://github.com/nthday-jpg/Double-Pendulum-Simulation.git
    print("Repository cloned successfully!")
else:
    os.chdir(project_root)
    !git pull
    print("Repository updated successfully!")
%cd {base_path}

Cloning into 'Double-Pendulum-Simulation'...
remote: Enumerating objects: 420, done.[K
remote: Counting objects: 100% (24/24), done.[K
remote: Compressing objects: 100% (17/17), done.[K
remote: Total 420 (delta 11), reused 16 (delta 7), pack-reused 396 (from 1)[K
Receiving objects: 100% (420/420), 187.37 KiB | 3.12 MiB/s, done.
Resolving deltas: 100% (245/245), done.
Repository cloned successfully!
/kaggle/working


In [2]:
# Generate training data
# Generate 10 trajectories with 5000 points each
!python {project_root}/generate_data.py \
    --output_dir {project_root}/data/raw \
    --num_trajectories 1 \
    --num_points 3000 \
    --t_start 0.0 \
    --t_end 5.0 \
    --check_energy

Generating 1 trajectories...
Deriving equations symbolically (this may take a moment)...
Symbolic derivation complete!
  Trajectory 000 (m1=1.00, m2=1.00, l1=1.00, l2=1.00): Energy drift = 0.000%
  Saved: /kaggle/working/Double-Pendulum-Simulation/data/raw/trajectory_000.npz and /kaggle/working/Double-Pendulum-Simulation/data/raw/parameters_000.json

Dataset complete! Saved to /kaggle/working/Double-Pendulum-Simulation/data/raw


In [3]:
# Remove old dataset
!rm -rf /kaggle/working/Double-Pendulum-Simulation/data/raw/

In [None]:
# Training Configuration
# Experiment
seed = 42
run_name = None  # Auto-generated if None
checkpoint_path = "run_20260110_113642/checkpoints/best_model.pth"

# Data
data_dir = f"{project_root}/data/raw"
val_split = 0.2
test_split = 0.1
normalize_time = False
normalize_state = False

# Model Architecture
model = "pinn"  # mlp | neural_ode | hnn | pinn
hidden_dims = "64 64"  # Space-separated
activation = "tanh"  # tanh | relu | gelu | silu | softplus
use_batch_norm = False
dropout_rate = 0.0
final_activation = None  # None | tanh | sigmoid
input_dim = 5
output_dim = 2

# Training
lr = 0.001
batch_size = 128
batch_size_collocation = 1024
epochs = 200
optimizer = "adam"  # adam | adamw | sgd
weight_decay = 0.0
grad_clip = None
scheduler = None  # None | cosine | step

# PyTorch Optimizations
use_compile = False
compile_mode = "default"  # default | reduce-overhead | max-autotune
mixed_precision = False
gradient_accumulation_steps = 1

# Regularization
l1_lambda = 0.0
l2_lambda = 0.0

# Physics / PINN
use_physics = True
n_collocation = 5000
data_fraction = 0.1
data_loss_ratio = 0.999
residual_type = "lagrangian"  # eom | lagrangian | hamiltonian

# Time Domain
t_min = 0.0
t_max = 5.0
collocation_sampling = "uniform"  # uniform | random | latin_hypercube

# Rollout Evaluation
rollout_T = 5.0
rollout_dt = 0.01

# Logging
log_interval = 10
print_interval = 10
save_checkpoints = True
checkpoint_interval = 50
test_interval = 50

# Early Stopping
early_stopping_patience = 50  # None to disable

# Physical Parameters
m1 = 1.0
m2 = 1.0
l1 = 1.0
l2 = 1.0
g = 9.81

# Build command arguments
args_list = [
    f"--seed {seed}",
    f"--hidden_dims {hidden_dims}",
    f"--input_dim {input_dim}",
    f"--output_dim {output_dim}",
    f"--residual_type {residual_type}",
    f"--t_max {t_max}",
    f"--t_min {t_min}",
    f"--epochs {epochs}",
    f"--lr {lr}",
    f"--batch_size {batch_size}",
    f"--batch_size_collocation {batch_size_collocation}",
    f"--data_dir {data_dir}",
    f"--val_split {val_split}",
    f"--test_split {test_split}",
    f"--model {model}",
    f"--activation {activation}",
    f"--dropout_rate {dropout_rate}",
    f"--optimizer {optimizer}",
    f"--weight_decay {weight_decay}",
    f"--compile_mode {compile_mode}",
    f"--gradient_accumulation_steps {gradient_accumulation_steps}",
    f"--l1_lambda {l1_lambda}",
    f"--l2_lambda {l2_lambda}",
    f"--n_collocation {n_collocation}",
    f"--data_fraction {data_fraction}",
    f"--data_loss_ratio {data_loss_ratio}",
    f"--collocation_sampling {collocation_sampling}",
    f"--rollout_T {rollout_T}",
    f"--rollout_dt {rollout_dt}",
    f"--log_interval {log_interval}",
    f"--print_interval {print_interval}",
    f"--test_interval {test_interval}",
    f"--checkpoint_interval {checkpoint_interval}",
    f"--m1 {m1}",
    f"--m2 {m2}",
    f"--l1 {l1}",
    f"--l2 {l2}",
    f"--g {g}"
]

# Add optional flags
if run_name:
    args_list.append(f"--run_name {run_name}")
if checkpoint_path:
    args_list.append(f"--checkpoint_path {checkpoint_path}")
if use_compile:
    args_list.append("--use_compile")
if normalize_time:
    args_list.append("--normalize_time")
if normalize_state:
    args_list.append("--normalize_state")
if use_batch_norm:
    args_list.append("--use_batch_norm")
if final_activation:
    args_list.append(f"--final_activation {final_activation}")
if grad_clip:
    args_list.append(f"--grad_clip {grad_clip}")
if scheduler:
    args_list.append(f"--scheduler {scheduler}")
if mixed_precision:
    args_list.append("--mixed_precision")
if save_checkpoints:
    args_list.append("--save_checkpoints")
if early_stopping_patience:
    args_list.append(f"--early_stopping_patience {early_stopping_patience}")
if use_physics:
    args_list.append("--use_physics")

args = " ".join(args_list)

print(f"Training Configuration:")
print(f"  Seed: {seed}")
print(f"  Model: {model}, Hidden: {hidden_dims}, Activation: {activation}")
print(f"  Optimizer: {optimizer}, LR: {lr}, Weight Decay: {weight_decay}")
print(f"  Epochs: {epochs}, Early Stop: {early_stopping_patience if early_stopping_patience else 'disabled'}")
print(f"  Batch sizes: data={batch_size}, colloc={batch_size_collocation}")
print(f"  Data loss ratio = {data_loss_ratio}, type={residual_type}")
print(f"  Checkpoint: {checkpoint_path if checkpoint_path else 'None (training from scratch)'}")
print(f"\nCommand arguments generated with {len(args_list)} parameters")


Training Configuration:
  Seed: 42
  Model: pinn, Hidden: 64 64, Activation: tanh
  Optimizer: adam, LR: 0.001, Weight Decay: 0.0
  Epochs: 200, Early Stop: 50
  Batch sizes: data=128, colloc=1024
  Data loss ratio = 0.999, type=lagrangian
  Checkpoint: run_20260110_113642/checkpoints/best_model.pth

Command arguments generated with 41 parameters


In [5]:
# Launch distributed training with Accelerate
!accelerate launch --num_processes=2 {project_root}/train.py {args}

The following values were not passed to `accelerate launch` and had defaults used instead:
		More than one GPU was found, enabling multi-GPU training.
		If this was unintended please pass in `--num_processes=1`.
	`--num_machines` was set to a value of `1`
	`--mixed_precision` was set to a value of `'no'`
	`--dynamo_backend` was set to a value of `'no'`
E0000 00:00:1768048139.992601      84 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768048139.992581      83 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768048140.044396      83 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
E0000 00:00:1768048140.044401      84 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to re

In [6]:
!zip -r /kaggle/working/runs.zip runs
# !rm -rf runs


zip error: Nothing to do! (try: zip -r /kaggle/working/runs.zip . -i runs)
