-
-
Notifications
You must be signed in to change notification settings - Fork 656
Closed
Description
🐛 Bug description
import gc
import torch
import torch.nn as nn
from ignite.engine import create_supervised_trainer, Events
from ignite.handlers import ProgressBar
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
def do(device, model):
x = torch.rand(32, 1, 64, 64, 32)
y = torch.round(torch.rand(32, 1))
ds = TensorDataset(x, y)
train_loader = DataLoader(ds, 6)
optim = Adam(model.parameters(), 1e-4)
loss = nn.BCEWithLogitsLoss()
trainer = create_supervised_trainer(model, optim, loss, device)
pbar = ProgressBar()
pbar.attach(trainer)
trainer.run(train_loader, 1)
pbar.close()
def main():
device = torch.device("cuda")
for i in range(100):
model = nn.Sequential(nn.Flatten(), nn.Linear(64*64*32, 6500), nn.ReLU(), nn.Linear(6500, 1))
model = model.to(device)
do(device, model)
del model
gc.collect()
torch.cuda.empty_cache()
print("!!!", i, torch.cuda.memory_allocated(), torch.cuda.max_memory_allocated())
if __name__ == "__main__":
main()This code get OOM in python 3.12, but works fine in python 3.10.
Circular reference seems not being handled properly.
The first run in main loop will be fine, but the resources are not released, then the second run will OOM.
Environment
- PyTorch Version: 2.7.1+cu128
- Ignite Version: 0.5.2
- OS (e.g., Linux): ubuntu 24
- How you installed Ignite: uv
- Python version: 3.12
- Any other relevant information: RTX 4090 24G, NVIDIA-SMI 575.64.03 Driver Version: 575.64.03 CUDA Version: 12.9
Metadata
Metadata
Assignees
Labels
No labels