This repository was archived by the owner on Aug 1, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 129
This repository was archived by the owner on Aug 1, 2025. It is now read-only.
IndexError: map::at in Stable Diffusion Training #1775
Copy link
Copy link
Closed
Labels
Description
🐛 Describe the bug
Hi,
I am trying to optimize this stable diffusion training script.
It previously created RuntimeErrors tracked in 1743 which were fixed with 87758.
Now the script fails with the error below.
Error logs
"""
Traceback (most recent call last):
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/concurrent/futures/process.py", line 246, in _process_worker
r = call_item.fn(*call_item.args, **call_item.kwargs)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/codecache.py", line 245, in _worker_compile
kernel.precompile(warm_cache_only_with_cc=cc)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/triton_ops/autotune.py", line 58, in precompile
self.launchers = [
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/triton_ops/autotune.py", line 59, in <listcomp>
self._precompile_config(c, warm_cache_only_with_cc)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/triton_ops/autotune.py", line 73, in _precompile_config
triton.compile(
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/triton/compiler.py", line 1251, in compile
asm, shared, kernel_name = _compile(fn, signature, device, constants, configs[0], num_warps, num_stages,
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/triton/compiler.py", line 901, in _compile
name, asm, shared_mem = _triton.code_gen.compile_ttir(backend, module, device, num_warps, num_stages, extern_libs, cc)
IndexError: map::at
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/mreso/torchdynamo/test.py", line 34, in <module>
image = apply(prompt).images[0]
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py", line 157, in _fn
return fn(*args, **kwargs)
File "/home/mreso/torchdynamo/test.py", line 31, in apply
return pipe(x)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py", line 214, in __call__
text_inputs = self.tokenizer(
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py", line 334, in <graph break in __call__>
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/diffusers/schedulers/scheduling_pndm.py", line 212, in step
return self.step_plms(model_output=model_output, timestep=timestep, sample=sample, return_dict=return_dict)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/diffusers/schedulers/scheduling_pndm.py", line 328, in step_plms
prev_sample = self._get_prev_sample(sample, timestep, prev_timestep, model_output)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/diffusers/schedulers/scheduling_pndm.py", line 362, in _get_prev_sample
alpha_prod_t = self.alphas_cumprod[timestep]
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/diffusers/schedulers/scheduling_pndm.py", line 362, in <graph break in _get_prev_sample>
alpha_prod_t = self.alphas_cumprod[timestep]
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/diffusers/schedulers/scheduling_pndm.py", line 363, in <graph break in _get_prev_sample>
alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/diffusers/schedulers/scheduling_pndm.py", line 363, in <graph break in _get_prev_sample>
alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_dynamo/eval_frame.py", line 157, in _fn
return fn(*args, **kwargs)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/functorch/_src/aot_autograd.py", line 870, in forward
return compiled_f(
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/functorch/_src/aot_autograd.py", line 861, in new_func
return compiled_fn(args)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/functorch/_src/aot_autograd.py", line 299, in new_fn
fw_outs = call_func_with_args(compiled_fw, args, disable_amp=disable_amp)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/functorch/_src/aot_autograd.py", line 255, in call_func_with_args
out = normalize_as_list(f(args))
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_dynamo/debug_utils.py", line 460, in deferred_for_real_inputs
raise e
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_dynamo/debug_utils.py", line 444, in deferred_for_real_inputs
inner_compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/compile_fx.py", line 123, in compile_fx_inner
compiled_fn = graph.compile_to_fn()
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/graph.py", line 348, in compile_to_fn
return self.compile_to_module().call
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/graph.py", line 338, in compile_to_module
mod = PyCodeCache.load(code)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/codecache.py", line 216, in load
exec(code, mod.__dict__, mod.__dict__)
File "/tmp/torchinductor_mreso/tw/ctwoc6kqxh4pfpdnw5d5l4uyylyvhkkkuy4vgjktcusswnmwx6wr.py", line 58, in <module>
async_compile.wait(globals())
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/codecache.py", line 386, in wait
scope[key] = result.result()
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/codecache.py", line 263, in result
self.future.result()
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/concurrent/futures/_base.py", line 446, in result
return self.__get_result()
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/concurrent/futures/_base.py", line 391, in __get_result
raise self._exception
IndexError: map::at
Minified repro
The minifier produces this repro:
import torch._inductor.overrides
import torch
from torch import tensor, device
import torch.fx as fx
from torch._dynamo.testing import rand_strided
from math import inf
from torch.fx.experimental.proxy_tensor import make_fx
# torch version: 1.14.0.dev20221027+cu117
# torch cuda version: 11.7
# torch git version: 7ecffceed4e7108cb527d75273936abd7a8c3716
# CUDA Info:
# nvcc: NVIDIA (R) Cuda compiler driver
# Copyright (c) 2005-2022 NVIDIA Corporation
# Built on Wed_Jun__8_16:49:14_PDT_2022
# Cuda compilation tools, release 11.7, V11.7.99
# Build cuda_11.7.r11.7/compiler.31442593_0
# GPU Hardware Info:
# NVIDIA A100-SXM4-80GB : 1
from torch.nn import *
class Repro(torch.nn.Module):
def __init__(self):
super().__init__()
self.register_buffer('_tensor_constant0', torch.randn([], dtype=torch.float32))
def forward(self, arg0_1, arg3_1):
_tensor_constant0 = self._tensor_constant0
lift_fresh_copy = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
sub = torch.ops.aten.sub.Tensor(lift_fresh_copy, arg3_1); lift_fresh_copy = None
mul_1 = torch.ops.aten.mul.Tensor(arg3_1, sub); arg3_1 = sub = None
mul_2 = torch.ops.aten.mul.Tensor(mul_1, arg0_1); mul_1 = arg0_1 = None
pow_3 = torch.ops.aten.pow.Tensor_Scalar(mul_2, 0.5); mul_2 = None
return (pow_3,)
args = [((), (), torch.float32, 'cpu'), ((), (), torch.float32, 'cpu')]
args = [rand_strided(sh, st, dt, dev) for (sh, st, dt, dev) in args]
mod = make_fx(Repro().to(device="cuda"))(*args)
from torch._inductor.compile_fx import compile_fx_inner
from torch._dynamo.debug_utils import same_two_models
compiled = compile_fx_inner(mod, args)
compiled(args)
The repro fails with a different error though:
Operand is null
%12 = call float @__nv_powf(<null operand!>, <null operand!>)
in function kernel_0d1d2d3
LLVM ERROR: Broken function found, compilation aborted!
Traceback (most recent call last):
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/repro.py", line 50, in <module>
compiled = compile_fx_inner(mod, args)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_dynamo/debug_utils.py", line 466, in debug_wrapper
compiled_fn = compiler_fn(gm, example_inputs, **kwargs)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/debug.py", line 177, in inner
return fn(*args, **kwargs)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/contextlib.py", line 79, in inner
return func(*args, **kwds)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/compile_fx.py", line 123, in compile_fx_inner
compiled_fn = graph.compile_to_fn()
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/graph.py", line 348, in compile_to_fn
return self.compile_to_module().call
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_dynamo/utils.py", line 87, in time_wrapper
r = func(*args, **kwargs)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/graph.py", line 338, in compile_to_module
mod = PyCodeCache.load(code)
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/codecache.py", line 216, in load
exec(code, mod.__dict__, mod.__dict__)
File "/tmp/torchinductor_mreso/3q/c3qyrouw2l7jyhl3aexebbk7qeiz22526d53taazcb4ngmzkhdyu.py", line 44, in <module>
async_compile.wait(globals())
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/codecache.py", line 386, in wait
scope[key] = result.result()
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/site-packages/torch/_inductor/codecache.py", line 263, in result
self.future.result()
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/concurrent/futures/_base.py", line 446, in result
return self.__get_result()
File "/home/mreso/miniconda3/envs/torchdynamo2/lib/python3.9/concurrent/futures/_base.py", line 391, in __get_result
raise self._exception
concurrent.futures.process.BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.