From 012b88d7a2d80de99b2b580a3a67b052799cfccc Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 10 Nov 2025 12:27:54 -0800 Subject: [PATCH] Fix links to sources for 2.9.0 --- 2.9/amp.html | 20 +- 2.9/autograd.html | 26 +- 2.9/backends.html | 76 +-- 2.9/benchmark_utils.html | 44 +- 2.9/checkpoint.html | 12 +- 2.9/cond.html | 2 +- 2.9/config_mod.html | 4 +- 2.9/cpp_extension.html | 20 +- 2.9/cuda._sanitizer.html | 2 +- 2.9/cuda.html | 2 +- 2.9/cuda.tunable.html | 42 +- 2.9/data.html | 40 +- 2.9/ddp_comm_hooks.html | 24 +- 2.9/distributed._dist2.html | 10 +- 2.9/distributed.algorithms.join.html | 14 +- 2.9/distributed.checkpoint.html | 176 +++--- 2.9/distributed.fsdp.fully_shard.html | 42 +- 2.9/distributed.html | 104 ++-- 2.9/distributed.optim.html | 26 +- 2.9/distributed.pipelining.html | 40 +- 2.9/distributed.tensor.html | 62 +-- 2.9/distributed.tensor.parallel.html | 16 +- 2.9/distributions.html | 518 +++++++++--------- 2.9/dlpack.html | 2 +- 2.9/elastic/agent.html | 50 +- 2.9/elastic/control_plane.html | 2 +- 2.9/elastic/errors.html | 8 +- 2.9/elastic/events.html | 10 +- 2.9/elastic/metrics.html | 12 +- 2.9/elastic/multiprocessing.html | 20 +- 2.9/elastic/numa.html | 6 +- 2.9/elastic/rendezvous.html | 90 +-- 2.9/elastic/subprocess_handler.html | 4 +- 2.9/elastic/timer.html | 30 +- 2.9/export/api_reference.html | 136 ++--- 2.9/export/joint_with_descriptors.html | 86 +-- 2.9/fsdp.html | 70 +-- 2.9/future_mod.html | 8 +- 2.9/futures.html | 18 +- 2.9/fx.html | 162 +++--- 2.9/generated/torch.Tensor.backward.html | 2 +- 2.9/generated/torch.Tensor.dim_order.html | 2 +- 2.9/generated/torch.Tensor.is_shared.html | 2 +- 2.9/generated/torch.Tensor.istft.html | 2 +- 2.9/generated/torch.Tensor.lu.html | 2 +- 2.9/generated/torch.Tensor.module_load.html | 2 +- 2.9/generated/torch.Tensor.norm.html | 2 +- 2.9/generated/torch.Tensor.register_hook.html | 2 +- ...or.register_post_accumulate_grad_hook.html | 2 +- 2.9/generated/torch.Tensor.share_memory_.html | 2 +- 2.9/generated/torch.Tensor.split.html | 2 +- 2.9/generated/torch.Tensor.stft.html | 2 +- 2.9/generated/torch.Tensor.storage.html | 2 +- 2.9/generated/torch.Tensor.storage_type.html | 2 +- 2.9/generated/torch.Tensor.to_sparse_coo.html | 2 +- 2.9/generated/torch.Tensor.unflatten.html | 2 +- 2.9/generated/torch.Tensor.unique.html | 2 +- .../torch.Tensor.unique_consecutive.html | 2 +- 2.9/generated/torch._assert.html | 2 +- 2.9/generated/torch._logging.set_logs.html | 2 +- ...torch.accelerator.current_accelerator.html | 2 +- .../torch.accelerator.current_device_idx.html | 2 +- ...orch.accelerator.current_device_index.html | 2 +- .../torch.accelerator.current_stream.html | 2 +- .../torch.accelerator.device_count.html | 2 +- .../torch.accelerator.device_index.html | 2 +- .../torch.accelerator.is_available.html | 2 +- .../torch.accelerator.memory.empty_cache.html | 2 +- ...celerator.memory.max_memory_allocated.html | 2 +- ...ccelerator.memory.max_memory_reserved.html | 2 +- ...h.accelerator.memory.memory_allocated.html | 2 +- ...ch.accelerator.memory.memory_reserved.html | 2 +- ...torch.accelerator.memory.memory_stats.html | 2 +- ...memory.reset_accumulated_memory_stats.html | 2 +- ...erator.memory.reset_peak_memory_stats.html | 2 +- .../torch.accelerator.set_device_idx.html | 2 +- .../torch.accelerator.set_device_index.html | 2 +- .../torch.accelerator.set_stream.html | 2 +- .../torch.accelerator.synchronize.html | 2 +- .../torch.ao.nn.intrinsic.BNReLU2d.html | 2 +- .../torch.ao.nn.intrinsic.BNReLU3d.html | 2 +- .../torch.ao.nn.intrinsic.ConvBn1d.html | 2 +- .../torch.ao.nn.intrinsic.ConvBn2d.html | 2 +- .../torch.ao.nn.intrinsic.ConvBn3d.html | 2 +- .../torch.ao.nn.intrinsic.ConvBnReLU1d.html | 2 +- .../torch.ao.nn.intrinsic.ConvBnReLU2d.html | 2 +- .../torch.ao.nn.intrinsic.ConvBnReLU3d.html | 2 +- .../torch.ao.nn.intrinsic.ConvReLU1d.html | 2 +- .../torch.ao.nn.intrinsic.ConvReLU2d.html | 2 +- .../torch.ao.nn.intrinsic.ConvReLU3d.html | 2 +- .../torch.ao.nn.intrinsic.LinearReLU.html | 2 +- .../torch.ao.nn.intrinsic.qat.ConvBn1d.html | 2 +- .../torch.ao.nn.intrinsic.qat.ConvBn2d.html | 2 +- .../torch.ao.nn.intrinsic.qat.ConvBn3d.html | 2 +- ...orch.ao.nn.intrinsic.qat.ConvBnReLU1d.html | 2 +- ...orch.ao.nn.intrinsic.qat.ConvBnReLU2d.html | 2 +- ...orch.ao.nn.intrinsic.qat.ConvBnReLU3d.html | 2 +- .../torch.ao.nn.intrinsic.qat.ConvReLU2d.html | 2 +- .../torch.ao.nn.intrinsic.qat.ConvReLU3d.html | 2 +- .../torch.ao.nn.intrinsic.qat.LinearReLU.html | 2 +- ...h.ao.nn.intrinsic.qat.freeze_bn_stats.html | 2 +- ...h.ao.nn.intrinsic.qat.update_bn_stats.html | 2 +- ...ch.ao.nn.intrinsic.quantized.BNReLU2d.html | 2 +- ...ch.ao.nn.intrinsic.quantized.BNReLU3d.html | 2 +- ....ao.nn.intrinsic.quantized.ConvReLU1d.html | 2 +- ....ao.nn.intrinsic.quantized.ConvReLU2d.html | 2 +- ....ao.nn.intrinsic.quantized.ConvReLU3d.html | 2 +- ....ao.nn.intrinsic.quantized.LinearReLU.html | 2 +- ...ntrinsic.quantized.dynamic.LinearReLU.html | 2 +- 2.9/generated/torch.ao.nn.qat.Conv2d.html | 2 +- 2.9/generated/torch.ao.nn.qat.Conv3d.html | 2 +- 2.9/generated/torch.ao.nn.qat.Linear.html | 4 +- .../torch.ao.nn.qat.dynamic.Linear.html | 2 +- .../torch.ao.nn.quantizable.LSTM.html | 2 +- ....ao.nn.quantizable.MultiheadAttention.html | 6 +- .../torch.ao.nn.quantized.BatchNorm2d.html | 2 +- .../torch.ao.nn.quantized.BatchNorm3d.html | 2 +- .../torch.ao.nn.quantized.Conv1d.html | 4 +- .../torch.ao.nn.quantized.Conv2d.html | 4 +- .../torch.ao.nn.quantized.Conv3d.html | 4 +- ...torch.ao.nn.quantized.ConvTranspose1d.html | 2 +- ...torch.ao.nn.quantized.ConvTranspose2d.html | 2 +- ...torch.ao.nn.quantized.ConvTranspose3d.html | 2 +- 2.9/generated/torch.ao.nn.quantized.ELU.html | 2 +- .../torch.ao.nn.quantized.Embedding.html | 4 +- .../torch.ao.nn.quantized.EmbeddingBag.html | 4 +- ...rch.ao.nn.quantized.FXFloatFunctional.html | 2 +- ...torch.ao.nn.quantized.FloatFunctional.html | 2 +- .../torch.ao.nn.quantized.GroupNorm.html | 2 +- .../torch.ao.nn.quantized.Hardswish.html | 2 +- .../torch.ao.nn.quantized.InstanceNorm1d.html | 2 +- .../torch.ao.nn.quantized.InstanceNorm2d.html | 2 +- .../torch.ao.nn.quantized.InstanceNorm3d.html | 2 +- .../torch.ao.nn.quantized.LayerNorm.html | 2 +- .../torch.ao.nn.quantized.LeakyReLU.html | 2 +- .../torch.ao.nn.quantized.Linear.html | 6 +- .../torch.ao.nn.quantized.QFunctional.html | 2 +- .../torch.ao.nn.quantized.ReLU6.html | 2 +- .../torch.ao.nn.quantized.Sigmoid.html | 2 +- .../torch.ao.nn.quantized.dynamic.GRU.html | 2 +- ...torch.ao.nn.quantized.dynamic.GRUCell.html | 2 +- .../torch.ao.nn.quantized.dynamic.LSTM.html | 2 +- ...orch.ao.nn.quantized.dynamic.LSTMCell.html | 2 +- .../torch.ao.nn.quantized.dynamic.Linear.html | 6 +- ...torch.ao.nn.quantized.dynamic.RNNCell.html | 2 +- ...ntized.functional.adaptive_avg_pool2d.html | 2 +- ...ntized.functional.adaptive_avg_pool3d.html | 2 +- ...ao.nn.quantized.functional.avg_pool2d.html | 2 +- ...ao.nn.quantized.functional.avg_pool3d.html | 2 +- ...torch.ao.nn.quantized.functional.celu.html | 2 +- ...orch.ao.nn.quantized.functional.clamp.html | 2 +- ...rch.ao.nn.quantized.functional.conv1d.html | 2 +- ...rch.ao.nn.quantized.functional.conv2d.html | 2 +- ...rch.ao.nn.quantized.functional.conv3d.html | 2 +- .../torch.ao.nn.quantized.functional.elu.html | 2 +- ...o.nn.quantized.functional.hardsigmoid.html | 2 +- ....ao.nn.quantized.functional.hardswish.html | 2 +- ...h.ao.nn.quantized.functional.hardtanh.html | 2 +- ...o.nn.quantized.functional.interpolate.html | 2 +- ...ao.nn.quantized.functional.leaky_relu.html | 2 +- ...rch.ao.nn.quantized.functional.linear.html | 2 +- ...ao.nn.quantized.functional.max_pool1d.html | 2 +- ...ao.nn.quantized.functional.max_pool2d.html | 2 +- ....ao.nn.quantized.functional.threshold.html | 2 +- ...h.ao.nn.quantized.functional.upsample.html | 2 +- ...uantized.functional.upsample_bilinear.html | 2 +- ...quantized.functional.upsample_nearest.html | 2 +- .../torch.ao.quantization.DeQuantStub.html | 2 +- .../torch.ao.quantization.QuantStub.html | 2 +- .../torch.ao.quantization.QuantWrapper.html | 2 +- ...rch.ao.quantization.add_quant_dequant.html | 2 +- ...tization.backend_config.BackendConfig.html | 12 +- ...n.backend_config.BackendPatternConfig.html | 24 +- ...antization.backend_config.DTypeConfig.html | 6 +- ...n.backend_config.DTypeWithConstraints.html | 2 +- ...zation.backend_config.ObservationType.html | 2 +- ...torch.ao.quantization.compare_results.html | 2 +- .../torch.ao.quantization.convert.html | 2 +- ...torch.ao.quantization.default_eval_fn.html | 2 +- ...tization.extract_results_from_loggers.html | 2 +- ...antization.fake_quantize.FakeQuantize.html | 2 +- ...zation.fake_quantize.FakeQuantizeBase.html | 2 +- ...ake_quantize.FixedQParamsFakeQuantize.html | 4 +- ...uantize.FusedMovingAvgObsFakeQuantize.html | 2 +- ...tion.fake_quantize.disable_fake_quant.html | 2 +- ...zation.fake_quantize.disable_observer.html | 2 +- ...ation.fake_quantize.enable_fake_quant.html | 2 +- ...ization.fake_quantize.enable_observer.html | 2 +- ...uantization.fuse_modules.fuse_modules.html | 2 +- ....fx.custom_config.ConvertCustomConfig.html | 10 +- ...ion.fx.custom_config.FuseCustomConfig.html | 8 +- ....fx.custom_config.PrepareCustomConfig.html | 22 +- ...om_config.StandaloneModuleConfigEntry.html | 2 +- ...ization.generate_numeric_debug_handle.html | 2 +- ....observer.AffineQuantizedObserverBase.html | 10 +- ....ao.quantization.observer.Granularity.html | 2 +- ...antization.observer.HistogramObserver.html | 2 +- ....ao.quantization.observer.MappingType.html | 2 +- ....quantization.observer.MinMaxObserver.html | 8 +- ....observer.MovingAverageMinMaxObserver.html | 2 +- ...MovingAveragePerChannelMinMaxObserver.html | 2 +- ...ao.quantization.observer.NoopObserver.html | 2 +- ...ao.quantization.observer.ObserverBase.html | 6 +- ...orch.ao.quantization.observer.PerAxis.html | 2 +- ...rch.ao.quantization.observer.PerBlock.html | 2 +- ...ion.observer.PerChannelMinMaxObserver.html | 4 +- ...rch.ao.quantization.observer.PerGroup.html | 2 +- ...torch.ao.quantization.observer.PerRow.html | 2 +- ...ch.ao.quantization.observer.PerTensor.html | 2 +- ...rch.ao.quantization.observer.PerToken.html | 2 +- ...tization.observer.PlaceholderObserver.html | 2 +- ...antization.observer.RecordingObserver.html | 2 +- ...ao.quantization.observer.TorchAODType.html | 2 +- ...quantization.observer.ZeroPointDomain.html | 2 +- ...ation.observer.default_debug_observer.html | 2 +- ...observer.default_placeholder_observer.html | 2 +- ....quantization.observer.get_block_size.html | 2 +- ...tion.observer.get_observer_state_dict.html | 2 +- ...ion.observer.load_observer_state_dict.html | 2 +- .../torch.ao.quantization.prepare.html | 2 +- ...on.prepare_for_propagation_comparison.html | 2 +- .../torch.ao.quantization.prepare_qat.html | 2 +- ...ch.ao.quantization.propagate_qconfig_.html | 2 +- ...n.pt2e.export_utils.model_is_exported.html | 2 +- ....lowering.lower_pt2e_quantized_to_x86.html | 2 +- ...torch.ao.quantization.qconfig.QConfig.html | 2 +- ...zation.qconfig_mapping.QConfigMapping.html | 16 +- ...pping.get_default_qat_qconfig_mapping.html | 2 +- ...g_mapping.get_default_qconfig_mapping.html | 2 +- .../torch.ao.quantization.quantize.html | 2 +- ...orch.ao.quantization.quantize_dynamic.html | 2 +- ...o.quantization.quantize_fx.convert_fx.html | 2 +- ...h.ao.quantization.quantize_fx.fuse_fx.html | 2 +- ...o.quantization.quantize_fx.prepare_fx.html | 2 +- ...antization.quantize_fx.prepare_qat_fx.html | 2 +- .../torch.ao.quantization.quantize_qat.html | 2 +- .../torch.ao.quantization.swap_module.html | 2 +- ....are_deterministic_algorithms_enabled.html | 2 +- 2.9/generated/torch.atleast_1d.html | 2 +- 2.9/generated/torch.atleast_2d.html | 2 +- 2.9/generated/torch.atleast_3d.html | 2 +- .../torch.autograd.Function.backward.html | 2 +- .../torch.autograd.Function.forward.html | 2 +- .../torch.autograd.Function.jvp.html | 2 +- .../torch.autograd.Function.vmap.html | 2 +- 2.9/generated/torch.autograd.backward.html | 2 +- ...utograd.forward_ad.UnpackedDualTensor.html | 2 +- .../torch.autograd.forward_ad.dual_level.html | 2 +- ....autograd.forward_ad.enter_dual_level.html | 2 +- ...h.autograd.forward_ad.exit_dual_level.html | 2 +- .../torch.autograd.forward_ad.make_dual.html | 2 +- ...torch.autograd.forward_ad.unpack_dual.html | 2 +- ...h.autograd.function.BackwardCFunction.html | 16 +- ...ograd.function.FunctionCtx.mark_dirty.html | 2 +- ...n.FunctionCtx.mark_non_differentiable.html | 2 +- ...unction.FunctionCtx.save_for_backward.html | 2 +- ...ion.FunctionCtx.set_materialize_grads.html | 2 +- ...rch.autograd.function.InplaceFunction.html | 24 +- ...ch.autograd.function.NestedIOFunction.html | 28 +- ...autograd.function.once_differentiable.html | 2 +- .../torch.autograd.functional.hessian.html | 2 +- .../torch.autograd.functional.hvp.html | 2 +- .../torch.autograd.functional.jacobian.html | 2 +- .../torch.autograd.functional.jvp.html | 2 +- .../torch.autograd.functional.vhp.html | 2 +- .../torch.autograd.functional.vjp.html | 2 +- 2.9/generated/torch.autograd.grad.html | 2 +- ...rch.autograd.grad_mode.inference_mode.html | 4 +- ...h.autograd.grad_mode.set_grad_enabled.html | 4 +- ....grad_mode.set_multithreading_enabled.html | 4 +- ...rch.autograd.gradcheck.GradcheckError.html | 2 +- .../torch.autograd.gradcheck.gradcheck.html | 2 +- ...orch.autograd.gradcheck.gradgradcheck.html | 2 +- .../torch.autograd.graph.Node.metadata.html | 2 +- .../torch.autograd.graph.Node.name.html | 2 +- ...rch.autograd.graph.Node.register_hook.html | 2 +- ....autograd.graph.Node.register_prehook.html | 2 +- ...orch.autograd.graph.increment_version.html | 2 +- ...torch.autograd.profiler.EnforceUnique.html | 4 +- ...h.autograd.profiler.KinetoStepTracker.html | 10 +- .../torch.autograd.profiler.load_nvprof.html | 2 +- ....autograd.profiler.parse_nvprof_trace.html | 2 +- ....profiler.profile.export_chrome_trace.html | 2 +- ...utograd.profiler.profile.key_averages.html | 2 +- ...tograd.profiler.profile.total_average.html | 2 +- ...rch.autograd.profiler.record_function.html | 2 +- ...torch.autograd.profiler_util.Interval.html | 4 +- ....autograd.profiler_util.MemRecordsAcc.html | 4 +- ...ch.autograd.profiler_util.StringTable.html | 2 +- 2.9/generated/torch.block_diag.html | 2 +- 2.9/generated/torch.broadcast_shapes.html | 2 +- 2.9/generated/torch.broadcast_tensors.html | 2 +- 2.9/generated/torch.cartesian_prod.html | 2 +- 2.9/generated/torch.cdist.html | 2 +- 2.9/generated/torch.chain_matmul.html | 2 +- 2.9/generated/torch.compile.html | 2 +- .../torch.compiled_with_cxx11_abi.html | 2 +- .../torch.compiler.allow_in_graph.html | 2 +- ...torch.compiler.assume_constant_result.html | 2 +- 2.9/generated/torch.compiler.compile.html | 2 +- ...ch.compiler.cudagraph_mark_step_begin.html | 2 +- 2.9/generated/torch.compiler.disable.html | 2 +- .../torch.compiler.is_compiling.html | 2 +- .../torch.compiler.is_dynamo_compiling.html | 2 +- .../torch.compiler.is_exporting.html | 2 +- ...ch.compiler.keep_tensor_guards_unsafe.html | 2 +- .../torch.compiler.list_backends.html | 2 +- .../torch.compiler.nested_compile_region.html | 2 +- 2.9/generated/torch.compiler.reset.html | 2 +- ...compiler.set_enable_guard_collectives.html | 2 +- 2.9/generated/torch.compiler.set_stance.html | 2 +- ...r.skip_guard_on_all_nn_modules_unsafe.html | 2 +- ...compiler.skip_guard_on_globals_unsafe.html | 2 +- ...ip_guard_on_inbuilt_nn_modules_unsafe.html | 2 +- .../torch.compiler.substitute_in_graph.html | 2 +- 2.9/generated/torch.cond.html | 2 +- 2.9/generated/torch.cpu.Stream.html | 2 +- 2.9/generated/torch.cpu.StreamContext.html | 2 +- 2.9/generated/torch.cpu.current_device.html | 2 +- 2.9/generated/torch.cpu.current_stream.html | 2 +- 2.9/generated/torch.cpu.device_count.html | 2 +- 2.9/generated/torch.cpu.is_available.html | 2 +- 2.9/generated/torch.cpu.set_device.html | 2 +- 2.9/generated/torch.cpu.stream.html | 2 +- 2.9/generated/torch.cpu.synchronize.html | 2 +- 2.9/generated/torch.cuda.CUDAGraph.html | 22 +- 2.9/generated/torch.cuda.Event.html | 16 +- 2.9/generated/torch.cuda.ExternalStream.html | 12 +- 2.9/generated/torch.cuda.Stream.html | 12 +- 2.9/generated/torch.cuda.StreamContext.html | 2 +- .../torch.cuda.can_device_access_peer.html | 2 +- 2.9/generated/torch.cuda.clock_rate.html | 2 +- 2.9/generated/torch.cuda.comm.broadcast.html | 2 +- .../torch.cuda.comm.broadcast_coalesced.html | 2 +- 2.9/generated/torch.cuda.comm.gather.html | 2 +- 2.9/generated/torch.cuda.comm.reduce_add.html | 2 +- .../torch.cuda.comm.reduce_add_coalesced.html | 2 +- 2.9/generated/torch.cuda.comm.scatter.html | 2 +- 2.9/generated/torch.cuda.cudart.html | 2 +- .../torch.cuda.current_blas_handle.html | 2 +- 2.9/generated/torch.cuda.current_device.html | 2 +- 2.9/generated/torch.cuda.current_stream.html | 2 +- 2.9/generated/torch.cuda.default_stream.html | 2 +- 2.9/generated/torch.cuda.device.html | 2 +- 2.9/generated/torch.cuda.device_count.html | 2 +- .../torch.cuda.device_memory_used.html | 2 +- 2.9/generated/torch.cuda.device_of.html | 2 +- 2.9/generated/torch.cuda.gds.GdsFile.html | 10 +- .../torch.cuda.gds.gds_deregister_buffer.html | 2 +- .../torch.cuda.gds.gds_register_buffer.html | 2 +- 2.9/generated/torch.cuda.get_arch_list.html | 2 +- .../torch.cuda.get_device_capability.html | 2 +- 2.9/generated/torch.cuda.get_device_name.html | 2 +- .../torch.cuda.get_device_properties.html | 2 +- .../torch.cuda.get_gencode_flags.html | 2 +- 2.9/generated/torch.cuda.get_rng_state.html | 2 +- .../torch.cuda.get_rng_state_all.html | 2 +- .../torch.cuda.get_stream_from_external.html | 2 +- .../torch.cuda.get_sync_debug_mode.html | 2 +- 2.9/generated/torch.cuda.graph.html | 2 +- .../torch.cuda.graph_pool_handle.html | 2 +- 2.9/generated/torch.cuda.init.html | 2 +- 2.9/generated/torch.cuda.initial_seed.html | 2 +- 2.9/generated/torch.cuda.ipc_collect.html | 2 +- 2.9/generated/torch.cuda.is_available.html | 2 +- ...orch.cuda.is_current_stream_capturing.html | 2 +- 2.9/generated/torch.cuda.is_initialized.html | 2 +- .../torch.cuda.is_tf32_supported.html | 2 +- .../torch.cuda.jiterator._create_jit_fn.html | 2 +- ...jiterator._create_multi_output_jit_fn.html | 2 +- .../torch.cuda.make_graphed_callables.html | 2 +- 2.9/generated/torch.cuda.manual_seed.html | 2 +- 2.9/generated/torch.cuda.manual_seed_all.html | 2 +- ...ch.cuda.memory.CUDAPluggableAllocator.html | 2 +- 2.9/generated/torch.cuda.memory.MemPool.html | 6 +- ...h.cuda.memory.caching_allocator_alloc.html | 2 +- ....cuda.memory.caching_allocator_delete.html | 2 +- ....cuda.memory.caching_allocator_enable.html | 2 +- ....cuda.memory.change_current_allocator.html | 2 +- .../torch.cuda.memory.empty_cache.html | 2 +- ...rch.cuda.memory.get_allocator_backend.html | 2 +- ...emory.get_per_process_memory_fraction.html | 2 +- .../torch.cuda.memory.host_memory_stats.html | 2 +- ...mory.host_memory_stats_as_nested_dict.html | 2 +- .../torch.cuda.memory.list_gpu_processes.html | 2 +- ...orch.cuda.memory.max_memory_allocated.html | 2 +- .../torch.cuda.memory.max_memory_cached.html | 2 +- ...torch.cuda.memory.max_memory_reserved.html | 2 +- .../torch.cuda.memory.mem_get_info.html | 2 +- .../torch.cuda.memory.memory_allocated.html | 2 +- .../torch.cuda.memory.memory_cached.html | 2 +- .../torch.cuda.memory.memory_reserved.html | 2 +- .../torch.cuda.memory.memory_snapshot.html | 2 +- .../torch.cuda.memory.memory_stats.html | 2 +- ...da.memory.memory_stats_as_nested_dict.html | 2 +- .../torch.cuda.memory.memory_summary.html | 2 +- ...y.reset_accumulated_host_memory_stats.html | 2 +- ...memory.reset_accumulated_memory_stats.html | 2 +- ...uda.memory.reset_max_memory_allocated.html | 2 +- ...h.cuda.memory.reset_max_memory_cached.html | 2 +- ...a.memory.reset_peak_host_memory_stats.html | 2 +- ...h.cuda.memory.reset_peak_memory_stats.html | 2 +- ...emory.set_per_process_memory_fraction.html | 2 +- 2.9/generated/torch.cuda.memory_usage.html | 2 +- 2.9/generated/torch.cuda.nvtx.mark.html | 2 +- 2.9/generated/torch.cuda.nvtx.range.html | 2 +- 2.9/generated/torch.cuda.nvtx.range_pop.html | 2 +- 2.9/generated/torch.cuda.nvtx.range_push.html | 2 +- 2.9/generated/torch.cuda.power_draw.html | 2 +- 2.9/generated/torch.cuda.seed.html | 2 +- 2.9/generated/torch.cuda.seed_all.html | 2 +- 2.9/generated/torch.cuda.set_device.html | 2 +- 2.9/generated/torch.cuda.set_rng_state.html | 2 +- .../torch.cuda.set_rng_state_all.html | 2 +- 2.9/generated/torch.cuda.set_stream.html | 2 +- .../torch.cuda.set_sync_debug_mode.html | 2 +- 2.9/generated/torch.cuda.stream.html | 2 +- 2.9/generated/torch.cuda.synchronize.html | 2 +- 2.9/generated/torch.cuda.temperature.html | 2 +- 2.9/generated/torch.cuda.utilization.html | 2 +- 2.9/generated/torch.einsum.html | 2 +- 2.9/generated/torch.enable_grad.html | 2 +- 2.9/generated/torch.from_dlpack.html | 2 +- 2.9/generated/torch.func.debug_unwrap.html | 2 +- 2.9/generated/torch.func.functional_call.html | 2 +- 2.9/generated/torch.func.functionalize.html | 2 +- 2.9/generated/torch.func.grad.html | 2 +- 2.9/generated/torch.func.grad_and_value.html | 2 +- 2.9/generated/torch.func.hessian.html | 2 +- 2.9/generated/torch.func.jacfwd.html | 2 +- 2.9/generated/torch.func.jacrev.html | 2 +- 2.9/generated/torch.func.jvp.html | 2 +- 2.9/generated/torch.func.linearize.html | 2 +- ....func.replace_all_batch_norm_modules_.html | 2 +- .../torch.func.stack_module_state.html | 2 +- 2.9/generated/torch.func.vjp.html | 2 +- 2.9/generated/torch.func.vmap.html | 2 +- .../torch.functional.align_tensors.html | 2 +- .../torch.functional.atleast_1d.html | 2 +- .../torch.functional.atleast_2d.html | 2 +- .../torch.functional.atleast_3d.html | 2 +- .../torch.functional.block_diag.html | 2 +- .../torch.functional.broadcast_shapes.html | 2 +- .../torch.functional.broadcast_tensors.html | 2 +- .../torch.functional.cartesian_prod.html | 2 +- 2.9/generated/torch.functional.cdist.html | 2 +- .../torch.functional.chain_matmul.html | 2 +- 2.9/generated/torch.functional.einsum.html | 2 +- 2.9/generated/torch.functional.lu.html | 2 +- 2.9/generated/torch.functional.meshgrid.html | 2 +- 2.9/generated/torch.functional.norm.html | 2 +- 2.9/generated/torch.functional.split.html | 2 +- 2.9/generated/torch.functional.stft.html | 2 +- 2.9/generated/torch.functional.tensordot.html | 2 +- 2.9/generated/torch.functional.unique.html | 2 +- .../torch.functional.unique_consecutive.html | 2 +- .../torch.functional.unravel_index.html | 2 +- ...erimental.proxy_tensor.get_proxy_mode.html | 2 +- ...ntal.proxy_tensor.handle_sym_dispatch.html | 2 +- ....fx.experimental.proxy_tensor.make_fx.html | 2 +- ...l.proxy_tensor.maybe_disable_thunkify.html | 2 +- ...al.proxy_tensor.maybe_enable_thunkify.html | 2 +- ...imental.symbolic_shapes.CallMethodKey.html | 4 +- ...imental.symbolic_shapes.ConvertIntKey.html | 4 +- ...mental.symbolic_shapes.DimConstraints.html | 14 +- ...perimental.symbolic_shapes.DimDynamic.html | 2 +- ...erimental.symbolic_shapes.DivideByKey.html | 4 +- ...al.symbolic_shapes.EqualityConstraint.html | 2 +- ...mental.symbolic_shapes.InnerTensorKey.html | 4 +- ...bolic_shapes.PropagateUnbackedSymInts.html | 26 +- ...mbolic_shapes.RelaxedUnspecConstraint.html | 2 +- ...experimental.symbolic_shapes.ShapeEnv.html | 82 +-- ...ntal.symbolic_shapes.ShapeEnvSettings.html | 2 +- ...mental.symbolic_shapes.Specialization.html | 2 +- ...mbolic_shapes.StatefulSymbolicContext.html | 2 +- ...bolic_shapes.StatelessSymbolicContext.html | 2 +- ...ymbolic_shapes.StrictMinMaxConstraint.html | 4 +- ...mbolic_shapes.SubclassSymbolicContext.html | 2 +- ...ental.symbolic_shapes.SymbolicContext.html | 2 +- ...ymbolic_shapes.canonicalize_bool_expr.html | 2 +- ...ntal.symbolic_shapes.check_consistent.html | 2 +- ...olic_shapes.compute_unbacked_bindings.html | 2 +- ...ental.symbolic_shapes.constrain_range.html | 2 +- ...ental.symbolic_shapes.constrain_unify.html | 2 +- ...mental.symbolic_shapes.guard_or_false.html | 2 +- ...imental.symbolic_shapes.guard_or_true.html | 2 +- ....symbolic_shapes.guard_size_oblivious.html | 2 +- ...ntal.symbolic_shapes.has_free_symbols.html | 2 +- ...olic_shapes.has_free_unbacked_symbols.html | 2 +- ...ntal.symbolic_shapes.has_static_value.html | 2 +- ...experimental.symbolic_shapes.hint_int.html | 2 +- ...ntal.symbolic_shapes.is_accessor_node.html | 2 +- ...ntal.symbolic_shapes.is_concrete_bool.html | 2 +- ...tal.symbolic_shapes.is_concrete_float.html | 2 +- ...ental.symbolic_shapes.is_concrete_int.html | 2 +- ...xperimental.symbolic_shapes.lru_cache.html | 2 +- ...ental.symbolic_shapes.rebind_unbacked.html | 2 +- ...olic_shapes.resolve_unbacked_bindings.html | 2 +- ...ymbolic_shapes.statically_known_false.html | 2 +- ...symbolic_shapes.statically_known_true.html | 2 +- ....experimental.symbolic_shapes.sym_and.html | 2 +- ...x.experimental.symbolic_shapes.sym_eq.html | 2 +- ...x.experimental.symbolic_shapes.sym_or.html | 2 +- 2.9/generated/torch.get_default_device.html | 2 +- .../torch.get_deterministic_debug_mode.html | 2 +- 2.9/generated/torch.get_device_module.html | 2 +- .../torch.get_float32_matmul_precision.html | 2 +- 2.9/generated/torch.get_rng_state.html | 2 +- 2.9/generated/torch.initial_seed.html | 2 +- ...ministic_algorithms_warn_only_enabled.html | 2 +- 2.9/generated/torch.is_storage.html | 2 +- 2.9/generated/torch.is_tensor.html | 2 +- .../torch.is_warn_always_enabled.html | 2 +- 2.9/generated/torch.jit.ScriptModule.html | 110 ++-- 2.9/generated/torch.jit.annotate.html | 2 +- .../torch.jit.enable_onednn_fusion.html | 2 +- 2.9/generated/torch.jit.fork.html | 2 +- 2.9/generated/torch.jit.freeze.html | 2 +- 2.9/generated/torch.jit.ignore.html | 2 +- 2.9/generated/torch.jit.interface.html | 2 +- 2.9/generated/torch.jit.isinstance.html | 2 +- 2.9/generated/torch.jit.load.html | 2 +- .../torch.jit.onednn_fusion_enabled.html | 2 +- .../torch.jit.optimize_for_inference.html | 2 +- 2.9/generated/torch.jit.save.html | 2 +- 2.9/generated/torch.jit.script.html | 2 +- .../torch.jit.script_if_tracing.html | 2 +- .../torch.jit.set_fusion_strategy.html | 2 +- 2.9/generated/torch.jit.strict_fusion.html | 2 +- 2.9/generated/torch.jit.trace.html | 2 +- 2.9/generated/torch.jit.trace_module.html | 2 +- 2.9/generated/torch.jit.unused.html | 2 +- 2.9/generated/torch.jit.wait.html | 2 +- 2.9/generated/torch.load.html | 2 +- 2.9/generated/torch.lobpcg.html | 2 +- 2.9/generated/torch.lu.html | 2 +- 2.9/generated/torch.manual_seed.html | 2 +- 2.9/generated/torch.meshgrid.html | 2 +- 2.9/generated/torch.mps.compile_shader.html | 2 +- .../torch.mps.current_allocated_memory.html | 2 +- 2.9/generated/torch.mps.device_count.html | 2 +- .../torch.mps.driver_allocated_memory.html | 2 +- 2.9/generated/torch.mps.empty_cache.html | 2 +- 2.9/generated/torch.mps.event.Event.html | 12 +- 2.9/generated/torch.mps.get_rng_state.html | 2 +- 2.9/generated/torch.mps.manual_seed.html | 2 +- ...torch.mps.profiler.is_capturing_metal.html | 2 +- ...mps.profiler.is_metal_capture_enabled.html | 2 +- .../torch.mps.profiler.metal_capture.html | 2 +- 2.9/generated/torch.mps.profiler.profile.html | 2 +- 2.9/generated/torch.mps.profiler.start.html | 2 +- 2.9/generated/torch.mps.profiler.stop.html | 2 +- .../torch.mps.recommended_max_memory.html | 2 +- 2.9/generated/torch.mps.seed.html | 2 +- ...h.mps.set_per_process_memory_fraction.html | 2 +- 2.9/generated/torch.mps.set_rng_state.html | 2 +- 2.9/generated/torch.mps.synchronize.html | 2 +- .../torch.mtia.DeferredMtiaCallError.html | 2 +- 2.9/generated/torch.mtia.StreamContext.html | 2 +- ...ch.mtia.attach_out_of_memory_observer.html | 2 +- 2.9/generated/torch.mtia.current_device.html | 2 +- 2.9/generated/torch.mtia.current_stream.html | 2 +- 2.9/generated/torch.mtia.default_stream.html | 2 +- 2.9/generated/torch.mtia.device.html | 2 +- 2.9/generated/torch.mtia.device_count.html | 2 +- 2.9/generated/torch.mtia.empty_cache.html | 2 +- .../torch.mtia.get_device_capability.html | 2 +- 2.9/generated/torch.mtia.get_rng_state.html | 2 +- 2.9/generated/torch.mtia.init.html | 2 +- 2.9/generated/torch.mtia.is_available.html | 2 +- 2.9/generated/torch.mtia.is_initialized.html | 2 +- .../torch.mtia.memory.memory_allocated.html | 2 +- .../torch.mtia.memory.memory_stats.html | 2 +- 2.9/generated/torch.mtia.memory_stats.html | 2 +- .../torch.mtia.record_memory_history.html | 2 +- 2.9/generated/torch.mtia.set_device.html | 2 +- 2.9/generated/torch.mtia.set_rng_state.html | 2 +- 2.9/generated/torch.mtia.set_stream.html | 2 +- 2.9/generated/torch.mtia.snapshot.html | 2 +- 2.9/generated/torch.mtia.stream.html | 2 +- 2.9/generated/torch.mtia.synchronize.html | 2 +- 2.9/generated/torch.nn.AdaptiveAvgPool1d.html | 4 +- 2.9/generated/torch.nn.AdaptiveAvgPool2d.html | 4 +- 2.9/generated/torch.nn.AdaptiveAvgPool3d.html | 4 +- .../torch.nn.AdaptiveLogSoftmaxWithLoss.html | 10 +- 2.9/generated/torch.nn.AdaptiveMaxPool1d.html | 4 +- 2.9/generated/torch.nn.AdaptiveMaxPool2d.html | 4 +- 2.9/generated/torch.nn.AdaptiveMaxPool3d.html | 4 +- 2.9/generated/torch.nn.AlphaDropout.html | 4 +- 2.9/generated/torch.nn.AvgPool1d.html | 4 +- 2.9/generated/torch.nn.AvgPool2d.html | 4 +- 2.9/generated/torch.nn.AvgPool3d.html | 4 +- 2.9/generated/torch.nn.BCELoss.html | 4 +- 2.9/generated/torch.nn.BCEWithLogitsLoss.html | 4 +- 2.9/generated/torch.nn.BatchNorm1d.html | 2 +- 2.9/generated/torch.nn.BatchNorm2d.html | 2 +- 2.9/generated/torch.nn.BatchNorm3d.html | 2 +- 2.9/generated/torch.nn.Bilinear.html | 8 +- 2.9/generated/torch.nn.CELU.html | 6 +- 2.9/generated/torch.nn.CTCLoss.html | 4 +- 2.9/generated/torch.nn.ChannelShuffle.html | 6 +- 2.9/generated/torch.nn.CircularPad1d.html | 2 +- 2.9/generated/torch.nn.CircularPad2d.html | 2 +- 2.9/generated/torch.nn.CircularPad3d.html | 2 +- 2.9/generated/torch.nn.ConstantPad1d.html | 2 +- 2.9/generated/torch.nn.ConstantPad2d.html | 2 +- 2.9/generated/torch.nn.ConstantPad3d.html | 2 +- 2.9/generated/torch.nn.Conv1d.html | 2 +- 2.9/generated/torch.nn.Conv2d.html | 2 +- 2.9/generated/torch.nn.Conv3d.html | 2 +- 2.9/generated/torch.nn.ConvTranspose1d.html | 2 +- 2.9/generated/torch.nn.ConvTranspose2d.html | 4 +- 2.9/generated/torch.nn.ConvTranspose3d.html | 2 +- .../torch.nn.CosineEmbeddingLoss.html | 4 +- 2.9/generated/torch.nn.CosineSimilarity.html | 4 +- 2.9/generated/torch.nn.CrossEntropyLoss.html | 4 +- 2.9/generated/torch.nn.DataParallel.html | 2 +- 2.9/generated/torch.nn.Dropout.html | 4 +- 2.9/generated/torch.nn.Dropout1d.html | 4 +- 2.9/generated/torch.nn.Dropout2d.html | 4 +- 2.9/generated/torch.nn.Dropout3d.html | 4 +- 2.9/generated/torch.nn.ELU.html | 6 +- 2.9/generated/torch.nn.Embedding.html | 4 +- 2.9/generated/torch.nn.EmbeddingBag.html | 6 +- .../torch.nn.FeatureAlphaDropout.html | 4 +- 2.9/generated/torch.nn.Fold.html | 6 +- .../torch.nn.FractionalMaxPool2d.html | 2 +- .../torch.nn.FractionalMaxPool3d.html | 2 +- 2.9/generated/torch.nn.GELU.html | 6 +- 2.9/generated/torch.nn.GLU.html | 6 +- 2.9/generated/torch.nn.GRU.html | 2 +- 2.9/generated/torch.nn.GRUCell.html | 2 +- 2.9/generated/torch.nn.GaussianNLLLoss.html | 4 +- 2.9/generated/torch.nn.GroupNorm.html | 2 +- 2.9/generated/torch.nn.Hardshrink.html | 6 +- 2.9/generated/torch.nn.Hardsigmoid.html | 4 +- 2.9/generated/torch.nn.Hardswish.html | 4 +- 2.9/generated/torch.nn.Hardtanh.html | 6 +- .../torch.nn.HingeEmbeddingLoss.html | 4 +- 2.9/generated/torch.nn.HuberLoss.html | 4 +- 2.9/generated/torch.nn.Identity.html | 4 +- 2.9/generated/torch.nn.InstanceNorm1d.html | 2 +- 2.9/generated/torch.nn.InstanceNorm2d.html | 2 +- 2.9/generated/torch.nn.InstanceNorm3d.html | 2 +- 2.9/generated/torch.nn.KLDivLoss.html | 4 +- 2.9/generated/torch.nn.L1Loss.html | 4 +- 2.9/generated/torch.nn.LPPool1d.html | 4 +- 2.9/generated/torch.nn.LPPool2d.html | 4 +- 2.9/generated/torch.nn.LPPool3d.html | 4 +- 2.9/generated/torch.nn.LSTM.html | 2 +- 2.9/generated/torch.nn.LSTMCell.html | 2 +- 2.9/generated/torch.nn.LayerNorm.html | 2 +- 2.9/generated/torch.nn.LazyBatchNorm1d.html | 4 +- 2.9/generated/torch.nn.LazyBatchNorm2d.html | 4 +- 2.9/generated/torch.nn.LazyBatchNorm3d.html | 4 +- 2.9/generated/torch.nn.LazyConv1d.html | 4 +- 2.9/generated/torch.nn.LazyConv2d.html | 4 +- 2.9/generated/torch.nn.LazyConv3d.html | 4 +- .../torch.nn.LazyConvTranspose1d.html | 4 +- .../torch.nn.LazyConvTranspose2d.html | 4 +- .../torch.nn.LazyConvTranspose3d.html | 4 +- .../torch.nn.LazyInstanceNorm1d.html | 4 +- .../torch.nn.LazyInstanceNorm2d.html | 4 +- .../torch.nn.LazyInstanceNorm3d.html | 4 +- 2.9/generated/torch.nn.LazyLinear.html | 8 +- 2.9/generated/torch.nn.LeakyReLU.html | 6 +- 2.9/generated/torch.nn.Linear.html | 8 +- 2.9/generated/torch.nn.LocalResponseNorm.html | 6 +- 2.9/generated/torch.nn.LogSigmoid.html | 4 +- 2.9/generated/torch.nn.LogSoftmax.html | 6 +- 2.9/generated/torch.nn.MSELoss.html | 4 +- 2.9/generated/torch.nn.MarginRankingLoss.html | 4 +- 2.9/generated/torch.nn.MaxPool1d.html | 4 +- 2.9/generated/torch.nn.MaxPool2d.html | 4 +- 2.9/generated/torch.nn.MaxPool3d.html | 4 +- 2.9/generated/torch.nn.MaxUnpool1d.html | 4 +- 2.9/generated/torch.nn.MaxUnpool2d.html | 4 +- 2.9/generated/torch.nn.MaxUnpool3d.html | 4 +- 2.9/generated/torch.nn.Mish.html | 6 +- 2.9/generated/torch.nn.Module.html | 110 ++-- 2.9/generated/torch.nn.ModuleDict.html | 14 +- 2.9/generated/torch.nn.ModuleList.html | 8 +- .../torch.nn.MultiLabelMarginLoss.html | 4 +- .../torch.nn.MultiLabelSoftMarginLoss.html | 4 +- 2.9/generated/torch.nn.MultiMarginLoss.html | 4 +- .../torch.nn.MultiheadAttention.html | 6 +- 2.9/generated/torch.nn.NLLLoss.html | 4 +- 2.9/generated/torch.nn.PReLU.html | 8 +- 2.9/generated/torch.nn.PairwiseDistance.html | 4 +- 2.9/generated/torch.nn.ParameterDict.html | 24 +- 2.9/generated/torch.nn.ParameterList.html | 8 +- 2.9/generated/torch.nn.PixelShuffle.html | 6 +- 2.9/generated/torch.nn.PixelUnshuffle.html | 6 +- 2.9/generated/torch.nn.PoissonNLLLoss.html | 4 +- 2.9/generated/torch.nn.RMSNorm.html | 8 +- 2.9/generated/torch.nn.RNN.html | 4 +- 2.9/generated/torch.nn.RNNBase.html | 4 +- 2.9/generated/torch.nn.RNNCell.html | 2 +- 2.9/generated/torch.nn.RReLU.html | 6 +- 2.9/generated/torch.nn.ReLU.html | 6 +- 2.9/generated/torch.nn.ReLU6.html | 4 +- 2.9/generated/torch.nn.ReflectionPad1d.html | 2 +- 2.9/generated/torch.nn.ReflectionPad2d.html | 2 +- 2.9/generated/torch.nn.ReflectionPad3d.html | 2 +- 2.9/generated/torch.nn.ReplicationPad1d.html | 2 +- 2.9/generated/torch.nn.ReplicationPad2d.html | 2 +- 2.9/generated/torch.nn.ReplicationPad3d.html | 2 +- 2.9/generated/torch.nn.SELU.html | 6 +- 2.9/generated/torch.nn.Sequential.html | 12 +- 2.9/generated/torch.nn.SiLU.html | 6 +- 2.9/generated/torch.nn.Sigmoid.html | 4 +- 2.9/generated/torch.nn.SmoothL1Loss.html | 4 +- 2.9/generated/torch.nn.SoftMarginLoss.html | 4 +- 2.9/generated/torch.nn.Softmax.html | 6 +- 2.9/generated/torch.nn.Softmax2d.html | 4 +- 2.9/generated/torch.nn.Softmin.html | 6 +- 2.9/generated/torch.nn.Softplus.html | 6 +- 2.9/generated/torch.nn.Softshrink.html | 6 +- 2.9/generated/torch.nn.Softsign.html | 4 +- 2.9/generated/torch.nn.SyncBatchNorm.html | 6 +- 2.9/generated/torch.nn.Tanh.html | 4 +- 2.9/generated/torch.nn.Tanhshrink.html | 4 +- 2.9/generated/torch.nn.Threshold.html | 6 +- 2.9/generated/torch.nn.Transformer.html | 6 +- .../torch.nn.TransformerDecoder.html | 4 +- .../torch.nn.TransformerDecoderLayer.html | 4 +- .../torch.nn.TransformerEncoder.html | 4 +- .../torch.nn.TransformerEncoderLayer.html | 4 +- 2.9/generated/torch.nn.TripletMarginLoss.html | 4 +- ...orch.nn.TripletMarginWithDistanceLoss.html | 4 +- 2.9/generated/torch.nn.Unfold.html | 6 +- 2.9/generated/torch.nn.Upsample.html | 6 +- .../torch.nn.UpsamplingBilinear2d.html | 2 +- .../torch.nn.UpsamplingNearest2d.html | 2 +- 2.9/generated/torch.nn.ZeroPad1d.html | 4 +- 2.9/generated/torch.nn.ZeroPad2d.html | 4 +- 2.9/generated/torch.nn.ZeroPad3d.html | 4 +- .../torch.nn.attention.bias.CausalBias.html | 2 +- ...torch.nn.attention.bias.CausalVariant.html | 2 +- ....nn.attention.bias.causal_lower_right.html | 2 +- ...h.nn.attention.bias.causal_upper_left.html | 2 +- .../torch.nn.attention.sdpa_kernel.html | 2 +- 2.9/generated/torch.nn.factory_kwargs.html | 2 +- ...rch.nn.functional.adaptive_avg_pool2d.html | 2 +- ...rch.nn.functional.adaptive_avg_pool3d.html | 2 +- ...rch.nn.functional.adaptive_max_pool1d.html | 2 +- ...rch.nn.functional.adaptive_max_pool2d.html | 2 +- ...rch.nn.functional.adaptive_max_pool3d.html | 2 +- .../torch.nn.functional.affine_grid.html | 2 +- .../torch.nn.functional.alpha_dropout.html | 2 +- .../torch.nn.functional.batch_norm.html | 2 +- ...ch.nn.functional.binary_cross_entropy.html | 2 +- ...onal.binary_cross_entropy_with_logits.html | 2 +- 2.9/generated/torch.nn.functional.celu.html | 2 +- ...h.nn.functional.cosine_embedding_loss.html | 2 +- .../torch.nn.functional.cross_entropy.html | 2 +- .../torch.nn.functional.ctc_loss.html | 2 +- .../torch.nn.functional.dropout.html | 2 +- .../torch.nn.functional.dropout1d.html | 2 +- .../torch.nn.functional.dropout2d.html | 2 +- .../torch.nn.functional.dropout3d.html | 2 +- 2.9/generated/torch.nn.functional.elu.html | 2 +- .../torch.nn.functional.embedding.html | 2 +- .../torch.nn.functional.embedding_bag.html | 2 +- ...h.nn.functional.feature_alpha_dropout.html | 2 +- 2.9/generated/torch.nn.functional.fold.html | 2 +- ...h.nn.functional.fractional_max_pool2d.html | 2 +- ...h.nn.functional.fractional_max_pool3d.html | 2 +- ...torch.nn.functional.gaussian_nll_loss.html | 2 +- 2.9/generated/torch.nn.functional.glu.html | 2 +- .../torch.nn.functional.grid_sample.html | 2 +- .../torch.nn.functional.group_norm.html | 2 +- .../torch.nn.functional.gumbel_softmax.html | 2 +- .../torch.nn.functional.hardsigmoid.html | 2 +- .../torch.nn.functional.hardswish.html | 2 +- .../torch.nn.functional.hardtanh.html | 2 +- ...ch.nn.functional.hinge_embedding_loss.html | 2 +- .../torch.nn.functional.huber_loss.html | 2 +- .../torch.nn.functional.instance_norm.html | 2 +- .../torch.nn.functional.interpolate.html | 2 +- 2.9/generated/torch.nn.functional.kl_div.html | 2 +- .../torch.nn.functional.l1_loss.html | 2 +- .../torch.nn.functional.layer_norm.html | 2 +- .../torch.nn.functional.leaky_relu.html | 2 +- ...rch.nn.functional.local_response_norm.html | 2 +- .../torch.nn.functional.log_softmax.html | 2 +- .../torch.nn.functional.lp_pool1d.html | 2 +- .../torch.nn.functional.lp_pool2d.html | 2 +- .../torch.nn.functional.lp_pool3d.html | 2 +- ...rch.nn.functional.margin_ranking_loss.html | 2 +- .../torch.nn.functional.max_pool1d.html | 2 +- .../torch.nn.functional.max_pool2d.html | 2 +- .../torch.nn.functional.max_pool3d.html | 2 +- .../torch.nn.functional.max_unpool1d.html | 2 +- .../torch.nn.functional.max_unpool2d.html | 2 +- .../torch.nn.functional.max_unpool3d.html | 2 +- 2.9/generated/torch.nn.functional.mish.html | 2 +- .../torch.nn.functional.mse_loss.html | 2 +- ...torch.nn.functional.multi_margin_loss.html | 2 +- ....nn.functional.multilabel_margin_loss.html | 2 +- ...unctional.multilabel_soft_margin_loss.html | 2 +- .../torch.nn.functional.nll_loss.html | 2 +- .../torch.nn.functional.normalize.html | 2 +- 2.9/generated/torch.nn.functional.pad.html | 2 +- .../torch.nn.functional.poisson_nll_loss.html | 2 +- 2.9/generated/torch.nn.functional.relu.html | 2 +- 2.9/generated/torch.nn.functional.relu6.html | 2 +- .../torch.nn.functional.rms_norm.html | 2 +- 2.9/generated/torch.nn.functional.rrelu.html | 2 +- 2.9/generated/torch.nn.functional.selu.html | 2 +- .../torch.nn.functional.sigmoid.html | 2 +- 2.9/generated/torch.nn.functional.silu.html | 2 +- .../torch.nn.functional.smooth_l1_loss.html | 2 +- .../torch.nn.functional.soft_margin_loss.html | 2 +- .../torch.nn.functional.softmax.html | 2 +- .../torch.nn.functional.softmin.html | 2 +- .../torch.nn.functional.softsign.html | 2 +- 2.9/generated/torch.nn.functional.tanh.html | 2 +- .../torch.nn.functional.tanhshrink.html | 2 +- .../torch.nn.functional.threshold.html | 2 +- ...ional.torch.nn.parallel.data_parallel.html | 2 +- ...rch.nn.functional.triplet_margin_loss.html | 2 +- ...nal.triplet_margin_with_distance_loss.html | 2 +- 2.9/generated/torch.nn.functional.unfold.html | 2 +- .../torch.nn.functional.upsample.html | 2 +- ...torch.nn.functional.upsample_bilinear.html | 2 +- .../torch.nn.functional.upsample_nearest.html | 2 +- .../torch.nn.modules.activation.CELU.html | 6 +- .../torch.nn.modules.activation.ELU.html | 6 +- .../torch.nn.modules.activation.GELU.html | 6 +- .../torch.nn.modules.activation.GLU.html | 6 +- ...orch.nn.modules.activation.Hardshrink.html | 6 +- ...rch.nn.modules.activation.Hardsigmoid.html | 4 +- ...torch.nn.modules.activation.Hardswish.html | 4 +- .../torch.nn.modules.activation.Hardtanh.html | 6 +- ...torch.nn.modules.activation.LeakyReLU.html | 6 +- ...orch.nn.modules.activation.LogSigmoid.html | 4 +- ...orch.nn.modules.activation.LogSoftmax.html | 6 +- .../torch.nn.modules.activation.Mish.html | 6 +- ...modules.activation.MultiheadAttention.html | 6 +- .../torch.nn.modules.activation.PReLU.html | 8 +- .../torch.nn.modules.activation.RReLU.html | 6 +- .../torch.nn.modules.activation.ReLU.html | 6 +- .../torch.nn.modules.activation.ReLU6.html | 4 +- .../torch.nn.modules.activation.SELU.html | 6 +- .../torch.nn.modules.activation.SiLU.html | 6 +- .../torch.nn.modules.activation.Sigmoid.html | 4 +- .../torch.nn.modules.activation.Softmax.html | 6 +- ...torch.nn.modules.activation.Softmax2d.html | 4 +- .../torch.nn.modules.activation.Softmin.html | 6 +- .../torch.nn.modules.activation.Softplus.html | 6 +- ...orch.nn.modules.activation.Softshrink.html | 6 +- .../torch.nn.modules.activation.Softsign.html | 4 +- .../torch.nn.modules.activation.Tanh.html | 4 +- ...orch.nn.modules.activation.Tanhshrink.html | 4 +- ...torch.nn.modules.activation.Threshold.html | 6 +- ...s.adaptive.AdaptiveLogSoftmaxWithLoss.html | 10 +- ...orch.nn.modules.batchnorm.BatchNorm1d.html | 2 +- ...orch.nn.modules.batchnorm.BatchNorm2d.html | 2 +- ...orch.nn.modules.batchnorm.BatchNorm3d.html | 2 +- ....nn.modules.batchnorm.LazyBatchNorm1d.html | 4 +- ....nn.modules.batchnorm.LazyBatchNorm2d.html | 4 +- ....nn.modules.batchnorm.LazyBatchNorm3d.html | 4 +- ...ch.nn.modules.batchnorm.SyncBatchNorm.html | 6 +- ...modules.channelshuffle.ChannelShuffle.html | 6 +- ...torch.nn.modules.container.ModuleDict.html | 14 +- ...torch.nn.modules.container.ModuleList.html | 8 +- ...ch.nn.modules.container.ParameterDict.html | 24 +- ...ch.nn.modules.container.ParameterList.html | 8 +- ...torch.nn.modules.container.Sequential.html | 12 +- .../torch.nn.modules.conv.Conv1d.html | 2 +- .../torch.nn.modules.conv.Conv2d.html | 2 +- .../torch.nn.modules.conv.Conv3d.html | 2 +- ...torch.nn.modules.conv.ConvTranspose1d.html | 2 +- ...torch.nn.modules.conv.ConvTranspose2d.html | 4 +- ...torch.nn.modules.conv.ConvTranspose3d.html | 2 +- .../torch.nn.modules.conv.LazyConv1d.html | 4 +- .../torch.nn.modules.conv.LazyConv2d.html | 4 +- .../torch.nn.modules.conv.LazyConv3d.html | 4 +- ...h.nn.modules.conv.LazyConvTranspose1d.html | 4 +- ...h.nn.modules.conv.LazyConvTranspose2d.html | 4 +- ...h.nn.modules.conv.LazyConvTranspose3d.html | 4 +- ....nn.modules.distance.CosineSimilarity.html | 4 +- ....nn.modules.distance.PairwiseDistance.html | 4 +- ...torch.nn.modules.dropout.AlphaDropout.html | 4 +- .../torch.nn.modules.dropout.Dropout.html | 4 +- .../torch.nn.modules.dropout.Dropout1d.html | 4 +- .../torch.nn.modules.dropout.Dropout2d.html | 4 +- .../torch.nn.modules.dropout.Dropout3d.html | 4 +- ...n.modules.dropout.FeatureAlphaDropout.html | 4 +- .../torch.nn.modules.flatten.Flatten.html | 6 +- .../torch.nn.modules.flatten.Unflatten.html | 6 +- 2.9/generated/torch.nn.modules.fold.Fold.html | 6 +- .../torch.nn.modules.fold.Unfold.html | 6 +- ...n.modules.instancenorm.InstanceNorm1d.html | 2 +- ...n.modules.instancenorm.InstanceNorm2d.html | 2 +- ...n.modules.instancenorm.InstanceNorm3d.html | 2 +- ...dules.instancenorm.LazyInstanceNorm1d.html | 4 +- ...dules.instancenorm.LazyInstanceNorm2d.html | 4 +- ...dules.instancenorm.LazyInstanceNorm3d.html | 4 +- ...torch.nn.modules.lazy.LazyModuleMixin.html | 6 +- .../torch.nn.modules.linear.Bilinear.html | 8 +- .../torch.nn.modules.linear.Identity.html | 4 +- .../torch.nn.modules.linear.LazyLinear.html | 8 +- .../torch.nn.modules.linear.Linear.html | 8 +- .../torch.nn.modules.loss.BCELoss.html | 4 +- ...rch.nn.modules.loss.BCEWithLogitsLoss.html | 4 +- .../torch.nn.modules.loss.CTCLoss.html | 4 +- ...h.nn.modules.loss.CosineEmbeddingLoss.html | 4 +- ...orch.nn.modules.loss.CrossEntropyLoss.html | 4 +- ...torch.nn.modules.loss.GaussianNLLLoss.html | 4 +- ...ch.nn.modules.loss.HingeEmbeddingLoss.html | 4 +- .../torch.nn.modules.loss.HuberLoss.html | 4 +- .../torch.nn.modules.loss.KLDivLoss.html | 4 +- .../torch.nn.modules.loss.L1Loss.html | 4 +- .../torch.nn.modules.loss.MSELoss.html | 4 +- ...rch.nn.modules.loss.MarginRankingLoss.html | 4 +- ....nn.modules.loss.MultiLabelMarginLoss.html | 4 +- ...modules.loss.MultiLabelSoftMarginLoss.html | 4 +- ...torch.nn.modules.loss.MultiMarginLoss.html | 4 +- .../torch.nn.modules.loss.NLLLoss.html | 4 +- .../torch.nn.modules.loss.PoissonNLLLoss.html | 4 +- .../torch.nn.modules.loss.SmoothL1Loss.html | 4 +- .../torch.nn.modules.loss.SoftMarginLoss.html | 4 +- ...rch.nn.modules.loss.TripletMarginLoss.html | 4 +- ...es.loss.TripletMarginWithDistanceLoss.html | 4 +- ....module.register_module_backward_hook.html | 2 +- ...ister_module_buffer_registration_hook.html | 2 +- ...s.module.register_module_forward_hook.html | 2 +- ...dule.register_module_forward_pre_hook.html | 2 +- ...le.register_module_full_backward_hook.html | 2 +- ...egister_module_full_backward_pre_hook.html | 2 +- ...ister_module_module_registration_hook.html | 2 +- ...er_module_parameter_registration_hook.html | 2 +- ...ch.nn.modules.normalization.GroupNorm.html | 2 +- ...ch.nn.modules.normalization.LayerNorm.html | 2 +- ...dules.normalization.LocalResponseNorm.html | 6 +- ...orch.nn.modules.normalization.RMSNorm.html | 8 +- ...orch.nn.modules.padding.CircularPad1d.html | 2 +- ...orch.nn.modules.padding.CircularPad2d.html | 2 +- ...orch.nn.modules.padding.CircularPad3d.html | 2 +- ...orch.nn.modules.padding.ConstantPad1d.html | 2 +- ...orch.nn.modules.padding.ConstantPad2d.html | 2 +- ...orch.nn.modules.padding.ConstantPad3d.html | 2 +- ...ch.nn.modules.padding.ReflectionPad1d.html | 2 +- ...ch.nn.modules.padding.ReflectionPad2d.html | 2 +- ...ch.nn.modules.padding.ReflectionPad3d.html | 2 +- ...h.nn.modules.padding.ReplicationPad1d.html | 2 +- ...h.nn.modules.padding.ReplicationPad2d.html | 2 +- ...h.nn.modules.padding.ReplicationPad3d.html | 2 +- .../torch.nn.modules.padding.ZeroPad1d.html | 4 +- .../torch.nn.modules.padding.ZeroPad2d.html | 4 +- .../torch.nn.modules.padding.ZeroPad3d.html | 4 +- ....nn.modules.pixelshuffle.PixelShuffle.html | 6 +- ...n.modules.pixelshuffle.PixelUnshuffle.html | 6 +- ....nn.modules.pooling.AdaptiveAvgPool1d.html | 4 +- ....nn.modules.pooling.AdaptiveAvgPool2d.html | 4 +- ....nn.modules.pooling.AdaptiveAvgPool3d.html | 4 +- ....nn.modules.pooling.AdaptiveMaxPool1d.html | 4 +- ....nn.modules.pooling.AdaptiveMaxPool2d.html | 4 +- ....nn.modules.pooling.AdaptiveMaxPool3d.html | 4 +- .../torch.nn.modules.pooling.AvgPool1d.html | 4 +- .../torch.nn.modules.pooling.AvgPool2d.html | 4 +- .../torch.nn.modules.pooling.AvgPool3d.html | 4 +- ...n.modules.pooling.FractionalMaxPool2d.html | 2 +- ...n.modules.pooling.FractionalMaxPool3d.html | 2 +- .../torch.nn.modules.pooling.LPPool1d.html | 4 +- .../torch.nn.modules.pooling.LPPool2d.html | 4 +- .../torch.nn.modules.pooling.LPPool3d.html | 4 +- .../torch.nn.modules.pooling.MaxPool1d.html | 4 +- .../torch.nn.modules.pooling.MaxPool2d.html | 4 +- .../torch.nn.modules.pooling.MaxPool3d.html | 4 +- .../torch.nn.modules.pooling.MaxUnpool1d.html | 4 +- .../torch.nn.modules.pooling.MaxUnpool2d.html | 4 +- .../torch.nn.modules.pooling.MaxUnpool3d.html | 4 +- 2.9/generated/torch.nn.modules.rnn.GRU.html | 2 +- .../torch.nn.modules.rnn.GRUCell.html | 2 +- 2.9/generated/torch.nn.modules.rnn.LSTM.html | 2 +- .../torch.nn.modules.rnn.LSTMCell.html | 2 +- 2.9/generated/torch.nn.modules.rnn.RNN.html | 4 +- .../torch.nn.modules.rnn.RNNBase.html | 4 +- .../torch.nn.modules.rnn.RNNCell.html | 2 +- .../torch.nn.modules.sparse.Embedding.html | 4 +- .../torch.nn.modules.sparse.EmbeddingBag.html | 6 +- ...ch.nn.modules.transformer.Transformer.html | 6 +- ...odules.transformer.TransformerDecoder.html | 4 +- ...s.transformer.TransformerDecoderLayer.html | 4 +- ...odules.transformer.TransformerEncoder.html | 4 +- ...s.transformer.TransformerEncoderLayer.html | 4 +- .../torch.nn.modules.upsampling.Upsample.html | 6 +- ...dules.upsampling.UpsamplingBilinear2d.html | 2 +- ...odules.upsampling.UpsamplingNearest2d.html | 2 +- ...h.nn.parallel.DistributedDataParallel.html | 10 +- 2.9/generated/torch.nn.parameter.Buffer.html | 2 +- .../torch.nn.parameter.Parameter.html | 2 +- ...orch.nn.parameter.UninitializedBuffer.html | 2 +- ...h.nn.parameter.UninitializedParameter.html | 4 +- 2.9/generated/torch.nn.parameter.is_lazy.html | 2 +- ...rch.nn.utils.clip_grad.clip_grad_norm.html | 2 +- ...ch.nn.utils.clip_grad.clip_grad_norm_.html | 2 +- ...h.nn.utils.clip_grad.clip_grad_value_.html | 2 +- .../torch.nn.utils.clip_grad_norm.html | 2 +- .../torch.nn.utils.clip_grad_norm_.html | 2 +- .../torch.nn.utils.clip_grad_value_.html | 2 +- .../torch.nn.utils.clip_grads_with_norm_.html | 2 +- ...s.convert_conv2d_weight_memory_format.html | 2 +- ...s.convert_conv3d_weight_memory_format.html | 2 +- ...nvert_parameters.parameters_to_vector.html | 2 +- ...nvert_parameters.vector_to_parameters.html | 2 +- .../torch.nn.utils.fuse_conv_bn_eval.html | 2 +- .../torch.nn.utils.fuse_conv_bn_weights.html | 2 +- .../torch.nn.utils.fuse_linear_bn_eval.html | 2 +- ...torch.nn.utils.fuse_linear_bn_weights.html | 2 +- ...rch.nn.utils.fusion.fuse_conv_bn_eval.html | 2 +- ....nn.utils.fusion.fuse_conv_bn_weights.html | 2 +- ...h.nn.utils.fusion.fuse_linear_bn_eval.html | 2 +- ...n.utils.fusion.fuse_linear_bn_weights.html | 2 +- .../torch.nn.utils.get_total_norm.html | 2 +- .../torch.nn.utils.init.skip_init.html | 2 +- ...t.convert_conv2d_weight_memory_format.html | 2 +- ...t.convert_conv3d_weight_memory_format.html | 2 +- .../torch.nn.utils.parameters_to_vector.html | 2 +- ....nn.utils.parametrizations.orthogonal.html | 2 +- ....utils.parametrizations.spectral_norm.html | 2 +- ...nn.utils.parametrizations.weight_norm.html | 2 +- ...utils.parametrize.ParametrizationList.html | 4 +- .../torch.nn.utils.parametrize.cached.html | 2 +- ....nn.utils.parametrize.is_parametrized.html | 2 +- ....parametrize.register_parametrization.html | 2 +- ...s.parametrize.remove_parametrizations.html | 2 +- ....transfer_parametrizations_and_params.html | 2 +- ...ametrize.type_before_parametrizations.html | 2 +- ...orch.nn.utils.prune.BasePruningMethod.html | 12 +- .../torch.nn.utils.prune.CustomFromMask.html | 10 +- .../torch.nn.utils.prune.Identity.html | 10 +- .../torch.nn.utils.prune.L1Unstructured.html | 10 +- .../torch.nn.utils.prune.LnStructured.html | 12 +- ...torch.nn.utils.prune.PruningContainer.html | 14 +- ...torch.nn.utils.prune.RandomStructured.html | 12 +- ...rch.nn.utils.prune.RandomUnstructured.html | 10 +- ...torch.nn.utils.prune.custom_from_mask.html | 2 +- ...ch.nn.utils.prune.global_unstructured.html | 2 +- .../torch.nn.utils.prune.identity.html | 2 +- .../torch.nn.utils.prune.is_pruned.html | 2 +- .../torch.nn.utils.prune.l1_unstructured.html | 2 +- .../torch.nn.utils.prune.ln_structured.html | 2 +- ...orch.nn.utils.prune.random_structured.html | 2 +- ...ch.nn.utils.prune.random_unstructured.html | 2 +- .../torch.nn.utils.prune.remove.html | 2 +- .../torch.nn.utils.remove_spectral_norm.html | 2 +- .../torch.nn.utils.remove_weight_norm.html | 2 +- .../torch.nn.utils.rnn.PackedSequence.html | 6 +- ...torch.nn.utils.rnn.invert_permutation.html | 2 +- ...rch.nn.utils.rnn.pack_padded_sequence.html | 2 +- .../torch.nn.utils.rnn.pack_sequence.html | 2 +- ...orch.nn.utils.rnn.pad_packed_sequence.html | 2 +- .../torch.nn.utils.rnn.pad_sequence.html | 2 +- .../torch.nn.utils.rnn.unpack_sequence.html | 2 +- .../torch.nn.utils.rnn.unpad_sequence.html | 2 +- 2.9/generated/torch.nn.utils.skip_init.html | 2 +- .../torch.nn.utils.spectral_norm.html | 2 +- ...ls.spectral_norm.remove_spectral_norm.html | 2 +- ....nn.utils.spectral_norm.spectral_norm.html | 2 +- ...ch.nn.utils.stateless.functional_call.html | 2 +- .../torch.nn.utils.vector_to_parameters.html | 2 +- 2.9/generated/torch.nn.utils.weight_norm.html | 2 +- ....utils.weight_norm.remove_weight_norm.html | 2 +- ...orch.nn.utils.weight_norm.weight_norm.html | 2 +- 2.9/generated/torch.no_grad.html | 2 +- 2.9/generated/torch.norm.html | 2 +- 2.9/generated/torch.optim.ASGD.html | 24 +- 2.9/generated/torch.optim.Adadelta.html | 24 +- 2.9/generated/torch.optim.Adafactor.html | 22 +- 2.9/generated/torch.optim.Adagrad.html | 26 +- 2.9/generated/torch.optim.Adam.html | 24 +- 2.9/generated/torch.optim.AdamW.html | 24 +- 2.9/generated/torch.optim.Adamax.html | 24 +- 2.9/generated/torch.optim.LBFGS.html | 24 +- 2.9/generated/torch.optim.Muon.html | 22 +- 2.9/generated/torch.optim.NAdam.html | 24 +- ...torch.optim.Optimizer.add_param_group.html | 2 +- ...torch.optim.Optimizer.load_state_dict.html | 2 +- ...er.register_load_state_dict_post_hook.html | 2 +- ...zer.register_load_state_dict_pre_hook.html | 2 +- ...timizer.register_state_dict_post_hook.html | 2 +- ...ptimizer.register_state_dict_pre_hook.html | 2 +- ...tim.Optimizer.register_step_post_hook.html | 2 +- ...ptim.Optimizer.register_step_pre_hook.html | 2 +- .../torch.optim.Optimizer.state_dict.html | 2 +- 2.9/generated/torch.optim.Optimizer.step.html | 2 +- .../torch.optim.Optimizer.zero_grad.html | 2 +- 2.9/generated/torch.optim.RAdam.html | 24 +- 2.9/generated/torch.optim.RMSprop.html | 24 +- 2.9/generated/torch.optim.Rprop.html | 24 +- 2.9/generated/torch.optim.SGD.html | 24 +- 2.9/generated/torch.optim.SparseAdam.html | 24 +- .../torch.optim.adadelta.Adadelta.html | 24 +- .../torch.optim.adadelta.adadelta.html | 2 +- .../torch.optim.adagrad.Adagrad.html | 26 +- .../torch.optim.adagrad.adagrad.html | 2 +- 2.9/generated/torch.optim.adam.Adam.html | 24 +- 2.9/generated/torch.optim.adam.adam.html | 2 +- 2.9/generated/torch.optim.adamax.Adamax.html | 24 +- 2.9/generated/torch.optim.adamax.adamax.html | 2 +- 2.9/generated/torch.optim.adamw.AdamW.html | 24 +- 2.9/generated/torch.optim.adamw.adamw.html | 2 +- 2.9/generated/torch.optim.asgd.ASGD.html | 24 +- 2.9/generated/torch.optim.asgd.asgd.html | 2 +- 2.9/generated/torch.optim.lbfgs.LBFGS.html | 24 +- ...h.optim.lr_scheduler.ChainedScheduler.html | 12 +- .../torch.optim.lr_scheduler.ConstantLR.html | 12 +- ....optim.lr_scheduler.CosineAnnealingLR.html | 12 +- ...scheduler.CosineAnnealingWarmRestarts.html | 12 +- .../torch.optim.lr_scheduler.CyclicLR.html | 14 +- ...orch.optim.lr_scheduler.ExponentialLR.html | 12 +- .../torch.optim.lr_scheduler.LRScheduler.html | 12 +- .../torch.optim.lr_scheduler.LambdaLR.html | 12 +- .../torch.optim.lr_scheduler.LinearLR.html | 12 +- .../torch.optim.lr_scheduler.MultiStepLR.html | 12 +- ...h.optim.lr_scheduler.MultiplicativeLR.html | 12 +- .../torch.optim.lr_scheduler.OneCycleLR.html | 12 +- ...torch.optim.lr_scheduler.PolynomialLR.html | 12 +- ....optim.lr_scheduler.ReduceLROnPlateau.html | 12 +- ...torch.optim.lr_scheduler.SequentialLR.html | 14 +- .../torch.optim.lr_scheduler.StepLR.html | 12 +- 2.9/generated/torch.optim.nadam.NAdam.html | 24 +- 2.9/generated/torch.optim.nadam.nadam.html | 2 +- 2.9/generated/torch.optim.radam.RAdam.html | 24 +- 2.9/generated/torch.optim.radam.radam.html | 2 +- .../torch.optim.rmsprop.RMSprop.html | 24 +- .../torch.optim.rmsprop.rmsprop.html | 2 +- 2.9/generated/torch.optim.rprop.Rprop.html | 24 +- 2.9/generated/torch.optim.rprop.rprop.html | 2 +- 2.9/generated/torch.optim.sgd.SGD.html | 24 +- 2.9/generated/torch.optim.sgd.sgd.html | 2 +- .../torch.optim.sparse_adam.SparseAdam.html | 24 +- .../torch.optim.swa_utils.AveragedModel.html | 112 ++-- .../torch.optim.swa_utils.SWALR.html | 12 +- 2.9/generated/torch.pca_lowrank.html | 2 +- .../torch.quasirandom.SobolEngine.html | 10 +- 2.9/generated/torch.save.html | 2 +- 2.9/generated/torch.seed.html | 2 +- 2.9/generated/torch.set_default_device.html | 2 +- 2.9/generated/torch.set_default_dtype.html | 2 +- .../torch.set_default_tensor_type.html | 2 +- .../torch.set_deterministic_debug_mode.html | 2 +- .../torch.set_float32_matmul_precision.html | 2 +- 2.9/generated/torch.set_printoptions.html | 2 +- 2.9/generated/torch.set_rng_state.html | 2 +- 2.9/generated/torch.set_warn_always.html | 2 +- .../torch.signal.windows.bartlett.html | 2 +- .../torch.signal.windows.blackman.html | 2 +- .../torch.signal.windows.cosine.html | 2 +- .../torch.signal.windows.exponential.html | 2 +- .../torch.signal.windows.gaussian.html | 2 +- .../torch.signal.windows.general_cosine.html | 2 +- .../torch.signal.windows.general_hamming.html | 2 +- .../torch.signal.windows.hamming.html | 2 +- 2.9/generated/torch.signal.windows.hann.html | 2 +- .../torch.signal.windows.kaiser.html | 2 +- .../torch.signal.windows.nuttall.html | 2 +- .../torch.sparse.as_sparse_gradcheck.html | 2 +- ...sparse.check_sparse_tensor_invariants.html | 8 +- 2.9/generated/torch.sparse.sum.html | 2 +- 2.9/generated/torch.split.html | 2 +- 2.9/generated/torch.stft.html | 2 +- 2.9/generated/torch.svd_lowrank.html | 2 +- 2.9/generated/torch.sym_float.html | 2 +- 2.9/generated/torch.sym_fresh_size.html | 2 +- 2.9/generated/torch.sym_int.html | 2 +- 2.9/generated/torch.sym_ite.html | 2 +- 2.9/generated/torch.sym_max.html | 2 +- 2.9/generated/torch.sym_min.html | 2 +- 2.9/generated/torch.sym_not.html | 2 +- 2.9/generated/torch.sym_sum.html | 2 +- 2.9/generated/torch.tensordot.html | 2 +- 2.9/generated/torch.unique.html | 2 +- 2.9/generated/torch.unique_consecutive.html | 2 +- 2.9/generated/torch.unravel_index.html | 2 +- .../torch.use_deterministic_algorithms.html | 2 +- ...erate_methods_for_privateuse1_backend.html | 2 +- .../torch.utils.get_cpp_backtrace.html | 2 +- ...orch.utils.rename_privateuse1_backend.html | 2 +- 2.9/generated/torch.utils.set_module.html | 2 +- 2.9/generated/torch.utils.swap_tensors.html | 2 +- 2.9/generated/torch.vmap.html | 2 +- 2.9/generated/torch.xpu.Event.html | 12 +- 2.9/generated/torch.xpu.Stream.html | 12 +- 2.9/generated/torch.xpu.StreamContext.html | 2 +- 2.9/generated/torch.xpu.current_device.html | 2 +- 2.9/generated/torch.xpu.current_stream.html | 2 +- 2.9/generated/torch.xpu.device.html | 2 +- 2.9/generated/torch.xpu.device_count.html | 2 +- 2.9/generated/torch.xpu.device_of.html | 2 +- 2.9/generated/torch.xpu.get_arch_list.html | 2 +- .../torch.xpu.get_device_capability.html | 2 +- 2.9/generated/torch.xpu.get_device_name.html | 2 +- .../torch.xpu.get_device_properties.html | 2 +- .../torch.xpu.get_gencode_flags.html | 2 +- 2.9/generated/torch.xpu.get_rng_state.html | 2 +- .../torch.xpu.get_rng_state_all.html | 2 +- .../torch.xpu.get_stream_from_external.html | 2 +- 2.9/generated/torch.xpu.init.html | 2 +- 2.9/generated/torch.xpu.initial_seed.html | 2 +- 2.9/generated/torch.xpu.is_available.html | 2 +- 2.9/generated/torch.xpu.is_initialized.html | 2 +- 2.9/generated/torch.xpu.manual_seed.html | 2 +- 2.9/generated/torch.xpu.manual_seed_all.html | 2 +- .../torch.xpu.memory.empty_cache.html | 2 +- ...torch.xpu.memory.max_memory_allocated.html | 2 +- .../torch.xpu.memory.max_memory_reserved.html | 2 +- .../torch.xpu.memory.mem_get_info.html | 2 +- .../torch.xpu.memory.memory_allocated.html | 2 +- .../torch.xpu.memory.memory_reserved.html | 2 +- .../torch.xpu.memory.memory_stats.html | 2 +- ...pu.memory.memory_stats_as_nested_dict.html | 2 +- ...memory.reset_accumulated_memory_stats.html | 2 +- ...ch.xpu.memory.reset_peak_memory_stats.html | 2 +- 2.9/generated/torch.xpu.seed.html | 2 +- 2.9/generated/torch.xpu.seed_all.html | 2 +- 2.9/generated/torch.xpu.set_device.html | 2 +- 2.9/generated/torch.xpu.set_rng_state.html | 2 +- .../torch.xpu.set_rng_state_all.html | 2 +- 2.9/generated/torch.xpu.set_stream.html | 2 +- 2.9/generated/torch.xpu.stream.html | 2 +- 2.9/generated/torch.xpu.synchronize.html | 2 +- 2.9/hub.html | 14 +- 2.9/library.html | 46 +- 2.9/mobile_optimizer.html | 2 +- 2.9/model_zoo.html | 2 +- 2.9/module_tracker.html | 2 +- 2.9/monitor.html | 4 +- 2.9/multiprocessing.html | 12 +- 2.9/named_tensor.html | 8 +- 2.9/nested.html | 10 +- 2.9/nn.attention.flex_attention.html | 36 +- 2.9/nn.init.html | 30 +- 2.9/notes/serialization.html | 26 +- 2.9/onnx.html | 10 +- 2.9/onnx_export.html | 18 +- 2.9/onnx_ops.html | 10 +- 2.9/onnx_verification.html | 6 +- 2.9/optim.html | 6 +- 2.9/package.html | 76 +-- 2.9/profiler.html | 42 +- 2.9/quantization.html | 6 +- 2.9/random.html | 12 +- 2.9/rpc.html | 28 +- 2.9/storage.html | 180 +++--- 2.9/tensorboard.html | 38 +- 2.9/testing.html | 6 +- 2.9/torch.compiler_aot_inductor.html | 4 +- 2.9/torch.html | 16 +- 2.9/torch.overrides.html | 16 +- 2.9/torch_cuda_memory.html | 6 +- replace_github_links.sh | 9 +- 1256 files changed, 3747 insertions(+), 3748 deletions(-) diff --git a/2.9/amp.html b/2.9/amp.html index 9c69740ae1f..48f699d237c 100644 --- a/2.9/amp.html +++ b/2.9/amp.html @@ -4448,7 +4448,7 @@

Automatic Mixed Precision package - torch.amp

Autocasting#

-torch.amp.autocast_mode.is_autocast_available(device_type)[source]#
+torch.amp.autocast_mode.is_autocast_available(device_type)[source]#

Return a bool indicating if autocast is available on device_type.

Parameters
@@ -4464,7 +4464,7 @@

Automatic Mixed Precision package - torch.amp
-class torch.autocast(device_type, dtype=None, enabled=True, cache_enabled=None)[source]#
+class torch.autocast(device_type, dtype=None, enabled=True, cache_enabled=None)[source]#

Instances of autocast serve as context managers or decorators that allow regions of your script to run in mixed precision.

In these regions, ops run in an op-specific dtype chosen by autocast @@ -4632,7 +4632,7 @@

Automatic Mixed Precision package - torch.amp
-torch.amp.custom_fwd(fwd=None, *, device_type, cast_inputs=None)[source]#
+torch.amp.custom_fwd(fwd=None, *, device_type, cast_inputs=None)[source]#

Create a helper decorator for forward methods of custom autograd functions.

Autograd functions are subclasses of torch.autograd.Function. See the example page for more detail.

@@ -4659,7 +4659,7 @@

Automatic Mixed Precision package - torch.amp
-torch.amp.custom_bwd(bwd=None, *, device_type)[source]#
+torch.amp.custom_bwd(bwd=None, *, device_type)[source]#

Create a helper decorator for backward methods of custom autograd functions.

Autograd functions are subclasses of torch.autograd.Function. Ensures that backward executes with the same autocast state as forward. @@ -4675,7 +4675,7 @@

Automatic Mixed Precision package - torch.amp
-class torch.cuda.amp.autocast(enabled=True, dtype=torch.float16, cache_enabled=True)[source]#
+class torch.cuda.amp.autocast(enabled=True, dtype=torch.float16, cache_enabled=True)[source]#

See torch.autocast.

torch.cuda.amp.autocast(args...) is deprecated. Please use torch.amp.autocast("cuda", args...) instead.

@@ -4684,21 +4684,21 @@

Automatic Mixed Precision package - torch.amp
-torch.cuda.amp.custom_fwd(fwd=None, *, cast_inputs=None)[source]#
+torch.cuda.amp.custom_fwd(fwd=None, *, cast_inputs=None)[source]#

torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.

-torch.cuda.amp.custom_bwd(bwd)[source]#
+torch.cuda.amp.custom_bwd(bwd)[source]#

torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.

-class torch.cpu.amp.autocast(enabled=True, dtype=torch.bfloat16, cache_enabled=True)[source]#
+class torch.cpu.amp.autocast(enabled=True, dtype=torch.bfloat16, cache_enabled=True)[source]#

See torch.autocast. torch.cpu.amp.autocast(args...) is deprecated. Please use torch.amp.autocast("cpu", args...) instead.

@@ -4729,7 +4729,7 @@

Automatic Mixed Precision package - torch.amp
-class torch.cuda.amp.GradScaler(init_scale=65536.0, growth_factor=2.0, backoff_factor=0.5, growth_interval=2000, enabled=True)[source]#
+class torch.cuda.amp.GradScaler(init_scale=65536.0, growth_factor=2.0, backoff_factor=0.5, growth_interval=2000, enabled=True)[source]#

See torch.amp.GradScaler. torch.cuda.amp.GradScaler(args...) is deprecated. Please use torch.amp.GradScaler("cuda", args...) instead.

@@ -4738,7 +4738,7 @@

Automatic Mixed Precision package - torch.amp
-class torch.cpu.amp.GradScaler(init_scale=65536.0, growth_factor=2.0, backoff_factor=0.5, growth_interval=2000, enabled=True)[source]#
+class torch.cpu.amp.GradScaler(init_scale=65536.0, growth_factor=2.0, backoff_factor=0.5, growth_interval=2000, enabled=True)[source]#

See torch.amp.GradScaler. torch.cpu.amp.GradScaler(args...) is deprecated. Please use torch.amp.GradScaler("cpu", args...) instead.

diff --git a/2.9/autograd.html b/2.9/autograd.html index 05fb3a8eeff..e23293911a9 100644 --- a/2.9/autograd.html +++ b/2.9/autograd.html @@ -4629,7 +4629,7 @@

Tensor autograd functionsFunction#

-class torch.autograd.Function(*args, **kwargs)[source]#
+class torch.autograd.Function(*args, **kwargs)[source]#

Base class to create custom autograd.Function.

To create a custom autograd.Function, subclass this class and implement the forward() and backward() static methods. Then, to use your custom @@ -4772,7 +4772,7 @@

Profileremit_itt.

-class torch.autograd.profiler.profile(enabled=True, *, use_cuda=False, use_device=None, record_shapes=False, with_flops=False, profile_memory=False, with_stack=False, with_modules=False, use_kineto=False, use_cpu=True, experimental_config=None, acc_events=False, custom_trace_id_callback=None)[source]#
+class torch.autograd.profiler.profile(enabled=True, *, use_cuda=False, use_device=None, record_shapes=False, with_flops=False, profile_memory=False, with_stack=False, with_modules=False, use_kineto=False, use_cpu=True, experimental_config=None, acc_events=False, custom_trace_id_callback=None)[source]#

Context manager that manages autograd profiler state and holds a summary of results.

Note

@@ -4907,7 +4907,7 @@

Profiler
-class torch.autograd.profiler.emit_nvtx(enabled=True, record_shapes=False)[source]#
+class torch.autograd.profiler.emit_nvtx(enabled=True, record_shapes=False)[source]#

Context manager that makes every autograd operation emit an NVTX range.

It is useful when running the program under nvprof:

nvprof --profile-from-start off -o trace_name.prof -- <regular command here>
@@ -4976,7 +4976,7 @@ 

Profiler
-class torch.autograd.profiler.emit_itt(enabled=True, record_shapes=False)[source]#
+class torch.autograd.profiler.emit_itt(enabled=True, record_shapes=False)[source]#

Context manager that makes every autograd operation emit an ITT range.

It is useful when running the program under Intel(R) VTune Profiler:

vtune <--vtune-flags> <regular command here>
@@ -5027,7 +5027,7 @@ 

Profiler#

-class torch.autograd.detect_anomaly(check_nan=True)[source]#
+class torch.autograd.detect_anomaly(check_nan=True)[source]#

Context-manager that enable anomaly detection for the autograd engine.

This does two things:

    @@ -5098,7 +5098,7 @@

    Debugging and anomaly detection
    -class torch.autograd.set_detect_anomaly(mode, check_nan=True)[source]#
    +class torch.autograd.set_detect_anomaly(mode, check_nan=True)[source]#

    Context-manager that sets the anomaly detection for the autograd engine on or off.

    set_detect_anomaly will enable or disable the autograd anomaly detection based on its argument mode. @@ -5184,7 +5184,7 @@

    Autograd graphHooks for saved tensors.

    -class torch.autograd.graph.saved_tensors_hooks(pack_hook, unpack_hook)[source]#
    +class torch.autograd.graph.saved_tensors_hooks(pack_hook, unpack_hook)[source]#

    Context-manager that sets a pair of pack / unpack hooks for saved tensors.

    Use this context-manager to define how intermediary results of an operation should be packed before saving, and unpacked on retrieval.

    @@ -5251,7 +5251,7 @@

    Autograd graph
    -class torch.autograd.graph.save_on_cpu(pin_memory=False, device_type='cuda')[source]#
    +class torch.autograd.graph.save_on_cpu(pin_memory=False, device_type='cuda')[source]#

    Context manager under which tensors saved by the forward pass will be stored on cpu, then retrieved for backward.

    When performing operations within this context manager, intermediary results saved in the graph during the forward pass will be moved to CPU, @@ -5291,7 +5291,7 @@

    Autograd graph
    -class torch.autograd.graph.disable_saved_tensors_hooks(error_message)[source]#
    +class torch.autograd.graph.disable_saved_tensors_hooks(error_message)[source]#

    Context-manager that disables the saved tensors default hooks feature.

    Useful for if you are creating a feature that does not work with saved tensors default hooks.

    @@ -5317,7 +5317,7 @@

    Autograd graph
    -class torch.autograd.graph.register_multi_grad_hook(tensors, fn, *, mode='all')[source]#
    +class torch.autograd.graph.register_multi_grad_hook(tensors, fn, *, mode='all')[source]#

    Register a multi-grad backward hook.

    There are two supported modes: "all" and "any".

    Under the "all" mode, the hook will be called after gradients with respect to every tensor in @@ -5368,7 +5368,7 @@

    Autograd graph
    -class torch.autograd.graph.allow_mutation_on_saved_tensors[source]#
    +class torch.autograd.graph.allow_mutation_on_saved_tensors[source]#

    Context manager under which mutating tensors saved for backward is allowed.

    Under this context manager, tensors saved for backward are cloned on mutation, so the original version can still be used during backward. Normally, mutating a tensor @@ -5404,7 +5404,7 @@

    Autograd graph
    -class torch.autograd.graph.GradientEdge(node, output_nr, ownership_token=None)[source]#
    +class torch.autograd.graph.GradientEdge(node, output_nr, ownership_token=None)[source]#

    Object representing a given gradient edge within the autograd graph.

    To get the gradient edge where a given Tensor gradient will be computed, you can do edge = autograd.graph.get_gradient_edge(tensor).

    @@ -5414,7 +5414,7 @@

    Autograd graph
    -torch.autograd.graph.get_gradient_edge(tensor)[source]#
    +torch.autograd.graph.get_gradient_edge(tensor)[source]#

    Get the gradient edge for computing the gradient of the given Tensor.

    In particular, it is equivalent to call g = autograd.grad(loss, input) and g = autograd.grad(loss, get_gradient_edge(input)).

    diff --git a/2.9/backends.html b/2.9/backends.html index 165e4a1d583..60b84c60e45 100644 --- a/2.9/backends.html +++ b/2.9/backends.html @@ -4414,7 +4414,7 @@

    torch.backends.cpu#

    -torch.backends.cpu.get_cpu_capability()[source]#
    +torch.backends.cpu.get_cpu_capability()[source]#

    Return cpu capability as a string value.

    Possible values: - “DEFAULT” @@ -4436,7 +4436,7 @@

    torch.backends.cuda#

    -torch.backends.cuda.is_built()[source]#
    +torch.backends.cuda.is_built()[source]#

    Return whether PyTorch is built with CUDA support.

    Note that this doesn’t necessarily mean CUDA is available; just that if this PyTorch binary were run on a machine with working CUDA drivers and devices, we would be able to use it.

    @@ -4488,7 +4488,7 @@
    -torch.backends.cuda.preferred_blas_library(backend=None)[source]#
    +torch.backends.cuda.preferred_blas_library(backend=None)[source]#

    Override the library PyTorch uses for BLAS operations. Choose between cuBLAS, cuBLASLt, and CK [ROCm-only].

    Warning

    @@ -4521,7 +4521,7 @@
    -torch.backends.cuda.preferred_rocm_fa_library(backend=None)[source]#
    +torch.backends.cuda.preferred_rocm_fa_library(backend=None)[source]#

    [ROCm-only] Override the backend PyTorch uses in ROCm environments for Flash Attention. Choose between AOTriton and CK

    @@ -4551,7 +4551,7 @@
    -torch.backends.cuda.preferred_linalg_library(backend=None)[source]#
    +torch.backends.cuda.preferred_linalg_library(backend=None)[source]#

    Override the heuristic PyTorch uses to choose between cuSOLVER and MAGMA for CUDA linear algebra operations.

    Warning

    @@ -4606,7 +4606,7 @@
    -torch.backends.cuda.flash_sdp_enabled()[source]#
    +torch.backends.cuda.flash_sdp_enabled()[source]#

    Warning

    This flag is beta and subject to change.

    @@ -4616,7 +4616,7 @@
    -torch.backends.cuda.enable_mem_efficient_sdp(enabled)[source]#
    +torch.backends.cuda.enable_mem_efficient_sdp(enabled)[source]#

    Warning

    This flag is beta and subject to change.

    @@ -4628,7 +4628,7 @@
    -torch.backends.cuda.mem_efficient_sdp_enabled()[source]#
    +torch.backends.cuda.mem_efficient_sdp_enabled()[source]#

    Warning

    This flag is beta and subject to change.

    @@ -4638,7 +4638,7 @@
    -torch.backends.cuda.enable_flash_sdp(enabled)[source]#
    +torch.backends.cuda.enable_flash_sdp(enabled)[source]#

    Warning

    This flag is beta and subject to change.

    @@ -4650,7 +4650,7 @@
    -torch.backends.cuda.math_sdp_enabled()[source]#
    +torch.backends.cuda.math_sdp_enabled()[source]#

    Warning

    This flag is beta and subject to change.

    @@ -4660,7 +4660,7 @@
    -torch.backends.cuda.enable_math_sdp(enabled)[source]#
    +torch.backends.cuda.enable_math_sdp(enabled)[source]#

    Warning

    This flag is beta and subject to change.

    @@ -4672,7 +4672,7 @@
    -torch.backends.cuda.fp16_bf16_reduction_math_sdp_allowed()[source]#
    +torch.backends.cuda.fp16_bf16_reduction_math_sdp_allowed()[source]#

    Warning

    This flag is beta and subject to change.

    @@ -4682,7 +4682,7 @@
    -torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(enabled)[source]#
    +torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(enabled)[source]#

    Warning

    This flag is beta and subject to change.

    @@ -4694,7 +4694,7 @@
    -torch.backends.cuda.cudnn_sdp_enabled()[source]#
    +torch.backends.cuda.cudnn_sdp_enabled()[source]#

    Warning

    This flag is beta and subject to change.

    @@ -4704,7 +4704,7 @@
    -torch.backends.cuda.enable_cudnn_sdp(enabled)[source]#
    +torch.backends.cuda.enable_cudnn_sdp(enabled)[source]#

    Warning

    This flag is beta and subject to change.

    @@ -4716,7 +4716,7 @@
    -torch.backends.cuda.is_flash_attention_available()[source]#
    +torch.backends.cuda.is_flash_attention_available()[source]#

    Check if PyTorch was built with FlashAttention for scaled_dot_product_attention.

    Returns
    @@ -4735,7 +4735,7 @@
    -torch.backends.cuda.can_use_flash_attention(params, debug=False)[source]#
    +torch.backends.cuda.can_use_flash_attention(params, debug=False)[source]#

    Check if FlashAttention can be utilized in scaled_dot_product_attention.

    Parameters
    @@ -4763,7 +4763,7 @@
    -torch.backends.cuda.can_use_efficient_attention(params, debug=False)[source]#
    +torch.backends.cuda.can_use_efficient_attention(params, debug=False)[source]#

    Check if efficient_attention can be utilized in scaled_dot_product_attention.

    Parameters
    @@ -4791,7 +4791,7 @@
    -torch.backends.cuda.can_use_cudnn_attention(params, debug=False)[source]#
    +torch.backends.cuda.can_use_cudnn_attention(params, debug=False)[source]#

    Check if cudnn_attention can be utilized in scaled_dot_product_attention.

    Parameters
    @@ -4819,7 +4819,7 @@
    -torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=True, enable_mem_efficient=True, enable_cudnn=True)[source]#
    +torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=True, enable_mem_efficient=True, enable_cudnn=True)[source]#

    Warning

    This flag is beta and subject to change.

    @@ -4835,13 +4835,13 @@

    torch.backends.cudnn#

    -torch.backends.cudnn.version()[source]#
    +torch.backends.cudnn.version()[source]#

    Return the version of cuDNN.

    -torch.backends.cudnn.is_available()[source]#
    +torch.backends.cudnn.is_available()[source]#

    Return a bool indicating if CUDNN is currently available.

    @@ -4887,7 +4887,7 @@

    torch.backends.cusparselt#

    -torch.backends.cusparselt.version()[source]#
    +torch.backends.cusparselt.version()[source]#

    Return the version of cuSPARSELt

    Return type
    @@ -4898,7 +4898,7 @@
    -torch.backends.cusparselt.is_available()[source]#
    +torch.backends.cusparselt.is_available()[source]#

    Return a bool indicating if cuSPARSELt is currently available.

    Return type
    @@ -4912,7 +4912,7 @@

    torch.backends.mha#

    -torch.backends.mha.get_fastpath_enabled()[source]#
    +torch.backends.mha.get_fastpath_enabled()[source]#

    Returns whether fast path for TransformerEncoder and MultiHeadAttention is enabled, or True if jit is scripting.

    @@ -4929,7 +4929,7 @@
    -torch.backends.mha.set_fastpath_enabled(value)[source]#
    +torch.backends.mha.set_fastpath_enabled(value)[source]#

    Sets whether fast path is enabled

    @@ -4950,7 +4950,7 @@

    torch.backends.mps#

    -torch.backends.mps.is_available()[source]#
    +torch.backends.mps.is_available()[source]#

    Return a bool indicating if MPS is currently available.

    Return type
    @@ -4961,7 +4961,7 @@
    -torch.backends.mps.is_built()[source]#
    +torch.backends.mps.is_built()[source]#

    Return whether PyTorch is built with MPS support.

    Note that this doesn’t necessarily mean MPS is available; just that if this PyTorch binary were run a machine with working MPS drivers @@ -4978,13 +4978,13 @@

    torch.backends.mkl#

    -torch.backends.mkl.is_available()[source]#
    +torch.backends.mkl.is_available()[source]#

    Return whether PyTorch is built with MKL support.

    -class torch.backends.mkl.verbose(enable)[source]#
    +class torch.backends.mkl.verbose(enable)[source]#

    On-demand oneMKL verbosing functionality.

    To make it easier to debug performance issues, oneMKL can dump verbose messages containing execution information like duration while executing @@ -5017,12 +5017,12 @@

    torch.backends.mkldnn#

    -torch.backends.mkldnn.is_available()[source]#
    +torch.backends.mkldnn.is_available()[source]#
    -class torch.backends.mkldnn.verbose(level)[source]#
    +class torch.backends.mkldnn.verbose(level)[source]#

    On-demand oneDNN (former MKL-DNN) verbosing functionality.

    To make it easier to debug performance issues, oneDNN can dump verbose messages containing information like kernel size, input data size and @@ -5056,19 +5056,19 @@

    torch.backends.nnpack#

    -torch.backends.nnpack.is_available()[source]#
    +torch.backends.nnpack.is_available()[source]#

    Return whether PyTorch is built with NNPACK support.

    -torch.backends.nnpack.flags(enabled=False)[source]#
    +torch.backends.nnpack.flags(enabled=False)[source]#

    Context manager for setting if nnpack is enabled globally

    -torch.backends.nnpack.set_flags(_enabled)[source]#
    +torch.backends.nnpack.set_flags(_enabled)[source]#

    Set if nnpack is enabled globally

    @@ -5077,7 +5077,7 @@

    torch.backends.openmp#

    -torch.backends.openmp.is_available()[source]#
    +torch.backends.openmp.is_available()[source]#

    Return whether PyTorch is built with OpenMP support.

    @@ -5086,7 +5086,7 @@

    torch.backends.opt_einsum#

    -torch.backends.opt_einsum.is_available()[source]#
    +torch.backends.opt_einsum.is_available()[source]#

    Return a bool indicating if opt_einsum is currently available.

    You must install opt-einsum in order for torch to automatically optimize einsum. To make opt-einsum available, you can install it along with torch: pip install torch[opt-einsum] @@ -5102,7 +5102,7 @@

    -torch.backends.opt_einsum.get_opt_einsum()[source]#
    +torch.backends.opt_einsum.get_opt_einsum()[source]#

    Return the opt_einsum package if opt_einsum is currently available, else None.

    Return type
    diff --git a/2.9/benchmark_utils.html b/2.9/benchmark_utils.html index 7be1a72b03d..ff49ed1613b 100644 --- a/2.9/benchmark_utils.html +++ b/2.9/benchmark_utils.html @@ -4396,7 +4396,7 @@

    Created On: Nov 02, 2020 | Last Updated On: Jun 12, 2025

    -class torch.utils.benchmark.Timer(stmt='pass', setup='pass', global_setup='', timer=<built-in function perf_counter>, globals=None, label=None, sub_label=None, description=None, env=None, num_threads=1, language=Language.PYTHON)[source]#
    +class torch.utils.benchmark.Timer(stmt='pass', setup='pass', global_setup='', timer=<built-in function perf_counter>, globals=None, label=None, sub_label=None, description=None, env=None, num_threads=1, language=Language.PYTHON)[source]#

    Helper class for measuring execution time of PyTorch statements.

    For a full tutorial on how to use this class, see: https://pytorch.org/tutorials/recipes/recipes/benchmark.html

    @@ -4500,7 +4500,7 @@
    -adaptive_autorange(threshold=0.1, *, min_run_time=0.01, max_run_time=10.0, callback=None)[source]#
    +adaptive_autorange(threshold=0.1, *, min_run_time=0.01, max_run_time=10.0, callback=None)[source]#

    Similar to blocked_autorange but also checks for variablility in measurements and repeats until iqr/median is smaller than threshold or max_run_time is reached.

    At a high level, adaptive_autorange executes the following pseudo-code:

    @@ -4541,7 +4541,7 @@
    -blocked_autorange(callback=None, min_run_time=0.2)[source]#
    +blocked_autorange(callback=None, min_run_time=0.2)[source]#

    Measure many replicates while keeping timer overhead to a minimum.

    At a high level, blocked_autorange executes the following pseudo-code:

    `setup`
    @@ -4586,7 +4586,7 @@
     
     
    -collect_callgrind(number: int, *, repeats: None, collect_baseline: bool, retain_out_file: bool) CallgrindStats[source]#
    +collect_callgrind(number: int, *, repeats: None, collect_baseline: bool, retain_out_file: bool) CallgrindStats[source]#
    collect_callgrind(number: int, *, repeats: int, collect_baseline: bool, retain_out_file: bool) tuple[torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats, ...]

    Collect instruction counts using Callgrind.

    @@ -4621,7 +4621,7 @@
    -timeit(number=1000000)[source]#
    +timeit(number=1000000)[source]#

    Mirrors the semantics of timeit.Timer.timeit().

    Execute the main statement (stmt) number times. https://docs.python.org/3/library/timeit.html#timeit.Timer.timeit

    @@ -4636,7 +4636,7 @@
    -class torch.utils.benchmark.Measurement(number_per_run, raw_times, task_spec, metadata=None)[source]#
    +class torch.utils.benchmark.Measurement(number_per_run, raw_times, task_spec, metadata=None)[source]#

    The result of a Timer measurement.

    This class stores one or more measurements of a given statement. It is serializable and provides several convenience methods @@ -4645,7 +4645,7 @@

    -static merge(measurements)[source]#
    +static merge(measurements)[source]#

    Convenience method for merging replicates.

    Merge will extrapolate times to number_per_run=1 and will not transfer any metadata. (Since it might differ between replicates)

    @@ -4675,7 +4675,7 @@
    -class torch.utils.benchmark.CallgrindStats(task_spec, number_per_run, built_with_debug_symbols, baseline_inclusive_stats, baseline_exclusive_stats, stmt_inclusive_stats, stmt_exclusive_stats, stmt_callgrind_out)[source]#
    +class torch.utils.benchmark.CallgrindStats(task_spec, number_per_run, built_with_debug_symbols, baseline_inclusive_stats, baseline_exclusive_stats, stmt_inclusive_stats, stmt_exclusive_stats, stmt_callgrind_out)[source]#

    Top level container for Callgrind results collected by Timer.

    Manipulation is generally done using the FunctionCounts class, which is obtained by calling CallgrindStats.stats(…). Several convenience @@ -4685,7 +4685,7 @@

    -as_standardized()[source]#
    +as_standardized()[source]#

    Strip library names and some prefixes from function strings.

    When comparing two different sets of instruction counts, on stumbling block can be path prefixes. Callgrind includes the full filepath @@ -4714,7 +4714,7 @@

    -counts(*, denoise=False)[source]#
    +counts(*, denoise=False)[source]#

    Returns the total number of instructions executed.

    See FunctionCounts.denoise() for an explanation of the denoise arg.

    @@ -4726,7 +4726,7 @@
    -delta(other, inclusive=False)[source]#
    +delta(other, inclusive=False)[source]#

    Diff two sets of counts.

    One common reason to collect instruction counts is to determine the the effect that a particular change will have on the number of instructions @@ -4744,7 +4744,7 @@

    -stats(inclusive=False)[source]#
    +stats(inclusive=False)[source]#

    Returns detailed function counts.

    Conceptually, the FunctionCounts returned can be thought of as a tuple of (count, path_and_function_name) tuples.

    @@ -4764,7 +4764,7 @@
    -class torch.utils.benchmark.FunctionCounts(_data, inclusive, truncate_rows=True, _linewidth=None)[source]#
    +class torch.utils.benchmark.FunctionCounts(_data, inclusive, truncate_rows=True, _linewidth=None)[source]#

    Container for manipulating Callgrind results.

    It supports:
      @@ -4781,7 +4781,7 @@
    -denoise()[source]#
    +denoise()[source]#

    Remove known noisy instructions.

    Several instructions in the CPython interpreter are rather noisy. These instructions involve unicode to dictionary lookups which Python uses to @@ -4797,7 +4797,7 @@

    -filter(filter_fn)[source]#
    +filter(filter_fn)[source]#

    Keep only the elements where filter_fn applied to function name returns True.

    Return type
    @@ -4808,7 +4808,7 @@
    -transform(map_fn)[source]#
    +transform(map_fn)[source]#

    Apply map_fn to all of the function names.

    This can be used to regularize function names (e.g. stripping irrelevant parts of the file path), coalesce entries by mapping multiple functions @@ -4824,7 +4824,7 @@

    -class torch.utils.benchmark.Compare(results)[source]#
    +class torch.utils.benchmark.Compare(results)[source]#

    Helper class for displaying the results of many measurements in a formatted table.

    The table format is based on the information fields provided in @@ -4840,33 +4840,33 @@

    -colorize(rowwise=False)[source]#
    +colorize(rowwise=False)[source]#

    Colorize formatted table.

    Colorize columnwise by default.

    -extend_results(results)[source]#
    +extend_results(results)[source]#

    Append results to already stored ones.

    All added results must be instances of Measurement.

    -highlight_warnings()[source]#
    +highlight_warnings()[source]#

    Enables warning highlighting when building formatted table.

    -print()[source]#
    +print()[source]#

    Print formatted table

    -trim_significant_figures()[source]#
    +trim_significant_figures()[source]#

    Enables trimming of significant figures when building the formatted table.

    diff --git a/2.9/checkpoint.html b/2.9/checkpoint.html index fb5536ae459..8fe2d746f6c 100644 --- a/2.9/checkpoint.html +++ b/2.9/checkpoint.html @@ -4426,7 +4426,7 @@

    torch.utils.checkpoint
    -torch.utils.checkpoint.checkpoint(function, *args, use_reentrant=None, context_fn=<function noop_context_fn>, determinism_check='default', debug=False, early_stop=True, **kwargs)[source]#
    +torch.utils.checkpoint.checkpoint(function, *args, use_reentrant=None, context_fn=<function noop_context_fn>, determinism_check='default', debug=False, early_stop=True, **kwargs)[source]#

    Checkpoint a model or part of the model.

    Activation checkpointing is a technique that trades compute for memory. Instead of keeping tensors needed for backward alive until they are used in @@ -4547,7 +4547,7 @@

    torch.utils.checkpoint
    -torch.utils.checkpoint.checkpoint_sequential(functions, segments, input, use_reentrant=None, **kwargs)[source]#
    +torch.utils.checkpoint.checkpoint_sequential(functions, segments, input, use_reentrant=None, **kwargs)[source]#

    Checkpoint a sequential model to save memory.

    Sequential models execute a list of modules/functions in order (sequentially). Therefore, we can divide such a model in various segments @@ -4597,7 +4597,7 @@

    torch.utils.checkpoint
    -torch.utils.checkpoint.set_checkpoint_debug_enabled(enabled)[source]#
    +torch.utils.checkpoint.set_checkpoint_debug_enabled(enabled)[source]#

    Context manager that sets whether checkpoint should print additional debug information when running. See the debug flag for checkpoint() for more information. Note that @@ -4613,7 +4613,7 @@

    torch.utils.checkpoint
    -class torch.utils.checkpoint.CheckpointPolicy(value)[source]#
    +class torch.utils.checkpoint.CheckpointPolicy(value)[source]#

    Enum for specifying the policy for checkpointing during backpropagation.

    The following policies are supported:

      @@ -4637,7 +4637,7 @@

      torch.utils.checkpoint
      -class torch.utils.checkpoint.SelectiveCheckpointContext(*, is_recompute)[source]#
      +class torch.utils.checkpoint.SelectiveCheckpointContext(*, is_recompute)[source]#

      Context passed to policy function during selective checkpointing.

      This class is used to pass relevant metadata to the policy function during selective checkpointing. The metadata includes whether the current invocation @@ -4660,7 +4660,7 @@

      torch.utils.checkpoint
      -torch.utils.checkpoint.create_selective_checkpoint_contexts(policy_fn_or_list, allow_cache_entry_mutation=False)[source]#
      +torch.utils.checkpoint.create_selective_checkpoint_contexts(policy_fn_or_list, allow_cache_entry_mutation=False)[source]#

      Helper to avoid recomputing certain ops during activation checkpointing.

      Use this with torch.utils.checkpoint.checkpoint to control which operations are recomputed during the backward pass.

      diff --git a/2.9/cond.html b/2.9/cond.html index bc6ea654c39..32f0d5759ea 100644 --- a/2.9/cond.html +++ b/2.9/cond.html @@ -4564,7 +4564,7 @@

      Invariants of torch.ops.higher_order.cond#

      -torch._higher_order_ops.cond.cond(pred, true_fn, false_fn, operands=())[source]#
      +torch._higher_order_ops.cond.cond(pred, true_fn, false_fn, operands=())[source]#

      Conditionally applies true_fn or false_fn.

      Warning

      diff --git a/2.9/config_mod.html b/2.9/config_mod.html index 57eae9ff84e..cd6cbd0e499 100644 --- a/2.9/config_mod.html +++ b/2.9/config_mod.html @@ -4396,7 +4396,7 @@

      Created On: Apr 09, 2019 | Last Updated On: Jun 13, 2025

      -torch.__config__.show()[source]#
      +torch.__config__.show()[source]#

      Return a human-readable string with descriptions of the configuration of PyTorch.

      @@ -4408,7 +4408,7 @@
      -torch.__config__.parallel_info()[source]#
      +torch.__config__.parallel_info()[source]#

      Returns detailed string with parallelization settings

      Return type
      diff --git a/2.9/cpp_extension.html b/2.9/cpp_extension.html index 044271d1e08..38b719bb8e1 100644 --- a/2.9/cpp_extension.html +++ b/2.9/cpp_extension.html @@ -4396,7 +4396,7 @@

      torch.utils.cpp_extensionCreated On: Mar 07, 2018 | Last Updated On: Feb 16, 2025

      -torch.utils.cpp_extension.CppExtension(name, sources, *args, **kwargs)[source]#
      +torch.utils.cpp_extension.CppExtension(name, sources, *args, **kwargs)[source]#

      Create a setuptools.Extension for C++.

      Convenience method that creates a setuptools.Extension with the bare minimum (but often sufficient) arguments to build a C++ extension.

      @@ -4441,7 +4441,7 @@

      torch.utils.cpp_extension
      -torch.utils.cpp_extension.CUDAExtension(name, sources, *args, **kwargs)[source]#
      +torch.utils.cpp_extension.CUDAExtension(name, sources, *args, **kwargs)[source]#

      Create a setuptools.Extension for CUDA/C++.

      Convenience method that creates a setuptools.Extension with the bare minimum (but often sufficient) arguments to build a CUDA/C++ @@ -4546,7 +4546,7 @@

      torch.utils.cpp_extension
      -torch.utils.cpp_extension.SyclExtension(name, sources, *args, **kwargs)[source]#
      +torch.utils.cpp_extension.SyclExtension(name, sources, *args, **kwargs)[source]#

      Creates a setuptools.Extension for SYCL/C++.

      Convenience method that creates a setuptools.Extension with the bare minimum (but often sufficient) arguments to build a SYCL/C++ @@ -4598,7 +4598,7 @@

      torch.utils.cpp_extension
      -torch.utils.cpp_extension.BuildExtension(*args, **kwargs)[source]#
      +torch.utils.cpp_extension.BuildExtension(*args, **kwargs)[source]#

      A custom setuptools build extension .

      This setuptools.build_ext subclass takes care of passing the minimum required compiler flags (e.g. -std=c++17) as well as mixed @@ -4626,7 +4626,7 @@

      torch.utils.cpp_extension
      -torch.utils.cpp_extension.load(name, sources, extra_cflags=None, extra_cuda_cflags=None, extra_sycl_cflags=None, extra_ldflags=None, extra_include_paths=None, build_directory=None, verbose=False, with_cuda=None, with_sycl=None, is_python_module=True, is_standalone=False, keep_intermediates=True)[source]#
      +torch.utils.cpp_extension.load(name, sources, extra_cflags=None, extra_cuda_cflags=None, extra_sycl_cflags=None, extra_ldflags=None, extra_include_paths=None, build_directory=None, verbose=False, with_cuda=None, with_sycl=None, is_python_module=True, is_standalone=False, keep_intermediates=True)[source]#

      Load a PyTorch C++ extension just-in-time (JIT).

      To load an extension, a Ninja build file is emitted, which is used to compile the given sources into a dynamic library. This library is @@ -4726,7 +4726,7 @@

      torch.utils.cpp_extension
      -torch.utils.cpp_extension.load_inline(name, cpp_sources, cuda_sources=None, sycl_sources=None, functions=None, extra_cflags=None, extra_cuda_cflags=None, extra_sycl_cflags=None, extra_ldflags=None, extra_include_paths=None, build_directory=None, verbose=False, with_cuda=None, with_sycl=None, is_python_module=True, with_pytorch_error_handling=True, keep_intermediates=True, use_pch=False, no_implicit_headers=False)[source]#
      +torch.utils.cpp_extension.load_inline(name, cpp_sources, cuda_sources=None, sycl_sources=None, functions=None, extra_cflags=None, extra_cuda_cflags=None, extra_sycl_cflags=None, extra_ldflags=None, extra_include_paths=None, build_directory=None, verbose=False, with_cuda=None, with_sycl=None, is_python_module=True, with_pytorch_error_handling=True, keep_intermediates=True, use_pch=False, no_implicit_headers=False)[source]#

      Load a PyTorch C++ extension just-in-time (JIT) from string sources.

      This function behaves exactly like load(), but takes its sources as strings rather than filenames. These strings are stored to files in the @@ -4823,7 +4823,7 @@

      torch.utils.cpp_extension
      -torch.utils.cpp_extension.include_paths(device_type='cpu')[source]#
      +torch.utils.cpp_extension.include_paths(device_type='cpu')[source]#

      Get the include paths required to build a C++ or CUDA or SYCL extension.

      Parameters
      @@ -4840,7 +4840,7 @@

      torch.utils.cpp_extension
      -torch.utils.cpp_extension.get_compiler_abi_compatibility_and_version(compiler)[source]#
      +torch.utils.cpp_extension.get_compiler_abi_compatibility_and_version(compiler)[source]#

      Determine if the given compiler is ABI-compatible with PyTorch alongside its version.

      Parameters
      @@ -4859,13 +4859,13 @@

      torch.utils.cpp_extension
      -torch.utils.cpp_extension.verify_ninja_availability()[source]#
      +torch.utils.cpp_extension.verify_ninja_availability()[source]#

      Raise RuntimeError if ninja build system is not available on the system, does nothing otherwise.

      -torch.utils.cpp_extension.is_ninja_available()[source]#
      +torch.utils.cpp_extension.is_ninja_available()[source]#

      Return True if the ninja build system is available on the system, False otherwise.

      diff --git a/2.9/cuda._sanitizer.html b/2.9/cuda._sanitizer.html index f2228bbbb4b..a5e19903027 100644 --- a/2.9/cuda._sanitizer.html +++ b/2.9/cuda._sanitizer.html @@ -4505,7 +4505,7 @@

      Usage#

      API Reference#

      -torch.cuda._sanitizer.enable_cuda_sanitizer()[source]#
      +torch.cuda._sanitizer.enable_cuda_sanitizer()[source]#

      Enable CUDA Sanitizer.

      The sanitizer will begin to analyze low-level CUDA calls invoked by torch functions for synchronization errors. All data races found will be printed to the standard diff --git a/2.9/cuda.html b/2.9/cuda.html index c2f4e111b33..fabd59f6df2 100644 --- a/2.9/cuda.html +++ b/2.9/cuda.html @@ -4744,7 +4744,7 @@

      Memory management
      -class torch.cuda.use_mem_pool(pool, device=None)[source]#
      +class torch.cuda.use_mem_pool(pool, device=None)[source]#

      A context manager that routes allocations to a given pool.

      Parameters
      diff --git a/2.9/cuda.tunable.html b/2.9/cuda.tunable.html index 1827dd066d2..74534480b08 100644 --- a/2.9/cuda.tunable.html +++ b/2.9/cuda.tunable.html @@ -4570,7 +4570,7 @@

      Environment Variable Interface#

      -torch.cuda.tunable.enable(val=True)[source]#
      +torch.cuda.tunable.enable(val=True)[source]#

      This is the big on/off switch for all TunableOp implementations.

      @@ -4578,7 +4578,7 @@

      API Reference
      -torch.cuda.tunable.is_enabled()[source]#
      +torch.cuda.tunable.is_enabled()[source]#

      Returns whether the TunableOp feature is enabled.

      Return type
      @@ -4589,7 +4589,7 @@

      API Reference
      -torch.cuda.tunable.tuning_enable(val=True)[source]#
      +torch.cuda.tunable.tuning_enable(val=True)[source]#

      Enable tuning of TunableOp implementations.

      When enabled, if a tuned entry isn’t found, run the tuning step and record the entry.

      @@ -4599,7 +4599,7 @@

      API Reference
      -torch.cuda.tunable.tuning_is_enabled()[source]#
      +torch.cuda.tunable.tuning_is_enabled()[source]#

      Returns whether TunableOp implementations can be tuned.

      Return type
      @@ -4610,7 +4610,7 @@

      API Reference
      -torch.cuda.tunable.record_untuned_enable(val=True)[source]#
      +torch.cuda.tunable.record_untuned_enable(val=True)[source]#

      Enable recording untuned of TunableOp perations for offline tuning.

      When enabled, if a tuned entry isn’t found, write it to the untuned file.

      @@ -4619,7 +4619,7 @@

      API Reference
      -torch.cuda.tunable.record_untuned_is_enabled()[source]#
      +torch.cuda.tunable.record_untuned_is_enabled()[source]#

      Returns whether TunableOp operations are recorded for offline tuning.

      Return type
      @@ -4630,7 +4630,7 @@

      API Reference
      -torch.cuda.tunable.set_max_tuning_duration(duration)[source]#
      +torch.cuda.tunable.set_max_tuning_duration(duration)[source]#

      Set max time in milliseconds to spend tuning a given solution.

      If both max tuning duration and iterations are set, the smaller of the two will be honored. At minimum 1 tuning iteration will always be run.

      @@ -4640,7 +4640,7 @@

      API Reference
      -torch.cuda.tunable.get_max_tuning_duration()[source]#
      +torch.cuda.tunable.get_max_tuning_duration()[source]#

      Get max time to spend tuning a given solution.

      Return type
      @@ -4651,7 +4651,7 @@

      API Reference
      -torch.cuda.tunable.set_max_tuning_iterations(iterations)[source]#
      +torch.cuda.tunable.set_max_tuning_iterations(iterations)[source]#

      Set max number of iterations to spend tuning a given solution.

      If both max tuning duration and iterations are set, the smaller of the two will be honored. At minimum 1 tuning iteration will always be run.

      @@ -4661,7 +4661,7 @@

      API Reference
      -torch.cuda.tunable.get_max_tuning_iterations()[source]#
      +torch.cuda.tunable.get_max_tuning_iterations()[source]#

      Get max iterations to spend tuning a given solution.

      Return type
      @@ -4672,7 +4672,7 @@

      API Reference
      -torch.cuda.tunable.set_filename(filename, insert_device_ordinal=False)[source]#
      +torch.cuda.tunable.set_filename(filename, insert_device_ordinal=False)[source]#

      Set the filename to use for input/output of tuning results.

      If insert_device_ordinal is True then the current device ordinal will be added to the given filename automatically. This can be used in a @@ -4683,7 +4683,7 @@

      API Reference
      -torch.cuda.tunable.get_filename()[source]#
      +torch.cuda.tunable.get_filename()[source]#

      Get the results filename.

      Return type
      @@ -4694,7 +4694,7 @@

      API Reference
      -torch.cuda.tunable.get_results()[source]#
      +torch.cuda.tunable.get_results()[source]#

      Return all TunableOp results.

      Return type
      @@ -4705,7 +4705,7 @@

      API Reference
      -torch.cuda.tunable.get_validators()[source]#
      +torch.cuda.tunable.get_validators()[source]#

      Return the TunableOp validators.

      Return type
      @@ -4716,7 +4716,7 @@

      API Reference
      -torch.cuda.tunable.write_file_on_exit(val)[source]#
      +torch.cuda.tunable.write_file_on_exit(val)[source]#

      During Tuning Context destruction, write file to disk.

      This is useful as a final flush of your results to disk if your application terminates as result of normal operation or an error. Manual flushing of @@ -4727,7 +4727,7 @@

      API Reference
      -torch.cuda.tunable.write_file(filename=None)[source]#
      +torch.cuda.tunable.write_file(filename=None)[source]#

      Write results to a CSV file.

      If filename is not given, get_filename() is called.

      @@ -4739,7 +4739,7 @@

      API Reference
      -torch.cuda.tunable.read_file(filename=None)[source]#
      +torch.cuda.tunable.read_file(filename=None)[source]#

      Read results from a TunableOp CSV file.

      If filename is not given, get_filename() is called.

      @@ -4751,7 +4751,7 @@

      API Reference
      -torch.cuda.tunable.tune_gemm_in_file(filename)[source]#
      +torch.cuda.tunable.tune_gemm_in_file(filename)[source]#

      tune GEMM in file.

      @@ -4759,7 +4759,7 @@

      API Reference
      -torch.cuda.tunable.mgpu_tune_gemm_in_file(filename_pattern, num_gpus)[source]#
      +torch.cuda.tunable.mgpu_tune_gemm_in_file(filename_pattern, num_gpus)[source]#

      Process one or more files and distribute work over one or more GPUs.

      @@ -4767,7 +4767,7 @@

      API Reference
      -torch.cuda.tunable.set_rotating_buffer_size(buffer_size)[source]#
      +torch.cuda.tunable.set_rotating_buffer_size(buffer_size)[source]#

      Set rotating buffer size to this value in MB, if the buffer size is greater than zero.

      If less than zero, query L2 cache size. If equal to zero, means deactivate rotating buffer.

      @@ -4776,7 +4776,7 @@

      API Reference
      -torch.cuda.tunable.get_rotating_buffer_size()[source]#
      +torch.cuda.tunable.get_rotating_buffer_size()[source]#

      Get the rotating buffer size in kilobytes.

      Return type
      diff --git a/2.9/data.html b/2.9/data.html index 3e118bb3e52..1aa79155692 100644 --- a/2.9/data.html +++ b/2.9/data.html @@ -4762,7 +4762,7 @@

      -class torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=None, sampler=None, batch_sampler=None, num_workers=0, collate_fn=None, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None, multiprocessing_context=None, generator=None, *, prefetch_factor=None, persistent_workers=False, pin_memory_device='', in_order=True)[source]#
      +class torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=None, sampler=None, batch_sampler=None, num_workers=0, collate_fn=None, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None, multiprocessing_context=None, generator=None, *, prefetch_factor=None, persistent_workers=False, pin_memory_device='', in_order=True)[source]#

      Data loader combines a dataset and a sampler, and provides an iterable over the given dataset.

      The DataLoader supports both map-style and iterable-style datasets with single- or multi-process loading, customizing @@ -4864,7 +4864,7 @@

      -class torch.utils.data.Dataset[source]#
      +class torch.utils.data.Dataset[source]#

      An abstract class representing a Dataset.

      All datasets that represent a map from keys to data samples should subclass it. All subclasses should overwrite __getitem__(), supporting fetching a @@ -4885,7 +4885,7 @@

      -class torch.utils.data.IterableDataset[source]#
      +class torch.utils.data.IterableDataset[source]#

      An iterable Dataset.

      All datasets that represent an iterable of data samples should subclass it. Such form of datasets is particularly useful when data come from a stream.

      @@ -4987,7 +4987,7 @@
      -class torch.utils.data.TensorDataset(*tensors)[source]#
      +class torch.utils.data.TensorDataset(*tensors)[source]#

      Dataset wrapping tensors.

      Each sample will be retrieved by indexing tensors along the first dimension.

      @@ -4999,7 +4999,7 @@
      -class torch.utils.data.StackDataset(*args, **kwargs)[source]#
      +class torch.utils.data.StackDataset(*args, **kwargs)[source]#

      Dataset as a stacking of multiple datasets.

      This class is useful to assemble different parts of complex input data, given as datasets.

      Example

      @@ -5023,7 +5023,7 @@
      -class torch.utils.data.ConcatDataset(datasets)[source]#
      +class torch.utils.data.ConcatDataset(datasets)[source]#

      Dataset as a concatenation of multiple datasets.

      This class is useful to assemble different existing datasets.

      @@ -5035,7 +5035,7 @@
      -class torch.utils.data.ChainDataset(datasets)[source]#
      +class torch.utils.data.ChainDataset(datasets)[source]#

      Dataset for chaining multiple IterableDataset s.

      This class is useful to assemble different existing dataset streams. The chaining operation is done on-the-fly, so concatenating large-scale @@ -5049,7 +5049,7 @@

      -class torch.utils.data.Subset(dataset, indices)[source]#
      +class torch.utils.data.Subset(dataset, indices)[source]#

      Subset of a dataset at specified indices.

      Parameters
      @@ -5063,7 +5063,7 @@
      -torch.utils.data._utils.collate.collate(batch, *, collate_fn_map=None)[source]#
      +torch.utils.data._utils.collate.collate(batch, *, collate_fn_map=None)[source]#

      General collate function that handles collection type of element within each batch.

      The function also opens function registry to deal with specific element types. default_collate_fn_map provides default collate functions for tensors, numpy arrays, numbers and strings.

      @@ -5098,7 +5098,7 @@
      -torch.utils.data.default_collate(batch)[source]#
      +torch.utils.data.default_collate(batch)[source]#

      Take in a batch of data and put the elements within the batch into a tensor with an additional outer dimension - batch size.

      The exact output type can be a torch.Tensor, a Sequence of torch.Tensor, a Collection of torch.Tensor, or left unchanged, depending on the input type. @@ -5164,7 +5164,7 @@

      -torch.utils.data.default_convert(data)[source]#
      +torch.utils.data.default_convert(data)[source]#

      Convert each NumPy array element into a torch.Tensor.

      If the input is a Sequence, Collection, or Mapping, it tries to convert each element inside to a torch.Tensor. If the input is not an NumPy array, it is left unchanged. @@ -5199,7 +5199,7 @@

      -torch.utils.data.get_worker_info()[source]#
      +torch.utils.data.get_worker_info()[source]#

      Returns the information about the current DataLoader iterator worker process.

      When called in a worker, this returns an object guaranteed to have the @@ -5233,7 +5233,7 @@

      -torch.utils.data.random_split(dataset, lengths, generator=<torch._C.Generator object>)[source]#
      +torch.utils.data.random_split(dataset, lengths, generator=<torch._C.Generator object>)[source]#

      Randomly split a dataset into non-overlapping new datasets of given lengths.

      If a list of fractions that sum up to 1 is given, the lengths will be computed automatically as @@ -5265,7 +5265,7 @@

      -class torch.utils.data.Sampler(data_source=None)[source]#
      +class torch.utils.data.Sampler(data_source=None)[source]#

      Base class for all Samplers.

      Every Sampler subclass has to provide an __iter__() method, providing a way to iterate over indices or lists of indices (batches) of dataset elements, @@ -5312,7 +5312,7 @@

      -class torch.utils.data.SequentialSampler(data_source)[source]#
      +class torch.utils.data.SequentialSampler(data_source)[source]#

      Samples elements sequentially, always in the same order.

      Parameters
      @@ -5323,7 +5323,7 @@
      -class torch.utils.data.RandomSampler(data_source, replacement=False, num_samples=None, generator=None)[source]#
      +class torch.utils.data.RandomSampler(data_source, replacement=False, num_samples=None, generator=None)[source]#

      Samples elements randomly. If without replacement, then sample from a shuffled dataset.

      If with replacement, then user can specify num_samples to draw.

      @@ -5340,7 +5340,7 @@
      -class torch.utils.data.SubsetRandomSampler(indices, generator=None)[source]#
      +class torch.utils.data.SubsetRandomSampler(indices, generator=None)[source]#

      Samples elements randomly from a given list of indices, without replacement.

      Parameters
      @@ -5354,7 +5354,7 @@
      -class torch.utils.data.WeightedRandomSampler(weights, num_samples, replacement=True, generator=None)[source]#
      +class torch.utils.data.WeightedRandomSampler(weights, num_samples, replacement=True, generator=None)[source]#

      Samples elements from [0,..,len(weights)-1] with given probabilities (weights).

      Parameters
      @@ -5387,7 +5387,7 @@
      -class torch.utils.data.BatchSampler(sampler, batch_size, drop_last)[source]#
      +class torch.utils.data.BatchSampler(sampler, batch_size, drop_last)[source]#

      Wraps another sampler to yield a mini-batch of indices.

      Parameters
      @@ -5416,7 +5416,7 @@
      -class torch.utils.data.distributed.DistributedSampler(dataset, num_replicas=None, rank=None, shuffle=True, seed=0, drop_last=False)[source]#
      +class torch.utils.data.distributed.DistributedSampler(dataset, num_replicas=None, rank=None, shuffle=True, seed=0, drop_last=False)[source]#

      Sampler that restricts data loading to a subset of the dataset.

      It is especially useful in conjunction with torch.nn.parallel.DistributedDataParallel. In such a case, each diff --git a/2.9/ddp_comm_hooks.html b/2.9/ddp_comm_hooks.html index fe89b45b874..b2c5645d833 100644 --- a/2.9/ddp_comm_hooks.html +++ b/2.9/ddp_comm_hooks.html @@ -4496,7 +4496,7 @@

      Default Communication Hooksbucket is a torch.distributed.GradBucket object.

      -torch.distributed.algorithms.ddp_comm_hooks.default_hooks.allreduce_hook(process_group, bucket)[source]#
      +torch.distributed.algorithms.ddp_comm_hooks.default_hooks.allreduce_hook(process_group, bucket)[source]#

      Call allreduce using GradBucket tensors.

      Once gradient tensors are aggregated across all workers, its then callback takes the mean and returns the result.

      @@ -4520,7 +4520,7 @@

      Default Communication Hooks
      -torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_hook(process_group, bucket)[source]#
      +torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_hook(process_group, bucket)[source]#

      Compress by casting GradBucket to torch.float16 divided by process group size.

      This DDP communication hook implements a simple gradient compression approach that casts GradBucket tensor to half-precision floating-point format (torch.float16) @@ -4542,7 +4542,7 @@

      Default Communication Hooks
      -torch.distributed.algorithms.ddp_comm_hooks.default_hooks.bf16_compress_hook(process_group, bucket)[source]#
      +torch.distributed.algorithms.ddp_comm_hooks.default_hooks.bf16_compress_hook(process_group, bucket)[source]#

      Warning: This API is experimental, and it requires NCCL version later than 2.9.6.

      This DDP communication hook implements a simple gradient compression approach that casts GradBucket tensor to half-precision @@ -4567,7 +4567,7 @@

      Default Communication Hooks
      -torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_wrapper(hook)[source]#
      +torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_wrapper(hook)[source]#

      Cast input tensor to torch.float16, cast result of hook back to input dtype.

      This wrapper casts the input gradient tensor of a given DDP communication hook to half-precision floating point format (torch.float16), and casts the resulting tensor of the given hook back to @@ -4589,7 +4589,7 @@

      Default Communication Hooks
      -torch.distributed.algorithms.ddp_comm_hooks.default_hooks.bf16_compress_wrapper(hook)[source]#
      +torch.distributed.algorithms.ddp_comm_hooks.default_hooks.bf16_compress_wrapper(hook)[source]#

      Warning: This API is experimental, and it requires NCCL version later than 2.9.6.

      This wrapper casts the input gradient tensor of a given DDP communication hook to half-precision Brain floating point format (torch.bfloat16), @@ -4622,7 +4622,7 @@

      PowerSGD Communication Hook#

      -class torch.distributed.algorithms.ddp_comm_hooks.powerSGD_hook.PowerSGDState(process_group, matrix_approximation_rank=1, start_powerSGD_iter=1000, min_compression_rate=2, use_error_feedback=True, warm_start=True, orthogonalization_epsilon=0, random_seed=0, compression_stats_logging_frequency=10000, batch_tensors_with_same_shape=False)[source]#
      +class torch.distributed.algorithms.ddp_comm_hooks.powerSGD_hook.PowerSGDState(process_group, matrix_approximation_rank=1, start_powerSGD_iter=1000, min_compression_rate=2, use_error_feedback=True, warm_start=True, orthogonalization_epsilon=0, random_seed=0, compression_stats_logging_frequency=10000, batch_tensors_with_same_shape=False)[source]#

      Store both the algorithm’s hyperparameters and internal state for all gradients during training.

      Particularly, matrix_approximation_rank and start_powerSGD_iter are the main hyperparameters that should be tuned by the user. For performance, we suggest to keep binary hyperparameters use_error_feedback and warm_start on.

      @@ -4674,7 +4674,7 @@

      PowerSGD Hooks
      -torch.distributed.algorithms.ddp_comm_hooks.powerSGD_hook.powerSGD_hook(state, bucket)[source]#
      +torch.distributed.algorithms.ddp_comm_hooks.powerSGD_hook.powerSGD_hook(state, bucket)[source]#

      Implement PowerSGD algorithm.

      This DDP communication hook implements PowerSGD gradient compression algorithm described in the paper. @@ -4739,7 +4739,7 @@

      PowerSGD Hooks
      -torch.distributed.algorithms.ddp_comm_hooks.powerSGD_hook.batched_powerSGD_hook(state, bucket)[source]#
      +torch.distributed.algorithms.ddp_comm_hooks.powerSGD_hook.batched_powerSGD_hook(state, bucket)[source]#

      Implement simplified PowerSGD algorithm.

      This DDP communication hook implements a simplified PowerSGD gradient compression algorithm described in the paper. @@ -4807,7 +4807,7 @@

      Debugging Communication Hooks
      -torch.distributed.algorithms.ddp_comm_hooks.debugging_hooks.noop_hook(_, bucket)[source]#
      +torch.distributed.algorithms.ddp_comm_hooks.debugging_hooks.noop_hook(_, bucket)[source]#

      Return a future that wraps the input, so it is a no-op that does not incur any communication overheads.

      This hook should only be used for headroom analysis of allreduce optimization, instead of the normal gradient synchronization. @@ -4845,12 +4845,12 @@

      Checkpointing of Communication HooksPowerSGDState has __setstate__ and __getstate__ implemented and can be used as a reference.

      -class torch.distributed.algorithms.ddp_comm_hooks.powerSGD_hook.PowerSGDState[source]
      +class torch.distributed.algorithms.ddp_comm_hooks.powerSGD_hook.PowerSGDState[source]
      -__getstate__()[source]#
      +__getstate__()[source]#

      Return a Dict[str, Any] which will be pickled and saved.

      process_group is not serializable and excluded from a returned state.

      @@ -4858,7 +4858,7 @@

      Checkpointing of Communication Hooks
      -__setstate__(state)[source]#
      +__setstate__(state)[source]#

      Take a provided state and set to this PowerSGDState instance.

      process_group is set to default.

      diff --git a/2.9/distributed._dist2.html b/2.9/distributed._dist2.html index b186db024c6..019d87dc917 100644 --- a/2.9/distributed._dist2.html +++ b/2.9/distributed._dist2.html @@ -4865,14 +4865,14 @@
      -class torch.distributed._dist2.ProcessGroupFactory(*args, **kwargs)[source]#
      +class torch.distributed._dist2.ProcessGroupFactory(*args, **kwargs)[source]#

      Bases: Protocol

      Protocol for process group factories.

      -torch.distributed._dist2.current_process_group()[source]#
      +torch.distributed._dist2.current_process_group()[source]#

      Get the current process group. Thread local method.

      Returns
      @@ -4886,7 +4886,7 @@
      -torch.distributed._dist2.new_group(backend, timeout, device, **kwargs)[source]#
      +torch.distributed._dist2.new_group(backend, timeout, device, **kwargs)[source]#

      Create a new process group with the given backend and options. This group is independent and will not be globally registered and thus not usable via the standard torch.distributed.* APIs.

      @@ -4911,7 +4911,7 @@
      -torch.distributed._dist2.process_group(pg)[source]#
      +torch.distributed._dist2.process_group(pg)[source]#

      Context manager for process groups. Thread local method.

      Parameters
      @@ -4925,7 +4925,7 @@
      -torch.distributed._dist2.register_backend(name, func)[source]#
      +torch.distributed._dist2.register_backend(name, func)[source]#

      Register a new process group backend.

      Parameters
      diff --git a/2.9/distributed.algorithms.join.html b/2.9/distributed.algorithms.join.html index 80c7d887f4c..dabce633a70 100644 --- a/2.9/distributed.algorithms.join.html +++ b/2.9/distributed.algorithms.join.html @@ -4400,7 +4400,7 @@

      Generic Join Context ManagerDistributed Training with Uneven Inputs Using the Join Context Manager.

      -class torch.distributed.algorithms.Join(joinables, enable=True, throw_on_early_termination=False, **kwargs)[source]#
      +class torch.distributed.algorithms.Join(joinables, enable=True, throw_on_early_termination=False, **kwargs)[source]#

      This class defines the generic join context manager, which allows custom hooks to be called after a process joins.

      These hooks should shadow the collective communications of non-joined processes to prevent hanging and @@ -4463,7 +4463,7 @@

      Generic Join Context Manager
      -static notify_join_context(joinable)[source]#
      +static notify_join_context(joinable)[source]#

      Notifies the join context manager that the calling process has not yet joined.

      Then, if throw_on_early_termination=True, checks if uneven inputs have been detected (i.e. if one process has already joined) and throws an exception if so.

      @@ -4491,7 +4491,7 @@

      Generic Join Context Manager
      -class torch.distributed.algorithms.Joinable[source]#
      +class torch.distributed.algorithms.Joinable[source]#

      This defines an abstract base class for joinable classes.

      A joinable class (inheriting from Joinable) should implement join_hook(), @@ -4508,7 +4508,7 @@

      Generic Join Context Manager
      -abstract join_hook(**kwargs)[source]#
      +abstract join_hook(**kwargs)[source]#

      Return a JoinHook instance for the given Joinable.

      Parameters
      @@ -4533,7 +4533,7 @@

      Generic Join Context Manager
      -class torch.distributed.algorithms.JoinHook[source]#
      +class torch.distributed.algorithms.JoinHook[source]#

      This defines a join hook, which provides two entry points in the join context manager.

      Entry points : a main hook, which is called repeatedly while there exists a non-joined process, and a post-hook, which is called once all processes have joined.

      @@ -4542,7 +4542,7 @@

      Generic Join Context Managerpost_hook() as appropriate.

      -main_hook()[source]#
      +main_hook()[source]#

      Call this hook while there exists a non-joined process to shadow collective communications in a training iteration.

      Training iteration i.e., in one forward pass, backward pass, and optimizer step.

      @@ -4551,7 +4551,7 @@

      Generic Join Context Manager
      -post_hook(is_last_joiner)[source]#
      +post_hook(is_last_joiner)[source]#

      Call hook after all processes have joined.

      It is passed an additional bool argument is_last_joiner, which indicates if the rank is one of the last to join.

      diff --git a/2.9/distributed.checkpoint.html b/2.9/distributed.checkpoint.html index f32e1d61299..d7e831d11c3 100644 --- a/2.9/distributed.checkpoint.html +++ b/2.9/distributed.checkpoint.html @@ -4412,13 +4412,13 @@

      Additional resources:
      -class torch.distributed.checkpoint.state_dict_saver.AsyncCheckpointerType(value)[source]#
      +class torch.distributed.checkpoint.state_dict_saver.AsyncCheckpointerType(value)[source]#

      Enum for async checkpointer type.

      -class torch.distributed.checkpoint.state_dict_saver.AsyncSaveResponse(staging_completion, upload_completion)[source]#
      +class torch.distributed.checkpoint.state_dict_saver.AsyncSaveResponse(staging_completion, upload_completion)[source]#

      This class contains futures for staging and upload completion. It is returned by async_save(). staging_completion is a future that indicates when local copy @@ -4431,7 +4431,7 @@

      Additional resources:
      -torch.distributed.checkpoint.state_dict_saver.save(state_dict, *, checkpoint_id=None, storage_writer=None, planner=None, process_group=None, no_dist=False, use_collectives=True)[source]#
      +torch.distributed.checkpoint.state_dict_saver.save(state_dict, *, checkpoint_id=None, storage_writer=None, planner=None, process_group=None, no_dist=False, use_collectives=True)[source]#

      Save a distributed model in SPMD style.

      This function is different from torch.save() as it handles ShardedTensor , and DTensor by having each rank only save their local shards.

      @@ -4522,7 +4522,7 @@

      Additional resources:
      -torch.distributed.checkpoint.state_dict_saver.async_save(state_dict, *, checkpoint_id=None, storage_writer=None, planner=None, process_group=None, async_checkpointer_type=AsyncCheckpointerType.THREAD, async_stager=None, no_dist=False, use_collectives=True)[source]#
      +torch.distributed.checkpoint.state_dict_saver.async_save(state_dict, *, checkpoint_id=None, storage_writer=None, planner=None, process_group=None, async_checkpointer_type=AsyncCheckpointerType.THREAD, async_stager=None, no_dist=False, use_collectives=True)[source]#

      Asynchronous version of save. This code first de-stages the state_dict on to the staging storage (defaults to CPU memory), and then calls the save in a separate thread.

      @@ -4589,7 +4589,7 @@

      Additional resources:
      -torch.distributed.checkpoint.state_dict_saver.save_state_dict(state_dict, storage_writer, process_group=None, coordinator_rank=0, no_dist=False, planner=None)[source]#
      +torch.distributed.checkpoint.state_dict_saver.save_state_dict(state_dict, storage_writer, process_group=None, coordinator_rank=0, no_dist=False, planner=None)[source]#

      This method is deprecated. Please switch to ‘save’.

      Return type
      @@ -4600,7 +4600,7 @@

      Additional resources:
      -torch.distributed.checkpoint.state_dict_loader.load(state_dict, *, checkpoint_id=None, storage_reader=None, planner=None, process_group=None, no_dist=False)[source]#
      +torch.distributed.checkpoint.state_dict_loader.load(state_dict, *, checkpoint_id=None, storage_reader=None, planner=None, process_group=None, no_dist=False)[source]#

      Load a checkpoint into a distributed state dict in SPMD style.

      Each rank must have the same keys in their state_dict provided to this API. Mismatched keys may result in hangs or errors. If unsure, you can use @@ -4689,7 +4689,7 @@

      Additional resources:
      -torch.distributed.checkpoint.state_dict_loader.load_state_dict(state_dict, storage_reader, process_group=None, coordinator_rank=0, no_dist=False, planner=None)[source]#
      +torch.distributed.checkpoint.state_dict_loader.load_state_dict(state_dict, storage_reader, process_group=None, coordinator_rank=0, no_dist=False, planner=None)[source]#

      This method is deprecated. Please switch to ‘load’.

      @@ -4698,7 +4698,7 @@

      Additional resources:The following module is also useful for additional customization of the staging mechanisms used for asynchronous checkpointing (torch.distributed.checkpoint.async_save):

      -class torch.distributed.checkpoint.staging.AsyncStager(*args, **kwargs)[source]#
      +class torch.distributed.checkpoint.staging.AsyncStager(*args, **kwargs)[source]#

      This protocol is meant to provide customization and extensibility for dcp.async_save, allowing users to customize how data is staged previous to executing the usual dcp.save path in parallel. The expected order of operations (concretely defined in torch.distributed.state_dict_saver.async_save) @@ -4731,7 +4731,7 @@

      Additional resources:
      -close()[source]#
      +close()[source]#

      Clean up all resources used by the stager.

      @@ -4745,7 +4745,7 @@

      Additional resources:
      -stage(state_dict)[source]#
      +stage(state_dict)[source]#

      Returns a “staged” copy of state_dict. The expectation of the staged copy is that it is inoculated from any updates incurred after the stage call is complete.

      @@ -4757,7 +4757,7 @@

      Additional resources:
      -synchronize_staging()[source]#
      +synchronize_staging()[source]#

      In the case stage is async in some way, this method should be called to ensure staging is complete and it is safe to begin modifying the original state_dict

      @@ -4768,7 +4768,7 @@

      Additional resources:
      -class torch.distributed.checkpoint.staging.DefaultStager(config=StagingOptions(use_pinned_memory=True, use_shared_memory=True, use_async_staging=True, use_non_blocking_copy=True))[source]#
      +class torch.distributed.checkpoint.staging.DefaultStager(config=StagingOptions(use_pinned_memory=True, use_shared_memory=True, use_async_staging=True, use_non_blocking_copy=True))[source]#

      DefaultStager provides a full-featured staging implementation that combines multiple optimization techniques for efficient checkpoint preparation.

      The staging process works as follows: @@ -4808,7 +4808,7 @@

      Additional resources:
      -close()[source]#
      +close()[source]#

      Clean up all resources used by the DefaultStager. Shuts down the ThreadPoolExecutor used for async staging operations and cleans up the underlying StateDictStager’s cached storages. Should be called when the stager is no longer needed to prevent @@ -4827,7 +4827,7 @@

      Additional resources:
      -stage(state_dict, **kwargs)[source]#
      +stage(state_dict, **kwargs)[source]#

      This function is responsible for staging staging the state_dict. See class docstring for more details on staging. If use_async_staging is True, it will return a Future object that will be @@ -4845,7 +4845,7 @@

      Additional resources:
      -synchronize_staging()[source]#
      +synchronize_staging()[source]#

      When use_async_staging is True, this method will wait until staging is complete. If use_async_staging is False, this method is a no-op.

      @@ -4856,7 +4856,7 @@

      Additional resources:
      -class torch.distributed.checkpoint.staging.StagingOptions(use_pinned_memory=True, use_shared_memory=True, use_async_staging=True, use_non_blocking_copy=True)[source]#
      +class torch.distributed.checkpoint.staging.StagingOptions(use_pinned_memory=True, use_shared_memory=True, use_async_staging=True, use_non_blocking_copy=True)[source]#

      Configuration options for checkpoint staging behavior.

      Variables
      @@ -4883,7 +4883,7 @@

      Additional resources:
      -class torch.distributed.checkpoint.staging.BlockingAsyncStager(cache_staged_state_dict=False, type_check=False)[source]#
      +class torch.distributed.checkpoint.staging.BlockingAsyncStager(cache_staged_state_dict=False, type_check=False)[source]#

      An implementation of AsyncStager which stages the state_dict on CPU RAM and blocks until the copy is complete. This implementation also provides an option to optimize stage latency using pinned memory.

      N.B. synchronize_staging is a no-op in this case.

      @@ -4891,7 +4891,7 @@

      Additional resources:
      -stage(state_dict)[source]#
      +stage(state_dict)[source]#

      Returns a copy of state_dict on the CPU.

      Return type
      @@ -4902,7 +4902,7 @@

      Additional resources:
      -synchronize_staging()[source]#
      +synchronize_staging()[source]#

      No-op function, since staging is blocking.

      @@ -4913,11 +4913,11 @@

      Additional resources:In addition to the above entrypoints, Stateful objects, as described below, provide additional customization during saving/loading

      -class torch.distributed.checkpoint.stateful.Stateful(*args, **kwargs)[source]#
      +class torch.distributed.checkpoint.stateful.Stateful(*args, **kwargs)[source]#

      Stateful protocol for objects that can be checkpointed and restored.

      -load_state_dict(state_dict)[source]#
      +load_state_dict(state_dict)[source]#

      Restore the object’s state from the provided state_dict.

      Parameters
      @@ -4928,7 +4928,7 @@

      Additional resources:
      -state_dict()[source]#
      +state_dict()[source]#

      Objects should return their state_dict representation as a dictionary. The output of this function will be checkpointed, and later restored in load_state_dict().

      @@ -4953,7 +4953,7 @@

      Additional resources:The following types define the IO interface used during checkpoint:

      -class torch.distributed.checkpoint.StorageReader[source]#
      +class torch.distributed.checkpoint.StorageReader[source]#

      Interface used by load_state_dict to read from storage.

      One StorageReader instance acts as both the coordinator and the follower in a distributed checkpoint. As part of initialization, each instance @@ -4969,7 +4969,7 @@

      Additional resources:
      -abstract prepare_global_plan(plans)[source]#
      +abstract prepare_global_plan(plans)[source]#

      Perform centralized planning of storage loading.

      This method is only called on the coordinator instance.

      While this method can produce a completely different plan, the preferred @@ -4989,7 +4989,7 @@

      Additional resources:
      -abstract prepare_local_plan(plan)[source]#
      +abstract prepare_local_plan(plan)[source]#

      Perform storage-specific local planning.

      While this method can produce a completely different plan, the recommended way is to store storage specific data in LoadPlan::storage_data.

      @@ -5008,7 +5008,7 @@

      Additional resources:
      -abstract read_data(plan, planner)[source]#
      +abstract read_data(plan, planner)[source]#

      Read all items from plan using planner to resolve the data.

      A subclass should call LoadPlanner::load_bytes to deserialize a BytesIO object into the right place.

      @@ -5034,7 +5034,7 @@

      Additional resources:
      -abstract read_metadata(*args, **kwargs)[source]#
      +abstract read_metadata(*args, **kwargs)[source]#

      Read the checkpoint metadata.

      Returns
      @@ -5048,7 +5048,7 @@

      Additional resources:
      -abstract reset(checkpoint_id=None)[source]#
      +abstract reset(checkpoint_id=None)[source]#

      Calls to indicates a brand new checkpoint read is going to happen. A checkpoint_id may be present if users set the checkpoint_id for this checkpoint read. The meaning of the checkpiont_id is @@ -5066,7 +5066,7 @@

      Additional resources:
      -abstract set_up_storage_reader(metadata, is_coordinator, *args, **kwargs)[source]#
      +abstract set_up_storage_reader(metadata, is_coordinator, *args, **kwargs)[source]#

      Initialize this instance.

      Parameters
      @@ -5081,7 +5081,7 @@

      Additional resources:
      -abstract classmethod validate_checkpoint_id(checkpoint_id)[source]#
      +abstract classmethod validate_checkpoint_id(checkpoint_id)[source]#

      Check if the given checkpoint_id is supported by the storage. This allow us to enable automatic storage selection.

      @@ -5095,7 +5095,7 @@

      Additional resources:
      -class torch.distributed.checkpoint.StorageWriter[source]#
      +class torch.distributed.checkpoint.StorageWriter[source]#

      Interface used by save_state_dict to write to storage.

      One StorageWriter instance acts as both the coordinator and the follower in a distributed checkpoint. As part of initialization, each instance @@ -5111,7 +5111,7 @@

      Additional resources:
      -abstract finish(metadata, results)[source]#
      +abstract finish(metadata, results)[source]#

      Write the metadata and marks the current checkpoint as successful.

      The actual format/schema used for serializing metadata is an implementation detail. The only requirement is that it’s recoverable @@ -5134,7 +5134,7 @@

      Additional resources:
      -abstract prepare_global_plan(plans)[source]#
      +abstract prepare_global_plan(plans)[source]#

      Perform centralized planning of storage.

      This method is only called on the coordinator instance.

      While this method can produce a completely different plan, the preferred @@ -5154,7 +5154,7 @@

      Additional resources:
      -abstract prepare_local_plan(plan)[source]#
      +abstract prepare_local_plan(plan)[source]#

      Perform storage-specific local planning.

      While this method can produce a completely different plan, the recommended way is to store storage specific data in SavePlan::storage_data.

      @@ -5173,7 +5173,7 @@

      Additional resources:
      -abstract reset(checkpoint_id=None)[source]#
      +abstract reset(checkpoint_id=None)[source]#

      Calls to indicates a brand new checkpoint write is going to happen. A checkpoint_id may be present if users set the checkpoint_id for this checkpoint write. The meaning of the checkpiont_id is @@ -5191,7 +5191,7 @@

      Additional resources:
      -abstract set_up_storage_writer(is_coordinator, *args, **kwargs)[source]#
      +abstract set_up_storage_writer(is_coordinator, *args, **kwargs)[source]#

      Initialize this instance.

      Parameters
      @@ -5203,7 +5203,7 @@

      Additional resources:
      -storage_meta()[source]#
      +storage_meta()[source]#

      Return the storage-specific metadata. This is used to store additional information in a checkpoint that can be useful for providing request-level observability. StorageMeta is passed to the SavePlanner during save calls. Returns None by default.

      @@ -5217,7 +5217,7 @@

      Additional resources:
      -abstract classmethod validate_checkpoint_id(checkpoint_id)[source]#
      +abstract classmethod validate_checkpoint_id(checkpoint_id)[source]#

      Check if the given checkpoint_id is supported by the storage. This allow us to enable automatic storage selection.

      @@ -5229,7 +5229,7 @@

      Additional resources:
      -abstract write_data(plan, planner)[source]#
      +abstract write_data(plan, planner)[source]#

      Write all items from plan using planner to resolve the data.

      A subclass should call SavePlanner::resolve_data on each item from the plan to get access to the underlying object to write.

      @@ -5260,7 +5260,7 @@

      Additional resources:The following types define the planner interface used during checkpoint:

      -class torch.distributed.checkpoint.LoadPlanner[source]#
      +class torch.distributed.checkpoint.LoadPlanner[source]#

      Abstract class defining the protocol used by load_state_dict to plan the load process.

      LoadPlanner are stateful objects that can be used to customize the whole load process.

      LoadPlanner acts as an access proxy to the state_dict, so any transformation done to it @@ -5337,7 +5337,7 @@

      Additional resources:
      -abstract commit_tensor(read_item, tensor)[source]#
      +abstract commit_tensor(read_item, tensor)[source]#

      Call once the StorageReader finished loading data into tensor.

      The provided tensor is the same one returned by the call to resolve_tensor. This method is only needed if this LoadPlanner needs to post process tensor prior to @@ -5349,7 +5349,7 @@

      Additional resources:
      -abstract create_global_plan(global_plan)[source]#
      +abstract create_global_plan(global_plan)[source]#

      Compute the global load plan and return plans for each rank.

      . N.B. This is called on the coordinator rank only

      @@ -5361,7 +5361,7 @@

      Additional resources:
      -abstract create_local_plan()[source]#
      +abstract create_local_plan()[source]#

      Create a LoadPlan based on state_dict and metadata provided by set_up_planner.

      . N.B. This is called on every rank.

      @@ -5373,7 +5373,7 @@

      Additional resources:
      -abstract finish_plan(central_plan)[source]#
      +abstract finish_plan(central_plan)[source]#

      Accept the plan from coordinator and return final LoadPlan.

      Return type
      @@ -5384,7 +5384,7 @@

      Additional resources:
      -abstract load_bytes(read_item, value)[source]#
      +abstract load_bytes(read_item, value)[source]#

      Load the item described by read_item``and ``value.

      This method is expected to modify in-place the underlying state_dict.

      The contents of value are defined by the SavePlanner used to produce @@ -5395,7 +5395,7 @@

      Additional resources:
      -resolve_bytes(read_item)[source]#
      +resolve_bytes(read_item)[source]#

      Return the BytesIO to be used by the StorageReader to load read_item.

      The BytesIO should alias with one on the underlying state_dict as StorageReader will replace its contents.

      @@ -5407,7 +5407,7 @@

      Additional resources:
      -abstract resolve_tensor(read_item)[source]#
      +abstract resolve_tensor(read_item)[source]#

      Return the tensor described by read_item to be used by the StorageReader to load read_item.

      The tensor should alias with one on the underlying state_dict as StorageReader will replace its contents. If, for any reason, that’s not possible, the planner can use the commit_tensor method to copy the data @@ -5421,7 +5421,7 @@

      Additional resources:
      -abstract set_up_planner(state_dict, metadata=None, is_coordinator=False)[source]#
      +abstract set_up_planner(state_dict, metadata=None, is_coordinator=False)[source]#

      Initialize this instance to load data into state_dict.

      . N.B. This is called on every rank.

      @@ -5432,21 +5432,21 @@

      Additional resources:
      -class torch.distributed.checkpoint.LoadPlan(items: list[torch.distributed.checkpoint.planner.ReadItem], storage_data: Any = None, planner_data: Any = None)[source]#
      +class torch.distributed.checkpoint.LoadPlan(items: list[torch.distributed.checkpoint.planner.ReadItem], storage_data: Any = None, planner_data: Any = None)[source]#

      -class torch.distributed.checkpoint.ReadItem(type: torch.distributed.checkpoint.planner.LoadItemType, dest_index: torch.distributed.checkpoint.metadata.MetadataIndex, dest_offsets: torch.Size, storage_index: torch.distributed.checkpoint.metadata.MetadataIndex, storage_offsets: torch.Size, lengths: torch.Size)[source]#
      +class torch.distributed.checkpoint.ReadItem(type: torch.distributed.checkpoint.planner.LoadItemType, dest_index: torch.distributed.checkpoint.metadata.MetadataIndex, dest_offsets: torch.Size, storage_index: torch.distributed.checkpoint.metadata.MetadataIndex, storage_offsets: torch.Size, lengths: torch.Size)[source]#
      -class torch.distributed.checkpoint.SavePlanner[source]#
      +class torch.distributed.checkpoint.SavePlanner[source]#

      Abstract class defining the protocol used by save_state_dict to plan the save process.

      SavePlanners are stateful objects that can be used to customize the whole save process.

      SavePlanner acts as an access proxy to the state_dict, so any transformation done to it @@ -5545,7 +5545,7 @@

      Additional resources:
      -abstract create_global_plan(all_plans)[source]#
      +abstract create_global_plan(all_plans)[source]#

      Compute the global checkpoint plan and return the local plan of each rank.

      This is called on the coordinator rank only.

      @@ -5557,7 +5557,7 @@

      Additional resources:
      -abstract create_local_plan()[source]#
      +abstract create_local_plan()[source]#

      Compute the save plan for the current rank.

      This will be aggregated and passed to create_global_plan. Planner specific data can be passed through SavePlan::planner_data.

      @@ -5571,7 +5571,7 @@

      Additional resources:
      -abstract finish_plan(new_plan)[source]#
      +abstract finish_plan(new_plan)[source]#

      Merge the plan created by create_local_plan and the result of create_global_plan.

      This is called on all ranks.

      @@ -5583,7 +5583,7 @@

      Additional resources:
      -abstract resolve_data(write_item)[source]#
      +abstract resolve_data(write_item)[source]#

      Transform and prepare write_item from state_dict for storage, ensuring idempotency and thread-safety.

      Lookup the object associated with write_item in state_dict and apply any transformation (such as serialization) prior to the storage layer consuming it.

      @@ -5603,7 +5603,7 @@

      Additional resources:
      -abstract set_up_planner(state_dict, storage_meta=None, is_coordinator=False)[source]#
      +abstract set_up_planner(state_dict, storage_meta=None, is_coordinator=False)[source]#

      Initialize this planner to save state_dict.

      Implementations should save those values as they won’t be provided lated in the save process.

      This is called on all ranks.

      @@ -5615,20 +5615,20 @@

      Additional resources:
      -class torch.distributed.checkpoint.SavePlan(items: list[torch.distributed.checkpoint.planner.WriteItem], storage_data: Any = None, planner_data: Any = None, usable: bool = True)[source]#
      +class torch.distributed.checkpoint.SavePlan(items: list[torch.distributed.checkpoint.planner.WriteItem], storage_data: Any = None, planner_data: Any = None, usable: bool = True)[source]#

      -class torch.distributed.checkpoint.planner.WriteItem(index, type, bytes_io_data=None, tensor_data=None)[source]#
      +class torch.distributed.checkpoint.planner.WriteItem(index, type, bytes_io_data=None, tensor_data=None)[source]#

      Dataclass which holds information about what needs to be written to storage.

      -tensor_storage_size()[source]#
      +tensor_storage_size()[source]#

      Calculates the storage size of the underlying tensor, or None if this is not a tensor write.

      Returns
      @@ -5644,7 +5644,7 @@

      Additional resources:
      -class torch.distributed.checkpoint.planner.BytesIOWriteData(nbytes: int)[source]#
      +class torch.distributed.checkpoint.planner.BytesIOWriteData(nbytes: int)[source]#

      @@ -5652,7 +5652,7 @@

      Additional resources:We provide a filesystem based storage layer:

      -class torch.distributed.checkpoint.FileSystemReader(path, _extension_registry=None)[source]#
      +class torch.distributed.checkpoint.FileSystemReader(path, _extension_registry=None)[source]#
      @@ -5665,7 +5665,7 @@

      Additional resources:
      -class torch.distributed.checkpoint.FileSystemWriter(path, single_file_per_rank=True, sync_files=True, thread_count=1, per_thread_copy_ahead=10000000, cache_staged_state_dict=False, overwrite=True, _extensions=None, serialization_format=SerializationFormat.TORCH_SAVE)[source]#
      +class torch.distributed.checkpoint.FileSystemWriter(path, single_file_per_rank=True, sync_files=True, thread_count=1, per_thread_copy_ahead=10000000, cache_staged_state_dict=False, overwrite=True, _extensions=None, serialization_format=SerializationFormat.TORCH_SAVE)[source]#

      Basic implementation of StorageWriter using file IO.

      This implementation makes the following assumptions and simplifications:

        @@ -5679,7 +5679,7 @@

        Additional resources:
        -stage(state_dict)[source]#
        +stage(state_dict)[source]#

        Override of AsyncStager.stage

        Return type
        @@ -5701,12 +5701,12 @@

        Additional resources:
        -class torch.distributed.checkpoint.DefaultSavePlanner(flatten_state_dict=True, flatten_sharded_tensors=True, dedup_replicated_tensors=None, dedup_save_to_lowest_rank=False, enable_plan_caching=False)[source]#
        +class torch.distributed.checkpoint.DefaultSavePlanner(flatten_state_dict=True, flatten_sharded_tensors=True, dedup_replicated_tensors=None, dedup_save_to_lowest_rank=False, enable_plan_caching=False)[source]#
        -lookup_object(index)[source]#
        +lookup_object(index)[source]#

        Extension from the planner interface to make it easy to extend the default planner.

        Return type
        @@ -5717,7 +5717,7 @@

        Additional resources:
        -transform_object(write_item, object)[source]#
        +transform_object(write_item, object)[source]#

        Extension from the planner interface to make it easy to extend the default planner.

        @@ -5727,7 +5727,7 @@

        Additional resources:
        -class torch.distributed.checkpoint.DefaultLoadPlanner(flatten_state_dict=True, flatten_sharded_tensors=True, allow_partial_load=False)[source]#
        +class torch.distributed.checkpoint.DefaultLoadPlanner(flatten_state_dict=True, flatten_sharded_tensors=True, allow_partial_load=False)[source]#

        DefaultLoadPlanner that adds multiple features on top of LoadPlanner.

        In particular it adds the following:

        flatten_state_dict: Handle state_dict with nested dicts @@ -5737,7 +5737,7 @@

        Additional resources:
        -lookup_tensor(index)[source]#
        +lookup_tensor(index)[source]#

        Extension from the planner interface to make it easy to extend the default planner.

        Return type
        @@ -5748,7 +5748,7 @@

        Additional resources:
        -transform_tensor(read_item, tensor)[source]#
        +transform_tensor(read_item, tensor)[source]#

        Extension from the planner interface to make it easy to extend the default planner.

        @@ -5764,7 +5764,7 @@

        Additional resources:Note that this feature is experimental, and API signatures might change in the future.

        -torch.distributed.checkpoint.state_dict.get_state_dict(model, optimizers, *, submodules=None, options=None)[source]#
        +torch.distributed.checkpoint.state_dict.get_state_dict(model, optimizers, *, submodules=None, options=None)[source]#

        Return the model state_dict and optimizers state_dict.

        get_state_dict can process any module that is parallelized by PyTorch FSDP/fully_shard, DDP/replicate, tensor_parallel/parallelize_module, and any @@ -5833,7 +5833,7 @@

        Additional resources:
        -torch.distributed.checkpoint.state_dict.get_model_state_dict(model, *, submodules=None, options=None)[source]#
        +torch.distributed.checkpoint.state_dict.get_model_state_dict(model, *, submodules=None, options=None)[source]#

        Return the model state_dict of model.

        See get_state_dict for the detail usage.

        @@ -5858,7 +5858,7 @@

        Additional resources:
        -torch.distributed.checkpoint.state_dict.get_optimizer_state_dict(model, optimizers, *, submodules=None, options=None)[source]#
        +torch.distributed.checkpoint.state_dict.get_optimizer_state_dict(model, optimizers, *, submodules=None, options=None)[source]#

        Return the combined state_dict for optimizers.

        See get_state_dict for the detail usage.

        @@ -5884,7 +5884,7 @@

        Additional resources:
        -torch.distributed.checkpoint.state_dict.set_state_dict(model, optimizers, *, model_state_dict, optim_state_dict, options=None)[source]#
        +torch.distributed.checkpoint.state_dict.set_state_dict(model, optimizers, *, model_state_dict, optim_state_dict, options=None)[source]#

        Load the model state_dict and optimizers state_dict.

        The counterpart of get_state_dict to set the state_dict to the model and optimizers. The given model_state_dict and optim_state_dict do not @@ -5930,7 +5930,7 @@

        Additional resources:
        -torch.distributed.checkpoint.state_dict.set_model_state_dict(model, model_state_dict, *, options=None)[source]#
        +torch.distributed.checkpoint.state_dict.set_model_state_dict(model, model_state_dict, *, options=None)[source]#

        Load the model state_dict.

        The counterpart of get_model_state_dict to set the state_dict to the model. See set_state_dict for the detail usage.

        @@ -5963,7 +5963,7 @@

        Additional resources:
        -torch.distributed.checkpoint.state_dict.set_optimizer_state_dict(model, optimizers, optim_state_dict, *, options=None)[source]#
        +torch.distributed.checkpoint.state_dict.set_optimizer_state_dict(model, optimizers, optim_state_dict, *, options=None)[source]#

        Load the optimizers state_dict.

        The counterpart of get_optimizer_state_dict to set the state_dict to the optimizers. See set_state_dict for the detail usage.

        @@ -5995,7 +5995,7 @@

        Additional resources:
        -class torch.distributed.checkpoint.state_dict.StateDictOptions(full_state_dict=False, cpu_offload=False, ignore_frozen_params=False, keep_submodule_prefixes=True, strict=True, broadcast_from_rank0=False, flatten_optimizer_state_dict=False, dsd_fqn_modifiers='_fqn_modifiers')[source]#
        +class torch.distributed.checkpoint.state_dict.StateDictOptions(full_state_dict=False, cpu_offload=False, ignore_frozen_params=False, keep_submodule_prefixes=True, strict=True, broadcast_from_rank0=False, flatten_optimizer_state_dict=False, dsd_fqn_modifiers='_fqn_modifiers')[source]#

        This dataclass specifies how get_state_dict/set_state_dict will work.

        • full_state_dict: if this is set to True, all the tensors in the @@ -6035,7 +6035,7 @@

          Additional resources:For users which are used to using and sharing models in the torch.save format, the following methods are provided which provide offline utilities for converting betweeing formats.

          -torch.distributed.checkpoint.format_utils.dcp_to_torch_save(dcp_checkpoint_dir, torch_save_path)[source]#
          +torch.distributed.checkpoint.format_utils.dcp_to_torch_save(dcp_checkpoint_dir, torch_save_path)[source]#

          Given a directory containing a DCP checkpoint, this function will convert it into a Torch save file.

          @@ -6054,7 +6054,7 @@

          Additional resources:
          -torch.distributed.checkpoint.format_utils.torch_save_to_dcp(torch_save_path, dcp_checkpoint_dir)[source]#
          +torch.distributed.checkpoint.format_utils.torch_save_to_dcp(torch_save_path, dcp_checkpoint_dir)[source]#

          Given the location of a torch save file, converts it into a DCP checkpoint.

          Parameters
          @@ -6073,7 +6073,7 @@

          Additional resources:The following classes can also be utilized for online loading and resharding of models from the torch.save format.

          -class torch.distributed.checkpoint.format_utils.BroadcastingTorchSaveReader(checkpoint_id=None, coordinator_rank=0)[source]#
          +class torch.distributed.checkpoint.format_utils.BroadcastingTorchSaveReader(checkpoint_id=None, coordinator_rank=0)[source]#

          StorageReader for reading a Torch Save file. This reader will read the entire checkpoint on the coordinator rank, and then broadcast and shard each tensor to all ranks.

          . N.B. Intended to be used with DynamicMetaLoadPlanner

          @@ -6094,7 +6094,7 @@

          Additional resources:
          -prepare_global_plan(global_plan)[source]#
          +prepare_global_plan(global_plan)[source]#

          Implementation of the StorageReader method

          Return type
          @@ -6105,7 +6105,7 @@

          Additional resources:
          -prepare_local_plan(plan)[source]#
          +prepare_local_plan(plan)[source]#

          Implementation of the StorageReader method

          Return type
          @@ -6116,7 +6116,7 @@

          Additional resources:
          -read_data(plan, planner)[source]#
          +read_data(plan, planner)[source]#

          Reads torch save data on the coordinator rank, and broadcast afterwards this incurrs a communication cost, but avoids having to load the entire checkpoint on each rank, hopefully preventing OOM issues

          @@ -6129,7 +6129,7 @@

          Additional resources:
          -read_metadata()[source]#
          +read_metadata()[source]#

          Extends the default StorageReader to support building the metadata file

          Return type
          @@ -6140,7 +6140,7 @@

          Additional resources:
          -reset(checkpoint_id=None)[source]#
          +reset(checkpoint_id=None)[source]#

          Implementation of the StorageReader method

          @@ -6148,7 +6148,7 @@

          Additional resources:
          -set_up_storage_reader(metadata, is_coordinator)[source]#
          +set_up_storage_reader(metadata, is_coordinator)[source]#

          Implementation of the StorageReader method

          @@ -6156,7 +6156,7 @@

          Additional resources:
          -classmethod validate_checkpoint_id(checkpoint_id)[source]#
          +classmethod validate_checkpoint_id(checkpoint_id)[source]#

          Implementation of the StorageReader method

          Return type
          @@ -6169,7 +6169,7 @@

          Additional resources:
          -class torch.distributed.checkpoint.format_utils.DynamicMetaLoadPlanner(flatten_state_dict=True, flatten_sharded_tensors=True, allow_partial_load=False)[source]#
          +class torch.distributed.checkpoint.format_utils.DynamicMetaLoadPlanner(flatten_state_dict=True, flatten_sharded_tensors=True, allow_partial_load=False)[source]#

          Extension of DefaultLoadPlanner, which creates a new Metadata object based on the passed in state dict, avoiding the need to read metadata from disk. This is useful when reading formats which don’t have a metadata file, like Torch Save files.

          @@ -6191,7 +6191,7 @@

          Additional resources:
          -set_up_planner(state_dict, metadata=None, is_coordinator=False)[source]#
          +set_up_planner(state_dict, metadata=None, is_coordinator=False)[source]#

          Setups of the planner, extnding default behavior by creating the Metadata object from the state dict

          diff --git a/2.9/distributed.fsdp.fully_shard.html b/2.9/distributed.fsdp.fully_shard.html index 415ecd40e18..482c0aaae44 100644 --- a/2.9/distributed.fsdp.fully_shard.html +++ b/2.9/distributed.fsdp.fully_shard.html @@ -4470,7 +4470,7 @@

          PyTorch FSDP2 (

          The frontend API is fully_shard that can be called on a module:

          -torch.distributed.fsdp.fully_shard(module, *, mesh=None, reshard_after_forward=None, shard_placement_fn=None, mp_policy=MixedPrecisionPolicy(param_dtype=None, reduce_dtype=None, output_dtype=None, cast_forward_inputs=True), offload_policy=OffloadPolicy(), ignored_params=None)[source]#
          +torch.distributed.fsdp.fully_shard(module, *, mesh=None, reshard_after_forward=None, shard_placement_fn=None, mp_policy=MixedPrecisionPolicy(param_dtype=None, reduce_dtype=None, output_dtype=None, cast_forward_inputs=True), offload_policy=OffloadPolicy(), ignored_params=None)[source]#

          Apply fully sharded data parallelism (FSDP) to module, where FSDP shards module parameters, gradients, and optimizer states across data parallel workers to save memory at the cost of communication.

          @@ -4573,7 +4573,7 @@

          PyTorch FSDP2 ( class torch.distributed.fsdp.FSDPModule(*args, **kwargs)#
          -reshard()[source]#
          +reshard()[source]#

          Reshards the module’s parameters, freeing the unsharded parameters if they are allocated and registering the sharded parameters to the module. This method is not recursive.

          @@ -4583,7 +4583,7 @@

          PyTorch FSDP2 (
          -set_all_reduce_hook(hook, *, stream=None)[source]#
          +set_all_reduce_hook(hook, *, stream=None)[source]#
          Parameters
            @@ -4602,7 +4602,7 @@

            PyTorch FSDP2 (
            -set_allocate_memory_from_process_group_for_comm(enable)[source]#
            +set_allocate_memory_from_process_group_for_comm(enable)[source]#

            Sets whether the temporary staging buffers used to send and receive data over collective communications should be allocated using the custom optimized allocator provided by the ProcessGroup itself (if any). This @@ -4622,7 +4622,7 @@

            PyTorch FSDP2 (
            -set_custom_all_gather(comm)[source]#
            +set_custom_all_gather(comm)[source]#

            Overrides the default all_gather communication behavior, to have better control over the communication and memory usage. See Comm and ReduceScatter for details.

            @@ -4635,7 +4635,7 @@

            PyTorch FSDP2 (
            -set_custom_reduce_scatter(comm)[source]#
            +set_custom_reduce_scatter(comm)[source]#

            Overrides the default reduce_scatter communication behavior, to have better control over the communication and memory usage. See Comm and ReduceScatter for details.

            @@ -4648,7 +4648,7 @@

            PyTorch FSDP2 (
            -set_force_sum_reduction_for_comms(enable)[source]#
            +set_force_sum_reduction_for_comms(enable)[source]#

            Sets whether to require the low-level collective communication primitives to exclusively use “sum”-type reductions, even if it comes at the cost of separate additional pre- or post-scaling operations. @@ -4667,7 +4667,7 @@

            PyTorch FSDP2 (
            -set_gradient_divide_factor(factor)[source]#
            +set_gradient_divide_factor(factor)[source]#

            Sets a custom divide factor for the gradient reduction. This might use a custom reduce op using NCCL’s PreMulSum, which allows multiplying by the factor before reduction.

            @@ -4680,7 +4680,7 @@

            PyTorch FSDP2 (
            -set_is_last_backward(is_last_backward)[source]#
            +set_is_last_backward(is_last_backward)[source]#

            Sets whether the next backward is the last one. On the last backward, FSDP waits on pending gradient reduction and clears internal data data structures for backward prefetching. This can be useful for @@ -4691,7 +4691,7 @@

            PyTorch FSDP2 (
            -set_modules_to_backward_prefetch(modules)[source]#
            +set_modules_to_backward_prefetch(modules)[source]#

            Sets the FSDP modules for which this FSDP module should explicitly prefetch all-gathers in backward. This overrides the default backward pretching implementation that prefetches the next FSDP module based on @@ -4709,7 +4709,7 @@

            PyTorch FSDP2 (
            -set_modules_to_forward_prefetch(modules)[source]#
            +set_modules_to_forward_prefetch(modules)[source]#

            Sets the FSDP modules for which this FSDP module should explicitly prefetch all-gathers in forward. The prefetching runs after this module’s all-gather copy-out.

            @@ -4727,7 +4727,7 @@

            PyTorch FSDP2 (
            -set_post_optim_event(event)[source]#
            +set_post_optim_event(event)[source]#

            Sets a post-optimizer-step event for the root FSDP module to wait the all-gather streams on.

            By default, the root FSDP module waits the all-gather streams on the @@ -4747,7 +4747,7 @@

            PyTorch FSDP2 (
            -set_reduce_scatter_divide_factor(factor)[source]#
            +set_reduce_scatter_divide_factor(factor)[source]#

            Use set_gradient_divide_factor() instead

            @@ -4755,7 +4755,7 @@

            PyTorch FSDP2 (
            -set_requires_all_reduce(requires_all_reduce, *, recurse=True)[source]#
            +set_requires_all_reduce(requires_all_reduce, *, recurse=True)[source]#

            Sets if the module should all-reduce gradients. This can be used to implement gradient accumulation with only reduce-scatter but not all-reduce for HSDP.

            @@ -4765,7 +4765,7 @@

            PyTorch FSDP2 (
            -set_requires_gradient_sync(requires_gradient_sync, *, recurse=True)[source]#
            +set_requires_gradient_sync(requires_gradient_sync, *, recurse=True)[source]#

            Sets if the module should sync gradients. This can be used to implement gradient accumulation without communication. For HSDP, this controls both reduce-scatter and all-reduce together. This is the equivalence of @@ -4784,7 +4784,7 @@

            PyTorch FSDP2 (
            -set_reshard_after_backward(reshard_after_backward, *, recurse=True)[source]#
            +set_reshard_after_backward(reshard_after_backward, *, recurse=True)[source]#

            Sets if the module should reshard parameters after backward. This can be used during gradient accumulation to trade off higher memory for reduced communication since the unsharded parameters do not need to be @@ -4803,7 +4803,7 @@

            PyTorch FSDP2 (
            -set_reshard_after_forward(reshard_after_forward, recurse=True)[source]#
            +set_reshard_after_forward(reshard_after_forward, recurse=True)[source]#

            Sets if the module should reshard parameters after forward. This can be used to change the reshard_after_forward FSDP arg at runtime. For example, this can be used to set the FSDP root module’s value to @@ -4824,7 +4824,7 @@

            PyTorch FSDP2 (
            -set_unshard_in_backward(unshard_in_backward)[source]#
            +set_unshard_in_backward(unshard_in_backward)[source]#

            Sets whether the FSDP module’s parameters need to be unsharded in backward. This can be used in expert cases when the user knows that all parameters in this FSDP module’s parameter group are not needed for @@ -4835,7 +4835,7 @@

            PyTorch FSDP2 (
            -unshard(async_op=False)[source]#
            +unshard(async_op=False)[source]#

            Unshards the module’s parameters by allocating memory and all-gathering the parameters. This method is not recursive. The unshard follows the MixedPrecisionPolicy, so it will all-gather following @@ -4868,7 +4868,7 @@

            PyTorch FSDP2 (

            A handle to wait on a FSDPModule.unshard() op.

            -wait()[source]#
            +wait()[source]#

            Waits on the unshard op. This ensures that the current stream can use the unsharded parameters, which are now registered to the module.

            @@ -4879,7 +4879,7 @@

            PyTorch FSDP2 (
            -torch.distributed.fsdp.register_fsdp_forward_method(module, method_name)[source]#
            +torch.distributed.fsdp.register_fsdp_forward_method(module, method_name)[source]#

            Registers a method on module to be considered a forward method for FSDP.

            FSDP all-gathers parameters pre-forward and optionally frees parameters diff --git a/2.9/distributed.html b/2.9/distributed.html index 07f88c4c66c..eba0d2e9bab 100644 --- a/2.9/distributed.html +++ b/2.9/distributed.html @@ -4688,7 +4688,7 @@

            Initialization
            -torch.distributed.is_available()[source]#
            +torch.distributed.is_available()[source]#

            Return True if the distributed package is available.

            Otherwise, torch.distributed does not expose any other APIs. Currently, @@ -4705,7 +4705,7 @@

            Initialization
            -torch.distributed.init_process_group(backend=None, init_method=None, timeout=None, world_size=-1, rank=-1, store=None, group_name='', pg_options=None, device_id=None)[source]#
            +torch.distributed.init_process_group(backend=None, init_method=None, timeout=None, world_size=-1, rank=-1, store=None, group_name='', pg_options=None, device_id=None)[source]#

            Initialize the default distributed process group.

            This will also initialize the distributed package.

            @@ -4799,7 +4799,7 @@

            Initialization
            -torch.distributed.device_mesh.init_device_mesh(device_type, mesh_shape, *, mesh_dim_names=None, backend_override=None)[source]#
            +torch.distributed.device_mesh.init_device_mesh(device_type, mesh_shape, *, mesh_dim_names=None, backend_override=None)[source]#

            Initializes a DeviceMesh based on device_type, mesh_shape, and mesh_dim_names parameters.

            This creates a DeviceMesh with an n-dimensional array layout, where n is the length of mesh_shape. If mesh_dim_names is provided, each dimension is labeled as mesh_dim_names[i].

            @@ -4849,7 +4849,7 @@

            Initialization
            -torch.distributed.is_initialized()[source]#
            +torch.distributed.is_initialized()[source]#

            Check if the default process group has been initialized.

            Return type
            @@ -4860,7 +4860,7 @@

            Initialization
            -torch.distributed.is_mpi_available()[source]#
            +torch.distributed.is_mpi_available()[source]#

            Check if the MPI backend is available.

            Return type
            @@ -4871,7 +4871,7 @@

            Initialization
            -torch.distributed.is_nccl_available()[source]#
            +torch.distributed.is_nccl_available()[source]#

            Check if the NCCL backend is available.

            Return type
            @@ -4882,7 +4882,7 @@

            Initialization
            -torch.distributed.is_gloo_available()[source]#
            +torch.distributed.is_gloo_available()[source]#

            Check if the Gloo backend is available.

            Return type
            @@ -4893,7 +4893,7 @@

            Initialization
            -torch.distributed.distributed_c10d.is_xccl_available()[source]#
            +torch.distributed.distributed_c10d.is_xccl_available()[source]#

            Check if the XCCL backend is available.

            Return type
            @@ -4904,7 +4904,7 @@

            Initialization
            -torch.distributed.is_torchelastic_launched()[source]#
            +torch.distributed.is_torchelastic_launched()[source]#

            Check whether this process was launched with torch.distributed.elastic (aka torchelastic).

            The existence of TORCHELASTIC_RUN_ID environment variable is used as a proxy to determine whether the current process @@ -4920,7 +4920,7 @@

            Initialization
            -torch.distributed.get_default_backend_for_device(device)[source]#
            +torch.distributed.get_default_backend_for_device(device)[source]#

            Return the default backend for the given device.

            Parameters
            @@ -5024,7 +5024,7 @@

            Post-Initializationtorch.distributed.is_initialized().

            -class torch.distributed.Backend(name)[source]#
            +class torch.distributed.Backend(name)[source]#

            An enum-like class for backends.

            Available backends: GLOO, NCCL, UCC, MPI, XCCL, and other registered backends.

            The values of this class are lowercase strings, e.g., "gloo". They can @@ -5043,7 +5043,7 @@

            Post-Initialization
            -classmethod register_backend(name, func, extended_api=False, devices=None)[source]#
            +classmethod register_backend(name, func, extended_api=False, devices=None)[source]#

            Register a new backend with the given name and instantiating function.

            This class method is used by 3rd party ProcessGroup extension to register new backends.

            @@ -5076,7 +5076,7 @@

            Post-Initialization
            -torch.distributed.get_backend(group=None)[source]#
            +torch.distributed.get_backend(group=None)[source]#

            Return the backend of the given process group.

            Parameters
            @@ -5095,7 +5095,7 @@

            Post-Initialization
            -torch.distributed.get_rank(group=None)[source]#
            +torch.distributed.get_rank(group=None)[source]#

            Return the rank of the current process in the provided group, default otherwise.

            Rank is a unique identifier assigned to each process within a distributed process group. They are always consecutive integers ranging from 0 to @@ -5117,7 +5117,7 @@

            Post-Initialization
            -torch.distributed.get_world_size(group=None)[source]#
            +torch.distributed.get_world_size(group=None)[source]#

            Return the number of processes in the current process group.

            Parameters
            @@ -5173,7 +5173,7 @@

            Groups
            -torch.distributed.new_group(ranks=None, timeout=None, backend=None, pg_options=None, use_local_synchronization=False, group_desc=None, device_id=None)[source]#
            +torch.distributed.new_group(ranks=None, timeout=None, backend=None, pg_options=None, use_local_synchronization=False, group_desc=None, device_id=None)[source]#

            Create a new distributed group.

            This function requires that all processes in the main group (i.e. all processes that are part of the distributed job) enter this function, even @@ -5242,7 +5242,7 @@

            Groups
            -torch.distributed.get_group_rank(group, global_rank)[source]#
            +torch.distributed.get_group_rank(group, global_rank)[source]#

            Translate a global rank into a group rank.

            global_rank must be part of group otherwise this raises RuntimeError.

            @@ -5264,7 +5264,7 @@

            Groups
            -torch.distributed.get_global_rank(group, group_rank)[source]#
            +torch.distributed.get_global_rank(group, group_rank)[source]#

            Translate a group rank into a global rank.

            group_rank must be part of group otherwise this raises RuntimeError.

            @@ -5286,7 +5286,7 @@

            Groups
            -torch.distributed.get_process_group_ranks(group)[source]#
            +torch.distributed.get_process_group_ranks(group)[source]#

            Get all ranks associated with group.

            Parameters
            @@ -5312,7 +5312,7 @@

            DeviceMesh
            -class torch.distributed.device_mesh.DeviceMesh(device_type, mesh, *, mesh_dim_names=None, backend_override=None, _init_backend=True)[source]#
            +class torch.distributed.device_mesh.DeviceMesh(device_type, mesh, *, mesh_dim_names=None, backend_override=None, _init_backend=True)[source]#

            DeviceMesh represents a mesh of devices, where layout of devices could be represented as a n-d dimension array, and each value of the n-d dimensional array is the global id of the default process group ranks.

            @@ -5360,7 +5360,7 @@

            DeviceMesh
            -static from_group(group, device_type, mesh=None, *, mesh_dim_names=None)[source]#
            +static from_group(group, device_type, mesh=None, *, mesh_dim_names=None)[source]#

            Constructs a DeviceMesh with device_type from an existing ProcessGroup or a list of existing ProcessGroup.

            The constructed device mesh has number of dimensions equal to the @@ -5401,7 +5401,7 @@

            DeviceMesh
            -get_all_groups()[source]#
            +get_all_groups()[source]#

            Returns a list of ProcessGroups for all mesh dimensions.

            Returns
            @@ -5415,7 +5415,7 @@

            DeviceMesh
            -get_coordinate()[source]#
            +get_coordinate()[source]#

            Return the relative indices of this rank relative to all dimensions of the mesh. If this rank is not part of the mesh, return None.

            @@ -5427,7 +5427,7 @@

            DeviceMesh
            -get_group(mesh_dim=None)[source]#
            +get_group(mesh_dim=None)[source]#

            Returns the single ProcessGroup specified by mesh_dim, or, if mesh_dim is not specified and the DeviceMesh is 1-dimensional, returns the only ProcessGroup in the mesh.

            @@ -5448,7 +5448,7 @@

            DeviceMesh
            -get_local_rank(mesh_dim=None)[source]#
            +get_local_rank(mesh_dim=None)[source]#

            Returns the local rank of the given mesh_dim of the DeviceMesh.

            Parameters
            @@ -5484,7 +5484,7 @@

            DeviceMesh
            -get_rank()[source]#
            +get_rank()[source]#

            Returns the current global rank.

            Return type
            @@ -5500,7 +5500,7 @@

            DeviceMesh#

            -torch.distributed.send(tensor, dst=None, group=None, tag=0, group_dst=None)[source]#
            +torch.distributed.send(tensor, dst=None, group=None, tag=0, group_dst=None)[source]#

            Send a tensor synchronously.

            Warning

            @@ -5523,7 +5523,7 @@

            Point-to-point communication
            -torch.distributed.recv(tensor, src=None, group=None, tag=0, group_src=None)[source]#
            +torch.distributed.recv(tensor, src=None, group=None, tag=0, group_src=None)[source]#

            Receives a tensor synchronously.

            Warning

            @@ -5561,7 +5561,7 @@

            Point-to-point communication
            -torch.distributed.isend(tensor, dst=None, group=None, tag=0, group_dst=None)[source]#
            +torch.distributed.isend(tensor, dst=None, group=None, tag=0, group_dst=None)[source]#

            Send a tensor asynchronously.

            Warning

            @@ -5596,7 +5596,7 @@

            Point-to-point communication
            -torch.distributed.irecv(tensor, src=None, group=None, tag=0, group_src=None)[source]#
            +torch.distributed.irecv(tensor, src=None, group=None, tag=0, group_src=None)[source]#

            Receives a tensor asynchronously.

            Warning

            @@ -5627,7 +5627,7 @@

            Point-to-point communication
            -torch.distributed.send_object_list(object_list, dst=None, group=None, device=None, group_dst=None, use_batch=False)[source]#
            +torch.distributed.send_object_list(object_list, dst=None, group=None, device=None, group_dst=None, use_batch=False)[source]#

            Sends picklable objects in object_list synchronously.

            Similar to send(), but Python objects can be passed in. Note that all objects in object_list must be picklable in order to be @@ -5705,7 +5705,7 @@

            Point-to-point communication
            -torch.distributed.recv_object_list(object_list, src=None, group=None, device=None, group_src=None, use_batch=False)[source]#
            +torch.distributed.recv_object_list(object_list, src=None, group=None, device=None, group_src=None, use_batch=False)[source]#

            Receives picklable objects in object_list synchronously.

            Similar to recv(), but can receive Python objects.

            @@ -5781,7 +5781,7 @@

            Point-to-point communication
            -torch.distributed.batch_isend_irecv(p2p_op_list)[source]#
            +torch.distributed.batch_isend_irecv(p2p_op_list)[source]#

            Send or Receive a batch of tensors asynchronously and return a list of requests.

            Process each of the operations in p2p_op_list and return the corresponding requests. NCCL, Gloo, and UCC backend are currently supported.

            @@ -5830,7 +5830,7 @@

            Point-to-point communication
            -class torch.distributed.P2POp(op, tensor, peer=None, group=None, tag=0, group_peer=None)[source]#
            +class torch.distributed.P2POp(op, tensor, peer=None, group=None, tag=0, group_peer=None)[source]#

            A class to build point-to-point operations for batch_isend_irecv.

            This class builds the type of P2P operation, communication buffer, peer rank, Process Group, and tag. Instances of this class will be passed to @@ -5905,7 +5905,7 @@

            Synchronous and asynchronous collective operationsCollective functions#

            -torch.distributed.broadcast(tensor, src=None, group=None, async_op=False, group_src=None)[source]#
            +torch.distributed.broadcast(tensor, src=None, group=None, async_op=False, group_src=None)[source]#

            Broadcasts the tensor to the whole group.

            tensor must have the same number of elements in all processes participating in the collective.

            @@ -5931,7 +5931,7 @@

            Collective functions
            -torch.distributed.broadcast_object_list(object_list, src=None, group=None, device=None, group_src=None)[source]#
            +torch.distributed.broadcast_object_list(object_list, src=None, group=None, device=None, group_src=None)[source]#

            Broadcasts picklable objects in object_list to the whole group.

            Similar to broadcast(), but Python objects can be passed in. Note that all objects in object_list must be picklable in order to be @@ -6012,7 +6012,7 @@

            Collective functions
            -torch.distributed.all_reduce(tensor, op=<RedOpType.SUM: 0>, group=None, async_op=False)[source]#
            +torch.distributed.all_reduce(tensor, op=<RedOpType.SUM: 0>, group=None, async_op=False)[source]#

            Reduces the tensor data across all machines in a way that all get the final result.

            After the call tensor is going to be bitwise identical in all processes.

            Complex tensors are supported.

            @@ -6066,7 +6066,7 @@

            Collective functions
            -torch.distributed.reduce(tensor, dst=None, op=<RedOpType.SUM: 0>, group=None, async_op=False, group_dst=None)[source]#
            +torch.distributed.reduce(tensor, dst=None, op=<RedOpType.SUM: 0>, group=None, async_op=False, group_dst=None)[source]#

            Reduces the tensor data across all machines.

            Only the process with rank dst is going to receive the final result.

            @@ -6094,7 +6094,7 @@

            Collective functions
            -torch.distributed.all_gather(tensor_list, tensor, group=None, async_op=False)[source]#
            +torch.distributed.all_gather(tensor_list, tensor, group=None, async_op=False)[source]#

            Gathers tensors from the whole group in a list.

            Complex and uneven sized tensors are supported.

            @@ -6158,7 +6158,7 @@

            Collective functions
            -torch.distributed.all_gather_into_tensor(output_tensor, input_tensor, group=None, async_op=False)[source]#
            +torch.distributed.all_gather_into_tensor(output_tensor, input_tensor, group=None, async_op=False)[source]#

            Gather tensors from all ranks and put them in a single output tensor.

            This function requires all tensors to be the same size on each process.

            @@ -6213,7 +6213,7 @@

            Collective functions
            -torch.distributed.all_gather_object(object_list, obj, group=None)[source]#
            +torch.distributed.all_gather_object(object_list, obj, group=None)[source]#

            Gathers picklable objects from the whole group into a list.

            Similar to all_gather(), but Python objects can be passed in. Note that the object must be picklable in order to be gathered.

            @@ -6284,7 +6284,7 @@

            Collective functions
            -torch.distributed.gather(tensor, gather_list=None, dst=None, group=None, async_op=False, group_dst=None)[source]#
            +torch.distributed.gather(tensor, gather_list=None, dst=None, group=None, async_op=False, group_dst=None)[source]#

            Gathers a list of tensors in a single process.

            This function requires all tensors to be the same size on each process.

            @@ -6333,7 +6333,7 @@

            Collective functions
            -torch.distributed.gather_object(obj, object_gather_list=None, dst=None, group=None, group_dst=None)[source]#
            +torch.distributed.gather_object(obj, object_gather_list=None, dst=None, group=None, group_dst=None)[source]#

            Gathers picklable objects from the whole group in a single process.

            Similar to gather(), but Python objects can be passed in. Note that the object must be picklable in order to be gathered.

            @@ -6412,7 +6412,7 @@

            Collective functions
            -torch.distributed.scatter(tensor, scatter_list=None, src=None, group=None, async_op=False, group_src=None)[source]#
            +torch.distributed.scatter(tensor, scatter_list=None, src=None, group=None, async_op=False, group_src=None)[source]#

            Scatters a list of tensors to all processes in a group.

            Each process will receive exactly one tensor and store its data in the tensor argument.

            @@ -6467,7 +6467,7 @@

            Collective functions
            -torch.distributed.scatter_object_list(scatter_object_output_list, scatter_object_input_list=None, src=None, group=None, group_src=None)[source]#
            +torch.distributed.scatter_object_list(scatter_object_output_list, scatter_object_input_list=None, src=None, group=None, group_src=None)[source]#

            Scatters picklable objects in scatter_object_input_list to the whole group.

            Similar to scatter(), but Python objects can be passed in. On each rank, the scattered object will be stored as the first element of @@ -6540,7 +6540,7 @@

            Collective functions
            -torch.distributed.reduce_scatter(output, input_list, op=<RedOpType.SUM: 0>, group=None, async_op=False)[source]#
            +torch.distributed.reduce_scatter(output, input_list, op=<RedOpType.SUM: 0>, group=None, async_op=False)[source]#

            Reduces, then scatters a list of tensors to all processes in a group.

            Parameters
            @@ -6564,7 +6564,7 @@

            Collective functions
            -torch.distributed.reduce_scatter_tensor(output, input, op=<RedOpType.SUM: 0>, group=None, async_op=False)[source]#
            +torch.distributed.reduce_scatter_tensor(output, input, op=<RedOpType.SUM: 0>, group=None, async_op=False)[source]#

            Reduces, then scatters a tensor to all ranks in a group.

            Parameters
            @@ -6620,7 +6620,7 @@

            Collective functions
            -torch.distributed.all_to_all_single(output, input, output_split_sizes=None, input_split_sizes=None, group=None, async_op=False)[source]#
            +torch.distributed.all_to_all_single(output, input, output_split_sizes=None, input_split_sizes=None, group=None, async_op=False)[source]#

            Split input tensor and then scatter the split list to all processes in a group.

            Later the received tensors are concatenated from all the processes in the group and returned as a single output tensor.

            @@ -6720,7 +6720,7 @@

            Collective functions
            -torch.distributed.all_to_all(output_tensor_list, input_tensor_list, group=None, async_op=False)[source]#
            +torch.distributed.all_to_all(output_tensor_list, input_tensor_list, group=None, async_op=False)[source]#

            Scatters list of input tensors to all processes in a group and return gathered list of tensors in output list.

            Complex tensors are supported.

            @@ -6820,7 +6820,7 @@

            Collective functions
            -torch.distributed.barrier(group=None, async_op=False, device_ids=None)[source]#
            +torch.distributed.barrier(group=None, async_op=False, device_ids=None)[source]#

            Synchronize all processes.

            This collective blocks processes until the whole group enters this function, if async_op is False, or if async work handle is called on wait().

            @@ -6854,7 +6854,7 @@

            Collective functions
            -torch.distributed.monitored_barrier(group=None, timeout=None, wait_all_ranks=False)[source]#
            +torch.distributed.monitored_barrier(group=None, timeout=None, wait_all_ranks=False)[source]#

            Synchronize processes similar to torch.distributed.barrier, but consider a configurable timeout.

            It is able to report ranks that did not pass this barrier within the provided timeout. Specifically, for non-zero ranks, will block until a send/recv is processed from rank 0. @@ -8206,7 +8206,7 @@

            Logging
            -torch.distributed.breakpoint(rank=0, skip=0, timeout_s=3600)[source]#
            +torch.distributed.breakpoint(rank=0, skip=0, timeout_s=3600)[source]#

            Set a breakpoint, but only on a single rank. All other ranks will wait for you to be done with the breakpoint before continuing.

            diff --git a/2.9/distributed.optim.html b/2.9/distributed.optim.html index 93dd97fd50b..03af88cac8e 100644 --- a/2.9/distributed.optim.html +++ b/2.9/distributed.optim.html @@ -4405,7 +4405,7 @@

            Distributed Optimizers
            -class torch.distributed.optim.DistributedOptimizer(optimizer_class, params_rref, *args, **kwargs)[source]#
            +class torch.distributed.optim.DistributedOptimizer(optimizer_class, params_rref, *args, **kwargs)[source]#

            DistributedOptimizer takes remote references to parameters scattered across workers and applies the given optimizer locally for each parameter.

            This class uses get_gradients() in order @@ -4465,7 +4465,7 @@

            Distributed Optimizers
            -step(context_id)[source]#
            +step(context_id)[source]#

            Performs a single optimization step.

            This will call torch.optim.Optimizer.step() on each worker containing parameters to be optimized, and will block until all workers @@ -4484,7 +4484,7 @@

            Distributed Optimizers
            -class torch.distributed.optim.PostLocalSGDOptimizer(optim, averager)[source]#
            +class torch.distributed.optim.PostLocalSGDOptimizer(optim, averager)[source]#

            Wraps an arbitrary torch.optim.Optimizer and runs post-local SGD, This optimizer runs local optimizer at every step. After the warm-up stage, it averages parameters periodically after the local optimizer is applied.

            @@ -4536,7 +4536,7 @@

            Distributed Optimizers
            -load_state_dict(state_dict)[source]#
            +load_state_dict(state_dict)[source]#

            This is the same as torch.optim.Optimizer load_state_dict(), but also restores model averager’s step value to the one saved in the provided state_dict.

            @@ -4546,7 +4546,7 @@

            Distributed Optimizers
            -state_dict()[source]#
            +state_dict()[source]#

            This is the same as torch.optim.Optimizer state_dict(), but adds an extra entry to record model averager’s step to the checkpoint to ensure reload does not cause unnecessary warm up again.

            @@ -4554,7 +4554,7 @@

            Distributed Optimizers
            -step()[source]#
            +step()[source]#

            Performs a single optimization step (parameter update).

            @@ -4562,7 +4562,7 @@

            Distributed Optimizers
            -class torch.distributed.optim.ZeroRedundancyOptimizer(params, optimizer_class, process_group=None, parameters_as_bucket_view=False, overlap_with_ddp=False, **defaults)[source]#
            +class torch.distributed.optim.ZeroRedundancyOptimizer(params, optimizer_class, process_group=None, parameters_as_bucket_view=False, overlap_with_ddp=False, **defaults)[source]#

            Wrap an arbitrary optim.Optimizer and shards its states across ranks in the group.

            The sharing is done as described by ZeRO.

            The local optimizer instance in each rank is only @@ -4652,7 +4652,7 @@

            Distributed Optimizers
            -add_param_group(param_group)[source]#
            +add_param_group(param_group)[source]#

            Add a parameter group to the Optimizer ‘s param_groups.

            This can be useful when fine tuning a pre-trained network, as frozen layers can be made trainable and added to the Optimizer as @@ -4675,7 +4675,7 @@

            Distributed Optimizers
            -consolidate_state_dict(to=0)[source]#
            +consolidate_state_dict(to=0)[source]#

            Consolidate a list of state_dict s (one per rank) on the target rank.

            Parameters
            @@ -4703,7 +4703,7 @@

            Distributed Optimizers
            -join_hook(**kwargs)[source]#
            +join_hook(**kwargs)[source]#

            Return the ZeRO join hook.

            It enables training on uneven inputs by shadowing the collective communications in the optimizer step.

            @@ -4728,7 +4728,7 @@

            Distributed Optimizers
            -load_state_dict(state_dict)[source]#
            +load_state_dict(state_dict)[source]#

            Load the state pertaining to the given rank from the input state_dict, updating the local optimizer as needed.

            Parameters
            @@ -4747,7 +4747,7 @@

            Distributed Optimizers
            -state_dict()[source]#
            +state_dict()[source]#

            Return the last global optimizer state known to this rank.

            Raises
            @@ -4766,7 +4766,7 @@

            Distributed Optimizers
            -step(closure=None, **kwargs)[source]#
            +step(closure=None, **kwargs)[source]#

            Perform a single optimizer step and syncs parameters across all ranks.

            Parameters
            diff --git a/2.9/distributed.pipelining.html b/2.9/distributed.pipelining.html index 95cd84655c4..cc4708e3979 100644 --- a/2.9/distributed.pipelining.html +++ b/2.9/distributed.pipelining.html @@ -4762,7 +4762,7 @@

            Model Split APIs
            -class torch.distributed.pipelining.SplitPoint(value)[source]#
            +class torch.distributed.pipelining.SplitPoint(value)[source]#

            Enum representing the points at which a split can occur in the execution of a submodule. :ivar BEGINNING: Represents adding a split point before the execution of a certain submodule in the forward function. :ivar END: Represents adding a split point after the execution of a certain submodule in the forward function.

            @@ -4770,7 +4770,7 @@

            Model Split APIs
            -torch.distributed.pipelining.pipeline(module, mb_args, mb_kwargs=None, split_spec=None, split_policy=None)[source]#
            +torch.distributed.pipelining.pipeline(module, mb_args, mb_kwargs=None, split_spec=None, split_policy=None)[source]#

            Split a module based on a specification.

            See Pipe for more details.

            @@ -4791,14 +4791,14 @@

            Model Split APIs
            -class torch.distributed.pipelining.Pipe(split_gm, num_stages, has_loss_and_backward, loss_spec)[source]#
            +class torch.distributed.pipelining.Pipe(split_gm, num_stages, has_loss_and_backward, loss_spec)[source]#

            -torch.distributed.pipelining.pipe_split()[source]#
            +torch.distributed.pipelining.pipe_split()[source]#

            pipe_split is a special operator that is used to mark the boundary between stages in a module. It is used to split the module into stages. It is a no-op if your annotated module is run eagerly.

            @@ -4819,7 +4819,7 @@

            Model Split APIs

            Microbatch Utilities#

            -class torch.distributed.pipelining.microbatch.TensorChunkSpec(split_dim)[source]#
            +class torch.distributed.pipelining.microbatch.TensorChunkSpec(split_dim)[source]#

            Class used to specify chunking of inputs

            @@ -4827,7 +4827,7 @@

            Model Split APIs
            -torch.distributed.pipelining.microbatch.split_args_kwargs_into_chunks(args, kwargs, chunks, args_chunk_spec=None, kwargs_chunk_spec=None)[source]#
            +torch.distributed.pipelining.microbatch.split_args_kwargs_into_chunks(args, kwargs, chunks, args_chunk_spec=None, kwargs_chunk_spec=None)[source]#

            Given a sequence of args and kwargs, split them into a number of chunks according to their respective chunking specs.

            @@ -4852,7 +4852,7 @@

            Model Split APIs
            -torch.distributed.pipelining.microbatch.merge_chunks(chunks, chunk_spec)[source]#
            +torch.distributed.pipelining.microbatch.merge_chunks(chunks, chunk_spec)[source]#

            Given a list of chunks, merge them into a single value according to the chunk spec.

            @@ -4876,7 +4876,7 @@

            Model Split APIs

            Pipeline Stages#

            -class torch.distributed.pipelining.stage.PipelineStage(submodule, stage_index, num_stages, device, input_args=None, output_args=None, group=None, dw_builder=None)[source]#
            +class torch.distributed.pipelining.stage.PipelineStage(submodule, stage_index, num_stages, device, input_args=None, output_args=None, group=None, dw_builder=None)[source]#

            A class representing a pipeline stage in a pipeline parallelism setup.

            PipelineStage assumes sequential partitioning of the model, i.e. the model is split into chunks where outputs from one chunk feed into inputs of the next chunk, with no skip connections.

            @@ -4902,7 +4902,7 @@

            Model Split APIs
            -torch.distributed.pipelining.stage.build_stage(stage_module, stage_index, pipe_info, device, group=None)[source]#
            +torch.distributed.pipelining.stage.build_stage(stage_module, stage_index, pipe_info, device, group=None)[source]#

            Create a pipeline stage given a stage_module to be wrapped by this stage and pipeline information.

            @@ -4929,7 +4929,7 @@

            Model Split APIs

            Pipeline Schedules#

            -class torch.distributed.pipelining.schedules.ScheduleGPipe(stage, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#
            +class torch.distributed.pipelining.schedules.ScheduleGPipe(stage, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#

            The GPipe schedule. Will go through all the microbatches in a fill-drain manner.

            @@ -4938,7 +4938,7 @@

            Model Split APIs
            -class torch.distributed.pipelining.schedules.Schedule1F1B(stage, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#
            +class torch.distributed.pipelining.schedules.Schedule1F1B(stage, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#

            The 1F1B schedule. Will perform one forward and one backward on the microbatches in steady state.

            @@ -4947,7 +4947,7 @@

            Model Split APIs
            -class torch.distributed.pipelining.schedules.ScheduleInterleaved1F1B(stages, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#
            +class torch.distributed.pipelining.schedules.ScheduleInterleaved1F1B(stages, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#

            The Interleaved 1F1B schedule. See https://arxiv.org/pdf/2104.04473 for details. Will perform one forward and one backward on the microbatches in steady @@ -4968,7 +4968,7 @@

            Model Split APIs
            -class torch.distributed.pipelining.schedules.ScheduleLoopedBFS(stages, n_microbatches, loss_fn=None, output_merge_spec=None, scale_grads=True)[source]#
            +class torch.distributed.pipelining.schedules.ScheduleLoopedBFS(stages, n_microbatches, loss_fn=None, output_merge_spec=None, scale_grads=True)[source]#

            Breadth-First Pipeline Parallelism. See https://arxiv.org/abs/2211.05953 for details. Similar to Interleaved 1F1B, Looped BFS supports multiple stages per rank. @@ -4981,7 +4981,7 @@

            Model Split APIs
            -class torch.distributed.pipelining.schedules.ScheduleInterleavedZeroBubble(stages, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#
            +class torch.distributed.pipelining.schedules.ScheduleInterleavedZeroBubble(stages, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#

            The Interleaved Zero Bubble schedule. See https://arxiv.org/pdf/2401.10241 for details. Will perform one forward and one backward on inputs for the microbatches in steady @@ -4994,7 +4994,7 @@

            Model Split APIs
            -class torch.distributed.pipelining.schedules.ScheduleZBVZeroBubble(stages, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#
            +class torch.distributed.pipelining.schedules.ScheduleZBVZeroBubble(stages, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#

            The Zero Bubble schedule (ZBV variant). See https://arxiv.org/pdf/2401.10241 Section 6 for details.

            This schedules requires exactly two stages per rank.

            @@ -5010,7 +5010,7 @@

            Model Split APIs
            -class torch.distributed.pipelining.schedules.ScheduleDualPipeV(stages, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#
            +class torch.distributed.pipelining.schedules.ScheduleDualPipeV(stages, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#

            The DualPipeV schedule. A more efficient schedule variant based on the DualPipe schedule introduced by DeepSeek in https://arxiv.org/pdf/2412.19437

            Based on the open sourced code from deepseek-ai/DualPipe

            @@ -5020,7 +5020,7 @@

            Model Split APIs
            -class torch.distributed.pipelining.schedules.PipelineScheduleSingle(stage, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#
            +class torch.distributed.pipelining.schedules.PipelineScheduleSingle(stage, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, scale_grads=True)[source]#

            Base class for single-stage schedules. Implements the step method. Derived classes should implement _step_microbatches.

            @@ -5031,7 +5031,7 @@

            Model Split APIs
            -step(*args, target=None, losses=None, **kwargs)[source]#
            +step(*args, target=None, losses=None, **kwargs)[source]#

            Run one iteration of the pipeline schedule with whole-batch input. Will chunk the input into microbatches automatically, and go through the microbatches according to the schedule implementation.

            @@ -5047,7 +5047,7 @@

            Model Split APIs
            -class torch.distributed.pipelining.schedules.PipelineScheduleMulti(stages, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, use_full_backward=None, scale_grads=True)[source]#
            +class torch.distributed.pipelining.schedules.PipelineScheduleMulti(stages, n_microbatches, loss_fn=None, args_chunk_spec=None, kwargs_chunk_spec=None, output_merge_spec=None, use_full_backward=None, scale_grads=True)[source]#

            Base class for multi-stage schedules. Implements the step method.

            Gradients are scaled by num_microbatches depending on the scale_grads argument, defaulting to True. This setting @@ -5057,7 +5057,7 @@

            Model Split APIs
            -step(*args, target=None, losses=None, **kwargs)[source]#
            +step(*args, target=None, losses=None, **kwargs)[source]#

            Run one iteration of the pipeline schedule with whole-batch input. Will chunk the input into microbatches automatically, and go through the microbatches according to the schedule implementation.

            diff --git a/2.9/distributed.tensor.html b/2.9/distributed.tensor.html index e8f376a07f9..8ffe312515a 100644 --- a/2.9/distributed.tensor.html +++ b/2.9/distributed.tensor.html @@ -4458,7 +4458,7 @@

            DTensor Class APIs
            -__create_chunk_list__()[source]#
            +__create_chunk_list__()[source]#

            Return a list of ChunkStorageMetadata, which is a dataclass that describes the size/offset of the local shard/replica on current rank. For DTensor, each rank will have a single local shard/replica, so the returned list usually only has one element.

            @@ -4472,7 +4472,7 @@

            DTensor Class APIs
            -static from_local(local_tensor, device_mesh=None, placements=None, *, run_check=False, shape=None, stride=None)[source]#
            +static from_local(local_tensor, device_mesh=None, placements=None, *, run_check=False, shape=None, stride=None)[source]#

            Create a DTensor from a local torch.Tensor on each rank according to the device_mesh and placements specified.

            @@ -4527,7 +4527,7 @@

            DTensor Class APIs
            -full_tensor(*, grad_placements=None)[source]#
            +full_tensor(*, grad_placements=None)[source]#

            Return the full tensor of this DTensor. It will perform necessary collectives to gather the local tensors from other ranks in its DeviceMesh and concatenate them together. It’s a syntactic sugar of the following code:

            @@ -4558,7 +4558,7 @@

            DTensor Class APIs
            -redistribute(device_mesh=None, placements=None, *, async_op=False, forward_dtype=None, backward_dtype=None)[source]#
            +redistribute(device_mesh=None, placements=None, *, async_op=False, forward_dtype=None, backward_dtype=None)[source]#

            redistribute performs necessary collective operations that redistribute the current DTensor from its current placements to a new placements, or from its current DeviceMesh to a new DeviceMesh. i.e. we can turn a Sharded DTensor to a Replicated DTensor by @@ -4619,7 +4619,7 @@

            DTensor Class APIs
            -to_local(*, grad_placements=None)[source]#
            +to_local(*, grad_placements=None)[source]#

            Get the local tensor of this DTensor on its current rank. For sharding it returns a local shard of the logical tensor view, for replication it returns the replica on its current rank.

            @@ -4687,7 +4687,7 @@

            DeviceMesh as the distributed communicatorPlacement on each DeviceMesh dimension:

            -class torch.distributed.tensor.placement_types.Shard(dim)[source]#
            +class torch.distributed.tensor.placement_types.Shard(dim)[source]#

            The Shard(dim) placement describes the DTensor sharding on tensor dimension dim over a corresponding DeviceMesh dimension, where each rank on the DeviceMesh dimension only holds a shard/piece of the global Tensor. The @@ -4715,7 +4715,7 @@

            DeviceMesh as the distributed communicator
            -class torch.distributed.tensor.placement_types.Replicate[source]#
            +class torch.distributed.tensor.placement_types.Replicate[source]#

            The Replicate() placement describes the DTensor replicating on a corresponding DeviceMesh dimension, where each rank on the DeviceMesh dimension holds a replica of the global Tensor. The Replicate placement can be used by all @@ -4726,7 +4726,7 @@

            DeviceMesh as the distributed communicator
            -class torch.distributed.tensor.placement_types.Partial(reduce_op='sum')[source]#
            +class torch.distributed.tensor.placement_types.Partial(reduce_op='sum')[source]#

            The Partial(reduce_op) placement describes the DTensor that is pending reduction on a specified DeviceMesh dimension, where each rank on the DeviceMesh dimension holds the partial value of the global Tensor. User can @@ -4755,7 +4755,7 @@

            DeviceMesh as the distributed communicator
            -class torch.distributed.tensor.placement_types.Placement[source]#
            +class torch.distributed.tensor.placement_types.Placement[source]#

            The base class for the Placement type, where it describes how a DTensor is placed onto the DeviceMesh. Placement and DeviceMesh together could describe the DTensor Layout. It is the base class of the three main DTensor Placement types: Shard, Replicate, @@ -4763,7 +4763,7 @@

            DeviceMesh as the distributed communicator
            -is_partial(reduce_op=None)[source]#
            +is_partial(reduce_op=None)[source]#
            Return type

            bool

            @@ -4773,7 +4773,7 @@

            DeviceMesh as the distributed communicator
            -is_replicate()[source]#
            +is_replicate()[source]#
            Return type

            bool

            @@ -4783,7 +4783,7 @@

            DeviceMesh as the distributed communicator
            -is_shard(dim=None)[source]#
            +is_shard(dim=None)[source]#
            Return type

            bool

            @@ -4823,7 +4823,7 @@

            Create DTensor from a logical torch.TensorDTensor s to comply with the single device semantic, which is critical for numerical correctness.

            -torch.distributed.tensor.distribute_tensor(tensor, device_mesh=None, placements=None, *, src_data_rank=0)[source]#
            +torch.distributed.tensor.distribute_tensor(tensor, device_mesh=None, placements=None, *, src_data_rank=0)[source]#

            Distribute a leaf torch.Tensor (i.e. nn.Parameter/buffers) to the device_mesh according to the placements specified. The rank of device_mesh and placements must be the same. The tensor to distribute is the logical or “global” tensor, and the API would use @@ -4875,7 +4875,7 @@

            Create DTensor from a logical torch.Tensornn.Module level

            -torch.distributed.tensor.distribute_module(module, device_mesh=None, partition_fn=None, input_fn=None, output_fn=None)[source]#
            +torch.distributed.tensor.distribute_module(module, device_mesh=None, partition_fn=None, input_fn=None, output_fn=None)[source]#

            This function expose three functions to control the parameters/inputs/outputs of the module:

            1. To perform sharding on the module before runtime execution by specifying the partition_fn (i.e. allow user to convert Module parameters to DTensor @@ -4923,7 +4923,7 @@

            DTensor Factory FunctionsDeviceMesh and Placement for the DTensor created:

            -torch.distributed.tensor.zeros(*size, requires_grad=False, dtype=None, layout=torch.strided, device_mesh=None, placements=None)[source]#
            +torch.distributed.tensor.zeros(*size, requires_grad=False, dtype=None, layout=torch.strided, device_mesh=None, placements=None)[source]#

            Returns a DTensor filled with the scalar value 0.

            Parameters
            @@ -4954,7 +4954,7 @@

            DTensor Factory Functions
            -torch.distributed.tensor.ones(*size, dtype=None, layout=torch.strided, requires_grad=False, device_mesh=None, placements=None)[source]#
            +torch.distributed.tensor.ones(*size, dtype=None, layout=torch.strided, requires_grad=False, device_mesh=None, placements=None)[source]#

            Returns a DTensor filled with the scalar value 1, with the shape defined by the variable argument size.

            @@ -4986,7 +4986,7 @@

            DTensor Factory Functions
            -torch.distributed.tensor.empty(*size, dtype=None, layout=torch.strided, requires_grad=False, device_mesh=None, placements=None)[source]#
            +torch.distributed.tensor.empty(*size, dtype=None, layout=torch.strided, requires_grad=False, device_mesh=None, placements=None)[source]#

            Returns a DTensor filled with uninitialized data. The shape of the DTensor is defined by the variable argument size.

            @@ -5017,7 +5017,7 @@

            DTensor Factory Functions
            -torch.distributed.tensor.full(size, fill_value, *, dtype=None, layout=torch.strided, requires_grad=False, device_mesh=None, placements=None)[source]#
            +torch.distributed.tensor.full(size, fill_value, *, dtype=None, layout=torch.strided, requires_grad=False, device_mesh=None, placements=None)[source]#

            Returns a DTensor filled with fill_value according to device_mesh and placements, with the shape defined by the argument size.

            @@ -5052,7 +5052,7 @@

            DTensor Factory Functions
            -torch.distributed.tensor.rand(*size, requires_grad=False, dtype=None, layout=torch.strided, device_mesh=None, placements=None)[source]#
            +torch.distributed.tensor.rand(*size, requires_grad=False, dtype=None, layout=torch.strided, device_mesh=None, placements=None)[source]#

            Returns a DTensor filled with random numbers from a uniform distribution on the interval [0, 1). The shape of the tensor is defined by the variable argument size.

            @@ -5085,7 +5085,7 @@

            DTensor Factory Functions
            -torch.distributed.tensor.randn(*size, requires_grad=False, dtype=None, layout=torch.strided, device_mesh=None, placements=None)[source]#
            +torch.distributed.tensor.randn(*size, requires_grad=False, dtype=None, layout=torch.strided, device_mesh=None, placements=None)[source]#

            Returns a DTensor filled with random numbers from a normal distribution with mean 0 and variance 1. The shape of the tensor is defined by the variable argument size.

            @@ -5163,7 +5163,7 @@

            Debugging Tools
            -generate_comm_debug_tracing_table(noise_level=3)[source]#
            +generate_comm_debug_tracing_table(noise_level=3)[source]#

            Generates detailed table displaying operations and collective tracing information on a module level. Amount of information is dependent on noise_level

              @@ -5176,7 +5176,7 @@

              Debugging Tools
              -generate_json_dump(file_name='comm_mode_log.json', noise_level=3)[source]#
              +generate_json_dump(file_name='comm_mode_log.json', noise_level=3)[source]#

              Creates json file used to build browser visual 0. prints module-level collective counts 1. prints dTensor operations not included in trivial operations @@ -5186,7 +5186,7 @@

              Debugging Tools
              -get_comm_counts()[source]#
              +get_comm_counts()[source]#

              Returns the communication counts as a dictionary.

              Returns
              @@ -5200,7 +5200,7 @@

              Debugging Tools
              -get_parameter_info()[source]#
              +get_parameter_info()[source]#
              Return type

              dict[str, dict[str, Any]]

              @@ -5210,7 +5210,7 @@

              Debugging Tools
              -get_sharding_info()[source]#
              +get_sharding_info()[source]#
              Return type

              dict[str, dict[str, Any]]

              @@ -5220,7 +5220,7 @@

              Debugging Tools
              -get_total_counts()[source]#
              +get_total_counts()[source]#
              Return type

              int

              @@ -5230,7 +5230,7 @@

              Debugging Tools
              -log_comm_debug_tracing_table_to_file(file_name='comm_mode_log.txt', noise_level=3)[source]#
              +log_comm_debug_tracing_table_to_file(file_name='comm_mode_log.txt', noise_level=3)[source]#

              Alternative to console CommDebugMode output, writes to file specified by the user

              @@ -5239,7 +5239,7 @@

              Debugging Toolsvisualize_sharding():

              -torch.distributed.tensor.debug.visualize_sharding(dtensor, header='', use_rich=False)[source]#
              +torch.distributed.tensor.debug.visualize_sharding(dtensor, header='', use_rich=False)[source]#

              Visualizes sharding in the terminal for DTensor that are 1D or 2D.

              Note

              @@ -5259,7 +5259,7 @@

              Experimental Features
              -torch.distributed.tensor.experimental.context_parallel(mesh, *, buffers=None, buffer_seq_dims=None, no_restore_buffers=None)[source]#
              +torch.distributed.tensor.experimental.context_parallel(mesh, *, buffers=None, buffer_seq_dims=None, no_restore_buffers=None)[source]#

              context_parallel is an experimental API to enable context parallelism (CP). This API performs two actions: 1) patch the SDPA (torch.nn.functional.scaled_dot_product_attention) with the CP-enabled @@ -5298,7 +5298,7 @@

              Experimental Features
              -torch.distributed.tensor.experimental.local_map(func=None, out_placements=None, in_placements=None, in_grad_placements=None, device_mesh=None, *, redistribute_inputs=False)[source]#
              +torch.distributed.tensor.experimental.local_map(func=None, out_placements=None, in_placements=None, in_grad_placements=None, device_mesh=None, *, redistribute_inputs=False)[source]#

              local_map() is an experimental API that allows users to pass DTensor s to a function that is written to be applied on torch.Tensor s. It is done by extracting the local components of DTensor, call the function, and wrap the outputs to @@ -5403,7 +5403,7 @@

              Experimental Features
              -torch.distributed.tensor.experimental.register_sharding(op)[source]#
              +torch.distributed.tensor.experimental.register_sharding(op)[source]#

              register_sharding() is an experimental API that allows users to register sharding strategies for an operator when the tensor inputs and outputs are DTensor. It can be useful when: (1) there doesn’t exist a default sharding strategy for op, diff --git a/2.9/distributed.tensor.parallel.html b/2.9/distributed.tensor.parallel.html index 13cafd69648..f4fa92db8d7 100644 --- a/2.9/distributed.tensor.parallel.html +++ b/2.9/distributed.tensor.parallel.html @@ -4404,7 +4404,7 @@

              Tensor Parallelism - torch.distributed.tensor.parallelThe entrypoint to parallelize your nn.Module using Tensor Parallelism is:

              -torch.distributed.tensor.parallel.parallelize_module(module, device_mesh=None, parallelize_plan=None, *, src_data_rank=0)[source]#
              +torch.distributed.tensor.parallel.parallelize_module(module, device_mesh=None, parallelize_plan=None, *, src_data_rank=0)[source]#

              Apply Tensor Parallelism in PyTorch by parallelizing modules or sub-modules based on a user-specified plan.

              We parallelize module or sub_modules based on a parallelize_plan. The parallelize_plan contains ParallelStyle, which indicates how user wants the module or sub_module @@ -4463,7 +4463,7 @@

              Tensor Parallelism - torch.distributed.tensor.parallelTensor Parallelism supports the following parallel styles:

              -class torch.distributed.tensor.parallel.ColwiseParallel(*, input_layouts=None, output_layouts=None, use_local_output=True)[source]#
              +class torch.distributed.tensor.parallel.ColwiseParallel(*, input_layouts=None, output_layouts=None, use_local_output=True)[source]#

              Partition a compatible nn.Module in a column-wise fashion. Currently supports nn.Linear and nn.Embedding. Users can compose it together with RowwiseParallel to achieve the sharding of more complicated modules. (i.e. MLP, Attention)

              @@ -4507,7 +4507,7 @@

              Tensor Parallelism - torch.distributed.tensor.parallel
              -class torch.distributed.tensor.parallel.RowwiseParallel(*, input_layouts=None, output_layouts=None, use_local_output=True)[source]#
              +class torch.distributed.tensor.parallel.RowwiseParallel(*, input_layouts=None, output_layouts=None, use_local_output=True)[source]#

              Partition a compatible nn.Module in a row-wise fashion. Currently supports nn.Linear and nn.Embedding. Users can compose it with ColwiseParallel to achieve the sharding of more complicated modules. (i.e. MLP, Attention)

              @@ -4545,7 +4545,7 @@

              Tensor Parallelism - torch.distributed.tensor.parallel
              -class torch.distributed.tensor.parallel.SequenceParallel(*, sequence_dim=1, use_local_output=False)[source]#
              +class torch.distributed.tensor.parallel.SequenceParallel(*, sequence_dim=1, use_local_output=False)[source]#

              SequenceParallel replicates a compatible nn.Module parameters and runs the sharded computation with input sharded on the sequence dimension. This currently supports nn.LayerNorm, nn.Dropout, and the RMSNorm python implementation

              @@ -4599,7 +4599,7 @@

              Tensor Parallelism - torch.distributed.tensor.parallelparallelize_plan when calling parallelize_module:

              -class torch.distributed.tensor.parallel.PrepareModuleInput(*, input_layouts=None, desired_input_layouts=None, input_kwarg_layouts=None, desired_input_kwarg_layouts=None, use_local_output=False)[source]#
              +class torch.distributed.tensor.parallel.PrepareModuleInput(*, input_layouts=None, desired_input_layouts=None, input_kwarg_layouts=None, desired_input_kwarg_layouts=None, use_local_output=False)[source]#

              Configure the nn.Module’s inputs to convert the input tensors of the nn.Module to DTensors at runtime according to input_layouts, and perform layout redistribution according to the desired_input_layouts.

              @@ -4649,7 +4649,7 @@

              Tensor Parallelism - torch.distributed.tensor.parallel
              -class torch.distributed.tensor.parallel.PrepareModuleOutput(*, output_layouts, desired_output_layouts, use_local_output=True)[source]#
              +class torch.distributed.tensor.parallel.PrepareModuleOutput(*, output_layouts, desired_output_layouts, use_local_output=True)[source]#

              Configure the nn.Module’s outputs to convert the output tensors of the nn.Module to DTensors at runtime according to output_layouts, and perform layout redistribution according to the desired_output_layouts.

              @@ -4692,7 +4692,7 @@

              Tensor Parallelism - torch.distributed.tensor.parallel
              -class torch.distributed.tensor.parallel.PrepareModuleInputOutput(*, input_layouts=None, desired_input_layouts=None, input_kwarg_layouts=None, desired_input_kwarg_layouts=None, use_local_input=False, output_layouts, desired_output_layouts, use_local_output=True)[source]#
              +class torch.distributed.tensor.parallel.PrepareModuleInputOutput(*, input_layouts=None, desired_input_layouts=None, input_kwarg_layouts=None, desired_input_kwarg_layouts=None, use_local_input=False, output_layouts, desired_output_layouts, use_local_output=True)[source]#

              Configure the nn.Module’s inputs (and outputs) to convert the input tensors (and output tensors, respectively) of the nn.Module to DTensors at runtime according to input_layouts (and output_layouts, respectively), and perform layout redistribution according to the desired_input_layouts (and desired_output_layouts, respectively). This is a combination of @@ -4765,7 +4765,7 @@

              Tensor Parallelism - torch.distributed.tensor.parallelParallelized cross-entropy loss computation (loss parallelism), is supported via the following context manager:

              -torch.distributed.tensor.parallel.loss_parallel()[source]#
              +torch.distributed.tensor.parallel.loss_parallel()[source]#

              A context manager that enables loss parallelism, where efficient parallelized loss computation can be performed when the input is sharded on the class dimension. Currently only the cross-entropy loss is supported.

              diff --git a/2.9/distributions.html b/2.9/distributions.html index 762c096201c..4c2beb1b710 100644 --- a/2.9/distributions.html +++ b/2.9/distributions.html @@ -4456,7 +4456,7 @@

              Pathwise derivativeDistribution#

              -class torch.distributions.distribution.Distribution(batch_shape=torch.Size([]), event_shape=torch.Size([]), validate_args=None)[source]#
              +class torch.distributions.distribution.Distribution(batch_shape=torch.Size([]), event_shape=torch.Size([]), validate_args=None)[source]#

              Bases: object

              Distribution is the abstract base class for probability distributions.

              @@ -4485,7 +4485,7 @@

              Distribution
              -cdf(value)[source]#
              +cdf(value)[source]#

              Returns the cumulative density/mass function evaluated at value.

              @@ -4500,7 +4500,7 @@

              Distribution
              -entropy()[source]#
              +entropy()[source]#

              Returns entropy of distribution, batched over batch_shape.

              Returns
              @@ -4514,7 +4514,7 @@

              Distribution
              -enumerate_support(expand=True)[source]#
              +enumerate_support(expand=True)[source]#

              Returns tensor containing all values supported by a discrete distribution. The result will enumerate over dimension 0, so the shape of the result will be (cardinality,) + batch_shape + event_shape @@ -4547,7 +4547,7 @@

              Distribution
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#

              Returns a new distribution instance (or populates an existing instance provided by a derived class) with batch dimensions expanded to batch_shape. This method calls expand on @@ -4572,7 +4572,7 @@

              Distribution
              -icdf(value)[source]#
              +icdf(value)[source]#

              Returns the inverse cumulative density/mass function evaluated at value.

              @@ -4587,7 +4587,7 @@

              Distribution
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              Returns the log of the probability density/mass function evaluated at value.

              @@ -4614,7 +4614,7 @@

              Distribution
              -perplexity()[source]#
              +perplexity()[source]#

              Returns perplexity of distribution, batched over batch_shape.

              Returns
              @@ -4628,7 +4628,7 @@

              Distribution
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#

              Generates a sample_shape shaped reparameterized sample or sample_shape shaped batch of reparameterized samples if the distribution parameters are batched.

              @@ -4641,7 +4641,7 @@

              Distribution
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              Generates a sample_shape shaped sample or sample_shape shaped batch of samples if the distribution parameters are batched.

              @@ -4653,7 +4653,7 @@

              Distribution
              -sample_n(n)[source]#
              +sample_n(n)[source]#

              Generates n samples or n batches of samples if the distribution parameters are batched.

              @@ -4665,7 +4665,7 @@

              Distribution
              -static set_default_validate_args(value)[source]#
              +static set_default_validate_args(value)[source]#

              Sets whether validation is enabled or disabled.

              The default behavior mimics Python’s assert statement: validation is on by default, but is disabled if Python is run in optimized mode @@ -4704,7 +4704,7 @@

              DistributionExponentialFamily#

              -class torch.distributions.exp_family.ExponentialFamily(batch_shape=torch.Size([]), event_shape=torch.Size([]), validate_args=None)[source]#
              +class torch.distributions.exp_family.ExponentialFamily(batch_shape=torch.Size([]), event_shape=torch.Size([]), validate_args=None)[source]#

              Bases: Distribution

              ExponentialFamily is the abstract base class for probability distributions belonging to an exponential family, whose probability mass/density function has the form is defined below

              @@ -4724,7 +4724,7 @@

              ExponentialFamily
              -entropy()[source]#
              +entropy()[source]#

              Method to compute the entropy using Bregman divergence of the log normalizer.

              @@ -4735,7 +4735,7 @@

              ExponentialFamilyBernoulli#

              -class torch.distributions.bernoulli.Bernoulli(probs=None, logits=None, validate_args=None)[source]#
              +class torch.distributions.bernoulli.Bernoulli(probs=None, logits=None, validate_args=None)[source]#

              Bases: ExponentialFamily

              Creates a Bernoulli distribution parameterized by probs or logits (but not both).

              @@ -4763,17 +4763,17 @@

              Bernoulli
              -entropy()[source]#
              +entropy()[source]#

              -enumerate_support(expand=True)[source]#
              +enumerate_support(expand=True)[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -4783,7 +4783,7 @@

              Bernoulli
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -4813,7 +4813,7 @@

              Bernoulli
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              @@ -4833,7 +4833,7 @@

              BernoulliBeta#

              -class torch.distributions.beta.Beta(concentration1, concentration0, validate_args=None)[source]#
              +class torch.distributions.beta.Beta(concentration1, concentration0, validate_args=None)[source]#

              Bases: ExponentialFamily

              Beta distribution parameterized by concentration1 and concentration0.

              Example:

              @@ -4869,12 +4869,12 @@

              Beta
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -4884,7 +4884,7 @@

              Beta
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -4899,7 +4899,7 @@

              Beta
              -rsample(sample_shape=())[source]#
              +rsample(sample_shape=())[source]#
              Return type

              Tensor

              @@ -4924,7 +4924,7 @@

              BetaBinomial#

              -class torch.distributions.binomial.Binomial(total_count=1, probs=None, logits=None, validate_args=None)[source]#
              +class torch.distributions.binomial.Binomial(total_count=1, probs=None, logits=None, validate_args=None)[source]#

              Bases: Distribution

              Creates a Binomial distribution parameterized by total_count and either probs or logits (but not both). total_count must be @@ -4956,17 +4956,17 @@

              Binomial
              -entropy()[source]#
              +entropy()[source]#

              -enumerate_support(expand=True)[source]#
              +enumerate_support(expand=True)[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -4976,7 +4976,7 @@

              Binomial
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -5006,7 +5006,7 @@

              Binomial
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              @@ -5031,7 +5031,7 @@

              BinomialCategorical#

              -class torch.distributions.categorical.Categorical(probs=None, logits=None, validate_args=None)[source]#
              +class torch.distributions.categorical.Categorical(probs=None, logits=None, validate_args=None)[source]#

              Bases: Distribution

              Creates a categorical distribution parameterized by either probs or logits (but not both).

              @@ -5077,17 +5077,17 @@

              Categorical
              -entropy()[source]#
              +entropy()[source]#

              -enumerate_support(expand=True)[source]#
              +enumerate_support(expand=True)[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -5097,7 +5097,7 @@

              Categorical
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -5127,7 +5127,7 @@

              Categorical
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              @@ -5152,7 +5152,7 @@

              CategoricalCauchy#

              -class torch.distributions.cauchy.Cauchy(loc, scale, validate_args=None)[source]#
              +class torch.distributions.cauchy.Cauchy(loc, scale, validate_args=None)[source]#

              Bases: Distribution

              Samples from a Cauchy (Lorentz) distribution. The distribution of the ratio of independent normally distributed random variables with means 0 follows a @@ -5178,17 +5178,17 @@

              Cauchy
              -cdf(value)[source]#
              +cdf(value)[source]#

              -entropy()[source]#
              +entropy()[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -5198,12 +5198,12 @@

              Cauchy
              -icdf(value)[source]#
              +icdf(value)[source]#

              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -5218,7 +5218,7 @@

              Cauchy
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -5243,7 +5243,7 @@

              CauchyChi2#

              -class torch.distributions.chi2.Chi2(df, validate_args=None)[source]#
              +class torch.distributions.chi2.Chi2(df, validate_args=None)[source]#

              Bases: Gamma

              Creates a Chi-squared distribution parameterized by shape parameter df. This is exactly equivalent to Gamma(alpha=0.5*df, beta=0.5)

              @@ -5270,7 +5270,7 @@

              Chi2
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#

              @@ -5280,7 +5280,7 @@

              Chi2ContinuousBernoulli#

              -class torch.distributions.continuous_bernoulli.ContinuousBernoulli(probs=None, logits=None, lims=(0.499, 0.501), validate_args=None)[source]#
              +class torch.distributions.continuous_bernoulli.ContinuousBernoulli(probs=None, logits=None, lims=(0.499, 0.501), validate_args=None)[source]#

              Bases: ExponentialFamily

              Creates a continuous Bernoulli distribution parameterized by probs or logits (but not both).

              @@ -5313,17 +5313,17 @@

              ContinuousBernoulli
              -cdf(value)[source]#
              +cdf(value)[source]#

              -entropy()[source]#
              +entropy()[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -5333,12 +5333,12 @@

              ContinuousBernoulli
              -icdf(value)[source]#
              +icdf(value)[source]#

              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -5363,7 +5363,7 @@

              ContinuousBernoulli
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -5373,7 +5373,7 @@

              ContinuousBernoulli
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              @@ -5398,7 +5398,7 @@

              ContinuousBernoulliDirichlet#

              -class torch.distributions.dirichlet.Dirichlet(concentration, validate_args=None)[source]#
              +class torch.distributions.dirichlet.Dirichlet(concentration, validate_args=None)[source]#

              Bases: ExponentialFamily

              Creates a Dirichlet distribution parameterized by concentration concentration.

              Example:

              @@ -5420,12 +5420,12 @@

              Dirichlet
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -5435,7 +5435,7 @@

              Dirichlet
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -5450,7 +5450,7 @@

              Dirichlet
              -rsample(sample_shape=())[source]#
              +rsample(sample_shape=())[source]#
              Return type

              Tensor

              @@ -5475,7 +5475,7 @@

              DirichletExponential#

              -class torch.distributions.exponential.Exponential(rate, validate_args=None)[source]#
              +class torch.distributions.exponential.Exponential(rate, validate_args=None)[source]#

              Bases: ExponentialFamily

              Creates a Exponential distribution parameterized by rate.

              Example:

              @@ -5496,17 +5496,17 @@

              Exponential
              -cdf(value)[source]#
              +cdf(value)[source]#

              -entropy()[source]#
              +entropy()[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -5516,12 +5516,12 @@

              Exponential
              -icdf(value)[source]#
              +icdf(value)[source]#

              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -5536,7 +5536,7 @@

              Exponential
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -5566,7 +5566,7 @@

              ExponentialFisherSnedecor#

              -class torch.distributions.fishersnedecor.FisherSnedecor(df1, df2, validate_args=None)[source]#
              +class torch.distributions.fishersnedecor.FisherSnedecor(df1, df2, validate_args=None)[source]#

              Bases: Distribution

              Creates a Fisher-Snedecor distribution parameterized by df1 and df2.

              Example:

              @@ -5590,7 +5590,7 @@

              FisherSnedecor
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#

              @@ -5600,7 +5600,7 @@

              FisherSnedecor
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -5615,7 +5615,7 @@

              FisherSnedecor
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -5640,7 +5640,7 @@

              FisherSnedecorGamma#

              -class torch.distributions.gamma.Gamma(concentration, rate, validate_args=None)[source]#
              +class torch.distributions.gamma.Gamma(concentration, rate, validate_args=None)[source]#

              Bases: ExponentialFamily

              Creates a Gamma distribution parameterized by shape concentration and rate.

              Example:

              @@ -5666,17 +5666,17 @@

              Gamma
              -cdf(value)[source]#
              +cdf(value)[source]#

              -entropy()[source]#
              +entropy()[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -5686,7 +5686,7 @@

              Gamma
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -5701,7 +5701,7 @@

              Gamma
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -5726,7 +5726,7 @@

              GammaGeneralizedPareto#

              -class torch.distributions.generalized_pareto.GeneralizedPareto(loc, scale, concentration, validate_args=None)[source]#
              +class torch.distributions.generalized_pareto.GeneralizedPareto(loc, scale, concentration, validate_args=None)[source]#

              Bases: Distribution

              Creates a Generalized Pareto distribution parameterized by loc, scale, and concentration.

              The Generalized Pareto distribution is a family of continuous probability distributions on the real line. @@ -5756,17 +5756,17 @@

              GeneralizedPareto
              -cdf(value)[source]#
              +cdf(value)[source]#

              -entropy()[source]#
              +entropy()[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -5776,22 +5776,22 @@

              GeneralizedPareto
              -icdf(value)[source]#
              +icdf(value)[source]#

              -log_cdf(value)[source]#
              +log_cdf(value)[source]#
              -log_prob(value)[source]#
              +log_prob(value)[source]#
              -log_survival_function(value)[source]#
              +log_survival_function(value)[source]#
              @@ -5806,7 +5806,7 @@

              GeneralizedPareto
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#

              @@ -5831,7 +5831,7 @@

              GeneralizedParetoGeometric#

              -class torch.distributions.geometric.Geometric(probs=None, logits=None, validate_args=None)[source]#
              +class torch.distributions.geometric.Geometric(probs=None, logits=None, validate_args=None)[source]#

              Bases: Distribution

              Creates a Geometric distribution parameterized by probs, where probs is the probability of success of Bernoulli trials.

              @@ -5863,17 +5863,17 @@

              Geometric
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -5898,7 +5898,7 @@

              Geometric
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              @@ -5918,7 +5918,7 @@

              GeometricGumbel#

              -class torch.distributions.gumbel.Gumbel(loc, scale, validate_args=None)[source]#
              +class torch.distributions.gumbel.Gumbel(loc, scale, validate_args=None)[source]#

              Bases: TransformedDistribution

              Samples from a Gumbel Distribution.

              Examples:

              @@ -5942,17 +5942,17 @@

              Gumbel
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -5987,7 +5987,7 @@

              GumbelHalfCauchy#

              -class torch.distributions.half_cauchy.HalfCauchy(scale, validate_args=None)[source]#
              +class torch.distributions.half_cauchy.HalfCauchy(scale, validate_args=None)[source]#

              Bases: TransformedDistribution

              Creates a half-Cauchy distribution parameterized by scale where:

              X ~ Cauchy(0, scale)
              @@ -6017,17 +6017,17 @@ 

              HalfCauchy
              -cdf(value)[source]#
              +cdf(value)[source]#

              -entropy()[source]#
              +entropy()[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -6037,12 +6037,12 @@

              HalfCauchy
              -icdf(prob)[source]#
              +icdf(prob)[source]#

              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -6077,7 +6077,7 @@

              HalfCauchyHalfNormal#

              -class torch.distributions.half_normal.HalfNormal(scale, validate_args=None)[source]#
              +class torch.distributions.half_normal.HalfNormal(scale, validate_args=None)[source]#

              Bases: TransformedDistribution

              Creates a half-normal distribution parameterized by scale where:

              X ~ Normal(0, scale)
              @@ -6107,17 +6107,17 @@ 

              HalfNormal
              -cdf(value)[source]#
              +cdf(value)[source]#

              -entropy()[source]#
              +entropy()[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -6127,12 +6127,12 @@

              HalfNormal
              -icdf(prob)[source]#
              +icdf(prob)[source]#

              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -6167,7 +6167,7 @@

              HalfNormalIndependent#

              -class torch.distributions.independent.Independent(base_distribution, reinterpreted_batch_ndims, validate_args=None)[source]#
              +class torch.distributions.independent.Independent(base_distribution, reinterpreted_batch_ndims, validate_args=None)[source]#

              Bases: Distribution, Generic[D]

              Reinterprets some of the batch dims of a distribution as event dims.

              This is mainly useful for changing the shape of the result of @@ -6211,17 +6211,17 @@

              Independent
              -entropy()[source]#
              +entropy()[source]#

              -enumerate_support(expand=True)[source]#
              +enumerate_support(expand=True)[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -6236,7 +6236,7 @@

              Independent
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -6251,7 +6251,7 @@

              Independent
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -6261,7 +6261,7 @@

              Independent
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -6291,7 +6291,7 @@

              IndependentInverseGamma#

              -class torch.distributions.inverse_gamma.InverseGamma(concentration, rate, validate_args=None)[source]#
              +class torch.distributions.inverse_gamma.InverseGamma(concentration, rate, validate_args=None)[source]#

              Bases: TransformedDistribution

              Creates an inverse gamma distribution parameterized by concentration and rate where:

              @@ -6332,12 +6332,12 @@

              InverseGamma
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -6377,7 +6377,7 @@

              InverseGammaKumaraswamy#

              -class torch.distributions.kumaraswamy.Kumaraswamy(concentration1, concentration0, validate_args=None)[source]#
              +class torch.distributions.kumaraswamy.Kumaraswamy(concentration1, concentration0, validate_args=None)[source]#

              Bases: TransformedDistribution

              Samples from a Kumaraswamy distribution.

              Example:

              @@ -6403,12 +6403,12 @@

              Kumaraswamy
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -6443,7 +6443,7 @@

              KumaraswamyLKJCholesky#

              -class torch.distributions.lkj_cholesky.LKJCholesky(dim, concentration=1.0, validate_args=None)[source]#
              +class torch.distributions.lkj_cholesky.LKJCholesky(dim, concentration=1.0, validate_args=None)[source]#

              Bases: Distribution

              LKJ distribution for lower Cholesky factor of correlation matrices. The distribution is controlled by concentration parameter η\eta @@ -6488,17 +6488,17 @@

              LKJCholesky
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#

              -log_prob(value)[source]#
              +log_prob(value)[source]#
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#
              @@ -6513,7 +6513,7 @@

              LKJCholeskyLaplace#

              -class torch.distributions.laplace.Laplace(loc, scale, validate_args=None)[source]#
              +class torch.distributions.laplace.Laplace(loc, scale, validate_args=None)[source]#

              Bases: Distribution

              Creates a Laplace distribution parameterized by loc and scale.

              Example:

              @@ -6537,17 +6537,17 @@

              Laplace
              -cdf(value)[source]#
              +cdf(value)[source]#

              -entropy()[source]#
              +entropy()[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -6557,12 +6557,12 @@

              Laplace
              -icdf(value)[source]#
              +icdf(value)[source]#

              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -6577,7 +6577,7 @@

              Laplace
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -6607,7 +6607,7 @@

              LaplaceLogNormal#

              -class torch.distributions.log_normal.LogNormal(loc, scale, validate_args=None)[source]#
              +class torch.distributions.log_normal.LogNormal(loc, scale, validate_args=None)[source]#

              Bases: TransformedDistribution

              Creates a log-normal distribution parameterized by loc and scale where:

              @@ -6641,12 +6641,12 @@

              LogNormal
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -6691,7 +6691,7 @@

              LogNormalLowRankMultivariateNormal#

              -class torch.distributions.lowrank_multivariate_normal.LowRankMultivariateNormal(loc, cov_factor, cov_diag, validate_args=None)[source]#
              +class torch.distributions.lowrank_multivariate_normal.LowRankMultivariateNormal(loc, cov_factor, cov_diag, validate_args=None)[source]#

              Bases: Distribution

              Creates a multivariate normal distribution with covariance matrix having a low-rank form parameterized by cov_factor and cov_diag:

              @@ -6740,12 +6740,12 @@

              LowRankMultivariateNormal
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -6755,7 +6755,7 @@

              LowRankMultivariateNormal
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -6775,7 +6775,7 @@

              LowRankMultivariateNormal
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -6805,7 +6805,7 @@

              LowRankMultivariateNormalMixtureSameFamily#

              -class torch.distributions.mixture_same_family.MixtureSameFamily(mixture_distribution, component_distribution, validate_args=None)[source]#
              +class torch.distributions.mixture_same_family.MixtureSameFamily(mixture_distribution, component_distribution, validate_args=None)[source]#

              Bases: Distribution

              The MixtureSameFamily distribution implements a (batch of) mixture distribution where all component are from different parameterizations of @@ -6856,7 +6856,7 @@

              MixtureSameFamily
              -cdf(x)[source]#
              +cdf(x)[source]#

              @@ -6866,7 +6866,7 @@

              MixtureSameFamily
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#

              @@ -6876,7 +6876,7 @@

              MixtureSameFamily
              -log_prob(x)[source]#
              +log_prob(x)[source]#

              @@ -6891,7 +6891,7 @@

              MixtureSameFamily
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              @@ -6916,7 +6916,7 @@

              MixtureSameFamilyMultinomial#

              -class torch.distributions.multinomial.Multinomial(total_count=1, probs=None, logits=None, validate_args=None)[source]#
              +class torch.distributions.multinomial.Multinomial(total_count=1, probs=None, logits=None, validate_args=None)[source]#

              Bases: Distribution

              Creates a Multinomial distribution parameterized by total_count and either probs or logits (but not both). The innermost dimension of @@ -6964,17 +6964,17 @@

              Multinomial
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -6999,7 +6999,7 @@

              Multinomial
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              @@ -7029,7 +7029,7 @@

              MultinomialMultivariateNormal#

              -class torch.distributions.multivariate_normal.MultivariateNormal(loc, covariance_matrix=None, precision_matrix=None, scale_tril=None, validate_args=None)[source]#
              +class torch.distributions.multivariate_normal.MultivariateNormal(loc, covariance_matrix=None, precision_matrix=None, scale_tril=None, validate_args=None)[source]#

              Bases: Distribution

              Creates a multivariate normal (also called Gaussian) distribution parameterized by a mean vector and a covariance matrix.

              @@ -7077,12 +7077,12 @@

              MultivariateNormal
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -7092,7 +7092,7 @@

              MultivariateNormal
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -7112,7 +7112,7 @@

              MultivariateNormal
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -7142,7 +7142,7 @@

              MultivariateNormalNegativeBinomial#

              -class torch.distributions.negative_binomial.NegativeBinomial(total_count, probs=None, logits=None, validate_args=None)[source]#
              +class torch.distributions.negative_binomial.NegativeBinomial(total_count, probs=None, logits=None, validate_args=None)[source]#

              Bases: Distribution

              Creates a Negative Binomial distribution, i.e. distribution of the number of successful independent and identical Bernoulli trials @@ -7166,12 +7166,12 @@

              NegativeBinomial
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#

              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -7201,7 +7201,7 @@

              NegativeBinomial
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              @@ -7221,7 +7221,7 @@

              NegativeBinomialNormal#

              -class torch.distributions.normal.Normal(loc, scale, validate_args=None)[source]#
              +class torch.distributions.normal.Normal(loc, scale, validate_args=None)[source]#

              Bases: ExponentialFamily

              Creates a normal (also called Gaussian) distribution parameterized by loc and scale.

              @@ -7247,17 +7247,17 @@

              Normal
              -cdf(value)[source]#
              +cdf(value)[source]#

              -entropy()[source]#
              +entropy()[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -7267,12 +7267,12 @@

              Normal
              -icdf(value)[source]#
              +icdf(value)[source]#

              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -7287,7 +7287,7 @@

              Normal
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -7297,7 +7297,7 @@

              Normal
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              @@ -7322,7 +7322,7 @@

              NormalOneHotCategorical#

              -class torch.distributions.one_hot_categorical.OneHotCategorical(probs=None, logits=None, validate_args=None)[source]#
              +class torch.distributions.one_hot_categorical.OneHotCategorical(probs=None, logits=None, validate_args=None)[source]#

              Bases: Distribution

              Creates a one-hot categorical distribution parameterized by probs or logits.

              @@ -7360,17 +7360,17 @@

              OneHotCategorical
              -entropy()[source]#
              +entropy()[source]#

              -enumerate_support(expand=True)[source]#
              +enumerate_support(expand=True)[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -7380,7 +7380,7 @@

              OneHotCategorical
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -7410,7 +7410,7 @@

              OneHotCategorical
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              @@ -7430,7 +7430,7 @@

              OneHotCategoricalPareto#

              -class torch.distributions.pareto.Pareto(scale, alpha, validate_args=None)[source]#
              +class torch.distributions.pareto.Pareto(scale, alpha, validate_args=None)[source]#

              Bases: TransformedDistribution

              Samples from a Pareto Type 1 distribution.

              Example:

              @@ -7454,7 +7454,7 @@

              Pareto
              -entropy()[source]#
              +entropy()[source]#
              Return type

              Tensor

              @@ -7464,7 +7464,7 @@

              Pareto
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              Return type

              Pareto

              @@ -7504,7 +7504,7 @@

              ParetoPoisson#

              -class torch.distributions.poisson.Poisson(rate, validate_args=None)[source]#
              +class torch.distributions.poisson.Poisson(rate, validate_args=None)[source]#

              Bases: ExponentialFamily

              Creates a Poisson distribution parameterized by rate, the rate parameter.

              Samples are nonnegative integers, with a pmf given by

              @@ -7529,12 +7529,12 @@

              Poisson
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#

              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -7549,7 +7549,7 @@

              Poisson
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              @@ -7569,7 +7569,7 @@

              PoissonRelaxedBernoulli#

              -class torch.distributions.relaxed_bernoulli.RelaxedBernoulli(temperature, probs=None, logits=None, validate_args=None)[source]#
              +class torch.distributions.relaxed_bernoulli.RelaxedBernoulli(temperature, probs=None, logits=None, validate_args=None)[source]#

              Bases: TransformedDistribution

              Creates a RelaxedBernoulli distribution, parametrized by temperature, and either probs or logits @@ -7603,7 +7603,7 @@

              RelaxedBernoulli
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#

              @@ -7638,7 +7638,7 @@

              RelaxedBernoulliLogitRelaxedBernoulli#

              -class torch.distributions.relaxed_bernoulli.LogitRelaxedBernoulli(temperature, probs=None, logits=None, validate_args=None)[source]#
              +class torch.distributions.relaxed_bernoulli.LogitRelaxedBernoulli(temperature, probs=None, logits=None, validate_args=None)[source]#

              Bases: Distribution

              Creates a LogitRelaxedBernoulli distribution parameterized by probs or logits (but not both), which is the logit of a RelaxedBernoulli @@ -7664,12 +7664,12 @@

              LogitRelaxedBernoulli
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#

              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -7689,7 +7689,7 @@

              LogitRelaxedBernoulli
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -7709,7 +7709,7 @@

              LogitRelaxedBernoulliRelaxedOneHotCategorical#

              -class torch.distributions.relaxed_categorical.RelaxedOneHotCategorical(temperature, probs=None, logits=None, validate_args=None)[source]#
              +class torch.distributions.relaxed_categorical.RelaxedOneHotCategorical(temperature, probs=None, logits=None, validate_args=None)[source]#

              Bases: TransformedDistribution

              Creates a RelaxedOneHotCategorical distribution parametrized by temperature, and either probs or logits. @@ -7743,7 +7743,7 @@

              RelaxedOneHotCategorical
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#

              @@ -7778,7 +7778,7 @@

              RelaxedOneHotCategoricalStudentT#

              -class torch.distributions.studentT.StudentT(df, loc=0.0, scale=1.0, validate_args=None)[source]#
              +class torch.distributions.studentT.StudentT(df, loc=0.0, scale=1.0, validate_args=None)[source]#

              Bases: Distribution

              Creates a Student’s t-distribution parameterized by degree of freedom df, mean loc and scale scale.

              @@ -7804,12 +7804,12 @@

              StudentT
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -7819,7 +7819,7 @@

              StudentT
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -7834,7 +7834,7 @@

              StudentT
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -7859,7 +7859,7 @@

              StudentTTransformedDistribution#

              -class torch.distributions.transformed_distribution.TransformedDistribution(base_distribution, transforms, validate_args=None)[source]#
              +class torch.distributions.transformed_distribution.TransformedDistribution(base_distribution, transforms, validate_args=None)[source]#

              Bases: Distribution

              Extension of the Distribution class, which applies a sequence of Transforms to a base distribution. Let f be the composition of transforms applied:

              @@ -7899,14 +7899,14 @@

              TransformedDistribution
              -cdf(value)[source]#
              +cdf(value)[source]#

              Computes the cumulative distribution function by inverting the transform(s) and computing the score of the base distribution.

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -7916,21 +7916,21 @@

              TransformedDistribution
              -icdf(value)[source]#
              +icdf(value)[source]#

              Computes the inverse cumulative distribution function using transform(s) and computing the score of the base distribution.

              -log_prob(value)[source]#
              +log_prob(value)[source]#

              Scores the sample by inverting the transform(s) and computing the score using the score of the base distribution and the log abs det jacobian.

              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#

              Generates a sample_shape shaped reparameterized sample or sample_shape shaped batch of reparameterized samples if the distribution parameters are batched. Samples first from base distribution and applies @@ -7944,7 +7944,7 @@

              TransformedDistribution
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              Generates a sample_shape shaped sample or sample_shape shaped batch of samples if the distribution parameters are batched. Samples first from base distribution and applies transform() for every transform in the @@ -7968,7 +7968,7 @@

              TransformedDistributionUniform#

              -class torch.distributions.uniform.Uniform(low, high, validate_args=None)[source]#
              +class torch.distributions.uniform.Uniform(low, high, validate_args=None)[source]#

              Bases: Distribution

              Generates uniformly distributed random samples from the half-open interval [low, high).

              @@ -7993,17 +7993,17 @@

              Uniform
              -cdf(value)[source]#
              +cdf(value)[source]#

              -entropy()[source]#
              +entropy()[source]#
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -8013,12 +8013,12 @@

              Uniform
              -icdf(value)[source]#
              +icdf(value)[source]#

              -log_prob(value)[source]#
              +log_prob(value)[source]#
              @@ -8033,7 +8033,7 @@

              Uniform
              -rsample(sample_shape=torch.Size([]))[source]#
              +rsample(sample_shape=torch.Size([]))[source]#
              Return type

              Tensor

              @@ -8068,7 +8068,7 @@

              UniformVonMises#

              -class torch.distributions.von_mises.VonMises(loc, concentration, validate_args=None)[source]#
              +class torch.distributions.von_mises.VonMises(loc, concentration, validate_args=None)[source]#

              Bases: Distribution

              A circular von Mises distribution.

              This implementation uses polar coordinates. The loc and value args @@ -8097,7 +8097,7 @@

              VonMises
              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#

              @@ -8107,7 +8107,7 @@

              VonMises
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -8123,7 +8123,7 @@

              VonMises
              -sample(sample_shape=torch.Size([]))[source]#
              +sample(sample_shape=torch.Size([]))[source]#

              The sampling algorithm for the von Mises distribution is based on the following paper: D.J. Best and N.I. Fisher, “Efficient simulation of the von Mises distribution.” Applied Statistics (1979): 152-157.

              @@ -8150,7 +8150,7 @@

              VonMisesWeibull#

              -class torch.distributions.weibull.Weibull(scale, concentration, validate_args=None)[source]#
              +class torch.distributions.weibull.Weibull(scale, concentration, validate_args=None)[source]#

              Bases: TransformedDistribution

              Samples from a two-parameter Weibull distribution.

              Example

              @@ -8175,12 +8175,12 @@

              Weibull
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -8210,7 +8210,7 @@

              WeibullWishart#

              -class torch.distributions.wishart.Wishart(df, covariance_matrix=None, precision_matrix=None, scale_tril=None, validate_args=None)[source]#
              +class torch.distributions.wishart.Wishart(df, covariance_matrix=None, precision_matrix=None, scale_tril=None, validate_args=None)[source]#

              Bases: ExponentialFamily

              Creates a Wishart distribution parameterized by a symmetric positive definite matrix Σ\Sigma, or its Cholesky decomposition Σ=LL\mathbf{\Sigma} = \mathbf{L}\mathbf{L}^\top

              @@ -8258,12 +8258,12 @@

              Wishart
              -entropy()[source]#
              +entropy()[source]#

              -expand(batch_shape, _instance=None)[source]#
              +expand(batch_shape, _instance=None)[source]#
              @@ -8273,7 +8273,7 @@

              Wishart
              -log_prob(value)[source]#
              +log_prob(value)[source]#

              @@ -8293,7 +8293,7 @@

              Wishart
              -rsample(sample_shape=torch.Size([]), max_try_correction=None)[source]#
              +rsample(sample_shape=torch.Size([]), max_try_correction=None)[source]#

              Warning

              In some cases, sampling algorithm based on Bartlett decomposition may return singular matrix samples. @@ -8331,7 +8331,7 @@

              Wishart

              KL Divergence#

              -torch.distributions.kl.kl_divergence(p, q)[source]#
              +torch.distributions.kl.kl_divergence(p, q)[source]#

              Compute Kullback-Leibler divergence KL(pq)KL(p \| q) between two distributions.

              KL(pq)=p(x)logp(x)q(x)dxKL(p \| q) = \int p(x) \log\frac {p(x)} {q(x)} \,dx
              @@ -8448,7 +8448,7 @@

              Wishart
              -torch.distributions.kl.register_kl(type_p, type_q)[source]#
              +torch.distributions.kl.register_kl(type_p, type_q)[source]#

              Decorator to register a pairwise function with kl_divergence(). Usage:

              @register_kl(Normal, Normal)
              @@ -8484,7 +8484,7 @@ 

              Wishart

              Transforms#

              -class torch.distributions.transforms.AbsTransform(cache_size=0)[source]#
              +class torch.distributions.transforms.AbsTransform(cache_size=0)[source]#

              Transform via the mapping y=xy = |x|.

              @@ -8492,7 +8492,7 @@

              Wishart
              -class torch.distributions.transforms.AffineTransform(loc, scale, event_dim=0, cache_size=0)[source]#
              +class torch.distributions.transforms.AffineTransform(loc, scale, event_dim=0, cache_size=0)[source]#

              Transform via the pointwise affine mapping y=loc+scale×xy = \text{loc} + \text{scale} \times x.

              Parameters
              @@ -8509,7 +8509,7 @@

              Wishart
              -class torch.distributions.transforms.CatTransform(tseq, dim=0, lengths=None, cache_size=0)[source]#
              +class torch.distributions.transforms.CatTransform(tseq, dim=0, lengths=None, cache_size=0)[source]#

              Transform functor that applies a sequence of transforms tseq component-wise to each submatrix at dim, of length lengths[dim], in a way compatible with torch.cat().

              @@ -8527,7 +8527,7 @@

              Wishart
              -class torch.distributions.transforms.ComposeTransform(parts, cache_size=0)[source]#
              +class torch.distributions.transforms.ComposeTransform(parts, cache_size=0)[source]#

              Composes multiple transforms in a chain. The transforms being composed are responsible for caching.

              @@ -8543,7 +8543,7 @@

              Wishart
              -class torch.distributions.transforms.CorrCholeskyTransform(cache_size=0)[source]#
              +class torch.distributions.transforms.CorrCholeskyTransform(cache_size=0)[source]#

              Transforms an unconstrained real vector xx with length D(D1)/2D*(D-1)/2 into the Cholesky factor of a D-dimension correlation matrix. This Cholesky factor is a lower triangular matrix with positive diagonals and unit Euclidean norm for each row. @@ -8577,7 +8577,7 @@

              Wishart
              -class torch.distributions.transforms.CumulativeDistributionTransform(distribution, cache_size=0)[source]#
              +class torch.distributions.transforms.CumulativeDistributionTransform(distribution, cache_size=0)[source]#

              Transform via the cumulative distribution function of a probability distribution.

              Parameters
              @@ -8599,7 +8599,7 @@

              Wishart
              -class torch.distributions.transforms.ExpTransform(cache_size=0)[source]#
              +class torch.distributions.transforms.ExpTransform(cache_size=0)[source]#

              Transform via the mapping y=exp(x)y = \exp(x).

              @@ -8607,7 +8607,7 @@

              Wishart
              -class torch.distributions.transforms.IndependentTransform(base_transform, reinterpreted_batch_ndims, cache_size=0)[source]#
              +class torch.distributions.transforms.IndependentTransform(base_transform, reinterpreted_batch_ndims, cache_size=0)[source]#

              Wrapper around another transform to treat reinterpreted_batch_ndims-many extra of the right most dimensions as dependent. This has no effect on the forward or backward transforms, but @@ -8626,7 +8626,7 @@

              Wishart
              -class torch.distributions.transforms.LowerCholeskyTransform(cache_size=0)[source]#
              +class torch.distributions.transforms.LowerCholeskyTransform(cache_size=0)[source]#

              Transform from unconstrained matrices to lower-triangular matrices with nonnegative diagonal entries.

              This is useful for parameterizing positive definite matrices in terms of @@ -8637,7 +8637,7 @@

              Wishart
              -class torch.distributions.transforms.PositiveDefiniteTransform(cache_size=0)[source]#
              +class torch.distributions.transforms.PositiveDefiniteTransform(cache_size=0)[source]#

              Transform from unconstrained matrices to positive-definite matrices.

              @@ -8645,7 +8645,7 @@

              Wishart
              -class torch.distributions.transforms.PowerTransform(exponent, cache_size=0)[source]#
              +class torch.distributions.transforms.PowerTransform(exponent, cache_size=0)[source]#

              Transform via the mapping y=xexponenty = x^{\text{exponent}}.

              @@ -8653,7 +8653,7 @@

              Wishart
              -class torch.distributions.transforms.ReshapeTransform(in_shape, out_shape, cache_size=0)[source]#
              +class torch.distributions.transforms.ReshapeTransform(in_shape, out_shape, cache_size=0)[source]#

              Unit Jacobian transform to reshape the rightmost part of a tensor.

              Note that in_shape and out_shape must have the same number of elements, just as for torch.Tensor.reshape().

              @@ -8671,7 +8671,7 @@

              Wishart
              -class torch.distributions.transforms.SigmoidTransform(cache_size=0)[source]#
              +class torch.distributions.transforms.SigmoidTransform(cache_size=0)[source]#

              Transform via the mapping y=11+exp(x)y = \frac{1}{1 + \exp(-x)} and x=logit(y)x = \text{logit}(y).

              @@ -8679,7 +8679,7 @@

              Wishart
              -class torch.distributions.transforms.SoftplusTransform(cache_size=0)[source]#
              +class torch.distributions.transforms.SoftplusTransform(cache_size=0)[source]#

              Transform via the mapping Softplus(x)=log(1+exp(x))\text{Softplus}(x) = \log(1 + \exp(x)). The implementation reverts to the linear function when x>20x > 20.

              @@ -8688,7 +8688,7 @@

              Wishart
              -class torch.distributions.transforms.TanhTransform(cache_size=0)[source]#
              +class torch.distributions.transforms.TanhTransform(cache_size=0)[source]#

              Transform via the mapping y=tanh(x)y = \tanh(x).

              It is equivalent to

              ComposeTransform(
              @@ -8709,7 +8709,7 @@ 

              Wishart
              -class torch.distributions.transforms.SoftmaxTransform(cache_size=0)[source]#
              +class torch.distributions.transforms.SoftmaxTransform(cache_size=0)[source]#

              Transform from unconstrained space to the simplex via y=exp(x)y = \exp(x) then normalizing.

              This is not bijective and cannot be used for HMC. However this acts mostly @@ -8721,7 +8721,7 @@

              Wishart
              -class torch.distributions.transforms.StackTransform(tseq, dim=0, cache_size=0)[source]#
              +class torch.distributions.transforms.StackTransform(tseq, dim=0, cache_size=0)[source]#

              Transform functor that applies a sequence of transforms tseq component-wise to each submatrix at dim in a way compatible with torch.stack().

              @@ -8737,7 +8737,7 @@

              Wishart
              -class torch.distributions.transforms.StickBreakingTransform(cache_size=0)[source]#
              +class torch.distributions.transforms.StickBreakingTransform(cache_size=0)[source]#

              Transform from unconstrained space to the simplex of one additional dimension via a stick-breaking process.

              This transform arises as an iterated sigmoid transform in a stick-breaking @@ -8752,7 +8752,7 @@

              Wishart
              -class torch.distributions.transforms.Transform(cache_size=0)[source]#
              +class torch.distributions.transforms.Transform(cache_size=0)[source]#

              Abstract class for invertable transformations with computable log det jacobians. They are primarily used in torch.distributions.TransformedDistribution.

              @@ -8811,20 +8811,20 @@

              Wishart
              -log_abs_det_jacobian(x, y)[source]#
              +log_abs_det_jacobian(x, y)[source]#

              Computes the log det jacobian log |dy/dx| given input and output.

              -forward_shape(shape)[source]#
              +forward_shape(shape)[source]#

              Infers the shape of the forward computation, given the input shape. Defaults to preserving shape.

              -inverse_shape(shape)[source]#
              +inverse_shape(shape)[source]#

              Infers the shapes of the inverse computation, given the output shape. Defaults to preserving shape.

              @@ -8836,7 +8836,7 @@

              Wishart

              Constraints#

              -class torch.distributions.constraints.Constraint[source]#
              +class torch.distributions.constraints.Constraint[source]#

              Abstract base class for constraints.

              A constraint object represents a region over which a variable is valid, e.g. within which a variable can be optimized.

              @@ -8853,7 +8853,7 @@

              Wishart
              -check(value)[source]#
              +check(value)[source]#

              Returns a byte tensor of sample_shape + batch_shape indicating whether each event in value satisfies this constraint.

              @@ -8862,55 +8862,55 @@

              Wishart
              -torch.distributions.constraints.cat[source]#
              +torch.distributions.constraints.cat[source]#

              alias of _Cat

              -torch.distributions.constraints.dependent_property[source]#
              +torch.distributions.constraints.dependent_property[source]#

              alias of _DependentProperty

              -torch.distributions.constraints.greater_than[source]#
              +torch.distributions.constraints.greater_than[source]#

              alias of _GreaterThan

              -torch.distributions.constraints.greater_than_eq[source]#
              +torch.distributions.constraints.greater_than_eq[source]#

              alias of _GreaterThanEq

              -torch.distributions.constraints.independent[source]#
              +torch.distributions.constraints.independent[source]#

              alias of _IndependentConstraint

              -torch.distributions.constraints.integer_interval[source]#
              +torch.distributions.constraints.integer_interval[source]#

              alias of _IntegerInterval

              -torch.distributions.constraints.interval[source]#
              +torch.distributions.constraints.interval[source]#

              alias of _Interval

              -torch.distributions.constraints.half_open_interval[source]#
              +torch.distributions.constraints.half_open_interval[source]#

              alias of _HalfOpenInterval

              -torch.distributions.constraints.is_dependent(constraint)[source]#
              +torch.distributions.constraints.is_dependent(constraint)[source]#

              Checks if constraint is a _Dependent object.

              Parameters
              @@ -8943,13 +8943,13 @@

              Wishart
              -torch.distributions.constraints.less_than[source]#
              +torch.distributions.constraints.less_than[source]#

              alias of _LessThan

              -class torch.distributions.constraints.MixtureSameFamilyConstraint(base_constraint)[source]#
              +class torch.distributions.constraints.MixtureSameFamilyConstraint(base_constraint)[source]#

              Constraint for the MixtureSameFamily distribution that adds back the rightmost batch dimension before performing the validity check with the component distribution @@ -8963,7 +8963,7 @@

              Wishart
              -check(value)[source]#
              +check(value)[source]#

              Check validity of value as a possible outcome of sampling the MixtureSameFamily distribution.

              @@ -8972,13 +8972,13 @@

              Wishart
              -torch.distributions.constraints.multinomial[source]#
              +torch.distributions.constraints.multinomial[source]#

              alias of _Multinomial

              -torch.distributions.constraints.stack[source]#
              +torch.distributions.constraints.stack[source]#

              alias of _Stack

              @@ -9051,11 +9051,11 @@

              Wishart
              -class torch.distributions.constraint_registry.ConstraintRegistry[source]#
              +class torch.distributions.constraint_registry.ConstraintRegistry[source]#

              Registry to link constraints to transforms.

              -register(constraint, factory=None)[source]#
              +register(constraint, factory=None)[source]#

              Registers a Constraint subclass in this registry. Usage:

              @my_registry.register(MyConstraintClass)
              diff --git a/2.9/dlpack.html b/2.9/dlpack.html
              index 87f4391352e..37bac8eed1f 100644
              --- a/2.9/dlpack.html
              +++ b/2.9/dlpack.html
              @@ -4396,7 +4396,7 @@ 

              torch.utils.dlpackCreated On: Jul 11, 2018 | Last Updated On: Jun 13, 2025

              -torch.utils.dlpack.from_dlpack(ext_tensor) Tensor[source]#
              +torch.utils.dlpack.from_dlpack(ext_tensor) Tensor[source]#

              Converts a tensor from an external library into a torch.Tensor.

              The returned PyTorch tensor will share the memory with the input tensor (which may have come from another library). Note that in-place operations diff --git a/2.9/elastic/agent.html b/2.9/elastic/agent.html index 6251aea2128..24ff4ba44f1 100644 --- a/2.9/elastic/agent.html +++ b/2.9/elastic/agent.html @@ -4431,7 +4431,7 @@

              Conceptsagent in torchelastic.

              -class torch.distributed.elastic.agent.server.ElasticAgent[source]#
              +class torch.distributed.elastic.agent.server.ElasticAgent[source]#

              An agent process responsible for managing one or more worker processes.

              The worker processes are assumed to be regular distributed PyTorch scripts. When the worker process is created by the agent, the agent provides the @@ -4463,7 +4463,7 @@

              Concepts
              -abstract get_worker_group(role='default')[source]#
              +abstract get_worker_group(role='default')[source]#

              Return the WorkerGroup for the given role.

              Note that the worker group is a mutable object and hence in a multi-threaded/process environment it may change state. @@ -4478,7 +4478,7 @@

              Concepts
              -abstract run(role='default')[source]#
              +abstract run(role='default')[source]#

              Run the agent.

              Supports retrying the worker group on failures up to max_restarts.

              @@ -4499,7 +4499,7 @@

              Concepts
              -class torch.distributed.elastic.agent.server.WorkerSpec(role, local_world_size, rdzv_handler, fn=None, entrypoint=None, args=(), max_restarts=3, monitor_interval=0.1, master_port=None, master_addr=None, local_addr=None, event_log_handler='null', numa_options=None)[source]#
              +class torch.distributed.elastic.agent.server.WorkerSpec(role, local_world_size, rdzv_handler, fn=None, entrypoint=None, args=(), max_restarts=3, monitor_interval=0.1, master_port=None, master_addr=None, local_addr=None, event_log_handler='null', numa_options=None)[source]#

              Blueprint information about a particular type of worker.

              For a given role, there must only exist a single worker spec. Worker spec is expected to be homogeneous across all nodes (machine), @@ -4532,7 +4532,7 @@

              Concepts
              -get_entrypoint_name()[source]#
              +get_entrypoint_name()[source]#

              Get the entry point name.

              If the entrypoint is a function (e.g. Callable) returns its __qualname__ else if the entrypoint is a binary (e.g. str), returns the binary name.

              @@ -4542,7 +4542,7 @@

              Concepts
              -class torch.distributed.elastic.agent.server.WorkerState(value)[source]#
              +class torch.distributed.elastic.agent.server.WorkerState(value)[source]#

              A state of the WorkerGroup.

              Workers in a worker group change state as a unit. If a single worker in a worker group fails the entire set is considered failed:

              @@ -4574,7 +4574,7 @@

              Concepts
              -static is_running(state)[source]#
              +static is_running(state)[source]#

              Return the state of the Worker.

              Returns
              @@ -4591,7 +4591,7 @@

              Concepts
              -class torch.distributed.elastic.agent.server.Worker(local_rank, global_rank=-1, role_rank=-1, world_size=-1, role_world_size=-1)[source]#
              +class torch.distributed.elastic.agent.server.Worker(local_rank, global_rank=-1, role_rank=-1, world_size=-1, role_world_size=-1)[source]#

              A worker instance.

              Contrast this with WorkerSpec that represents the specifications of a worker. A Worker is created from a WorkerSpec. A Worker is to @@ -4616,7 +4616,7 @@

              Concepts
              -class torch.distributed.elastic.agent.server.WorkerGroup(spec)[source]#
              +class torch.distributed.elastic.agent.server.WorkerGroup(spec)[source]#

              A set of Worker instances.

              The class defines a set of Worker instances for the given WorkerSpec managed by ElasticAgent. Whether the worker group contains cross instance workers or not depends on the implementation of the agent.

              @@ -4630,7 +4630,7 @@

              Implementations
              -class torch.distributed.elastic.agent.server.local_elastic_agent.LocalElasticAgent(spec, logs_specs, start_method='spawn', exit_barrier_timeout=300, log_line_prefix_template=None)[source]#
              +class torch.distributed.elastic.agent.server.local_elastic_agent.LocalElasticAgent(spec, logs_specs, start_method='spawn', exit_barrier_timeout=300, log_line_prefix_template=None)[source]#

              An implementation of torchelastic.agent.server.ElasticAgent that handles host-local workers.

              This agent is deployed per host and is configured to spawn n workers. When using GPUs, n maps to the number of GPUs available on the host.

              @@ -4716,7 +4716,7 @@

              Extending the Agent
              -class torch.distributed.elastic.agent.server.SimpleElasticAgent(spec, exit_barrier_timeout=300)[source]#
              +class torch.distributed.elastic.agent.server.SimpleElasticAgent(spec, exit_barrier_timeout=300)[source]#

              An ElasticAgent that manages one particular type of worker role.

              An ElasticAgent that manages workers (WorkerGroup) for a single WorkerSpec such as one particular type of worker role.

              @@ -4724,7 +4724,7 @@

              Extending the Agent
              -_assign_worker_ranks(store, group_rank, group_world_size, spec)[source]#
              +_assign_worker_ranks(store, group_rank, group_world_size, spec)[source]#

              Determine proper ranks for worker processes.

              Fast Path: when all workers have the same role and world size. We calculate the global rank to be group_rank * group_world_size + local_rank. And the @@ -4760,7 +4760,7 @@

              Extending the Agent
              -_exit_barrier()[source]#
              +_exit_barrier()[source]#

              Define a barrier that keeps the agent process alive until all workers finish.

              Wait for exit_barrier_timeout seconds for all agents to finish executing their local workers (either successfully or not). This @@ -4770,7 +4770,7 @@

              Extending the Agent
              -_initialize_workers(worker_group)[source]#
              +_initialize_workers(worker_group)[source]#

              Start a fresh set of workers for the worker_group.

              Essentially, a rendezvous followed by a start_workers. The caller should first call _stop_workers() to stop running workers @@ -4784,7 +4784,7 @@

              Extending the Agent
              -abstract _monitor_workers(worker_group)[source]#
              +abstract _monitor_workers(worker_group)[source]#

              Check on the workers for the worker_group.

              This function also returns the new state of the worker group.

              @@ -4796,7 +4796,7 @@

              Extending the Agent
              -_rendezvous(worker_group)[source]#
              +_rendezvous(worker_group)[source]#

              Run rendezvous for the workers specified by the worker spec.

              Assigns workers a new global rank and world size. Updates the rendezvous store for the worker group.

              @@ -4806,7 +4806,7 @@

              Extending the Agent
              -_restart_workers(worker_group)[source]#
              +_restart_workers(worker_group)[source]#

              Restart (stops, rendezvous, starts) all local workers in the group.

              @@ -4814,7 +4814,7 @@

              Extending the Agent
              -abstract _shutdown(death_sig=Signals.SIGTERM)[source]#
              +abstract _shutdown(death_sig=Signals.SIGTERM)[source]#

              Clean up any resources that were allocated during the agent’s work.

              Parameters
              @@ -4825,7 +4825,7 @@

              Extending the Agent
              -abstract _start_workers(worker_group)[source]#
              +abstract _start_workers(worker_group)[source]#

              Start worker_group.spec.local_world_size number of workers.

              This is according to worker spec for the worker group . Returns a map of local_rank to worker id.

              @@ -4838,7 +4838,7 @@

              Extending the Agent
              -abstract _stop_workers(worker_group)[source]#
              +abstract _stop_workers(worker_group)[source]#

              Stop all workers in the given worker group.

              Implementers must deal with workers in all states defined by WorkerState. That is, it must gracefully handle stopping @@ -4851,7 +4851,7 @@

              Extending the Agent
              -class torch.distributed.elastic.agent.server.api.RunResult(state, return_values=<factory>, failures=<factory>)[source]#
              +class torch.distributed.elastic.agent.server.api.RunResult(state, return_values=<factory>, failures=<factory>)[source]#

              Return results of the worker executions.

              Run results follow an “all-or-nothing” policy where the run is successful if and only if ALL local workers managed by this agent complete successfully.

              @@ -4900,7 +4900,7 @@

              Health Check Server
              -class torch.distributed.elastic.agent.server.health_check_server.HealthCheckServer(alive_callback, port, timeout)[source]#
              +class torch.distributed.elastic.agent.server.health_check_server.HealthCheckServer(alive_callback, port, timeout)[source]#

              Interface for health check monitoring server, which can be extended by starting tcp/http server on the specified port.

              @@ -4914,7 +4914,7 @@

              Health Check Server
              -start()[source]#
              +start()[source]#

              Unsupported functionality for Pytorch, doesn’t start any health check server

              @@ -4922,7 +4922,7 @@

              Health Check Server
              -stop()[source]#
              +stop()[source]#

              Function to stop health check server

              @@ -4932,7 +4932,7 @@

              Health Check Server
              -torch.distributed.elastic.agent.server.health_check_server.create_healthcheck_server(alive_callback, port, timeout)[source]#
              +torch.distributed.elastic.agent.server.health_check_server.create_healthcheck_server(alive_callback, port, timeout)[source]#

              creates health check server object

              Return type
              diff --git a/2.9/elastic/control_plane.html b/2.9/elastic/control_plane.html index 0b59593aced..78a9015f867 100644 --- a/2.9/elastic/control_plane.html +++ b/2.9/elastic/control_plane.html @@ -4407,7 +4407,7 @@ into your application.

              -torch.distributed.elastic.control_plane.worker_main()[source]#
              +torch.distributed.elastic.control_plane.worker_main()[source]#

              This is a context manager that wraps your main entry function. This combines the existing errors.record logic as well as a new _WorkerServer that exposes handlers via a unix socket specified by diff --git a/2.9/elastic/errors.html b/2.9/elastic/errors.html index 38b51cbd661..930853c8c55 100644 --- a/2.9/elastic/errors.html +++ b/2.9/elastic/errors.html @@ -4460,7 +4460,7 @@

              Methods and Classes#

              -torch.distributed.elastic.multiprocessing.errors.record(fn, error_handler=None)[source]#
              +torch.distributed.elastic.multiprocessing.errors.record(fn, error_handler=None)[source]#

              Syntactic sugar to record errors/exceptions that happened in the decorated function using the provided error_handler.

              Using this decorator is equivalent to:

              @@ -4501,7 +4501,7 @@

              Methods and Classes
              -class torch.distributed.elastic.multiprocessing.errors.ChildFailedError(name, failures)[source]#
              +class torch.distributed.elastic.multiprocessing.errors.ChildFailedError(name, failures)[source]#

              Special exception type that can be raised from a function annotated with the @record decorator to have the child process’ (root exception) propagate up the stack as-is (e.g. without being wrapped in the parent’s traceback).

              @@ -4538,7 +4538,7 @@

              Methods and Classes
              -class torch.distributed.elastic.multiprocessing.errors.ErrorHandler[source]#
              +class torch.distributed.elastic.multiprocessing.errors.ErrorHandler[source]#

              Write the provided exception object along with some other metadata about the error in a structured way in JSON format to an error file specified by the environment variable: TORCHELASTIC_ERROR_FILE. If this environment @@ -4550,7 +4550,7 @@

              Methods and Classes
              -class torch.distributed.elastic.multiprocessing.errors.ProcessFailure(local_rank, pid, exitcode, error_file)[source]#
              +class torch.distributed.elastic.multiprocessing.errors.ProcessFailure(local_rank, pid, exitcode, error_file)[source]#

              Represent the failed process result. When the worker process fails, it may record failure root cause into the file.

              Tries to read the failure timestamp from the provided error_file, if the error_file does not exist, the timestamp is the current diff --git a/2.9/elastic/events.html b/2.9/elastic/events.html index 115248aa1a3..2ee702e8f74 100644 --- a/2.9/elastic/events.html +++ b/2.9/elastic/events.html @@ -4417,14 +4417,14 @@

              API Methods#

              -torch.distributed.elastic.events.record(event, destination='null')[source]#
              +torch.distributed.elastic.events.record(event, destination='null')[source]#
              -torch.distributed.elastic.events.construct_and_record_rdzv_event(run_id, message, node_state, name='', hostname='', pid=None, master_endpoint='', local_id=None, rank=None)[source]#
              +torch.distributed.elastic.events.construct_and_record_rdzv_event(run_id, message, node_state, name='', hostname='', pid=None, master_endpoint='', local_id=None, rank=None)[source]#

              Initialize rendezvous event object and record its operations.

              Parameters
              @@ -4471,7 +4471,7 @@

              API Methods
              -torch.distributed.elastic.events.get_logging_handler(destination='null')[source]#
              +torch.distributed.elastic.events.get_logging_handler(destination='null')[source]#
              Return type

              Handler

              @@ -4484,7 +4484,7 @@

              API Methods#

              -class torch.distributed.elastic.events.api.Event(name, source, timestamp=0, metadata=<factory>)[source]#
              +class torch.distributed.elastic.events.api.Event(name, source, timestamp=0, metadata=<factory>)[source]#

              The class represents the generic event that occurs during the torchelastic job execution.

              The event can be any kind of meaningful action.

              @@ -4501,7 +4501,7 @@

              Event Objects
              -class torch.distributed.elastic.events.api.EventSource(value)[source]#
              +class torch.distributed.elastic.events.api.EventSource(value)[source]#

              Known identifiers of the event producers.

              diff --git a/2.9/elastic/metrics.html b/2.9/elastic/metrics.html index 5a6ec2ed009..43e832b89f7 100644 --- a/2.9/elastic/metrics.html +++ b/2.9/elastic/metrics.html @@ -4518,17 +4518,17 @@

              Metric Handlers
              -class torch.distributed.elastic.metrics.api.MetricHandler[source]#
              +class torch.distributed.elastic.metrics.api.MetricHandler[source]#

              -class torch.distributed.elastic.metrics.api.ConsoleMetricHandler[source]#
              +class torch.distributed.elastic.metrics.api.ConsoleMetricHandler[source]#
              -class torch.distributed.elastic.metrics.api.NullMetricHandler[source]#
              +class torch.distributed.elastic.metrics.api.NullMetricHandler[source]#

              @@ -4536,14 +4536,14 @@

              Metric Handlers#

              -torch.distributed.elastic.metrics.configure(handler, group=None)[source]#
              +torch.distributed.elastic.metrics.configure(handler, group=None)[source]#
              -torch.distributed.elastic.metrics.prof(fn=None, group='torchelastic')[source]#
              +torch.distributed.elastic.metrics.prof(fn=None, group='torchelastic')[source]#

              @profile decorator publishes duration.ms, count, success, failure metrics for the function that it decorates.

              The metric name defaults to the qualified name (class_name.def_name) of the function. If the function does not belong to a class, it uses the leaf module name instead.

              @@ -4564,7 +4564,7 @@

              Methods
              -torch.distributed.elastic.metrics.put_metric(metric_name, metric_value, metric_group='torchelastic')[source]#
              +torch.distributed.elastic.metrics.put_metric(metric_name, metric_value, metric_group='torchelastic')[source]#

              Publish a metric data point.

              Usage

              put_metric("metric_name", 1)
              diff --git a/2.9/elastic/multiprocessing.html b/2.9/elastic/multiprocessing.html
              index 021b90bc342..a4fa37e223a 100644
              --- a/2.9/elastic/multiprocessing.html
              +++ b/2.9/elastic/multiprocessing.html
              @@ -4454,7 +4454,7 @@
               

              Starting Multiple Workers#

              -torch.distributed.elastic.multiprocessing.start_processes(name, entrypoint, args, envs, logs_specs, log_line_prefixes=None, start_method='spawn', numa_options=None)[source]#
              +torch.distributed.elastic.multiprocessing.start_processes(name, entrypoint, args, envs, logs_specs, log_line_prefixes=None, start_method='spawn', numa_options=None)[source]#

              Start n copies of entrypoint processes with the provided options.

              entrypoint is either a Callable (function) or a str (binary). The number of copies is determined by the number of entries for args and @@ -4558,7 +4558,7 @@

              Starting Multiple Workers#

              -class torch.distributed.elastic.multiprocessing.api.PContext(name, entrypoint, args, envs, logs_specs, log_line_prefixes=None)[source]#
              +class torch.distributed.elastic.multiprocessing.api.PContext(name, entrypoint, args, envs, logs_specs, log_line_prefixes=None)[source]#

              The base class that standardizes operations over a set of processes that are launched via different mechanisms.

              The name PContext is intentional to disambiguate with torch.multiprocessing.ProcessContext.

              @@ -4573,7 +4573,7 @@

              Process Context
              -class torch.distributed.elastic.multiprocessing.api.MultiprocessContext(name, entrypoint, args, envs, start_method, logs_specs, log_line_prefixes=None, numa_options=None)[source]#
              +class torch.distributed.elastic.multiprocessing.api.MultiprocessContext(name, entrypoint, args, envs, start_method, logs_specs, log_line_prefixes=None, numa_options=None)[source]#

              PContext holding worker processes invoked as a function.

              @@ -4581,7 +4581,7 @@

              Process Context
              -class torch.distributed.elastic.multiprocessing.api.SubprocessContext(name, entrypoint, args, envs, logs_specs, log_line_prefixes=None, numa_options=None)[source]#
              +class torch.distributed.elastic.multiprocessing.api.SubprocessContext(name, entrypoint, args, envs, logs_specs, log_line_prefixes=None, numa_options=None)[source]#

              PContext holding worker processes invoked as a binary.

              @@ -4589,7 +4589,7 @@

              Process Context
              -class torch.distributed.elastic.multiprocessing.api.RunProcsResult(return_values=<factory>, failures=<factory>, stdouts=<factory>, stderrs=<factory>)[source]#
              +class torch.distributed.elastic.multiprocessing.api.RunProcsResult(return_values=<factory>, failures=<factory>, stdouts=<factory>, stderrs=<factory>)[source]#

              Results of a completed run of processes started with start_processes(). Returned by PContext.

              Note the following:

                @@ -4604,7 +4604,7 @@

                Process Context
                -class torch.distributed.elastic.multiprocessing.api.DefaultLogsSpecs(log_dir=None, redirects=Std.NONE, tee=Std.NONE, local_ranks_filter=None)[source]#
                +class torch.distributed.elastic.multiprocessing.api.DefaultLogsSpecs(log_dir=None, redirects=Std.NONE, tee=Std.NONE, local_ranks_filter=None)[source]#

                Default LogsSpecs implementation:

                • log_dir will be created if it doesn’t exist

                • @@ -4614,7 +4614,7 @@

                  Process Context
                  -reify(envs)[source]#
                  +reify(envs)[source]#

                  Uses following scheme to build log destination paths:

                  • <log_dir>/<rdzv_run_id>/attempt_<attempt>/<rank>/stdout.log

                  • @@ -4632,7 +4632,7 @@

                    Process Context
                    -class torch.distributed.elastic.multiprocessing.api.LogsDest(stdouts=<factory>, stderrs=<factory>, tee_stdouts=<factory>, tee_stderrs=<factory>, error_files=<factory>)[source]#
                    +class torch.distributed.elastic.multiprocessing.api.LogsDest(stdouts=<factory>, stderrs=<factory>, tee_stdouts=<factory>, tee_stderrs=<factory>, error_files=<factory>)[source]#

                    For each log type, holds mapping of local rank ids to file paths.

                    @@ -4640,7 +4640,7 @@

                    Process Context
                    -class torch.distributed.elastic.multiprocessing.api.LogsSpecs(log_dir=None, redirects=Std.NONE, tee=Std.NONE, local_ranks_filter=None)[source]#
                    +class torch.distributed.elastic.multiprocessing.api.LogsSpecs(log_dir=None, redirects=Std.NONE, tee=Std.NONE, local_ranks_filter=None)[source]#

                    Defines logs processing and redirection for each worker process.

                    Parameters
                    @@ -4657,7 +4657,7 @@

                    Process Context
                    -abstract reify(envs)[source]#
                    +abstract reify(envs)[source]#

                    Given the environment variables, builds destination of log files for each of the local ranks.

                    Envs parameter contains env variables dict for each of the local ranks, where entries are defined in: _start_workers().

                    diff --git a/2.9/elastic/numa.html b/2.9/elastic/numa.html index 61e8c023b97..29adf83b577 100644 --- a/2.9/elastic/numa.html +++ b/2.9/elastic/numa.html @@ -4405,14 +4405,14 @@

                    Created On: Jul 25, 2025 | Last Updated On: Aug 12, 2025

                    -class torch.numa.binding.AffinityMode(value)[source]#
                    +class torch.numa.binding.AffinityMode(value)[source]#

                    See behavior description for each affinity mode in torch.distributed.run.

                    -class torch.numa.binding.NumaOptions(affinity_mode: torch.numa.binding.AffinityMode, should_fall_back_if_binding_fails: bool = False)[source]#
                    +class torch.numa.binding.NumaOptions(affinity_mode: torch.numa.binding.AffinityMode, should_fall_back_if_binding_fails: bool = False)[source]#
                    @@ -4428,7 +4428,7 @@
                    -torch.numa.binding.maybe_temporarily_apply_numa_binding_to_current_thread(*, gpu_index, numa_options)[source]#
                    +torch.numa.binding.maybe_temporarily_apply_numa_binding_to_current_thread(*, gpu_index, numa_options)[source]#

                    1. Applies NUMA binding to the current thread, suitable for the thread which will be interacting with GPU gpu_index. 2. Resets to the original CPU affinity before exiting the context manager.

                    diff --git a/2.9/elastic/rendezvous.html b/2.9/elastic/rendezvous.html index f532f34e106..c580324b370 100644 --- a/2.9/elastic/rendezvous.html +++ b/2.9/elastic/rendezvous.html @@ -4502,7 +4502,7 @@

                    Registry#

                    -class torch.distributed.elastic.rendezvous.RendezvousParameters(backend, endpoint, run_id, min_nodes, max_nodes, local_addr=None, **kwargs)[source]#
                    +class torch.distributed.elastic.rendezvous.RendezvousParameters(backend, endpoint, run_id, min_nodes, max_nodes, local_addr=None, **kwargs)[source]#

                    Hold the parameters to construct a RendezvousHandler.

                    Parameters
                    @@ -4519,7 +4519,7 @@

                    Registry
                    -get(key, default=None)[source]#
                    +get(key, default=None)[source]#

                    Return the value for key if key exists, else default.

                    Return type
                    @@ -4530,7 +4530,7 @@

                    Registry
                    -get_as_bool(key, default=None)[source]#
                    +get_as_bool(key, default=None)[source]#

                    Return the value for key as a bool.

                    Return type
                    @@ -4541,7 +4541,7 @@

                    Registry
                    -get_as_int(key, default=None)[source]#
                    +get_as_int(key, default=None)[source]#

                    Return the value for key as an int.

                    Return type
                    @@ -4554,7 +4554,7 @@

                    Registry
                    -class torch.distributed.elastic.rendezvous.RendezvousHandlerRegistry[source]#
                    +class torch.distributed.elastic.rendezvous.RendezvousHandlerRegistry[source]#

                    Represent a registry of RendezvousHandler backends.

                    @@ -4565,7 +4565,7 @@

                    Registry#

                    -class torch.distributed.elastic.rendezvous.RendezvousHandler[source]#
                    +class torch.distributed.elastic.rendezvous.RendezvousHandler[source]#

                    Main rendezvous interface.

                    Note

                    @@ -4575,7 +4575,7 @@

                    Handler
                    -abstract get_backend()[source]#
                    +abstract get_backend()[source]#

                    Return the name of the rendezvous backend.

                    Return type
                    @@ -4586,7 +4586,7 @@

                    Handler
                    -abstract get_run_id()[source]#
                    +abstract get_run_id()[source]#

                    Return the run id of the rendezvous.

                    The run id is a user-defined id that uniquely identifies an instance of a distributed application. It typically maps to a job id and is used to @@ -4600,7 +4600,7 @@

                    Handler
                    -abstract is_closed()[source]#
                    +abstract is_closed()[source]#

                    Check whether the rendezvous has been closed.

                    A closed rendezvous means all future attempts to re-rendezvous within same job will fail.

                    @@ -4618,7 +4618,7 @@

                    Handler
                    -abstract next_rendezvous()[source]#
                    +abstract next_rendezvous()[source]#

                    Main entry-point into the rendezvous barrier.

                    Blocks until the rendezvous is complete and the current process is included in the formed worker group, or a timeout occurs, or the @@ -4643,7 +4643,7 @@

                    Handler
                    -abstract num_nodes_waiting()[source]#
                    +abstract num_nodes_waiting()[source]#

                    Return the number of nodes who arrived late at the rendezvous barrier, hence were not included in the current worker group.

                    Callers should periodically call this method to check whether new @@ -4658,13 +4658,13 @@

                    Handler
                    -abstract set_closed()[source]#
                    +abstract set_closed()[source]#

                    Mark the rendezvous as closed.

                    -abstract shutdown()[source]#
                    +abstract shutdown()[source]#

                    Close all resources that were open for the rendezvous.

                    Example:

                    rdzv_handler = ...
                    @@ -4697,7 +4697,7 @@ 

                    Handler#

                    -class torch.distributed.elastic.rendezvous.RendezvousInfo(store, rank, world_size, bootstrap_store_info)[source]#
                    +class torch.distributed.elastic.rendezvous.RendezvousInfo(store, rank, world_size, bootstrap_store_info)[source]#

                    Holds the information about the rendezvous.

                    @@ -4705,13 +4705,13 @@

                    Dataclasses
                    -class torch.distributed.elastic.rendezvous.api.RendezvousStoreInfo(master_addr, master_port)[source]#
                    +class torch.distributed.elastic.rendezvous.api.RendezvousStoreInfo(master_addr, master_port)[source]#

                    Store address and port that can be used to bootstrap trainer distributed comms

                    -static build(rank, store)[source]#
                    +static build(rank, store)[source]#

                    Factory method, finds unused new port on rank0 host and addr/port info with all ranks.

                    If master_addr/master_port is knowns (useful when sharing existing tcp store server) use the constructor.

                    @@ -4736,37 +4736,37 @@

                    Dataclasses#

                    -class torch.distributed.elastic.rendezvous.api.RendezvousError[source]#
                    +class torch.distributed.elastic.rendezvous.api.RendezvousError[source]#

                    Represents the base type for rendezvous errors.

                    -class torch.distributed.elastic.rendezvous.api.RendezvousClosedError[source]#
                    +class torch.distributed.elastic.rendezvous.api.RendezvousClosedError[source]#

                    Raised when a rendezvous is closed.

                    -class torch.distributed.elastic.rendezvous.api.RendezvousTimeoutError[source]#
                    +class torch.distributed.elastic.rendezvous.api.RendezvousTimeoutError[source]#

                    Raised when a rendezvous did not complete on time.

                    -class torch.distributed.elastic.rendezvous.api.RendezvousConnectionError[source]#
                    +class torch.distributed.elastic.rendezvous.api.RendezvousConnectionError[source]#

                    Raised when the connection to a rendezvous backend has failed.

                    -class torch.distributed.elastic.rendezvous.api.RendezvousStateError[source]#
                    +class torch.distributed.elastic.rendezvous.api.RendezvousStateError[source]#

                    Raised when the state of a rendezvous is corrupt.

                    -class torch.distributed.elastic.rendezvous.api.RendezvousGracefulExitError[source]#
                    +class torch.distributed.elastic.rendezvous.api.RendezvousGracefulExitError[source]#

                    Raised when node wasn’t not included in rendezvous and gracefully exits.

                    Exception is a mechanism to exit the stack, however does not mean a failure.

                    @@ -4778,7 +4778,7 @@

                    Implementations#

                    -torch.distributed.elastic.rendezvous.dynamic_rendezvous.create_handler(store, backend, params)[source]#
                    +torch.distributed.elastic.rendezvous.dynamic_rendezvous.create_handler(store, backend, params)[source]#

                    Create a new DynamicRendezvousHandler from the specified parameters.

                    Parameters
                    @@ -4830,13 +4830,13 @@

                    Dynamic Rendezvous
                    -class torch.distributed.elastic.rendezvous.dynamic_rendezvous.DynamicRendezvousHandler[source]#
                    +class torch.distributed.elastic.rendezvous.dynamic_rendezvous.DynamicRendezvousHandler[source]#

                    Represent a handler that sets up a rendezvous among a set of nodes.

                    -classmethod from_backend(run_id, store, backend, min_nodes, max_nodes, local_addr=None, timeout=None, keep_alive_interval=5, keep_alive_max_attempt=3)[source]#
                    +classmethod from_backend(run_id, store, backend, min_nodes, max_nodes, local_addr=None, timeout=None, keep_alive_interval=5, keep_alive_max_attempt=3)[source]#

                    Create a new DynamicRendezvousHandler.

                    Parameters
                    @@ -4861,11 +4861,11 @@

                    Dynamic Rendezvous
                    -class torch.distributed.elastic.rendezvous.dynamic_rendezvous.RendezvousBackend[source]#
                    +class torch.distributed.elastic.rendezvous.dynamic_rendezvous.RendezvousBackend[source]#

                    Represent a backend that holds the rendezvous state.

                    -abstract get_state()[source]#
                    +abstract get_state()[source]#

                    Get the rendezvous state.

                    Returns
                    @@ -4892,7 +4892,7 @@

                    Dynamic Rendezvous
                    -abstract set_state(state, token=None)[source]#
                    +abstract set_state(state, token=None)[source]#

                    Set the rendezvous state.

                    The new rendezvous state is set conditionally:

                    @@ -4937,7 +4937,7 @@

                    Dynamic Rendezvous
                    -class torch.distributed.elastic.rendezvous.dynamic_rendezvous.RendezvousTimeout(join=None, last_call=None, close=None, heartbeat=None)[source]#
                    +class torch.distributed.elastic.rendezvous.dynamic_rendezvous.RendezvousTimeout(join=None, last_call=None, close=None, heartbeat=None)[source]#

                    Hold the timeout configuration of a rendezvous.

                    Parameters
                    @@ -4983,7 +4983,7 @@

                    Dynamic Rendezvous#

                    -torch.distributed.elastic.rendezvous.c10d_rendezvous_backend.create_backend(params)[source]#
                    +torch.distributed.elastic.rendezvous.c10d_rendezvous_backend.create_backend(params)[source]#

                    Create a new C10dRendezvousBackend from the specified parameters.

                    @@ -5038,7 +5038,7 @@

                    C10d Backend
                    -class torch.distributed.elastic.rendezvous.c10d_rendezvous_backend.C10dRendezvousBackend(store, run_id)[source]#
                    +class torch.distributed.elastic.rendezvous.c10d_rendezvous_backend.C10dRendezvousBackend(store, run_id)[source]#

                    Represents a C10d-backed rendezvous backend.

                    Parameters
                    @@ -5051,7 +5051,7 @@

                    C10d Backend
                    -get_state()[source]#
                    +get_state()[source]#

                    See base class.

                    Return type
                    @@ -5068,7 +5068,7 @@

                    C10d Backend
                    -set_state(state, token=None)[source]#
                    +set_state(state, token=None)[source]#

                    See base class.

                    Return type
                    @@ -5084,7 +5084,7 @@

                    C10d Backend#

                    -torch.distributed.elastic.rendezvous.etcd_rendezvous_backend.create_backend(params)[source]#
                    +torch.distributed.elastic.rendezvous.etcd_rendezvous_backend.create_backend(params)[source]#

                    Create a new EtcdRendezvousBackend from the specified parameters.

                    @@ -5129,7 +5129,7 @@

                    Etcd Backend
                    -class torch.distributed.elastic.rendezvous.etcd_rendezvous_backend.EtcdRendezvousBackend(client, run_id, key_prefix=None, ttl=None)[source]#
                    +class torch.distributed.elastic.rendezvous.etcd_rendezvous_backend.EtcdRendezvousBackend(client, run_id, key_prefix=None, ttl=None)[source]#

                    Represents an etcd-based rendezvous backend.

                    Parameters
                    @@ -5143,7 +5143,7 @@

                    Etcd Backend
                    -get_state()[source]#
                    +get_state()[source]#

                    See base class.

                    Return type
                    @@ -5160,7 +5160,7 @@

                    Etcd Backend
                    -set_state(state, token=None)[source]#
                    +set_state(state, token=None)[source]#

                    See base class.

                    Return type
                    @@ -5183,7 +5183,7 @@

                    Etcd Rendezvous (Legacy)
                    -class torch.distributed.elastic.rendezvous.etcd_rendezvous.EtcdRendezvousHandler(rdzv_impl, local_addr)[source]#
                    +class torch.distributed.elastic.rendezvous.etcd_rendezvous.EtcdRendezvousHandler(rdzv_impl, local_addr)[source]#

                    Implements a torch.distributed.elastic.rendezvous.RendezvousHandler interface backed by @@ -5269,14 +5269,14 @@

                    Etcd Storenext_rendezvous() when etcd is used as the rendezvous backend.

                    -class torch.distributed.elastic.rendezvous.etcd_store.EtcdStore(etcd_client, etcd_store_prefix, timeout=None)[source]#
                    +class torch.distributed.elastic.rendezvous.etcd_store.EtcdStore(etcd_client, etcd_store_prefix, timeout=None)[source]#

                    Implement a c10 Store interface by piggybacking on the rendezvous etcd instance.

                    This is the store object returned by EtcdRendezvous.

                    -add(key, num)[source]#
                    +add(key, num)[source]#

                    Atomically increment a value by an integer amount.

                    The integer is represented as a string using base 10. If key is not present, a default value of 0 will be assumed.

                    @@ -5292,7 +5292,7 @@

                    Etcd Store
                    -check(keys)[source]#
                    +check(keys)[source]#

                    Check if all of the keys are immediately present (without waiting).

                    Return type
                    @@ -5303,7 +5303,7 @@

                    Etcd Store
                    -get(key)[source]#
                    +get(key)[source]#

                    Get a value by key, possibly doing a blocking wait.

                    If key is not immediately present, will do a blocking wait for at most timeout duration or until the key is published.

                    @@ -5322,14 +5322,14 @@

                    Etcd Store
                    -set(key, value)[source]#
                    +set(key, value)[source]#

                    Write a key/value pair into EtcdStore.

                    Both key and value may be either Python str or bytes.

                    -wait(keys, override_timeout=None)[source]#
                    +wait(keys, override_timeout=None)[source]#

                    Wait until all of the keys are published, or until timeout.

                    Raises
                    @@ -5355,7 +5355,7 @@

                    Etcd Server
                    -class torch.distributed.elastic.rendezvous.etcd_server.EtcdServer(data_dir=None)[source]#
                    +class torch.distributed.elastic.rendezvous.etcd_server.EtcdServer(data_dir=None)[source]#

                    Note

                    tested on etcd server v3.4.3.

                    diff --git a/2.9/elastic/subprocess_handler.html b/2.9/elastic/subprocess_handler.html index 711742f4e09..acff148ca7f 100644 --- a/2.9/elastic/subprocess_handler.html +++ b/2.9/elastic/subprocess_handler.html @@ -4407,7 +4407,7 @@

                    Retrieve SubprocessHandler#

                    -torch.distributed.elastic.multiprocessing.subprocess_handler.handlers.get_subprocess_handler(entrypoint, args, env, stdout, stderr, local_rank_id, numa_options=None)[source]#
                    +torch.distributed.elastic.multiprocessing.subprocess_handler.handlers.get_subprocess_handler(entrypoint, args, env, stdout, stderr, local_rank_id, numa_options=None)[source]#
                    Return type

                    SubprocessHandler

                    @@ -4420,7 +4420,7 @@

                    Retrieve SubprocessHandler#

                    -class torch.distributed.elastic.multiprocessing.subprocess_handler.subprocess_handler.SubprocessHandler(entrypoint, args, env, stdout, stderr, local_rank_id, numa_options)[source]#
                    +class torch.distributed.elastic.multiprocessing.subprocess_handler.subprocess_handler.SubprocessHandler(entrypoint, args, env, stdout, stderr, local_rank_id, numa_options)[source]#

                    Convenience wrapper around python’s subprocess.Popen. Keeps track of meta-objects associated to the process (e.g. stdout and stderr redirect fds).

                    diff --git a/2.9/elastic/timer.html b/2.9/elastic/timer.html index 89c1017442e..1ed29bf1d7c 100644 --- a/2.9/elastic/timer.html +++ b/2.9/elastic/timer.html @@ -4438,7 +4438,7 @@

                    Client Methods#

                    -torch.distributed.elastic.timer.configure(timer_client)[source]#
                    +torch.distributed.elastic.timer.configure(timer_client)[source]#

                    Configures a timer client. Must be called before using expires.

                    @@ -4446,7 +4446,7 @@

                    Client Methods
                    -torch.distributed.elastic.timer.expires(after, scope=None, client=None)[source]#
                    +torch.distributed.elastic.timer.expires(after, scope=None, client=None)[source]#

                    Acquires a countdown timer that expires in after seconds from now, unless the code-block that it wraps is finished within the timeframe. When the timer expires, this worker is eligible to be reaped. The @@ -4480,7 +4480,7 @@

                    Server/Client Implementationsmultiprocess.Queue.

                    -class torch.distributed.elastic.timer.LocalTimerServer(mp_queue, max_interval=60, daemon=True)[source]#
                    +class torch.distributed.elastic.timer.LocalTimerServer(mp_queue, max_interval=60, daemon=True)[source]#

                    Server that works with LocalTimerClient. Clients are expected to be subprocesses to the parent process that is running this server. Each host in the job is expected to start its own timer server locally and each @@ -4492,7 +4492,7 @@

                    Server/Client Implementations
                    -class torch.distributed.elastic.timer.LocalTimerClient(mp_queue)[source]#
                    +class torch.distributed.elastic.timer.LocalTimerClient(mp_queue)[source]#

                    Client side of LocalTimerServer. This client is meant to be used on the same host that the LocalTimerServer is running on and uses pid to uniquely identify a worker. This is particularly useful in situations @@ -4504,7 +4504,7 @@

                    Server/Client Implementations
                    -class torch.distributed.elastic.timer.FileTimerServer(file_path, run_id, max_interval=10, daemon=True, log_event=None)[source]#
                    +class torch.distributed.elastic.timer.FileTimerServer(file_path, run_id, max_interval=10, daemon=True, log_event=None)[source]#

                    Server that works with FileTimerClient. Clients are expected to be running on the same host as the process that is running this server. Each host in the job is expected to start its own timer server locally @@ -4526,7 +4526,7 @@

                    Server/Client Implementations
                    -class torch.distributed.elastic.timer.FileTimerClient(file_path, signal=Signals.SIGKILL)[source]#
                    +class torch.distributed.elastic.timer.FileTimerClient(file_path, signal=Signals.SIGKILL)[source]#

                    Client side of FileTimerServer. This client is meant to be used on the same host that the FileTimerServer is running on and uses pid to uniquely identify a worker. @@ -4556,7 +4556,7 @@

                    Writing a custom timer server/client
                    -class torch.distributed.elastic.timer.TimerRequest(worker_id, scope_id, expiration_time)[source]#
                    +class torch.distributed.elastic.timer.TimerRequest(worker_id, scope_id, expiration_time)[source]#

                    Data object representing a countdown timer acquisition and release that is used between the TimerClient and TimerServer. A negative expiration_time should be interpreted as a “release” @@ -4573,7 +4573,7 @@

                    Writing a custom timer server/client
                    -class torch.distributed.elastic.timer.TimerServer(request_queue, max_interval, daemon=True)[source]#
                    +class torch.distributed.elastic.timer.TimerServer(request_queue, max_interval, daemon=True)[source]#

                    Entity that monitors active timers and expires them in a timely fashion. This server is responsible for reaping workers that have expired timers.

                    @@ -4581,7 +4581,7 @@

                    Writing a custom timer server/client
                    -abstract clear_timers(worker_ids)[source]#
                    +abstract clear_timers(worker_ids)[source]#

                    Clears all timers for the given worker_ids.

                    @@ -4589,7 +4589,7 @@

                    Writing a custom timer server/client
                    -abstract get_expired_timers(deadline)[source]#
                    +abstract get_expired_timers(deadline)[source]#

                    Returns all expired timers for each worker_id. An expired timer is a timer for which the expiration_time is less than or equal to the provided deadline.

                    @@ -4602,7 +4602,7 @@

                    Writing a custom timer server/client
                    -abstract register_timers(timer_requests)[source]#
                    +abstract register_timers(timer_requests)[source]#

                    Processes the incoming timer requests and registers them with the server. The timer request can either be a acquire-timer or release-timer request. Timer requests with a negative expiration_time should be interpreted @@ -4615,12 +4615,12 @@

                    Writing a custom timer server/client
                    -class torch.distributed.elastic.timer.TimerClient[source]#
                    +class torch.distributed.elastic.timer.TimerClient[source]#

                    Client library to acquire and release countdown timers by communicating with the TimerServer.

                    -abstract acquire(scope_id, expiration_time)[source]#
                    +abstract acquire(scope_id, expiration_time)[source]#

                    Acquires a timer for the worker that holds this client object given the scope_id and expiration_time. Typically registers the timer with the TimerServer.

                    @@ -4630,7 +4630,7 @@

                    Writing a custom timer server/client
                    -abstract release(scope_id)[source]#
                    +abstract release(scope_id)[source]#

                    Releases the timer for the scope_id on the worker this client represents. After this method is called, the countdown timer on the scope is no longer in effect.

                    @@ -4645,7 +4645,7 @@

                    Writing a custom timer server/client

                    Debug info logging#

                    -torch.distributed.elastic.timer.debug_info_logging.log_debug_info_for_expired_timers(run_id, expired_timers)[source]#
                    +torch.distributed.elastic.timer.debug_info_logging.log_debug_info_for_expired_timers(run_id, expired_timers)[source]#
                    diff --git a/2.9/export/api_reference.html b/2.9/export/api_reference.html index 48ec22a5d6a..c838ac24260 100644 --- a/2.9/export/api_reference.html +++ b/2.9/export/api_reference.html @@ -4405,7 +4405,7 @@

                    Created On: Jul 17, 2025 | Last Updated On: Jul 17, 2025

                    -torch.export.export(mod, args, kwargs=None, *, dynamic_shapes=None, strict=False, preserve_module_call_signature=(), prefer_deferred_runtime_asserts_over_guards=False)[source]#
                    +torch.export.export(mod, args, kwargs=None, *, dynamic_shapes=None, strict=False, preserve_module_call_signature=(), prefer_deferred_runtime_asserts_over_guards=False)[source]#

                    export() takes any nn.Module along with example inputs, and produces a traced graph representing only the Tensor computation of the function in an Ahead-of-Time (AOT) fashion, which can subsequently be executed with different inputs or serialized. The @@ -4492,7 +4492,7 @@

                    -class torch.export.ExportedProgram(root, graph, graph_signature, state_dict, range_constraints, module_call_graph, example_inputs=None, constants=None, *, verifiers=None)[source]#
                    +class torch.export.ExportedProgram(root, graph, graph_signature, state_dict, range_constraints, module_call_graph, example_inputs=None, constants=None, *, verifiers=None)[source]#

                    Package of a program from export(). It contains an torch.fx.Graph that represents Tensor computation, a state_dict containing tensor values of all lifted parameters and buffers, and various metadata.

                    @@ -4507,7 +4507,7 @@
                    -buffers()[source]#
                    +buffers()[source]#

                    Returns an iterator over original module buffers.

                    Warning

                    @@ -4585,7 +4585,7 @@
                    -module(check_guards=True)[source]#
                    +module(check_guards=True)[source]#

                    Returns a self contained GraphModule with all the parameters/buffers inlined.

                    • When check_guards=True (default), a _guards_fn submodule is generated @@ -4612,7 +4612,7 @@

                      -named_buffers()[source]#
                      +named_buffers()[source]#

                      Returns an iterator over original module buffers, yielding both the name of the buffer as well as the buffer itself.

                      @@ -4628,7 +4628,7 @@
                      -named_parameters()[source]#
                      +named_parameters()[source]#

                      Returns an iterator over original module parameters, yielding both the name of the parameter as well as the parameter itself.

                      @@ -4644,7 +4644,7 @@
                      -parameters()[source]#
                      +parameters()[source]#

                      Returns an iterator over original module’s parameters.

                      Warning

                      @@ -4668,7 +4668,7 @@
                      -run_decompositions(decomp_table=None, decompose_custom_triton_ops=False)[source]#
                      +run_decompositions(decomp_table=None, decompose_custom_triton_ops=False)[source]#

                      Run a set of decompositions on the exported program and returns a new exported program. By default we will run the Core ATen decompositions to get operators in the @@ -4719,7 +4719,7 @@

                      -validate()[source]#
                      +validate()[source]#

                      Warning

                      This API is experimental and is NOT backward-compatible.

                      @@ -4748,7 +4748,7 @@
                      -class torch.export.dynamic_shapes.AdditionalInputs[source]#
                      +class torch.export.dynamic_shapes.AdditionalInputs[source]#

                      Infers dynamic_shapes based on additional inputs.

                      This is useful particularly for deployment engineers who, on the one hand, may have access to ample testing or profiling data that can provide a fair sense of @@ -4772,13 +4772,13 @@

                      -add(args, kwargs=None)[source]#
                      +add(args, kwargs=None)[source]#

                      Additional input args() and kwargs().

                      -dynamic_shapes(m, args, kwargs=None)[source]#
                      +dynamic_shapes(m, args, kwargs=None)[source]#

                      Infers a dynamic_shapes() pytree structure by merging shapes of the original input args() and kwargs() and of each additional input args and kwargs.

                      @@ -4786,7 +4786,7 @@
                      -verify(ep)[source]#
                      +verify(ep)[source]#

                      Verifies that an exported program is valid for each additional input.

                      @@ -4794,7 +4794,7 @@
                      -class torch.export.dynamic_shapes.Dim(name, *, min=None, max=None)[source]#
                      +class torch.export.dynamic_shapes.Dim(name, *, min=None, max=None)[source]#

                      The Dim class allows users to specify dynamism in their exported programs. By marking a dimension with a Dim, the compiler associates the dimension with a symbolic integer containing a dynamic range.

                      @@ -4855,7 +4855,7 @@
                      -class torch.export.dynamic_shapes.ShapesCollection[source]#
                      +class torch.export.dynamic_shapes.ShapesCollection[source]#

                      Builder for dynamic_shapes. Used to assign dynamic shape specifications to tensors that appear in inputs.

                      This is useful particularly when args() is a nested input structure, and it’s @@ -4893,7 +4893,7 @@

                      -dynamic_shapes(m, args, kwargs=None)[source]#
                      +dynamic_shapes(m, args, kwargs=None)[source]#

                      Generates the dynamic_shapes() pytree structure according to args() and kwargs().

                      @@ -4901,7 +4901,7 @@
                      -torch.export.dynamic_shapes.refine_dynamic_shapes_from_suggested_fixes(msg, dynamic_shapes)[source]#
                      +torch.export.dynamic_shapes.refine_dynamic_shapes_from_suggested_fixes(msg, dynamic_shapes)[source]#

                      When exporting with dynamic_shapes(), export may fail with a ConstraintViolation error if the specification doesn’t match the constraints inferred from tracing the model. The error message may provide suggested fixes - changes that can be made to dynamic_shapes() to export successfully.

                      @@ -4934,7 +4934,7 @@
                      -torch.export.save(ep, f, *, extra_files=None, opset_version=None, pickle_protocol=2)[source]#
                      +torch.export.save(ep, f, *, extra_files=None, opset_version=None, pickle_protocol=2)[source]#

                      Warning

                      Under active development, saved files may not be usable in newer versions @@ -4983,7 +4983,7 @@

                      -torch.export.load(f, *, extra_files=None, expected_opset_version=None)[source]#
                      +torch.export.load(f, *, extra_files=None, expected_opset_version=None)[source]#

                      Warning

                      Under active development, saved files may not be usable in newer versions @@ -5034,7 +5034,7 @@

                      -torch.export.pt2_archive._package.package_pt2(f, *, exported_programs=None, aoti_files=None, extra_files=None, opset_version=None, pickle_protocol=2)[source]#
                      +torch.export.pt2_archive._package.package_pt2(f, *, exported_programs=None, aoti_files=None, extra_files=None, opset_version=None, pickle_protocol=2)[source]#

                      Saves the artifacts to a PT2Archive format. The artifact can then be loaded using load_pt2.

                      @@ -5067,7 +5067,7 @@
                      -torch.export.pt2_archive._package.load_pt2(f, *, expected_opset_version=None, run_single_threaded=False, num_runners=1, device_index=-1, load_weights_from_disk=False)[source]#
                      +torch.export.pt2_archive._package.load_pt2(f, *, expected_opset_version=None, run_single_threaded=False, num_runners=1, device_index=-1, load_weights_from_disk=False)[source]#

                      Loads all the artifacts previously saved with package_pt2.

                      Parameters
                      @@ -5097,7 +5097,7 @@
                      -torch.export.draft_export(mod, args, kwargs=None, *, dynamic_shapes=None, preserve_module_call_signature=(), strict=False, prefer_deferred_runtime_asserts_over_guards=False)[source]#
                      +torch.export.draft_export(mod, args, kwargs=None, *, dynamic_shapes=None, preserve_module_call_signature=(), strict=False, prefer_deferred_runtime_asserts_over_guards=False)[source]#

                      A version of torch.export.export which is designed to consistently produce an ExportedProgram, even if there are potential soundness issues, and to generate a report listing the issues found.

                      @@ -5110,11 +5110,11 @@
                      -class torch.export.unflatten.FlatArgsAdapter[source]#
                      +class torch.export.unflatten.FlatArgsAdapter[source]#

                      Adapts input arguments with input_spec to align target_spec.

                      -abstract adapt(target_spec, input_spec, input_args, metadata=None, obj=None)[source]#
                      +abstract adapt(target_spec, input_spec, input_args, metadata=None, obj=None)[source]#

                      NOTE: This adapter may mutate given input_args_with_path.

                      Return type
                      @@ -5125,7 +5125,7 @@
                      -get_flat_arg_paths()[source]#
                      +get_flat_arg_paths()[source]#

                      Returns a list of paths that are used to access the flat args.

                      Return type
                      @@ -5138,7 +5138,7 @@
                      -class torch.export.unflatten.InterpreterModule(graph, ty=None)[source]#
                      +class torch.export.unflatten.InterpreterModule(graph, ty=None)[source]#

                      A module that uses torch.fx.Interpreter to execute instead of the usual codegen that GraphModule uses. This provides better stack trace information and makes it easier to debug execution.

                      @@ -5148,7 +5148,7 @@
                      -class torch.export.unflatten.InterpreterModuleDispatcher(attrs, call_modules)[source]#
                      +class torch.export.unflatten.InterpreterModuleDispatcher(attrs, call_modules)[source]#

                      A module that carries a sequence of InterpreterModules corresponding to a sequence of calls of that module. Each call to the module dispatches to the next InterpreterModule, and wraps back around after the last.

                      @@ -5158,7 +5158,7 @@
                      -torch.export.unflatten.unflatten(module, flat_args_adapter=None)[source]#
                      +torch.export.unflatten.unflatten(module, flat_args_adapter=None)[source]#

                      Unflatten an ExportedProgram, producing a module with the same module hierarchy as the original eager module. This can be useful if you are trying to use torch.export with another system that expects a module @@ -5190,7 +5190,7 @@

                      -torch.export.register_dataclass(cls, *, serialized_type_name=None)[source]#
                      +torch.export.register_dataclass(cls, *, serialized_type_name=None)[source]#

                      Registers a dataclass as a valid input/output type for torch.export.export().

                      Parameters
                      @@ -5236,7 +5236,7 @@
                      -class torch.export.decomp_utils.CustomDecompTable[source]#
                      +class torch.export.decomp_utils.CustomDecompTable[source]#

                      This is a custom dictionary that is specifically used for handling decomp_table in export. The reason we need this is because in the new world, you can only delete an op from decomp table to preserve it. This is problematic for custom ops because we don’t know when the custom @@ -5254,7 +5254,7 @@

                      -copy()[source]#
                      +copy()[source]#
                      Return type

                      CustomDecompTable

                      @@ -5264,17 +5264,17 @@
                      -items()[source]#
                      +items()[source]#
                      -keys()[source]#
                      +keys()[source]#
                      -materialize()[source]#
                      +materialize()[source]#
                      Return type

                      dict[torch._ops.OperatorBase, Callable]

                      @@ -5284,19 +5284,19 @@
                      -pop(*args)[source]#
                      +pop(*args)[source]#
                      -update(other_dict)[source]#
                      +update(other_dict)[source]#
                      -torch.export.passes.move_to_device_pass(ep, location)[source]#
                      +torch.export.passes.move_to_device_pass(ep, location)[source]#

                      Move the exported program to the given device.

                      Parameters
                      @@ -5325,7 +5325,7 @@
                      -archive_version()[source]#
                      +archive_version()[source]#

                      Get the archive version.

                      Return type
                      @@ -5336,7 +5336,7 @@
                      -get_file_names()[source]#
                      +get_file_names()[source]#

                      Get the file names in the archive.

                      Return type
                      @@ -5347,7 +5347,7 @@
                      -read_bytes(name)[source]#
                      +read_bytes(name)[source]#

                      Read a bytes object from the archive. name: The source file inside the archive.

                      @@ -5359,7 +5359,7 @@
                      -read_string(name)[source]#
                      +read_string(name)[source]#

                      Read a string object from the archive. name: The source file inside the archive.

                      @@ -5379,7 +5379,7 @@
                      -close()[source]#
                      +close()[source]#

                      Close the archive.

                      @@ -5387,7 +5387,7 @@
                      -count_prefix(prefix)[source]#
                      +count_prefix(prefix)[source]#

                      Count the number of records that start with a given prefix.

                      Return type
                      @@ -5398,7 +5398,7 @@
                      -has_record(name)[source]#
                      +has_record(name)[source]#

                      Check if a record exists in the archive.

                      Return type
                      @@ -5409,7 +5409,7 @@
                      -write_bytes(name, data)[source]#
                      +write_bytes(name, data)[source]#

                      Write a bytes object to the archive. name: The destination file inside the archive. data: The bytes object to write.

                      @@ -5419,7 +5419,7 @@
                      -write_file(name, file_path)[source]#
                      +write_file(name, file_path)[source]#

                      Copy a file into the archive. name: The destination file inside the archive. file_path: The source file on disk.

                      @@ -5429,7 +5429,7 @@
                      -write_folder(archive_dir, folder_dir)[source]#
                      +write_folder(archive_dir, folder_dir)[source]#

                      Copy a folder into the archive. archive_dir: The destination folder inside the archive. folder_dir: The source folder on disk.

                      @@ -5439,7 +5439,7 @@
                      -write_string(name, data)[source]#
                      +write_string(name, data)[source]#

                      Write a string object to the archive. name: The destination file inside the archive. data: The string object to write.

                      @@ -5451,7 +5451,7 @@
                      -torch.export.pt2_archive.is_pt2_package(serialized_model)[source]#
                      +torch.export.pt2_archive.is_pt2_package(serialized_model)[source]#

                      Check if the serialized model is a PT2 Archive package.

                      Return type
                      @@ -5462,21 +5462,21 @@
                      -class torch.export.exported_program.ModuleCallEntry(fqn: str, signature: Optional[torch.export.exported_program.ModuleCallSignature] = None)[source]#
                      +class torch.export.exported_program.ModuleCallEntry(fqn: str, signature: Optional[torch.export.exported_program.ModuleCallSignature] = None)[source]#
                      -class torch.export.exported_program.ModuleCallSignature(inputs: list[Union[torch.export.graph_signature.TensorArgument, torch.export.graph_signature.SymIntArgument, torch.export.graph_signature.SymFloatArgument, torch.export.graph_signature.SymBoolArgument, torch.export.graph_signature.ConstantArgument, torch.export.graph_signature.CustomObjArgument, torch.export.graph_signature.TokenArgument]], outputs: list[Union[torch.export.graph_signature.TensorArgument, torch.export.graph_signature.SymIntArgument, torch.export.graph_signature.SymFloatArgument, torch.export.graph_signature.SymBoolArgument, torch.export.graph_signature.ConstantArgument, torch.export.graph_signature.CustomObjArgument, torch.export.graph_signature.TokenArgument]], in_spec: torch.utils._pytree.TreeSpec, out_spec: torch.utils._pytree.TreeSpec, forward_arg_names: Optional[list[str]] = None)[source]#
                      +class torch.export.exported_program.ModuleCallSignature(inputs: list[Union[torch.export.graph_signature.TensorArgument, torch.export.graph_signature.SymIntArgument, torch.export.graph_signature.SymFloatArgument, torch.export.graph_signature.SymBoolArgument, torch.export.graph_signature.ConstantArgument, torch.export.graph_signature.CustomObjArgument, torch.export.graph_signature.TokenArgument]], outputs: list[Union[torch.export.graph_signature.TensorArgument, torch.export.graph_signature.SymIntArgument, torch.export.graph_signature.SymFloatArgument, torch.export.graph_signature.SymBoolArgument, torch.export.graph_signature.ConstantArgument, torch.export.graph_signature.CustomObjArgument, torch.export.graph_signature.TokenArgument]], in_spec: torch.utils._pytree.TreeSpec, out_spec: torch.utils._pytree.TreeSpec, forward_arg_names: Optional[list[str]] = None)[source]#
                      -torch.export.exported_program.default_decompositions()[source]#
                      +torch.export.exported_program.default_decompositions()[source]#

                      This is the default decomposition table which contains decomposition of all ATEN operators to core aten opset. Use this API together with run_decompositions()

                      @@ -5489,7 +5489,7 @@
                      -class torch.export.custom_obj.ScriptObjectMeta(constant_name, class_fqn)[source]#
                      +class torch.export.custom_obj.ScriptObjectMeta(constant_name, class_fqn)[source]#

                      Metadata which is stored on nodes representing ScriptObjects.

                      @@ -5497,7 +5497,7 @@
                      -class torch.export.graph_signature.ConstantArgument(name: str, value: Union[int, float, bool, str, NoneType])[source]#
                      +class torch.export.graph_signature.ConstantArgument(name: str, value: Union[int, float, bool, str, NoneType])[source]#
                      @@ -5514,7 +5514,7 @@
                      -class torch.export.graph_signature.CustomObjArgument(name: str, class_fqn: str, fake_val: Optional[torch._library.fake_class_registry.FakeScriptObject] = None)[source]#
                      +class torch.export.graph_signature.CustomObjArgument(name: str, class_fqn: str, fake_val: Optional[torch._library.fake_class_registry.FakeScriptObject] = None)[source]#
                      @@ -5536,7 +5536,7 @@
                      -class torch.export.graph_signature.ExportBackwardSignature(gradients_to_parameters: dict[str, str], gradients_to_user_inputs: dict[str, str], loss_output: str)[source]#
                      +class torch.export.graph_signature.ExportBackwardSignature(gradients_to_parameters: dict[str, str], gradients_to_user_inputs: dict[str, str], loss_output: str)[source]#
                      @@ -5558,7 +5558,7 @@
                      -class torch.export.graph_signature.ExportGraphSignature(input_specs, output_specs)[source]#
                      +class torch.export.graph_signature.ExportGraphSignature(input_specs, output_specs)[source]#

                      ExportGraphSignature models the input/output signature of Export Graph, which is a fx.Graph with stronger invariants guarantees.

                      Export Graph is functional and does not access “states” like parameters @@ -5685,7 +5685,7 @@

                      -get_replace_hook(replace_inputs=False)[source]#
                      +get_replace_hook(replace_inputs=False)[source]#
                      @@ -5755,7 +5755,7 @@
                      -replace_all_uses(old, new)[source]#
                      +replace_all_uses(old, new)[source]#

                      Replace all uses of the old name with new name in the signature.

                      @@ -5780,7 +5780,7 @@
                      -class torch.export.graph_signature.InputKind(value)[source]#
                      +class torch.export.graph_signature.InputKind(value)[source]#

                      An enumeration.

                      @@ -5816,7 +5816,7 @@
                      -class torch.export.graph_signature.InputSpec(kind: torch.export.graph_signature.InputKind, arg: Union[torch.export.graph_signature.TensorArgument, torch.export.graph_signature.SymIntArgument, torch.export.graph_signature.SymFloatArgument, torch.export.graph_signature.SymBoolArgument, torch.export.graph_signature.ConstantArgument, torch.export.graph_signature.CustomObjArgument, torch.export.graph_signature.TokenArgument], target: Optional[str], persistent: Optional[bool] = None)[source]#
                      +class torch.export.graph_signature.InputSpec(kind: torch.export.graph_signature.InputKind, arg: Union[torch.export.graph_signature.TensorArgument, torch.export.graph_signature.SymIntArgument, torch.export.graph_signature.SymFloatArgument, torch.export.graph_signature.SymBoolArgument, torch.export.graph_signature.ConstantArgument, torch.export.graph_signature.CustomObjArgument, torch.export.graph_signature.TokenArgument], target: Optional[str], persistent: Optional[bool] = None)[source]#
                      @@ -5843,7 +5843,7 @@
                      -class torch.export.graph_signature.OutputKind(value)[source]#
                      +class torch.export.graph_signature.OutputKind(value)[source]#

                      An enumeration.

                      @@ -5889,7 +5889,7 @@
                      -class torch.export.graph_signature.OutputSpec(kind: torch.export.graph_signature.OutputKind, arg: Union[torch.export.graph_signature.TensorArgument, torch.export.graph_signature.SymIntArgument, torch.export.graph_signature.SymFloatArgument, torch.export.graph_signature.SymBoolArgument, torch.export.graph_signature.ConstantArgument, torch.export.graph_signature.CustomObjArgument, torch.export.graph_signature.TokenArgument], target: Optional[str])[source]#
                      +class torch.export.graph_signature.OutputSpec(kind: torch.export.graph_signature.OutputKind, arg: Union[torch.export.graph_signature.TensorArgument, torch.export.graph_signature.SymIntArgument, torch.export.graph_signature.SymFloatArgument, torch.export.graph_signature.SymBoolArgument, torch.export.graph_signature.ConstantArgument, torch.export.graph_signature.CustomObjArgument, torch.export.graph_signature.TokenArgument], target: Optional[str])[source]#
                      @@ -5911,7 +5911,7 @@
                      -class torch.export.graph_signature.SymBoolArgument(name: str)[source]#
                      +class torch.export.graph_signature.SymBoolArgument(name: str)[source]#
                      @@ -5923,7 +5923,7 @@
                      -class torch.export.graph_signature.SymFloatArgument(name: str)[source]#
                      +class torch.export.graph_signature.SymFloatArgument(name: str)[source]#
                      @@ -5935,7 +5935,7 @@
                      -class torch.export.graph_signature.SymIntArgument(name: str)[source]#
                      +class torch.export.graph_signature.SymIntArgument(name: str)[source]#
                      @@ -5947,7 +5947,7 @@
                      -class torch.export.graph_signature.TensorArgument(name: str)[source]#
                      +class torch.export.graph_signature.TensorArgument(name: str)[source]#
                      @@ -5959,7 +5959,7 @@
                      -class torch.export.graph_signature.TokenArgument(name: str)[source]#
                      +class torch.export.graph_signature.TokenArgument(name: str)[source]#
                      diff --git a/2.9/export/joint_with_descriptors.html b/2.9/export/joint_with_descriptors.html index 80efb68af55..b27e53aeeb8 100644 --- a/2.9/export/joint_with_descriptors.html +++ b/2.9/export/joint_with_descriptors.html @@ -4411,7 +4411,7 @@

                      Joint with descriptors
                      -torch._functorch.aot_autograd.aot_export_joint_with_descriptors(stack, mod, args, kwargs=None, *, decompositions=None, keep_inference_input_mutations=False, ignore_shape_env=False, fw_compiler=<function boxed_nop_preserve_node_meta>, bw_compiler=<function boxed_nop_preserve_node_meta>)[source]#
                      +torch._functorch.aot_autograd.aot_export_joint_with_descriptors(stack, mod, args, kwargs=None, *, decompositions=None, keep_inference_input_mutations=False, ignore_shape_env=False, fw_compiler=<function boxed_nop_preserve_node_meta>, bw_compiler=<function boxed_nop_preserve_node_meta>)[source]#

                      This API captures the joint graph for an nn.Module. However, unlike aot_export_joint_simple or aot_export_module(trace_joint=True), the calling convention of the produced joint graph follows no fixed positional @@ -4473,7 +4473,7 @@

                      Joint with descriptors
                      -torch._functorch.aot_autograd.aot_compile_joint_with_descriptors(jd)[source]#
                      +torch._functorch.aot_autograd.aot_compile_joint_with_descriptors(jd)[source]#

                      Companion function for aot_export_joint_with_descriptors which compiles the joint graph into a callable function that follows a standard calling convention. params_flat all are arguments.

                      @@ -4491,13 +4491,13 @@

                      Joint with descriptorsDescriptors#

                      -class torch._functorch._aot_autograd.descriptors.AOTInput[source]#
                      +class torch._functorch._aot_autograd.descriptors.AOTInput[source]#

                      Describes where an input from an AOTAutograd produced FX graph comes from

                      -is_buffer()[source]#
                      +is_buffer()[source]#

                      True if this input is a buffer or derived from a buffer (e.g., subclass attr)

                      Return type
                      @@ -4508,7 +4508,7 @@

                      Descriptors
                      -is_param()[source]#
                      +is_param()[source]#

                      True if this input is a parameter or derived from a parameter (e.g., subclass attr)

                      Return type
                      @@ -4519,7 +4519,7 @@

                      Descriptors
                      -is_tangent()[source]#
                      +is_tangent()[source]#

                      True if this input is a tangent or derived from a tangent (e.g., subclass attr)

                      Return type
                      @@ -4532,14 +4532,14 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.AOTOutput[source]#
                      +class torch._functorch._aot_autograd.descriptors.AOTOutput[source]#

                      Describes where an output from an AOTAutograd produced FX graph will eventually be bundled into the final output

                      -is_grad()[source]#
                      +is_grad()[source]#

                      True if this output is a grad or derived from a grad (e.g., subclass attr)

                      Return type
                      @@ -4552,7 +4552,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.BackwardTokenAOTInput(idx)[source]#
                      +class torch._functorch._aot_autograd.descriptors.BackwardTokenAOTInput(idx)[source]#

                      The world token which is threaded through side-effectful operations, for backwards

                      @@ -4560,7 +4560,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.BackwardTokenAOTOutput(idx)[source]#
                      +class torch._functorch._aot_autograd.descriptors.BackwardTokenAOTOutput(idx)[source]#

                      The world token output for side-effectful calls, returned so we cannot DCE it, backward only

                      @@ -4568,7 +4568,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.BufferAOTInput(target)[source]#
                      +class torch._functorch._aot_autograd.descriptors.BufferAOTInput(target)[source]#

                      The input is a buffer, whose FQN is target

                      @@ -4576,7 +4576,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.DummyAOTInput(idx)[source]#
                      +class torch._functorch._aot_autograd.descriptors.DummyAOTInput(idx)[source]#

                      In some circumstances, we want to call into a function that expects AOTInput, but we don’t actually care about that logic (most typically, because some code is being used for both compile-time and run-time; AOTInput processing is not needed in this situation. @@ -4588,7 +4588,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.DummyAOTOutput(idx)[source]#
                      +class torch._functorch._aot_autograd.descriptors.DummyAOTOutput(idx)[source]#

                      For cases when you don’t actually care about descriptor propagation, do not use under normal circumstances.

                      @@ -4597,7 +4597,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.GradAOTOutput(grad_of)[source]#
                      +class torch._functorch._aot_autograd.descriptors.GradAOTOutput(grad_of)[source]#

                      An output representing the computed gradient for a differentiable input, in the joint graph

                      @@ -4605,7 +4605,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.InputMutationAOTOutput(mutated_input)[source]#
                      +class torch._functorch._aot_autograd.descriptors.InputMutationAOTOutput(mutated_input)[source]#

                      The mutated value of an input tensor, returned so we can appropriately propagate autograd.

                      @@ -4613,7 +4613,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.IntermediateBaseAOTOutput(base_of)[source]#
                      +class torch._functorch._aot_autograd.descriptors.IntermediateBaseAOTOutput(base_of)[source]#

                      An intermediate base of multiple outputs which alias each other. We only report ONE of the outputs that contributed to this base

                      @@ -4622,7 +4622,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.ParamAOTInput(target)[source]#
                      +class torch._functorch._aot_autograd.descriptors.ParamAOTInput(target)[source]#

                      The input is a parameter, whose FQN is target

                      @@ -4630,7 +4630,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.PhiloxBackwardBaseOffsetAOTInput[source]#
                      +class torch._functorch._aot_autograd.descriptors.PhiloxBackwardBaseOffsetAOTInput[source]#

                      The offset for functionalized Philox RNG calls, specifically for backward graph.

                      @@ -4638,7 +4638,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.PhiloxBackwardSeedAOTInput[source]#
                      +class torch._functorch._aot_autograd.descriptors.PhiloxBackwardSeedAOTInput[source]#

                      The seed for functionalized Philox RNG calls, specifically for backward graph.

                      @@ -4646,7 +4646,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.PhiloxForwardBaseOffsetAOTInput[source]#
                      +class torch._functorch._aot_autograd.descriptors.PhiloxForwardBaseOffsetAOTInput[source]#

                      The offset for functionalized Philox RNG calls, specifically for forward graph.

                      @@ -4654,7 +4654,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.PhiloxForwardSeedAOTInput[source]#
                      +class torch._functorch._aot_autograd.descriptors.PhiloxForwardSeedAOTInput[source]#

                      The seed for functionalized Philox RNG calls, specifically for forward graph.

                      @@ -4662,7 +4662,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.PhiloxUpdatedBackwardOffsetAOTOutput[source]#
                      +class torch._functorch._aot_autograd.descriptors.PhiloxUpdatedBackwardOffsetAOTOutput[source]#

                      The final offset from the functionalized RNG calls, backward only

                      @@ -4670,7 +4670,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.PhiloxUpdatedForwardOffsetAOTOutput[source]#
                      +class torch._functorch._aot_autograd.descriptors.PhiloxUpdatedForwardOffsetAOTOutput[source]#

                      The final offset from the functionalized RNG calls, forward only

                      @@ -4678,7 +4678,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.PlainAOTInput(idx)[source]#
                      +class torch._functorch._aot_autograd.descriptors.PlainAOTInput(idx)[source]#

                      The input is a plain input, corresponding to a particular positional index.

                      Note that AOTInput is always relative to a function with a flat calling convention, e.g., as accepted by aot_module_simplified. There are some AOTAutograd APIs that @@ -4690,7 +4690,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.PlainAOTOutput(idx)[source]#
                      +class torch._functorch._aot_autograd.descriptors.PlainAOTOutput(idx)[source]#

                      A plain tensor output at position idx of the output tuple

                      @@ -4698,14 +4698,14 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.SavedForBackwardsAOTOutput(idx: int)[source]#
                      +class torch._functorch._aot_autograd.descriptors.SavedForBackwardsAOTOutput(idx: int)[source]#

                      -class torch._functorch._aot_autograd.descriptors.SubclassGetAttrAOTInput(base, attr)[source]#
                      +class torch._functorch._aot_autograd.descriptors.SubclassGetAttrAOTInput(base, attr)[source]#

                      Subclass inputs get unpacked into their constituent pieces before going into an FX graph. This tells you which particular attribute of the subclass this particular input corresponds to (of the ‘base’ originally subclass argument.)

                      @@ -4715,7 +4715,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.SubclassGetAttrAOTOutput(base, attr)[source]#
                      +class torch._functorch._aot_autograd.descriptors.SubclassGetAttrAOTOutput(base, attr)[source]#

                      This output will be bundled into a subclass at this location

                      @@ -4723,7 +4723,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.SubclassSizeAOTInput(base, idx)[source]#
                      +class torch._functorch._aot_autograd.descriptors.SubclassSizeAOTInput(base, idx)[source]#

                      Which subclass this particular outer size SymInt input (at dim idx) came from.

                      @@ -4731,7 +4731,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.SubclassSizeAOTOutput(base, idx)[source]#
                      +class torch._functorch._aot_autograd.descriptors.SubclassSizeAOTOutput(base, idx)[source]#

                      This output size will be bundled into a subclass at this location

                      @@ -4739,7 +4739,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.SubclassStrideAOTInput(base, idx)[source]#
                      +class torch._functorch._aot_autograd.descriptors.SubclassStrideAOTInput(base, idx)[source]#

                      Which subclass this particular outer stride SymInt input (at dim idx) came from.

                      @@ -4747,7 +4747,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.SubclassStrideAOTOutput(base, idx)[source]#
                      +class torch._functorch._aot_autograd.descriptors.SubclassStrideAOTOutput(base, idx)[source]#

                      This output stride will be bundled into a subclass at this location

                      @@ -4755,7 +4755,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.SyntheticBaseAOTInput(base_of)[source]#
                      +class torch._functorch._aot_autograd.descriptors.SyntheticBaseAOTInput(base_of)[source]#

                      This is similar to ViewBaseAOTInput, but this happens when none of the views were differentiable, so we weren’t able to get our hands on the true original view and constructed a synthetic one instead for the sake of autograd.

                      @@ -4765,7 +4765,7 @@

                      Descriptors
                      -class torch._functorch._aot_autograd.descriptors.ViewBaseAOTInput(base_of)[source]#
                      +class torch._functorch._aot_autograd.descriptors.ViewBaseAOTInput(base_of)[source]#

                      When multiple differentiable inputs are views of the same input, AOTAutograd will replace all of these views with a single input representing the base. If this is undesirable, you can clone the views example inputs before passing them into AOTAutograd.

                      @@ -4783,7 +4783,7 @@

                      Descriptors
                      -torch._functorch._aot_autograd.fx_utils.get_all_input_and_grad_nodes(g)[source]#
                      +torch._functorch._aot_autograd.fx_utils.get_all_input_and_grad_nodes(g)[source]#

                      Given a joint graph with descriptors (meta[‘desc’] on placeholders and output), returns the node for every input and its corresponding grad output node if it exists. These tuples are in a dict that is indexed by @@ -4817,7 +4817,7 @@

                      Descriptors
                      -torch._functorch._aot_autograd.fx_utils.get_all_output_and_tangent_nodes(g)[source]#
                      +torch._functorch._aot_autograd.fx_utils.get_all_output_and_tangent_nodes(g)[source]#

                      Get all output nodes and their corresponding tangent nodes from a joint graph.

                      Similar to get_all_input_and_grad_nodes, but returns output nodes paired with their tangent nodes (if they exist). This function traverses the graph to find @@ -4850,7 +4850,7 @@

                      Descriptors
                      -torch._functorch._aot_autograd.fx_utils.get_buffer_nodes(graph)[source]#
                      +torch._functorch._aot_autograd.fx_utils.get_buffer_nodes(graph)[source]#

                      Get all buffer nodes from a graph as a list.

                      You can rely on this providing the correct order of buffers you need to feed into the joint graph (after parameters).

                      @@ -4876,7 +4876,7 @@

                      Descriptors
                      -torch._functorch._aot_autograd.fx_utils.get_named_buffer_nodes(graph)[source]#
                      +torch._functorch._aot_autograd.fx_utils.get_named_buffer_nodes(graph)[source]#

                      Get buffer nodes mapped by their fully qualified names.

                      This function traverses the graph to find all buffer input nodes and returns them in a dictionary where keys are the buffer names (FQNs) @@ -4902,7 +4902,7 @@

                      Descriptors
                      -torch._functorch._aot_autograd.fx_utils.get_named_param_nodes(graph)[source]#
                      +torch._functorch._aot_autograd.fx_utils.get_named_param_nodes(graph)[source]#

                      Get parameter nodes mapped by their fully qualified names.

                      This function traverses the graph to find all parameter input nodes and returns them in a dictionary where keys are the parameter names (FQNs) @@ -4928,7 +4928,7 @@

                      Descriptors
                      -torch._functorch._aot_autograd.fx_utils.get_param_and_grad_nodes(graph)[source]#
                      +torch._functorch._aot_autograd.fx_utils.get_param_and_grad_nodes(graph)[source]#

                      Get parameter nodes and their corresponding gradient nodes from a joint graph.

                      Parameters
                      @@ -4949,7 +4949,7 @@

                      Descriptors
                      -torch._functorch._aot_autograd.fx_utils.get_param_nodes(graph)[source]#
                      +torch._functorch._aot_autograd.fx_utils.get_param_nodes(graph)[source]#

                      Get all parameter nodes from a graph as a list.

                      You can rely on this providing the correct order of parameters you need to feed into the joint graph (at the very beginning of the argument list, @@ -4976,7 +4976,7 @@

                      Descriptors
                      -torch._functorch._aot_autograd.fx_utils.get_plain_input_and_grad_nodes(graph)[source]#
                      +torch._functorch._aot_autograd.fx_utils.get_plain_input_and_grad_nodes(graph)[source]#

                      Get plain input nodes and their corresponding gradient nodes from a joint graph.

                      Parameters
                      @@ -4997,7 +4997,7 @@

                      Descriptors
                      -torch._functorch._aot_autograd.fx_utils.get_plain_output_and_tangent_nodes(graph)[source]#
                      +torch._functorch._aot_autograd.fx_utils.get_plain_output_and_tangent_nodes(graph)[source]#

                      Get plain output nodes and their corresponding tangent nodes from a joint graph.

                      Parameters
                      diff --git a/2.9/fsdp.html b/2.9/fsdp.html index 0db5e7a8bfa..26f6774df80 100644 --- a/2.9/fsdp.html +++ b/2.9/fsdp.html @@ -4396,7 +4396,7 @@

                      Created On: Feb 02, 2022 | Last Updated On: Jun 11, 2025

                      -class torch.distributed.fsdp.FullyShardedDataParallel(module, process_group=None, sharding_strategy=None, cpu_offload=None, auto_wrap_policy=None, backward_prefetch=BackwardPrefetch.BACKWARD_PRE, mixed_precision=None, ignored_modules=None, param_init_fn=None, device_id=None, sync_module_states=False, forward_prefetch=False, limit_all_gathers=True, use_orig_params=False, ignored_states=None, device_mesh=None)[source]#
                      +class torch.distributed.fsdp.FullyShardedDataParallel(module, process_group=None, sharding_strategy=None, cpu_offload=None, auto_wrap_policy=None, backward_prefetch=BackwardPrefetch.BACKWARD_PRE, mixed_precision=None, ignored_modules=None, param_init_fn=None, device_id=None, sync_module_states=False, forward_prefetch=False, limit_all_gathers=True, use_orig_params=False, ignored_states=None, device_mesh=None)[source]#

                      A wrapper for sharding module parameters across data parallel workers.

                      This is inspired by Xu et al. as well as the ZeRO Stage 3 from DeepSpeed. @@ -4653,7 +4653,7 @@

                      -apply(fn)[source]#
                      +apply(fn)[source]#

                      Apply fn recursively to every submodule (as returned by .children()) as well as self.

                      Typical use includes initializing the parameters of a model (see also torch.nn.init).

                      Compared to torch.nn.Module.apply, this version additionally gathers @@ -4674,7 +4674,7 @@

                      -check_is_root()[source]#
                      +check_is_root()[source]#

                      Check if this instance is a root FSDP module.

                      Return type
                      @@ -4685,7 +4685,7 @@
                      -clip_grad_norm_(max_norm, norm_type=2.0)[source]#
                      +clip_grad_norm_(max_norm, norm_type=2.0)[source]#

                      Clip the gradient norm of all parameters.

                      The norm is computed over all parameters’ gradients as viewed as a single vector, and the gradients are modified in-place.

                      @@ -4726,7 +4726,7 @@
                      -static flatten_sharded_optim_state_dict(sharded_optim_state_dict, model, optim)[source]#
                      +static flatten_sharded_optim_state_dict(sharded_optim_state_dict, model, optim)[source]#

                      Flatten a sharded optimizer state-dict.

                      The API is similar to shard_full_optim_state_dict(). The only difference is that the input sharded_optim_state_dict should be @@ -4754,7 +4754,7 @@

                      -forward(*args, **kwargs)[source]#
                      +forward(*args, **kwargs)[source]#

                      Run the forward pass for the wrapped module, inserting FSDP-specific pre- and post-forward sharding logic.

                      Return type
                      @@ -4765,7 +4765,7 @@
                      -static fsdp_modules(module, root_only=False)[source]#
                      +static fsdp_modules(module, root_only=False)[source]#

                      Return all nested FSDP instances.

                      This possibly includes module itself and only includes FSDP root modules if root_only=True.

                      @@ -4789,7 +4789,7 @@
                      -static full_optim_state_dict(model, optim, optim_input=None, rank0_only=True, group=None)[source]#
                      +static full_optim_state_dict(model, optim, optim_input=None, rank0_only=True, group=None)[source]#

                      Return the full optimizer state-dict.

                      Consolidates the full optimizer state on rank 0 and returns it as a dict following the convention of @@ -4842,7 +4842,7 @@

                      -static get_state_dict_type(module)[source]#
                      +static get_state_dict_type(module)[source]#

                      Get the state_dict_type and the corresponding configurations for the FSDP modules rooted at module.

                      The target module does not have to be an FSDP module.

                      @@ -4870,7 +4870,7 @@
                      -named_buffers(*args, **kwargs)[source]#
                      +named_buffers(*args, **kwargs)[source]#

                      Return an iterator over module buffers, yielding both the name of the buffer and the buffer itself.

                      Intercepts buffer names and removes all occurrences of the FSDP-specific flattened buffer prefix when inside the summon_full_params() context manager.

                      @@ -4883,7 +4883,7 @@
                      -named_parameters(*args, **kwargs)[source]#
                      +named_parameters(*args, **kwargs)[source]#

                      Return an iterator over module parameters, yielding both the name of the parameter and the parameter itself.

                      Intercepts parameter names and removes all occurrences of the FSDP-specific flattened parameter prefix when inside the summon_full_params() context manager.

                      @@ -4896,7 +4896,7 @@
                      -no_sync()[source]#
                      +no_sync()[source]#

                      Disable gradient synchronizations across FSDP instances.

                      Within this context, gradients will be accumulated in module variables, which will later be synchronized in the first @@ -4924,7 +4924,7 @@

                      -static optim_state_dict(model, optim, optim_state_dict=None, group=None)[source]#
                      +static optim_state_dict(model, optim, optim_state_dict=None, group=None)[source]#

                      Transform the state-dict of an optimizer corresponding to a sharded model.

                      The given state-dict can be transformed to one of three types: 1) full optimizer state_dict, 2) sharded optimizer state_dict, 3) local optimizer state_dict.

                      @@ -4998,7 +4998,7 @@
                      -static optim_state_dict_to_load(model, optim, optim_state_dict, is_named_optimizer=False, load_directly=False, group=None)[source]#
                      +static optim_state_dict_to_load(model, optim, optim_state_dict, is_named_optimizer=False, load_directly=False, group=None)[source]#

                      Convert an optimizer state-dict so that it can be loaded into the optimizer associated with the FSDP model.

                      Given a optim_state_dict that is transformed through optim_state_dict(), it gets converted to the flattened optimizer @@ -5069,7 +5069,7 @@

                      -register_comm_hook(state, hook)[source]#
                      +register_comm_hook(state, hook)[source]#

                      Register a communication hook.

                      This is an enhancement that provides a flexible hook to users where they can specify how FSDP aggregates gradients across multiple workers. @@ -5115,7 +5115,7 @@

                      -static rekey_optim_state_dict(optim_state_dict, optim_state_key_type, model, optim_input=None, optim=None)[source]#
                      +static rekey_optim_state_dict(optim_state_dict, optim_state_key_type, model, optim_input=None, optim=None)[source]#

                      Re-keys the optimizer state dict optim_state_dict to use the key type optim_state_key_type.

                      This can be used to achieve compatibility between optimizer state dicts from models with FSDP instances and ones without.

                      @@ -5152,7 +5152,7 @@
                      -static scatter_full_optim_state_dict(full_optim_state_dict, model, optim_input=None, optim=None, group=None)[source]#
                      +static scatter_full_optim_state_dict(full_optim_state_dict, model, optim_input=None, optim=None, group=None)[source]#

                      Scatter the full optimizer state dict from rank 0 to all other ranks.

                      Returns the sharded optimizer state dict on each rank. The return value is the same as shard_full_optim_state_dict(), and on rank @@ -5217,7 +5217,7 @@

                      -static set_state_dict_type(module, state_dict_type, state_dict_config=None, optim_state_dict_config=None)[source]#
                      +static set_state_dict_type(module, state_dict_type, state_dict_config=None, optim_state_dict_config=None)[source]#

                      Set the state_dict_type of all the descendant FSDP modules of the target module.

                      Also takes (optional) configuration for the model’s and optimizer’s state dict. The target module does not have to be a FSDP module. If the target @@ -5271,7 +5271,7 @@

                      -static shard_full_optim_state_dict(full_optim_state_dict, model, optim_input=None, optim=None)[source]#
                      +static shard_full_optim_state_dict(full_optim_state_dict, model, optim_input=None, optim=None)[source]#

                      Shard a full optimizer state-dict.

                      Remaps the state in full_optim_state_dict to flattened parameters instead of unflattened parameters and restricts to only this rank’s part of the optimizer state. @@ -5334,7 +5334,7 @@

                      -static sharded_optim_state_dict(model, optim, group=None)[source]#
                      +static sharded_optim_state_dict(model, optim, group=None)[source]#

                      Return the optimizer state-dict in its sharded form.

                      The API is similar to full_optim_state_dict() but this API chunks all non-zero-dimension states to ShardedTensor to save memory. @@ -5355,7 +5355,7 @@

                      -static state_dict_type(module, state_dict_type, state_dict_config=None, optim_state_dict_config=None)[source]#
                      +static state_dict_type(module, state_dict_type, state_dict_config=None, optim_state_dict_config=None)[source]#

                      Set the state_dict_type of all the descendant FSDP modules of the target module.

                      This context manager has the same functions as set_state_dict_type(). Read the document of set_state_dict_type() for the detail.

                      @@ -5387,7 +5387,7 @@
                      -static summon_full_params(module, recurse=True, writeback=True, rank0_only=False, offload_to_cpu=False, with_grads=False)[source]#
                      +static summon_full_params(module, recurse=True, writeback=True, rank0_only=False, offload_to_cpu=False, with_grads=False)[source]#

                      Expose full params for FSDP instances with this context manager.

                      Can be useful after forward/backward for a model to get the params for additional processing or checking. It can take a non-FSDP @@ -5477,7 +5477,7 @@

                      -class torch.distributed.fsdp.BackwardPrefetch(value)[source]#
                      +class torch.distributed.fsdp.BackwardPrefetch(value)[source]#

                      This configures explicit backward prefetching, which improves throughput by enabling communication and computation overlap in the backward pass at the cost of slightly increased memory usage.

                      @@ -5509,7 +5509,7 @@
                      -class torch.distributed.fsdp.ShardingStrategy(value)[source]#
                      +class torch.distributed.fsdp.ShardingStrategy(value)[source]#

                      This specifies the sharding strategy to be used for distributed training by FullyShardedDataParallel.

                        @@ -5544,7 +5544,7 @@
                        -class torch.distributed.fsdp.MixedPrecision(param_dtype=None, reduce_dtype=None, buffer_dtype=None, keep_low_precision_grads=False, cast_forward_inputs=False, cast_root_forward_inputs=True, _module_classes_to_ignore=(<class 'torch.nn.modules.batchnorm._BatchNorm'>, ))[source]#
                        +class torch.distributed.fsdp.MixedPrecision(param_dtype=None, reduce_dtype=None, buffer_dtype=None, keep_low_precision_grads=False, cast_forward_inputs=False, cast_root_forward_inputs=True, _module_classes_to_ignore=(<class 'torch.nn.modules.batchnorm._BatchNorm'>, ))[source]#

                        This configures FSDP-native mixed precision training.

                        Variables
                        @@ -5668,7 +5668,7 @@
                        -class torch.distributed.fsdp.CPUOffload(offload_params=False)[source]#
                        +class torch.distributed.fsdp.CPUOffload(offload_params=False)[source]#

                        This configures CPU offloading.

                        Variables
                        @@ -5682,7 +5682,7 @@
                        -class torch.distributed.fsdp.StateDictConfig(offload_to_cpu=False)[source]#
                        +class torch.distributed.fsdp.StateDictConfig(offload_to_cpu=False)[source]#

                        StateDictConfig is the base class for all state_dict configuration classes. Users should instantiate a child class (e.g. FullStateDictConfig) in order to configure settings for the @@ -5698,7 +5698,7 @@

                        -class torch.distributed.fsdp.FullStateDictConfig(offload_to_cpu=False, rank0_only=False)[source]#
                        +class torch.distributed.fsdp.FullStateDictConfig(offload_to_cpu=False, rank0_only=False)[source]#

                        FullStateDictConfig is a config class meant to be used with StateDictType.FULL_STATE_DICT. We recommend enabling both offload_to_cpu=True and rank0_only=True when saving full state @@ -5739,7 +5739,7 @@

                        -class torch.distributed.fsdp.ShardedStateDictConfig(offload_to_cpu=False, _use_dtensor=False)[source]#
                        +class torch.distributed.fsdp.ShardedStateDictConfig(offload_to_cpu=False, _use_dtensor=False)[source]#

                        ShardedStateDictConfig is a config class meant to be used with StateDictType.SHARDED_STATE_DICT.

                        @@ -5759,14 +5759,14 @@
                        -class torch.distributed.fsdp.LocalStateDictConfig(offload_to_cpu: bool = False)[source]#
                        +class torch.distributed.fsdp.LocalStateDictConfig(offload_to_cpu: bool = False)[source]#
                        -class torch.distributed.fsdp.OptimStateDictConfig(offload_to_cpu=True)[source]#
                        +class torch.distributed.fsdp.OptimStateDictConfig(offload_to_cpu=True)[source]#

                        OptimStateDictConfig is the base class for all optim_state_dict configuration classes. Users should instantiate a child class (e.g. FullOptimStateDictConfig) in order to configure settings for the @@ -5783,7 +5783,7 @@

                        -class torch.distributed.fsdp.FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False)[source]#
                        +class torch.distributed.fsdp.FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False)[source]#
                        Variables

                        rank0_only (bool) – If True, then only rank 0 saves the full state @@ -5795,7 +5795,7 @@

                        -class torch.distributed.fsdp.ShardedOptimStateDictConfig(offload_to_cpu=True, _use_dtensor=False)[source]#
                        +class torch.distributed.fsdp.ShardedOptimStateDictConfig(offload_to_cpu=True, _use_dtensor=False)[source]#

                        ShardedOptimStateDictConfig is a config class meant to be used with StateDictType.SHARDED_STATE_DICT.

                        @@ -5815,14 +5815,14 @@
                        -class torch.distributed.fsdp.LocalOptimStateDictConfig(offload_to_cpu: bool = False)[source]#
                        +class torch.distributed.fsdp.LocalOptimStateDictConfig(offload_to_cpu: bool = False)[source]#
                        -class torch.distributed.fsdp.StateDictSettings(state_dict_type: torch.distributed.fsdp.api.StateDictType, state_dict_config: torch.distributed.fsdp.api.StateDictConfig, optim_state_dict_config: torch.distributed.fsdp.api.OptimStateDictConfig)[source]#
                        +class torch.distributed.fsdp.StateDictSettings(state_dict_type: torch.distributed.fsdp.api.StateDictType, state_dict_config: torch.distributed.fsdp.api.StateDictConfig, optim_state_dict_config: torch.distributed.fsdp.api.OptimStateDictConfig)[source]#
                        diff --git a/2.9/future_mod.html b/2.9/future_mod.html index 4c8bf99a035..3a08e9d3855 100644 --- a/2.9/future_mod.html +++ b/2.9/future_mod.html @@ -4396,7 +4396,7 @@

                        Created On: Feb 05, 2024 | Last Updated On: Jun 12, 2025

                        -torch.__future__.set_overwrite_module_params_on_conversion(value)[source]#
                        +torch.__future__.set_overwrite_module_params_on_conversion(value)[source]#

                        Sets whether to assign new tensors to the parameters instead of changing the existing parameters in-place when converting an nn.Module.

                        When enabled, the following methods will assign new parameters to the module:

                        @@ -4415,7 +4415,7 @@
                        -torch.__future__.get_overwrite_module_params_on_conversion()[source]#
                        +torch.__future__.get_overwrite_module_params_on_conversion()[source]#

                        Returns whether to assign new tensors to the parameters instead of changing the existing parameters in-place when converting an torch.nn.Module. Defaults to False.

                        See set_overwrite_module_params_on_conversion() for more information.

                        @@ -4428,7 +4428,7 @@
                        -torch.__future__.set_swap_module_params_on_conversion(value)[source]#
                        +torch.__future__.set_swap_module_params_on_conversion(value)[source]#

                        Sets whether to use swap_tensors() instead of setting .data to change the existing parameters in-place when converting an nn.Module and instead of param.copy_(state_dict[key]) when loading a state dict into an nn.Module.

                        @@ -4461,7 +4461,7 @@
                        -torch.__future__.get_swap_module_params_on_conversion()[source]#
                        +torch.__future__.get_swap_module_params_on_conversion()[source]#

                        Returns whether to use swap_tensors() instead of setting .data to change the existing parameters in-place when converting an nn.Module. Defaults to False.

                        See set_swap_module_params_on_conversion() for more information.

                        diff --git a/2.9/futures.html b/2.9/futures.html index 37376ecacff..a45093c9e81 100644 --- a/2.9/futures.html +++ b/2.9/futures.html @@ -4413,7 +4413,7 @@
                        -add_done_callback(callback)[source]#
                        +add_done_callback(callback)[source]#

                        Append the given callback function to this Future, which will be run when the Future is completed. Multiple callbacks can be added to the same Future, but the order in which they will be executed cannot @@ -4459,7 +4459,7 @@

                        -done()[source]#
                        +done()[source]#

                        Return True if this Future is done. A Future is done if it has a result or an exception.

                        If the value contains tensors that reside on GPUs, Future.done() @@ -4476,7 +4476,7 @@

                        -set_exception(result)[source]#
                        +set_exception(result)[source]#

                        Set an exception for this Future, which will mark this Future as completed with an error and trigger all attached callbacks. Note that when calling wait()/value() on this Future, the exception set here @@ -4499,7 +4499,7 @@

                        -set_result(result)[source]#
                        +set_result(result)[source]#

                        Set the result for this Future, which will mark this Future as completed and trigger all attached callbacks. Note that a Future cannot be marked completed twice.

                        @@ -4539,7 +4539,7 @@
                        -then(callback)[source]#
                        +then(callback)[source]#

                        Append the given callback function to this Future, which will be run when the Future is completed. Multiple callbacks can be added to the same Future, but the order in which they will be executed cannot @@ -4608,7 +4608,7 @@

                        -value()[source]#
                        +value()[source]#

                        Obtain the value of an already-completed future.

                        This method should only be called after a call to wait() has completed, or inside a callback function passed to then(). In @@ -4632,7 +4632,7 @@

                        -wait()[source]#
                        +wait()[source]#

                        Block until the value of this Future is ready.

                        If the value contains tensors that reside on GPUs, then an additional synchronization is performed with the kernels (executing on the device) @@ -4659,7 +4659,7 @@

                        -torch.futures.collect_all(futures)[source]#
                        +torch.futures.collect_all(futures)[source]#

                        Collects the provided Future objects into a single combined Future that is completed when all of the sub-futures are completed.

                        @@ -4694,7 +4694,7 @@
                        -torch.futures.wait_all(futures)[source]#
                        +torch.futures.wait_all(futures)[source]#

                        Waits for all provided futures to be complete, and returns the list of completed values. If any of the futures encounters an error, the method will exit early and report the error not waiting for other diff --git a/2.9/fx.html b/2.9/fx.html index d88de35d514..908d974e5ff 100644 --- a/2.9/fx.html +++ b/2.9/fx.html @@ -5534,7 +5534,7 @@

                        Miscellanea#

                        -torch.fx.symbolic_trace(root, concrete_args=None)[source]#
                        +torch.fx.symbolic_trace(root, concrete_args=None)[source]#

                        Symbolic tracing API

                        Given an nn.Module or function instance root, this function will return a GraphModule constructed by recording operations seen while tracing through root.

                        @@ -5595,7 +5595,7 @@

                        API Reference
                        -torch.fx.wrap(fn_or_name)[source]#
                        +torch.fx.wrap(fn_or_name)[source]#

                        This function can be called at module-level scope to register fn_or_name as a “leaf function”. A “leaf function” will be preserved as a CallFunction node in the FX trace instead of being traced through:

                        @@ -5637,7 +5637,7 @@

                        API Reference
                        -class torch.fx.GraphModule(*args, **kwargs)[source]#
                        +class torch.fx.GraphModule(*args, **kwargs)[source]#

                        GraphModule is an nn.Module generated from an fx.Graph. Graphmodule has a graph attribute, as well as code and forward attributes generated from that graph.

                        @@ -5654,7 +5654,7 @@

                        API Reference
                        -__init__(root, graph, class_name='GraphModule')[source]#
                        +__init__(root, graph, class_name='GraphModule')[source]#

                        Construct a GraphModule.

                        Parameters
                        @@ -5681,7 +5681,7 @@

                        API Reference
                        -add_submodule(target, m)[source]#
                        +add_submodule(target, m)[source]#

                        Adds the given submodule to self.

                        This installs empty Modules where none exist yet if they are subpaths of target.

                        @@ -5724,7 +5724,7 @@

                        API Reference
                        -delete_all_unused_submodules()[source]#
                        +delete_all_unused_submodules()[source]#

                        Deletes all unused submodules from self.

                        A Module is considered “used” if any one of the following is true: @@ -5744,7 +5744,7 @@

                        API Reference
                        -delete_submodule(target)[source]#
                        +delete_submodule(target)[source]#

                        Deletes the given submodule from self.

                        The module will not be deleted if target is not a valid target.

                        @@ -5781,7 +5781,7 @@

                        API Reference
                        -print_readable(print_output=True, include_stride=False, include_device=False, colored=False, *, fast_sympy_print=False, expanded_def=False)[source]#
                        +print_readable(print_output=True, include_stride=False, include_device=False, colored=False, *, fast_sympy_print=False, expanded_def=False)[source]#

                        Return the Python code generated for current GraphModule and its children GraphModules

                        Warning

                        @@ -5793,7 +5793,7 @@

                        API Reference
                        -recompile()[source]#
                        +recompile()[source]#

                        Recompile this GraphModule from its graph attribute. This should be called after editing the contained graph, otherwise the generated code of this GraphModule will be out of date.

                        @@ -5810,7 +5810,7 @@

                        API Reference
                        -to_folder(folder, module_name='FxModule')[source]#
                        +to_folder(folder, module_name='FxModule')[source]#
                        Dumps out module to folder with module_name so that it can be

                        imported with from <folder> import <module_name>

                        Args:

                        @@ -5835,7 +5835,7 @@

                        API Reference
                        -class torch.fx.Graph(owning_module=None, tracer_cls=None, tracer_extras=None)[source]#
                        +class torch.fx.Graph(owning_module=None, tracer_cls=None, tracer_extras=None)[source]#

                        Graph is the main data structure used in the FX Intermediate Representation. It consists of a series of Node s, each representing callsites (or other syntactic constructs). The list of Node s, taken together, constitute a @@ -5884,7 +5884,7 @@

                        API Reference
                        -__init__(owning_module=None, tracer_cls=None, tracer_extras=None)[source]#
                        +__init__(owning_module=None, tracer_cls=None, tracer_extras=None)[source]#

                        Construct an empty Graph.

                        Note

                        @@ -5896,7 +5896,7 @@

                        API Reference
                        -call_function(the_function, args=None, kwargs=None, type_expr=None, name=None)[source]#
                        +call_function(the_function, args=None, kwargs=None, type_expr=None, name=None)[source]#

                        Insert a call_function Node into the Graph. A call_function node represents a call to a Python callable, specified by the_function.

                        @@ -5934,7 +5934,7 @@

                        API Reference
                        -call_method(method_name, args=None, kwargs=None, type_expr=None)[source]#
                        +call_method(method_name, args=None, kwargs=None, type_expr=None)[source]#

                        Insert a call_method Node into the Graph. A call_method node represents a call to a given method on the 0th element of args.

                        @@ -5971,7 +5971,7 @@

                        API Reference
                        -call_module(module_name, args=None, kwargs=None, type_expr=None)[source]#
                        +call_module(module_name, args=None, kwargs=None, type_expr=None)[source]#

                        Insert a call_module Node into the Graph. A call_module node represents a call to the forward() function of a Module in the Module hierarchy.

                        @@ -6011,7 +6011,7 @@

                        API Reference
                        -create_node(op, target, args=None, kwargs=None, name=None, type_expr=None)[source]#
                        +create_node(op, target, args=None, kwargs=None, name=None, type_expr=None)[source]#

                        Create a Node and add it to the Graph at the current insert-point. Note that the current insert-point can be set via Graph.inserting_before() and Graph.inserting_after().

                        @@ -6045,7 +6045,7 @@

                        API Reference
                        -eliminate_dead_code(is_impure_node=None)[source]#
                        +eliminate_dead_code(is_impure_node=None)[source]#

                        Remove all dead code from the graph, based on each node’s number of users, and whether the nodes have any side effects. The graph must be topologically sorted before calling.

                        @@ -6096,7 +6096,7 @@

                        API Reference
                        -erase_node(to_erase)[source]#
                        +erase_node(to_erase)[source]#

                        Erases a Node from the Graph. Throws an exception if there are still users of that node in the Graph.

                        @@ -6112,7 +6112,7 @@

                        API Reference
                        -find_nodes(*, op, target=None, sort=True)[source]#
                        +find_nodes(*, op, target=None, sort=True)[source]#

                        Allows for fast query of nodes

                        Parameters
                        @@ -6136,7 +6136,7 @@

                        API Reference
                        -get_attr(qualified_name, type_expr=None)[source]#
                        +get_attr(qualified_name, type_expr=None)[source]#

                        Insert a get_attr node into the Graph. A get_attr Node represents the fetch of an attribute from the Module hierarchy.

                        @@ -6170,7 +6170,7 @@

                        API Reference
                        -graph_copy(g, val_map, return_output_node=False)[source]#
                        +graph_copy(g, val_map, return_output_node=False)[source]#

                        Copy all nodes from a given graph into self.

                        Parameters
                        @@ -6197,7 +6197,7 @@

                        API Reference
                        -inserting_after(n=None)[source]#
                        +inserting_after(n=None)[source]#
                        Set the point at which create_node and companion methods will insert into the graph.

                        When used within a ‘with’ statement, this will temporary set the insert point and then restore it when the with statement exits:

                        @@ -6230,7 +6230,7 @@

                        API Reference
                        -inserting_before(n=None)[source]#
                        +inserting_before(n=None)[source]#
                        Set the point at which create_node and companion methods will insert into the graph.

                        When used within a ‘with’ statement, this will temporary set the insert point and then restore it when the with statement exits:

                        @@ -6263,7 +6263,7 @@

                        API Reference
                        -lint()[source]#
                        +lint()[source]#

                        Runs various checks on this Graph to make sure it is well-formed. In particular: - Checks Nodes have correct ownership (owned by this graph) @@ -6278,7 +6278,7 @@

                        API Reference
                        -node_copy(node, arg_transform=<function Graph.<lambda>>)[source]#
                        +node_copy(node, arg_transform=<function Graph.<lambda>>)[source]#

                        Copy a node from one graph into another. arg_transform needs to transform arguments from the graph of node to the graph of self. Example:

                        # Copying all the nodes in `g` into `new_graph`
                        @@ -6326,7 +6326,7 @@ 

                        API Reference
                        -on_generate_code(make_transformer)[source]#
                        +on_generate_code(make_transformer)[source]#

                        Register a transformer function when python code is generated

                        @@ -6401,7 +6401,7 @@

                        API Reference
                        -output(result, type_expr=None)[source]#
                        +output(result, type_expr=None)[source]#

                        Insert an output Node into the Graph. An output node represents a return statement in Python code. result is the value that should be returned.

                        @@ -6427,7 +6427,7 @@

                        API Reference
                        -output_node()[source]#
                        +output_node()[source]#

                        Warning

                        This API is experimental and is NOT backward-compatible.

                        @@ -6441,7 +6441,7 @@

                        API Reference
                        -placeholder(name, type_expr=None, default_value)[source]#
                        +placeholder(name, type_expr=None, default_value)[source]#

                        Insert a placeholder node into the Graph. A placeholder represents a function input.

                        @@ -6476,7 +6476,7 @@

                        API Reference
                        -print_tabular()[source]#
                        +print_tabular()[source]#

                        Prints the intermediate representation of the graph in tabular format. Note that this API requires the tabulate module to be installed.

                        @@ -6488,7 +6488,7 @@

                        API Reference
                        -process_inputs(*args)[source]#
                        +process_inputs(*args)[source]#

                        Processes args so that they can be passed to the FX graph.

                        Warning

                        @@ -6498,7 +6498,7 @@

                        API Reference
                        -process_outputs(out)[source]#
                        +process_outputs(out)[source]#

                        Warning

                        This API is experimental and is NOT backward-compatible.

                        @@ -6507,7 +6507,7 @@

                        API Reference
                        -python_code(root_module, *, verbose=False, include_stride=False, include_device=False, colored=False, expanded_def=False)[source]#
                        +python_code(root_module, *, verbose=False, include_stride=False, include_device=False, colored=False, expanded_def=False)[source]#

                        Turn this Graph into valid Python code.

                        Parameters
                        @@ -6530,7 +6530,7 @@

                        API Reference
                        -set_codegen(codegen)[source]#
                        +set_codegen(codegen)[source]#

                        Warning

                        This API is experimental and is NOT backward-compatible.

                        @@ -6543,7 +6543,7 @@

                        API Reference
                        -class torch.fx.Node(graph, name, op, target, args, kwargs, return_type=None)[source]#
                        +class torch.fx.Node(graph, name, op, target, args, kwargs, return_type=None)[source]#

                        Node is the data structure that represents individual operations within a Graph. For the most part, Nodes represent callsites to various entities, such as operators, methods, and Modules (some exceptions include nodes that @@ -6591,7 +6591,7 @@

                        API Reference
                        -append(x)[source]#
                        +append(x)[source]#

                        Insert x after this node in the list of nodes in the graph. Equivalent to self.next.prepend(x)

                        @@ -6617,7 +6617,7 @@

                        API Reference
                        -format_node(placeholder_names=None, maybe_return_typename=None, *, include_tensor_metadata=False)[source]#
                        +format_node(placeholder_names=None, maybe_return_typename=None, *, include_tensor_metadata=False)[source]#

                        Return a descriptive string representation of self.

                        This method can be used with no arguments as a debugging utility.

                        @@ -6661,7 +6661,7 @@

                        API Reference
                        -insert_arg(idx, arg)[source]#
                        +insert_arg(idx, arg)[source]#

                        Insert an positional argument to the argument list with given index.

                        Parameters
                        @@ -6679,7 +6679,7 @@

                        API Reference
                        -is_impure(impure_random=True)[source]#
                        +is_impure(impure_random=True)[source]#

                        Returns whether this op is impure, i.e. if its op is a placeholder or output, or if a call_function or call_module which is impure.

                        @@ -6722,7 +6722,7 @@

                        API Reference
                        -normalized_arguments(root, arg_types=None, kwarg_types=None, normalize_to_only_use_kwargs=False)[source]#
                        +normalized_arguments(root, arg_types=None, kwarg_types=None, normalize_to_only_use_kwargs=False)[source]#

                        Returns normalized arguments to Python targets. This means that args/kwargs will be matched up to the module/functional’s signature and return exclusively kwargs in positional order @@ -6755,7 +6755,7 @@

                        API Reference
                        -prepend(x)[source]#
                        +prepend(x)[source]#

                        Insert x before this node in the list of nodes in the graph. Example:

                        Before: p -> self
                                 bx -> x -> ax
                        @@ -6787,7 +6787,7 @@ 

                        API Reference
                        -replace_all_uses_with(replace_with, delete_user_cb=<function Node.<lambda>>, *, propagate_meta=False)[source]#
                        +replace_all_uses_with(replace_with, delete_user_cb=<function Node.<lambda>>, *, propagate_meta=False)[source]#

                        Replace all uses of self in the Graph with the Node replace_with.

                        Parameters
                        @@ -6816,7 +6816,7 @@

                        API Reference
                        -replace_input_with(old_input, new_input)[source]#
                        +replace_input_with(old_input, new_input)[source]#

                        Loop through input nodes of self, and replace all instances of old_input with new_input.

                        @@ -6847,7 +6847,7 @@

                        API Reference
                        -update_arg(idx, arg)[source]#
                        +update_arg(idx, arg)[source]#

                        Update an existing positional argument to contain the new value arg. After calling, self.args[idx] == arg.

                        @@ -6866,7 +6866,7 @@

                        API Reference
                        -update_kwarg(key, arg)[source]#
                        +update_kwarg(key, arg)[source]#

                        Update an existing keyword argument to contain the new value arg. After calling, self.kwargs[key] == arg.

                        @@ -6887,7 +6887,7 @@

                        API Reference
                        -class torch.fx.Tracer(autowrap_modules=(math,), autowrap_functions=())[source]#
                        +class torch.fx.Tracer(autowrap_modules=(math,), autowrap_functions=())[source]#

                        Tracer is the class that implements the symbolic tracing functionality of torch.fx.symbolic_trace. A call to symbolic_trace(m) is equivalent @@ -6904,7 +6904,7 @@

                        API Reference
                        -call_module(m, forward, args, kwargs)[source]#
                        +call_module(m, forward, args, kwargs)[source]#

                        Method that specifies the behavior of this Tracer when it encounters a call to an nn.Module instance.

                        By default, the behavior is to check if the called module is a leaf module @@ -6940,7 +6940,7 @@

                        API Reference
                        -create_arg(a)[source]#
                        +create_arg(a)[source]#

                        A method to specify the behavior of tracing when preparing values to be used as arguments to nodes in the Graph.

                        By default, the behavior includes:

                        @@ -6978,7 +6978,7 @@

                        API Reference
                        -create_args_for_root(root_fn, is_module, concrete_args=None)[source]#
                        +create_args_for_root(root_fn, is_module, concrete_args=None)[source]#

                        Create placeholder nodes corresponding to the signature of the root Module. This method introspects root’s signature and emits those nodes accordingly, also supporting *args and **kwargs.

                        @@ -6990,7 +6990,7 @@

                        API Reference
                        -create_node(kind, target, args, kwargs, name=None, type_expr=None)[source]#
                        +create_node(kind, target, args, kwargs, name=None, type_expr=None)[source]#

                        Inserts a graph node given target, args, kwargs, and name.

                        This method can be overridden to do extra checking, validation, or modification of values used in node creation. For example, one might @@ -7008,7 +7008,7 @@

                        API Reference
                        -create_proxy(kind, target, args, kwargs, name=None, type_expr=None, proxy_factory_fn=None)[source]#
                        +create_proxy(kind, target, args, kwargs, name=None, type_expr=None, proxy_factory_fn=None)[source]#

                        Create a Node from the given arguments, then return the Node wrapped in a Proxy object.

                        If kind = ‘placeholder’, then we’re creating a Node that @@ -7025,7 +7025,7 @@

                        API Reference
                        -get_fresh_qualname(prefix)[source]#
                        +get_fresh_qualname(prefix)[source]#

                        Gets a fresh name for a prefix and returns it. This function ensures that it will not clash with an existing attribute on the graph.

                        @@ -7041,7 +7041,7 @@

                        API Reference
                        -getattr(attr, attr_val, parameter_proxy_cache)[source]#
                        +getattr(attr, attr_val, parameter_proxy_cache)[source]#

                        Method that specifies the behavior of this Tracer when we call getattr on a call to an nn.Module instance.

                        By default, the behavior is to return a proxy value for the attribute. It @@ -7069,7 +7069,7 @@

                        API Reference
                        -is_leaf_module(m, module_qualified_name)[source]#
                        +is_leaf_module(m, module_qualified_name)[source]#

                        A method to specify whether a given nn.Module is a “leaf” module.

                        Leaf modules are the atomic units that appear in the IR, referenced by call_module calls. By default, @@ -7099,7 +7099,7 @@

                        API Reference
                        -iter(obj)[source]#
                        +iter(obj)[source]#
                        Called when a proxy object is being iterated over, such as

                        when used in control flow. Normally we don’t know what to do because we don’t know the value of the proxy, but a custom tracer can attach more @@ -7119,7 +7119,7 @@

                        API Reference
                        -keys(obj)[source]#
                        +keys(obj)[source]#
                        Called when a proxy object is has the keys() method called.

                        This is what happens when ** is called on a proxy. This should return an iterator it ** is suppose to work in your custom tracer.

                        @@ -7138,7 +7138,7 @@

                        API Reference
                        -path_of_module(mod)[source]#
                        +path_of_module(mod)[source]#

                        Helper method to find the qualified name of mod in the Module hierarchy of root. For example, if root has a submodule named foo, which has a submodule named bar, passing bar into this function will return @@ -7159,7 +7159,7 @@

                        API Reference
                        -proxy(node)[source]#
                        +proxy(node)[source]#

                        Note

                        Backwards-compatibility for this API is guaranteed.

                        @@ -7173,7 +7173,7 @@

                        API Reference
                        -to_bool(obj)[source]#
                        +to_bool(obj)[source]#
                        Called when a proxy object is being converted to a boolean, such as

                        when used in control flow. Normally we don’t know what to do because we don’t know the value of the proxy, but a custom tracer can attach more @@ -7193,7 +7193,7 @@

                        API Reference
                        -trace(root, concrete_args=None)[source]#
                        +trace(root, concrete_args=None)[source]#

                        Trace root and return the corresponding FX Graph representation. root can either be an nn.Module instance or a Python callable.

                        Note that after this call, self.root may be different from the root passed @@ -7228,7 +7228,7 @@

                        API Reference
                        -class torch.fx.Proxy(node, tracer=None)[source]#
                        +class torch.fx.Proxy(node, tracer=None)[source]#

                        Proxy objects are Node wrappers that flow through the program during symbolic tracing and record all the operations (torch function calls, method calls, operators) that they touch @@ -7261,7 +7261,7 @@

                        API Reference
                        -class torch.fx.Interpreter(module, garbage_collect_values=True, graph=None)[source]#
                        +class torch.fx.Interpreter(module, garbage_collect_values=True, graph=None)[source]#

                        An Interpreter executes an FX graph Node-by-Node. This pattern can be useful for many things, including writing code transformations as well as analysis passes.

                        @@ -7327,7 +7327,7 @@

                        API Reference
                        -boxed_run(args_list)[source]#
                        +boxed_run(args_list)[source]#

                        Run module via interpretation and return the result. This uses the “boxed” calling convention, where you pass a list of arguments, which will be cleared by the interpreter. This ensures that input tensors are promptly deallocated.

                        @@ -7339,7 +7339,7 @@

                        API Reference
                        -call_function(target, args, kwargs)[source]#
                        +call_function(target, args, kwargs)[source]#

                        Execute a call_function node and return the result.

                        Parameters
                        @@ -7367,7 +7367,7 @@

                        API Reference
                        -call_method(target, args, kwargs)[source]#
                        +call_method(target, args, kwargs)[source]#

                        Execute a call_method node and return the result.

                        Parameters
                        @@ -7395,7 +7395,7 @@

                        API Reference
                        -call_module(target, args, kwargs)[source]#
                        +call_module(target, args, kwargs)[source]#

                        Execute a call_module node and return the result.

                        Parameters
                        @@ -7423,7 +7423,7 @@

                        API Reference
                        -fetch_args_kwargs_from_env(n)[source]#
                        +fetch_args_kwargs_from_env(n)[source]#

                        Fetch the concrete values of args and kwargs of node n from the current execution environment.

                        @@ -7445,7 +7445,7 @@

                        API Reference
                        -fetch_attr(target)[source]#
                        +fetch_attr(target)[source]#

                        Fetch an attribute from the Module hierarchy of self.module.

                        Parameters
                        @@ -7466,7 +7466,7 @@

                        API Reference
                        -get_attr(target, args, kwargs)[source]#
                        +get_attr(target, args, kwargs)[source]#

                        Execute a get_attr node. Will retrieve an attribute value from the Module hierarchy of self.module.

                        @@ -7494,7 +7494,7 @@

                        API Reference
                        -map_nodes_to_values(args, n)[source]#
                        +map_nodes_to_values(args, n)[source]#

                        Recursively descend through args and look up the concrete value for each Node in the current execution environment.

                        @@ -7516,7 +7516,7 @@

                        API Reference
                        -output(target, args, kwargs)[source]#
                        +output(target, args, kwargs)[source]#

                        Execute an output node. This really just retrieves the value referenced by the output node and returns it.

                        @@ -7544,7 +7544,7 @@

                        API Reference
                        -placeholder(target, args, kwargs)[source]#
                        +placeholder(target, args, kwargs)[source]#

                        Execute a placeholder node. Note that this is stateful: Interpreter maintains an internal iterator over arguments passed to run and this method returns @@ -7574,7 +7574,7 @@

                        API Reference
                        -run(*args, initial_env=None, enable_io_processing=True)[source]#
                        +run(*args, initial_env=None, enable_io_processing=True)[source]#

                        Run module via interpretation and return the result.

                        Parameters
                        @@ -7603,7 +7603,7 @@

                        API Reference
                        -run_node(n)[source]#
                        +run_node(n)[source]#

                        Run a specific node n and return the result. Calls into placeholder, get_attr, call_function, call_method, call_module, or output depending @@ -7629,7 +7629,7 @@

                        API Reference
                        -class torch.fx.Transformer(module)[source]#
                        +class torch.fx.Transformer(module)[source]#

                        Transformer is a special type of interpreter that produces a new Module. It exposes a transform() method that returns the transformed Module. Transformer does not require @@ -7684,7 +7684,7 @@

                        API Reference
                        -call_function(target, args, kwargs)[source]#
                        +call_function(target, args, kwargs)[source]#

                        Note

                        Backwards-compatibility for this API is guaranteed.

                        @@ -7698,7 +7698,7 @@

                        API Reference
                        -call_module(target, args, kwargs)[source]#
                        +call_module(target, args, kwargs)[source]#

                        Note

                        Backwards-compatibility for this API is guaranteed.

                        @@ -7712,7 +7712,7 @@

                        API Reference
                        -get_attr(target, args, kwargs)[source]#
                        +get_attr(target, args, kwargs)[source]#

                        Execute a get_attr node. In Transformer, this is overridden to insert a new get_attr node into the output graph.

                        @@ -7738,7 +7738,7 @@

                        API Reference
                        -placeholder(target, args, kwargs)[source]#
                        +placeholder(target, args, kwargs)[source]#

                        Execute a placeholder node. In Transformer, this is overridden to insert a new placeholder into the output graph.

                        @@ -7764,7 +7764,7 @@

                        API Reference
                        -transform()[source]#
                        +transform()[source]#

                        Transform self.module and return the transformed GraphModule.

                        @@ -7782,7 +7782,7 @@

                        API Reference
                        -torch.fx.replace_pattern(gm, pattern, replacement)[source]#
                        +torch.fx.replace_pattern(gm, pattern, replacement)[source]#

                        Matches all possible non-overlapping sets of operators and their data dependencies (pattern) in the Graph of a GraphModule (gm), then replaces each of these matched subgraphs with another diff --git a/2.9/generated/torch.Tensor.backward.html b/2.9/generated/torch.Tensor.backward.html index 79914b67c08..acca3c7c159 100644 --- a/2.9/generated/torch.Tensor.backward.html +++ b/2.9/generated/torch.Tensor.backward.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.backward#

                        -Tensor.backward(gradient=None, retain_graph=None, create_graph=False, inputs=None)[source]#
                        +Tensor.backward(gradient=None, retain_graph=None, create_graph=False, inputs=None)[source]#

                        Computes the gradient of current tensor wrt graph leaves.

                        The graph is differentiated using the chain rule. If the tensor is non-scalar (i.e. its data has more than one element) and requires diff --git a/2.9/generated/torch.Tensor.dim_order.html b/2.9/generated/torch.Tensor.dim_order.html index 9baa48b47e0..57b4b0553dc 100644 --- a/2.9/generated/torch.Tensor.dim_order.html +++ b/2.9/generated/torch.Tensor.dim_order.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.dim_order#

                        -Tensor.dim_order(ambiguity_check=False) tuple[source]#
                        +Tensor.dim_order(ambiguity_check=False) tuple[source]#

                        Returns the uniquely determined tuple of int describing the dim order or physical layout of self.

                        The dim order represents how dimensions are laid out in memory of dense tensors, diff --git a/2.9/generated/torch.Tensor.is_shared.html b/2.9/generated/torch.Tensor.is_shared.html index 3bb21ed9cad..daa64fd8727 100644 --- a/2.9/generated/torch.Tensor.is_shared.html +++ b/2.9/generated/torch.Tensor.is_shared.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.is_shared#

                        -Tensor.is_shared()[source]#
                        +Tensor.is_shared()[source]#

                        Checks if tensor is in shared memory.

                        This is always True for CUDA tensors.

                        diff --git a/2.9/generated/torch.Tensor.istft.html b/2.9/generated/torch.Tensor.istft.html index 5edef549278..c9d8b99eb34 100644 --- a/2.9/generated/torch.Tensor.istft.html +++ b/2.9/generated/torch.Tensor.istft.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.istft#

                        -Tensor.istft(n_fft, hop_length=None, win_length=None, window=None, center=True, normalized=False, onesided=None, length=None, return_complex=False)[source]#
                        +Tensor.istft(n_fft, hop_length=None, win_length=None, window=None, center=True, normalized=False, onesided=None, length=None, return_complex=False)[source]#

                        See torch.istft()

                        diff --git a/2.9/generated/torch.Tensor.lu.html b/2.9/generated/torch.Tensor.lu.html index c77df769349..4fd224c3aad 100644 --- a/2.9/generated/torch.Tensor.lu.html +++ b/2.9/generated/torch.Tensor.lu.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.lu#

                        -Tensor.lu(pivot=True, get_infos=False)[source]#
                        +Tensor.lu(pivot=True, get_infos=False)[source]#

                        See torch.lu()

                        diff --git a/2.9/generated/torch.Tensor.module_load.html b/2.9/generated/torch.Tensor.module_load.html index f9aaff1d0ce..ae7bf3dcc3d 100644 --- a/2.9/generated/torch.Tensor.module_load.html +++ b/2.9/generated/torch.Tensor.module_load.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.module_load#

                        -Tensor.module_load(other, assign=False)[source]#
                        +Tensor.module_load(other, assign=False)[source]#

                        Defines how to transform other when loading it into self in load_state_dict().

                        Used when get_swap_module_params_on_conversion() is True.

                        It is expected that self is a parameter or buffer in an nn.Module and other is the diff --git a/2.9/generated/torch.Tensor.norm.html b/2.9/generated/torch.Tensor.norm.html index 690190f3299..f53d5abe1a0 100644 --- a/2.9/generated/torch.Tensor.norm.html +++ b/2.9/generated/torch.Tensor.norm.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.norm#

                        -Tensor.norm(p='fro', dim=None, keepdim=False, dtype=None)[source]#
                        +Tensor.norm(p='fro', dim=None, keepdim=False, dtype=None)[source]#

                        See torch.norm()

                        diff --git a/2.9/generated/torch.Tensor.register_hook.html b/2.9/generated/torch.Tensor.register_hook.html index e5bd14dfbfb..620cfdf7d8c 100644 --- a/2.9/generated/torch.Tensor.register_hook.html +++ b/2.9/generated/torch.Tensor.register_hook.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.register_hook#

                        -Tensor.register_hook(hook)[source]#
                        +Tensor.register_hook(hook)[source]#

                        Registers a backward hook.

                        The hook will be called every time a gradient with respect to the Tensor is computed. The hook should have the following signature:

                        diff --git a/2.9/generated/torch.Tensor.register_post_accumulate_grad_hook.html b/2.9/generated/torch.Tensor.register_post_accumulate_grad_hook.html index d7381bad672..b2925b2425a 100644 --- a/2.9/generated/torch.Tensor.register_post_accumulate_grad_hook.html +++ b/2.9/generated/torch.Tensor.register_post_accumulate_grad_hook.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.register_post_accumulate_grad_hook#

                        -Tensor.register_post_accumulate_grad_hook(hook)[source]#
                        +Tensor.register_post_accumulate_grad_hook(hook)[source]#

                        Registers a backward hook that runs after grad accumulation.

                        The hook will be called after all gradients for a tensor have been accumulated, meaning that the .grad field has been updated on that tensor. The post diff --git a/2.9/generated/torch.Tensor.share_memory_.html b/2.9/generated/torch.Tensor.share_memory_.html index 35ba80985f7..aa1d2fc7185 100644 --- a/2.9/generated/torch.Tensor.share_memory_.html +++ b/2.9/generated/torch.Tensor.share_memory_.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.share_memory_#

                        -Tensor.share_memory_()[source]#
                        +Tensor.share_memory_()[source]#

                        Moves the underlying storage to shared memory.

                        This is a no-op if the underlying storage is already in shared memory and for CUDA tensors. Tensors in shared memory cannot be resized.

                        diff --git a/2.9/generated/torch.Tensor.split.html b/2.9/generated/torch.Tensor.split.html index f0b0fe7233d..68afa48c6f3 100644 --- a/2.9/generated/torch.Tensor.split.html +++ b/2.9/generated/torch.Tensor.split.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.split#

                        -Tensor.split(split_size, dim=0)[source]#
                        +Tensor.split(split_size, dim=0)[source]#

                        See torch.split()

                        diff --git a/2.9/generated/torch.Tensor.stft.html b/2.9/generated/torch.Tensor.stft.html index 3f7c68d11c6..762f8f69cc7 100644 --- a/2.9/generated/torch.Tensor.stft.html +++ b/2.9/generated/torch.Tensor.stft.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.stft#

                        -Tensor.stft(n_fft, hop_length=None, win_length=None, window=None, center=True, pad_mode='reflect', normalized=False, onesided=None, return_complex=None, align_to_window=None)[source]#
                        +Tensor.stft(n_fft, hop_length=None, win_length=None, window=None, center=True, pad_mode='reflect', normalized=False, onesided=None, return_complex=None, align_to_window=None)[source]#

                        See torch.stft()

                        Warning

                        diff --git a/2.9/generated/torch.Tensor.storage.html b/2.9/generated/torch.Tensor.storage.html index 3e4c3ed7b19..740d2030d42 100644 --- a/2.9/generated/torch.Tensor.storage.html +++ b/2.9/generated/torch.Tensor.storage.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.storage#

                        -Tensor.storage() torch.TypedStorage[source]#
                        +Tensor.storage() torch.TypedStorage[source]#

                        Returns the underlying TypedStorage.

                        Warning

                        diff --git a/2.9/generated/torch.Tensor.storage_type.html b/2.9/generated/torch.Tensor.storage_type.html index 92c7cfccac8..1cc2f04e793 100644 --- a/2.9/generated/torch.Tensor.storage_type.html +++ b/2.9/generated/torch.Tensor.storage_type.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.storage_type#

                        -Tensor.storage_type() type[source]#
                        +Tensor.storage_type() type[source]#

                        Returns the type of the underlying storage.

                        diff --git a/2.9/generated/torch.Tensor.to_sparse_coo.html b/2.9/generated/torch.Tensor.to_sparse_coo.html index 971aef0fedb..7f147e637f8 100644 --- a/2.9/generated/torch.Tensor.to_sparse_coo.html +++ b/2.9/generated/torch.Tensor.to_sparse_coo.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.to_sparse_coo#

                        -Tensor.to_sparse_coo()[source]#
                        +Tensor.to_sparse_coo()[source]#

                        Convert a tensor to coordinate format.

                        Examples:

                        >>> dense = torch.randn(5, 5)
                        diff --git a/2.9/generated/torch.Tensor.unflatten.html b/2.9/generated/torch.Tensor.unflatten.html
                        index cb1c76beba1..a9fe2dbc1ac 100644
                        --- a/2.9/generated/torch.Tensor.unflatten.html
                        +++ b/2.9/generated/torch.Tensor.unflatten.html
                        @@ -4404,7 +4404,7 @@
                         

                        torch.Tensor.unflatten#

                        -Tensor.unflatten(dim, sizes) Tensor[source]#
                        +Tensor.unflatten(dim, sizes) Tensor[source]#

                        See torch.unflatten().

                        diff --git a/2.9/generated/torch.Tensor.unique.html b/2.9/generated/torch.Tensor.unique.html index d57534608e8..8274c2e286b 100644 --- a/2.9/generated/torch.Tensor.unique.html +++ b/2.9/generated/torch.Tensor.unique.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.unique#

                        -Tensor.unique(sorted=True, return_inverse=False, return_counts=False, dim=None)[source]#
                        +Tensor.unique(sorted=True, return_inverse=False, return_counts=False, dim=None)[source]#

                        Returns the unique elements of the input tensor.

                        See torch.unique()

                        diff --git a/2.9/generated/torch.Tensor.unique_consecutive.html b/2.9/generated/torch.Tensor.unique_consecutive.html index 75c4722f5e2..d964778f5fb 100644 --- a/2.9/generated/torch.Tensor.unique_consecutive.html +++ b/2.9/generated/torch.Tensor.unique_consecutive.html @@ -4404,7 +4404,7 @@

                        torch.Tensor.unique_consecutive#

                        -Tensor.unique_consecutive(return_inverse=False, return_counts=False, dim=None)[source]#
                        +Tensor.unique_consecutive(return_inverse=False, return_counts=False, dim=None)[source]#

                        Eliminates all but the first element from every consecutive group of equivalent elements.

                        See torch.unique_consecutive()

                        diff --git a/2.9/generated/torch._assert.html b/2.9/generated/torch._assert.html index 5bcf39a27f4..ade0bb50f3f 100644 --- a/2.9/generated/torch._assert.html +++ b/2.9/generated/torch._assert.html @@ -4404,7 +4404,7 @@

                        torch._assert#

                        -torch._assert(condition, message)[source]#
                        +torch._assert(condition, message)[source]#

                        A wrapper around Python’s assert which is symbolically traceable.

                        diff --git a/2.9/generated/torch._logging.set_logs.html b/2.9/generated/torch._logging.set_logs.html index f17dcf9e529..bf60c39cc26 100644 --- a/2.9/generated/torch._logging.set_logs.html +++ b/2.9/generated/torch._logging.set_logs.html @@ -4421,7 +4421,7 @@

                        torch._logging.set_logs#

                        -torch._logging.set_logs(*, all=None, dynamo=None, aot=None, autograd=None, dynamic=None, inductor=None, distributed=None, c10d=None, ddp=None, fsdp=None, dtensor=None, onnx=None, bytecode=False, aot_graphs=False, aot_joint_graph=False, ddp_graphs=False, graph=False, graph_code=False, graph_code_verbose=False, graph_breaks=False, graph_sizes=False, guards=False, recompiles=False, recompiles_verbose=False, trace_source=False, trace_call=False, trace_bytecode=False, output_code=False, kernel_code=False, schedule=False, perf_hints=False, pre_grad_graphs=False, post_grad_graphs=False, ir_pre_fusion=False, ir_post_fusion=False, onnx_diagnostics=False, fusion=False, overlap=False, export=None, modules=None, cudagraphs=False, sym_node=False, compiled_autograd=False, compiled_autograd_verbose=False, cudagraph_static_inputs=False, benchmarking=False, autotuning=False, graph_region_expansion=False, inductor_metrics=False, hierarchical_compile=False, compute_dependencies=False)[source]#
                        +torch._logging.set_logs(*, all=None, dynamo=None, aot=None, autograd=None, dynamic=None, inductor=None, distributed=None, c10d=None, ddp=None, fsdp=None, dtensor=None, onnx=None, bytecode=False, aot_graphs=False, aot_joint_graph=False, ddp_graphs=False, graph=False, graph_code=False, graph_code_verbose=False, graph_breaks=False, graph_sizes=False, guards=False, recompiles=False, recompiles_verbose=False, trace_source=False, trace_call=False, trace_bytecode=False, output_code=False, kernel_code=False, schedule=False, perf_hints=False, pre_grad_graphs=False, post_grad_graphs=False, ir_pre_fusion=False, ir_post_fusion=False, onnx_diagnostics=False, fusion=False, overlap=False, export=None, modules=None, cudagraphs=False, sym_node=False, compiled_autograd=False, compiled_autograd_verbose=False, cudagraph_static_inputs=False, benchmarking=False, autotuning=False, graph_region_expansion=False, inductor_metrics=False, hierarchical_compile=False, compute_dependencies=False)[source]#

                        Sets the log level for individual components and toggles individual log artifact types.

                        diff --git a/2.9/generated/torch.accelerator.current_accelerator.html b/2.9/generated/torch.accelerator.current_accelerator.html index d0f8aeee808..ebe0a802034 100644 --- a/2.9/generated/torch.accelerator.current_accelerator.html +++ b/2.9/generated/torch.accelerator.current_accelerator.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.current_accelerator#

                        -torch.accelerator.current_accelerator(check_available=False)[source]#
                        +torch.accelerator.current_accelerator(check_available=False)[source]#

                        Return the device of the accelerator available at compilation time. If no accelerator were available at compilation time, returns None. See accelerator for details.

                        diff --git a/2.9/generated/torch.accelerator.current_device_idx.html b/2.9/generated/torch.accelerator.current_device_idx.html index 7f7bc4276ed..23e6470bbc9 100644 --- a/2.9/generated/torch.accelerator.current_device_idx.html +++ b/2.9/generated/torch.accelerator.current_device_idx.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.current_device_idx#

                        -torch.accelerator.current_device_idx()[source]#
                        +torch.accelerator.current_device_idx()[source]#

                        (Deprecated) Return the index of a currently selected device for the current accelerator.

                        Returns
                        diff --git a/2.9/generated/torch.accelerator.current_device_index.html b/2.9/generated/torch.accelerator.current_device_index.html index 724728e524c..b41694031d1 100644 --- a/2.9/generated/torch.accelerator.current_device_index.html +++ b/2.9/generated/torch.accelerator.current_device_index.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.current_device_index#

                        -torch.accelerator.current_device_index()[source]#
                        +torch.accelerator.current_device_index()[source]#

                        Return the index of a currently selected device for the current accelerator.

                        Returns
                        diff --git a/2.9/generated/torch.accelerator.current_stream.html b/2.9/generated/torch.accelerator.current_stream.html index 36807cc1141..b2615d98a27 100644 --- a/2.9/generated/torch.accelerator.current_stream.html +++ b/2.9/generated/torch.accelerator.current_stream.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.current_stream#

                        -torch.accelerator.current_stream(device=None, /)[source]#
                        +torch.accelerator.current_stream(device=None, /)[source]#

                        Return the currently selected stream for a given device.

                        Parameters
                        diff --git a/2.9/generated/torch.accelerator.device_count.html b/2.9/generated/torch.accelerator.device_count.html index 834da0a4951..abdaa65f36f 100644 --- a/2.9/generated/torch.accelerator.device_count.html +++ b/2.9/generated/torch.accelerator.device_count.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.device_count#

                        -torch.accelerator.device_count()[source]#
                        +torch.accelerator.device_count()[source]#

                        Return the number of current accelerator available.

                        Returns
                        diff --git a/2.9/generated/torch.accelerator.device_index.html b/2.9/generated/torch.accelerator.device_index.html index 30dc54ca639..86e55b1984c 100644 --- a/2.9/generated/torch.accelerator.device_index.html +++ b/2.9/generated/torch.accelerator.device_index.html @@ -4404,7 +4404,7 @@

                        device_index#

                        -class torch.accelerator.device_index(device, /)[source]#
                        +class torch.accelerator.device_index(device, /)[source]#

                        Context manager to set the current device index for the current accelerator. Temporarily changes the current device index to the specified value for the duration of the context, and automatically restores the previous device index when exiting diff --git a/2.9/generated/torch.accelerator.is_available.html b/2.9/generated/torch.accelerator.is_available.html index 048409fc4b9..5bef7a6f54d 100644 --- a/2.9/generated/torch.accelerator.is_available.html +++ b/2.9/generated/torch.accelerator.is_available.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.is_available#

                        -torch.accelerator.is_available()[source]#
                        +torch.accelerator.is_available()[source]#

                        Check if the current accelerator is available at runtime: it was build, all the required drivers are available and at least one device is visible. See accelerator for details.

                        diff --git a/2.9/generated/torch.accelerator.memory.empty_cache.html b/2.9/generated/torch.accelerator.memory.empty_cache.html index dff18c7e07e..22af2c96428 100644 --- a/2.9/generated/torch.accelerator.memory.empty_cache.html +++ b/2.9/generated/torch.accelerator.memory.empty_cache.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.memory.empty_cache#

                        -torch.accelerator.memory.empty_cache()[source]#
                        +torch.accelerator.memory.empty_cache()[source]#

                        Release all unoccupied cached memory currently held by the caching allocator so that those can be used in other application.

                        diff --git a/2.9/generated/torch.accelerator.memory.max_memory_allocated.html b/2.9/generated/torch.accelerator.memory.max_memory_allocated.html index 5660f944b7a..46b7105c196 100644 --- a/2.9/generated/torch.accelerator.memory.max_memory_allocated.html +++ b/2.9/generated/torch.accelerator.memory.max_memory_allocated.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.memory.max_memory_allocated#

                        -torch.accelerator.memory.max_memory_allocated(device_index=None, /)[source]#
                        +torch.accelerator.memory.max_memory_allocated(device_index=None, /)[source]#

                        Return the current accelerator maximum device memory occupied by tensors in bytes for a given device index.

                        By default, this returns the peak allocated memory since the beginning of diff --git a/2.9/generated/torch.accelerator.memory.max_memory_reserved.html b/2.9/generated/torch.accelerator.memory.max_memory_reserved.html index 71f8ad18c7f..2a8e830b27f 100644 --- a/2.9/generated/torch.accelerator.memory.max_memory_reserved.html +++ b/2.9/generated/torch.accelerator.memory.max_memory_reserved.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.memory.max_memory_reserved#

                        -torch.accelerator.memory.max_memory_reserved(device_index=None, /)[source]#
                        +torch.accelerator.memory.max_memory_reserved(device_index=None, /)[source]#

                        Return the current accelerator maximum device memory managed by the caching allocator in bytes for a given device index.

                        By default, this returns the peak cached memory since the beginning of this diff --git a/2.9/generated/torch.accelerator.memory.memory_allocated.html b/2.9/generated/torch.accelerator.memory.memory_allocated.html index 15f6e9426c6..d2b60ce0bc8 100644 --- a/2.9/generated/torch.accelerator.memory.memory_allocated.html +++ b/2.9/generated/torch.accelerator.memory.memory_allocated.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.memory.memory_allocated#

                        -torch.accelerator.memory.memory_allocated(device_index=None, /)[source]#
                        +torch.accelerator.memory.memory_allocated(device_index=None, /)[source]#

                        Return the current accelerator device memory occupied by tensors in bytes for a given device index.

                        diff --git a/2.9/generated/torch.accelerator.memory.memory_reserved.html b/2.9/generated/torch.accelerator.memory.memory_reserved.html index 7c968e84c1f..0715c6776a7 100644 --- a/2.9/generated/torch.accelerator.memory.memory_reserved.html +++ b/2.9/generated/torch.accelerator.memory.memory_reserved.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.memory.memory_reserved#

                        -torch.accelerator.memory.memory_reserved(device_index=None, /)[source]#
                        +torch.accelerator.memory.memory_reserved(device_index=None, /)[source]#

                        Return the current accelerator device memory managed by the caching allocator in bytes for a given device index.

                        diff --git a/2.9/generated/torch.accelerator.memory.memory_stats.html b/2.9/generated/torch.accelerator.memory.memory_stats.html index 58811612dc5..4020b251fb9 100644 --- a/2.9/generated/torch.accelerator.memory.memory_stats.html +++ b/2.9/generated/torch.accelerator.memory.memory_stats.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.memory.memory_stats#

                        -torch.accelerator.memory.memory_stats(device_index=None, /)[source]#
                        +torch.accelerator.memory.memory_stats(device_index=None, /)[source]#

                        Return a dictionary of accelerator device memory allocator statistics for a given device index.

                        The return value of this function is a dictionary of statistics, each of which is a non-negative integer.

                        diff --git a/2.9/generated/torch.accelerator.memory.reset_accumulated_memory_stats.html b/2.9/generated/torch.accelerator.memory.reset_accumulated_memory_stats.html index 6f0566a3bf3..2ea6c53b47a 100644 --- a/2.9/generated/torch.accelerator.memory.reset_accumulated_memory_stats.html +++ b/2.9/generated/torch.accelerator.memory.reset_accumulated_memory_stats.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.memory.reset_accumulated_memory_stats#

                        -torch.accelerator.memory.reset_accumulated_memory_stats(device_index=None, /)[source]#
                        +torch.accelerator.memory.reset_accumulated_memory_stats(device_index=None, /)[source]#

                        Reset the “accumulated” (historical) stats tracked by the current accelerator memory allocator for a given device index.

                        diff --git a/2.9/generated/torch.accelerator.memory.reset_peak_memory_stats.html b/2.9/generated/torch.accelerator.memory.reset_peak_memory_stats.html index 365d04f1b7e..4cc09e7e231 100644 --- a/2.9/generated/torch.accelerator.memory.reset_peak_memory_stats.html +++ b/2.9/generated/torch.accelerator.memory.reset_peak_memory_stats.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.memory.reset_peak_memory_stats#

                        -torch.accelerator.memory.reset_peak_memory_stats(device_index=None, /)[source]#
                        +torch.accelerator.memory.reset_peak_memory_stats(device_index=None, /)[source]#

                        Reset the “peak” stats tracked by the current accelerator memory allocator for a given device index.

                        diff --git a/2.9/generated/torch.accelerator.set_device_idx.html b/2.9/generated/torch.accelerator.set_device_idx.html index f06bc3ad7a1..4716dcff2d8 100644 --- a/2.9/generated/torch.accelerator.set_device_idx.html +++ b/2.9/generated/torch.accelerator.set_device_idx.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.set_device_idx#

                        -torch.accelerator.set_device_idx(device, /)[source]#
                        +torch.accelerator.set_device_idx(device, /)[source]#

                        (Deprecated) Set the current device index to a given device.

                        Parameters
                        diff --git a/2.9/generated/torch.accelerator.set_device_index.html b/2.9/generated/torch.accelerator.set_device_index.html index aba5e957fb3..692ba469fbb 100644 --- a/2.9/generated/torch.accelerator.set_device_index.html +++ b/2.9/generated/torch.accelerator.set_device_index.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.set_device_index#

                        -torch.accelerator.set_device_index(device, /)[source]#
                        +torch.accelerator.set_device_index(device, /)[source]#

                        Set the current device index to a given device.

                        Parameters
                        diff --git a/2.9/generated/torch.accelerator.set_stream.html b/2.9/generated/torch.accelerator.set_stream.html index 7ef0930c336..4f4d61a0b63 100644 --- a/2.9/generated/torch.accelerator.set_stream.html +++ b/2.9/generated/torch.accelerator.set_stream.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.set_stream#

                        -torch.accelerator.set_stream(stream)[source]#
                        +torch.accelerator.set_stream(stream)[source]#

                        Set the current stream to a given stream.

                        Parameters
                        diff --git a/2.9/generated/torch.accelerator.synchronize.html b/2.9/generated/torch.accelerator.synchronize.html index 5d8f7bf7428..ad911c76718 100644 --- a/2.9/generated/torch.accelerator.synchronize.html +++ b/2.9/generated/torch.accelerator.synchronize.html @@ -4404,7 +4404,7 @@

                        torch.accelerator.synchronize#

                        -torch.accelerator.synchronize(device=None, /)[source]#
                        +torch.accelerator.synchronize(device=None, /)[source]#

                        Wait for all kernels in all streams on the given device to complete.

                        Parameters
                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.BNReLU2d.html b/2.9/generated/torch.ao.nn.intrinsic.BNReLU2d.html index 6cae876b7d8..96a3ebbde85 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.BNReLU2d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.BNReLU2d.html @@ -4415,7 +4415,7 @@

                        BNReLU2d#

                        -class torch.ao.nn.intrinsic.BNReLU2d(batch_norm, relu)[source]#
                        +class torch.ao.nn.intrinsic.BNReLU2d(batch_norm, relu)[source]#

                        This is a sequential container which calls the BatchNorm 2d and ReLU modules. During quantization this will be replaced with the corresponding fused module.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.BNReLU3d.html b/2.9/generated/torch.ao.nn.intrinsic.BNReLU3d.html index 1809adf648c..82258eee810 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.BNReLU3d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.BNReLU3d.html @@ -4415,7 +4415,7 @@

                        BNReLU3d#

                        -class torch.ao.nn.intrinsic.BNReLU3d(batch_norm, relu)[source]#
                        +class torch.ao.nn.intrinsic.BNReLU3d(batch_norm, relu)[source]#

                        This is a sequential container which calls the BatchNorm 3d and ReLU modules. During quantization this will be replaced with the corresponding fused module.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.ConvBn1d.html b/2.9/generated/torch.ao.nn.intrinsic.ConvBn1d.html index cedb337ea93..8a28db689c7 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.ConvBn1d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.ConvBn1d.html @@ -4415,7 +4415,7 @@

                        ConvBn1d#

                        -class torch.ao.nn.intrinsic.ConvBn1d(conv, bn)[source]#
                        +class torch.ao.nn.intrinsic.ConvBn1d(conv, bn)[source]#

                        This is a sequential container which calls the Conv 1d and Batch Norm 1d modules. During quantization this will be replaced with the corresponding fused module.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.ConvBn2d.html b/2.9/generated/torch.ao.nn.intrinsic.ConvBn2d.html index 787035d6934..9d784567485 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.ConvBn2d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.ConvBn2d.html @@ -4415,7 +4415,7 @@

                        ConvBn2d#

                        -class torch.ao.nn.intrinsic.ConvBn2d(conv, bn)[source]#
                        +class torch.ao.nn.intrinsic.ConvBn2d(conv, bn)[source]#

                        This is a sequential container which calls the Conv 2d and Batch Norm 2d modules. During quantization this will be replaced with the corresponding fused module.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.ConvBn3d.html b/2.9/generated/torch.ao.nn.intrinsic.ConvBn3d.html index 523c90d47d1..cd571ae86c7 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.ConvBn3d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.ConvBn3d.html @@ -4415,7 +4415,7 @@

                        ConvBn3d#

                        -class torch.ao.nn.intrinsic.ConvBn3d(conv, bn)[source]#
                        +class torch.ao.nn.intrinsic.ConvBn3d(conv, bn)[source]#

                        This is a sequential container which calls the Conv 3d and Batch Norm 3d modules. During quantization this will be replaced with the corresponding fused module.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.ConvBnReLU1d.html b/2.9/generated/torch.ao.nn.intrinsic.ConvBnReLU1d.html index f09533b2926..5e670897d2d 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.ConvBnReLU1d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.ConvBnReLU1d.html @@ -4415,7 +4415,7 @@

                        ConvBnReLU1d#

                        -class torch.ao.nn.intrinsic.ConvBnReLU1d(conv, bn, relu)[source]#
                        +class torch.ao.nn.intrinsic.ConvBnReLU1d(conv, bn, relu)[source]#

                        This is a sequential container which calls the Conv 1d, Batch Norm 1d, and ReLU modules. During quantization this will be replaced with the corresponding fused module.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.ConvBnReLU2d.html b/2.9/generated/torch.ao.nn.intrinsic.ConvBnReLU2d.html index 41c1f073c69..97024adb606 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.ConvBnReLU2d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.ConvBnReLU2d.html @@ -4415,7 +4415,7 @@

                        ConvBnReLU2d#

                        -class torch.ao.nn.intrinsic.ConvBnReLU2d(conv, bn, relu)[source]#
                        +class torch.ao.nn.intrinsic.ConvBnReLU2d(conv, bn, relu)[source]#

                        This is a sequential container which calls the Conv 2d, Batch Norm 2d, and ReLU modules. During quantization this will be replaced with the corresponding fused module.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.ConvBnReLU3d.html b/2.9/generated/torch.ao.nn.intrinsic.ConvBnReLU3d.html index 748896e1cc5..45c5091dde6 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.ConvBnReLU3d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.ConvBnReLU3d.html @@ -4415,7 +4415,7 @@

                        ConvBnReLU3d#

                        -class torch.ao.nn.intrinsic.ConvBnReLU3d(conv, bn, relu)[source]#
                        +class torch.ao.nn.intrinsic.ConvBnReLU3d(conv, bn, relu)[source]#

                        This is a sequential container which calls the Conv 3d, Batch Norm 3d, and ReLU modules. During quantization this will be replaced with the corresponding fused module.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.ConvReLU1d.html b/2.9/generated/torch.ao.nn.intrinsic.ConvReLU1d.html index e05ace35843..6c7d57e0bc2 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.ConvReLU1d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.ConvReLU1d.html @@ -4415,7 +4415,7 @@

                        ConvReLU1d#

                        -class torch.ao.nn.intrinsic.ConvReLU1d(conv, relu)[source]#
                        +class torch.ao.nn.intrinsic.ConvReLU1d(conv, relu)[source]#

                        This is a sequential container which calls the Conv1d and ReLU modules. During quantization this will be replaced with the corresponding fused module.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.ConvReLU2d.html b/2.9/generated/torch.ao.nn.intrinsic.ConvReLU2d.html index 9635e174fe7..f489f031a5f 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.ConvReLU2d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.ConvReLU2d.html @@ -4415,7 +4415,7 @@

                        ConvReLU2d#

                        -class torch.ao.nn.intrinsic.ConvReLU2d(conv, relu)[source]#
                        +class torch.ao.nn.intrinsic.ConvReLU2d(conv, relu)[source]#

                        This is a sequential container which calls the Conv2d and ReLU modules. During quantization this will be replaced with the corresponding fused module.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.ConvReLU3d.html b/2.9/generated/torch.ao.nn.intrinsic.ConvReLU3d.html index a5cc7764d86..685feee5927 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.ConvReLU3d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.ConvReLU3d.html @@ -4415,7 +4415,7 @@

                        ConvReLU3d#

                        -class torch.ao.nn.intrinsic.ConvReLU3d(conv, relu)[source]#
                        +class torch.ao.nn.intrinsic.ConvReLU3d(conv, relu)[source]#

                        This is a sequential container which calls the Conv3d and ReLU modules. During quantization this will be replaced with the corresponding fused module.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.LinearReLU.html b/2.9/generated/torch.ao.nn.intrinsic.LinearReLU.html index 52665678fb6..0537240d67e 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.LinearReLU.html +++ b/2.9/generated/torch.ao.nn.intrinsic.LinearReLU.html @@ -4415,7 +4415,7 @@

                        LinearReLU#

                        -class torch.ao.nn.intrinsic.LinearReLU(linear, relu)[source]#
                        +class torch.ao.nn.intrinsic.LinearReLU(linear, relu)[source]#

                        This is a sequential container which calls the Linear and ReLU modules. During quantization this will be replaced with the corresponding fused module.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBn1d.html b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBn1d.html index 4c0586614c7..db6c3992b61 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBn1d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBn1d.html @@ -4415,7 +4415,7 @@

                        ConvBn1d#

                        -class torch.ao.nn.intrinsic.qat.ConvBn1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=None, padding_mode='zeros', eps=1e-05, momentum=0.1, freeze_bn=False, qconfig=None)[source]#
                        +class torch.ao.nn.intrinsic.qat.ConvBn1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=None, padding_mode='zeros', eps=1e-05, momentum=0.1, freeze_bn=False, qconfig=None)[source]#

                        A ConvBn1d module is a module fused from Conv1d and BatchNorm1d, attached with FakeQuantize modules for weight, used in quantization aware training.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBn2d.html b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBn2d.html index 433f063b460..5b877171653 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBn2d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBn2d.html @@ -4415,7 +4415,7 @@

                        ConvBn2d#

                        -class torch.ao.nn.intrinsic.qat.ConvBn2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=None, padding_mode='zeros', eps=1e-05, momentum=0.1, freeze_bn=False, qconfig=None)[source]#
                        +class torch.ao.nn.intrinsic.qat.ConvBn2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=None, padding_mode='zeros', eps=1e-05, momentum=0.1, freeze_bn=False, qconfig=None)[source]#

                        A ConvBn2d module is a module fused from Conv2d and BatchNorm2d, attached with FakeQuantize modules for weight, used in quantization aware training.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBn3d.html b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBn3d.html index 5de32e0a172..d63d577d912 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBn3d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBn3d.html @@ -4415,7 +4415,7 @@

                        ConvBn3d#

                        -class torch.ao.nn.intrinsic.qat.ConvBn3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=None, padding_mode='zeros', eps=1e-05, momentum=0.1, freeze_bn=False, qconfig=None)[source]#
                        +class torch.ao.nn.intrinsic.qat.ConvBn3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=None, padding_mode='zeros', eps=1e-05, momentum=0.1, freeze_bn=False, qconfig=None)[source]#

                        A ConvBn3d module is a module fused from Conv3d and BatchNorm3d, attached with FakeQuantize modules for weight, used in quantization aware training.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBnReLU1d.html b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBnReLU1d.html index 5367c9745d4..503c02a96b6 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBnReLU1d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBnReLU1d.html @@ -4415,7 +4415,7 @@

                        ConvBnReLU1d#

                        -class torch.ao.nn.intrinsic.qat.ConvBnReLU1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=None, padding_mode='zeros', eps=1e-05, momentum=0.1, freeze_bn=False, qconfig=None)[source]#
                        +class torch.ao.nn.intrinsic.qat.ConvBnReLU1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=None, padding_mode='zeros', eps=1e-05, momentum=0.1, freeze_bn=False, qconfig=None)[source]#

                        A ConvBnReLU1d module is a module fused from Conv1d, BatchNorm1d and ReLU, attached with FakeQuantize modules for weight, used in quantization aware training.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBnReLU2d.html b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBnReLU2d.html index bf8fe82dc24..776d1d4ac11 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBnReLU2d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBnReLU2d.html @@ -4415,7 +4415,7 @@

                        ConvBnReLU2d#

                        -class torch.ao.nn.intrinsic.qat.ConvBnReLU2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=None, padding_mode='zeros', eps=1e-05, momentum=0.1, freeze_bn=False, qconfig=None)[source]#
                        +class torch.ao.nn.intrinsic.qat.ConvBnReLU2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=None, padding_mode='zeros', eps=1e-05, momentum=0.1, freeze_bn=False, qconfig=None)[source]#

                        A ConvBnReLU2d module is a module fused from Conv2d, BatchNorm2d and ReLU, attached with FakeQuantize modules for weight, used in quantization aware training.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBnReLU3d.html b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBnReLU3d.html index 0130d8b6af2..4d5444e6b38 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBnReLU3d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvBnReLU3d.html @@ -4415,7 +4415,7 @@

                        ConvBnReLU3d#

                        -class torch.ao.nn.intrinsic.qat.ConvBnReLU3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=None, padding_mode='zeros', eps=1e-05, momentum=0.1, freeze_bn=False, qconfig=None)[source]#
                        +class torch.ao.nn.intrinsic.qat.ConvBnReLU3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=None, padding_mode='zeros', eps=1e-05, momentum=0.1, freeze_bn=False, qconfig=None)[source]#

                        A ConvBnReLU3d module is a module fused from Conv3d, BatchNorm3d and ReLU, attached with FakeQuantize modules for weight, used in quantization aware training.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvReLU2d.html b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvReLU2d.html index 8b5acf2781d..409b24e3cca 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvReLU2d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvReLU2d.html @@ -4415,7 +4415,7 @@

                        ConvReLU2d#

                        -class torch.ao.nn.intrinsic.qat.ConvReLU2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', qconfig=None)[source]#
                        +class torch.ao.nn.intrinsic.qat.ConvReLU2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', qconfig=None)[source]#

                        A ConvReLU2d module is a fused module of Conv2d and ReLU, attached with FakeQuantize modules for weight for quantization aware training.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvReLU3d.html b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvReLU3d.html index 769e57b839d..5774e34af44 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.qat.ConvReLU3d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.qat.ConvReLU3d.html @@ -4415,7 +4415,7 @@

                        ConvReLU3d#

                        -class torch.ao.nn.intrinsic.qat.ConvReLU3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', qconfig=None)[source]#
                        +class torch.ao.nn.intrinsic.qat.ConvReLU3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', qconfig=None)[source]#

                        A ConvReLU3d module is a fused module of Conv3d and ReLU, attached with FakeQuantize modules for weight for quantization aware training.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.qat.LinearReLU.html b/2.9/generated/torch.ao.nn.intrinsic.qat.LinearReLU.html index e5555bf51e5..40c465c6125 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.qat.LinearReLU.html +++ b/2.9/generated/torch.ao.nn.intrinsic.qat.LinearReLU.html @@ -4415,7 +4415,7 @@

                        LinearReLU#

                        -class torch.ao.nn.intrinsic.qat.LinearReLU(in_features, out_features, bias=True, qconfig=None)[source]#
                        +class torch.ao.nn.intrinsic.qat.LinearReLU(in_features, out_features, bias=True, qconfig=None)[source]#

                        A LinearReLU module fused from Linear and ReLU modules, attached with FakeQuantize modules for weight, used in quantization aware training.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.qat.freeze_bn_stats.html b/2.9/generated/torch.ao.nn.intrinsic.qat.freeze_bn_stats.html index 2745f095fba..20fedbbc59f 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.qat.freeze_bn_stats.html +++ b/2.9/generated/torch.ao.nn.intrinsic.qat.freeze_bn_stats.html @@ -4415,7 +4415,7 @@

                        freeze_bn_stats#

                        -class torch.ao.nn.intrinsic.qat.freeze_bn_stats(mod)[source]#
                        +class torch.ao.nn.intrinsic.qat.freeze_bn_stats(mod)[source]#
                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.qat.update_bn_stats.html b/2.9/generated/torch.ao.nn.intrinsic.qat.update_bn_stats.html index 18a6c9e13f8..c21e749f42e 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.qat.update_bn_stats.html +++ b/2.9/generated/torch.ao.nn.intrinsic.qat.update_bn_stats.html @@ -4415,7 +4415,7 @@

                        update_bn_stats#

                        -class torch.ao.nn.intrinsic.qat.update_bn_stats(mod)[source]#
                        +class torch.ao.nn.intrinsic.qat.update_bn_stats(mod)[source]#
                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.quantized.BNReLU2d.html b/2.9/generated/torch.ao.nn.intrinsic.quantized.BNReLU2d.html index 1398421ec63..8547d7ba012 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.quantized.BNReLU2d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.quantized.BNReLU2d.html @@ -4415,7 +4415,7 @@

                        BNReLU2d#

                        -class torch.ao.nn.intrinsic.quantized.BNReLU2d(num_features, eps=1e-05, momentum=0.1, device=None, dtype=None)[source]#
                        +class torch.ao.nn.intrinsic.quantized.BNReLU2d(num_features, eps=1e-05, momentum=0.1, device=None, dtype=None)[source]#

                        A BNReLU2d module is a fused module of BatchNorm2d and ReLU

                        We adopt the same interface as torch.ao.nn.quantized.BatchNorm2d.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.quantized.BNReLU3d.html b/2.9/generated/torch.ao.nn.intrinsic.quantized.BNReLU3d.html index 5e9579f30c1..ffd0677a629 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.quantized.BNReLU3d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.quantized.BNReLU3d.html @@ -4415,7 +4415,7 @@

                        BNReLU3d#

                        -class torch.ao.nn.intrinsic.quantized.BNReLU3d(num_features, eps=1e-05, momentum=0.1, device=None, dtype=None)[source]#
                        +class torch.ao.nn.intrinsic.quantized.BNReLU3d(num_features, eps=1e-05, momentum=0.1, device=None, dtype=None)[source]#

                        A BNReLU3d module is a fused module of BatchNorm3d and ReLU

                        We adopt the same interface as torch.ao.nn.quantized.BatchNorm3d.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.quantized.ConvReLU1d.html b/2.9/generated/torch.ao.nn.intrinsic.quantized.ConvReLU1d.html index 2b089afe808..ea7cbb718f2 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.quantized.ConvReLU1d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.quantized.ConvReLU1d.html @@ -4415,7 +4415,7 @@

                        ConvReLU1d#

                        -class torch.ao.nn.intrinsic.quantized.ConvReLU1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                        +class torch.ao.nn.intrinsic.quantized.ConvReLU1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                        A ConvReLU1d module is a fused module of Conv1d and ReLU

                        We adopt the same interface as torch.ao.nn.quantized.Conv1d.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.quantized.ConvReLU2d.html b/2.9/generated/torch.ao.nn.intrinsic.quantized.ConvReLU2d.html index 41a77897400..371aae39ce1 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.quantized.ConvReLU2d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.quantized.ConvReLU2d.html @@ -4415,7 +4415,7 @@

                        ConvReLU2d#

                        -class torch.ao.nn.intrinsic.quantized.ConvReLU2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                        +class torch.ao.nn.intrinsic.quantized.ConvReLU2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                        A ConvReLU2d module is a fused module of Conv2d and ReLU

                        We adopt the same interface as torch.ao.nn.quantized.Conv2d.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.quantized.ConvReLU3d.html b/2.9/generated/torch.ao.nn.intrinsic.quantized.ConvReLU3d.html index 8e386953df2..28013bac0ba 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.quantized.ConvReLU3d.html +++ b/2.9/generated/torch.ao.nn.intrinsic.quantized.ConvReLU3d.html @@ -4415,7 +4415,7 @@

                        ConvReLU3d#

                        -class torch.ao.nn.intrinsic.quantized.ConvReLU3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                        +class torch.ao.nn.intrinsic.quantized.ConvReLU3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                        A ConvReLU3d module is a fused module of Conv3d and ReLU

                        We adopt the same interface as torch.ao.nn.quantized.Conv3d.

                        Attributes: Same as torch.ao.nn.quantized.Conv3d

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.quantized.LinearReLU.html b/2.9/generated/torch.ao.nn.intrinsic.quantized.LinearReLU.html index a5fbe40807c..88ef0721caf 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.quantized.LinearReLU.html +++ b/2.9/generated/torch.ao.nn.intrinsic.quantized.LinearReLU.html @@ -4415,7 +4415,7 @@

                        LinearReLU#

                        -class torch.ao.nn.intrinsic.quantized.LinearReLU(in_features, out_features, bias=True, dtype=torch.qint8)[source]#
                        +class torch.ao.nn.intrinsic.quantized.LinearReLU(in_features, out_features, bias=True, dtype=torch.qint8)[source]#

                        A LinearReLU module fused from Linear and ReLU modules

                        We adopt the same interface as torch.ao.nn.quantized.Linear.

                        diff --git a/2.9/generated/torch.ao.nn.intrinsic.quantized.dynamic.LinearReLU.html b/2.9/generated/torch.ao.nn.intrinsic.quantized.dynamic.LinearReLU.html index 106e3b3cb91..0563cb1c3ce 100644 --- a/2.9/generated/torch.ao.nn.intrinsic.quantized.dynamic.LinearReLU.html +++ b/2.9/generated/torch.ao.nn.intrinsic.quantized.dynamic.LinearReLU.html @@ -4415,7 +4415,7 @@

                        LinearReLU#

                        -class torch.ao.nn.intrinsic.quantized.dynamic.LinearReLU(in_features, out_features, bias=True, dtype=torch.qint8)[source]#
                        +class torch.ao.nn.intrinsic.quantized.dynamic.LinearReLU(in_features, out_features, bias=True, dtype=torch.qint8)[source]#

                        A LinearReLU module fused from Linear and ReLU modules that can be used for dynamic quantization. Supports both, FP16 and INT8 quantization.

                        diff --git a/2.9/generated/torch.ao.nn.qat.Conv2d.html b/2.9/generated/torch.ao.nn.qat.Conv2d.html index 2826f3b27ee..07e50588405 100644 --- a/2.9/generated/torch.ao.nn.qat.Conv2d.html +++ b/2.9/generated/torch.ao.nn.qat.Conv2d.html @@ -4415,7 +4415,7 @@

                        Conv2d#

                        -class torch.ao.nn.qat.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', qconfig=None, device=None, dtype=None)[source]#
                        +class torch.ao.nn.qat.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', qconfig=None, device=None, dtype=None)[source]#

                        A Conv2d module attached with FakeQuantize modules for weight, used for quantization aware training.

                        We adopt the same interface as torch.nn.Conv2d, please see diff --git a/2.9/generated/torch.ao.nn.qat.Conv3d.html b/2.9/generated/torch.ao.nn.qat.Conv3d.html index 7b559be9ee6..8a331528605 100644 --- a/2.9/generated/torch.ao.nn.qat.Conv3d.html +++ b/2.9/generated/torch.ao.nn.qat.Conv3d.html @@ -4415,7 +4415,7 @@

                        Conv3d#

                        -class torch.ao.nn.qat.Conv3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', qconfig=None, device=None, dtype=None)[source]#
                        +class torch.ao.nn.qat.Conv3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', qconfig=None, device=None, dtype=None)[source]#

                        A Conv3d module attached with FakeQuantize modules for weight, used for quantization aware training.

                        We adopt the same interface as torch.nn.Conv3d, please see diff --git a/2.9/generated/torch.ao.nn.qat.Linear.html b/2.9/generated/torch.ao.nn.qat.Linear.html index 075c8a639c3..7fd21c538db 100644 --- a/2.9/generated/torch.ao.nn.qat.Linear.html +++ b/2.9/generated/torch.ao.nn.qat.Linear.html @@ -4415,7 +4415,7 @@

                        Linear#

                        -class torch.ao.nn.qat.Linear(in_features, out_features, bias=True, qconfig=None, device=None, dtype=None)[source]#
                        +class torch.ao.nn.qat.Linear(in_features, out_features, bias=True, qconfig=None, device=None, dtype=None)[source]#

                        A linear module attached with FakeQuantize modules for weight, used for quantization aware training.

                        We adopt the same interface as torch.nn.Linear, please see @@ -4430,7 +4430,7 @@

                        Linear
                        -classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#
                        +classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#

                        Create a qat module from a float module or qparams_dict Args: mod a float module, either produced by torch.ao.quantization utilities or directly from user

                        diff --git a/2.9/generated/torch.ao.nn.qat.dynamic.Linear.html b/2.9/generated/torch.ao.nn.qat.dynamic.Linear.html index c9814701e4c..8c00134d3f5 100644 --- a/2.9/generated/torch.ao.nn.qat.dynamic.Linear.html +++ b/2.9/generated/torch.ao.nn.qat.dynamic.Linear.html @@ -4415,7 +4415,7 @@

                        Linear#

                        -class torch.ao.nn.qat.dynamic.Linear(in_features, out_features, bias=True, qconfig=None, device=None, dtype=None)[source]#
                        +class torch.ao.nn.qat.dynamic.Linear(in_features, out_features, bias=True, qconfig=None, device=None, dtype=None)[source]#

                        A linear module attached with FakeQuantize modules for weight, used for dynamic quantization aware training.

                        We adopt the same interface as torch.nn.Linear, please see diff --git a/2.9/generated/torch.ao.nn.quantizable.LSTM.html b/2.9/generated/torch.ao.nn.quantizable.LSTM.html index 26f53226050..f14890ec4ad 100644 --- a/2.9/generated/torch.ao.nn.quantizable.LSTM.html +++ b/2.9/generated/torch.ao.nn.quantizable.LSTM.html @@ -4415,7 +4415,7 @@

                        LSTM#

                        -class torch.ao.nn.quantizable.LSTM(input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, device=None, dtype=None, *, split_gates=False)[source]#
                        +class torch.ao.nn.quantizable.LSTM(input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, device=None, dtype=None, *, split_gates=False)[source]#

                        A quantizable long short-term memory (LSTM).

                        For the description and the argument types, please, refer to LSTM

                        diff --git a/2.9/generated/torch.ao.nn.quantizable.MultiheadAttention.html b/2.9/generated/torch.ao.nn.quantizable.MultiheadAttention.html index c5a28d7c9d0..8f233209a20 100644 --- a/2.9/generated/torch.ao.nn.quantizable.MultiheadAttention.html +++ b/2.9/generated/torch.ao.nn.quantizable.MultiheadAttention.html @@ -4415,12 +4415,12 @@

                        MultiheadAttention#

                        -class torch.ao.nn.quantizable.MultiheadAttention(embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, batch_first=False, device=None, dtype=None)[source]#
                        +class torch.ao.nn.quantizable.MultiheadAttention(embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, batch_first=False, device=None, dtype=None)[source]#
                        -dequantize()[source]#
                        +dequantize()[source]#

                        Utility to convert the quantized MHA back to float.

                        The motivation for this is that it is not trivial to convert the weights from the format that is used in the quantized version back to the @@ -4429,7 +4429,7 @@

                        MultiheadAttention
                        -forward(query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None, average_attn_weights=True, is_causal=False)[source]#
                        +forward(query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None, average_attn_weights=True, is_causal=False)[source]#
                        Note::

                        Please, refer to forward() for more information

                        diff --git a/2.9/generated/torch.ao.nn.quantized.BatchNorm2d.html b/2.9/generated/torch.ao.nn.quantized.BatchNorm2d.html index 4baf1338d25..1bae0569921 100644 --- a/2.9/generated/torch.ao.nn.quantized.BatchNorm2d.html +++ b/2.9/generated/torch.ao.nn.quantized.BatchNorm2d.html @@ -4415,7 +4415,7 @@

                        BatchNorm2d#

                        -class torch.ao.nn.quantized.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, device=None, dtype=None)[source]#
                        +class torch.ao.nn.quantized.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, device=None, dtype=None)[source]#

                        This is the quantized version of BatchNorm2d.

                        diff --git a/2.9/generated/torch.ao.nn.quantized.BatchNorm3d.html b/2.9/generated/torch.ao.nn.quantized.BatchNorm3d.html index c158dd7f9ca..e15e97a5156 100644 --- a/2.9/generated/torch.ao.nn.quantized.BatchNorm3d.html +++ b/2.9/generated/torch.ao.nn.quantized.BatchNorm3d.html @@ -4415,7 +4415,7 @@

                        BatchNorm3d#

                        -class torch.ao.nn.quantized.BatchNorm3d(num_features, eps=1e-05, momentum=0.1, device=None, dtype=None)[source]#
                        +class torch.ao.nn.quantized.BatchNorm3d(num_features, eps=1e-05, momentum=0.1, device=None, dtype=None)[source]#

                        This is the quantized version of BatchNorm3d.

                        diff --git a/2.9/generated/torch.ao.nn.quantized.Conv1d.html b/2.9/generated/torch.ao.nn.quantized.Conv1d.html index e7a539ed2e9..aee50fd1fe9 100644 --- a/2.9/generated/torch.ao.nn.quantized.Conv1d.html +++ b/2.9/generated/torch.ao.nn.quantized.Conv1d.html @@ -4415,7 +4415,7 @@

                        Conv1d#

                        -class torch.ao.nn.quantized.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                        +class torch.ao.nn.quantized.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                        Applies a 1D convolution over a quantized input signal composed of several quantized input planes.

                        For details on input arguments, parameters, and implementation see @@ -4450,7 +4450,7 @@

                        Conv1d
                        -classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#
                        +classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#

                        Creates a quantized module from a float module or qparams_dict.

                        Parameters
                        diff --git a/2.9/generated/torch.ao.nn.quantized.Conv2d.html b/2.9/generated/torch.ao.nn.quantized.Conv2d.html index 0538efb4867..f25870088d8 100644 --- a/2.9/generated/torch.ao.nn.quantized.Conv2d.html +++ b/2.9/generated/torch.ao.nn.quantized.Conv2d.html @@ -4415,7 +4415,7 @@

                        Conv2d#

                        -class torch.ao.nn.quantized.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                        +class torch.ao.nn.quantized.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                        Applies a 2D convolution over a quantized input signal composed of several quantized input planes.

                        For details on input arguments, parameters, and implementation see @@ -4454,7 +4454,7 @@

                        Conv2d
                        -classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#
                        +classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#

                        Creates a quantized module from a float module or qparams_dict.

                        Parameters
                        diff --git a/2.9/generated/torch.ao.nn.quantized.Conv3d.html b/2.9/generated/torch.ao.nn.quantized.Conv3d.html index 07d5f0758e0..4bf11f1699b 100644 --- a/2.9/generated/torch.ao.nn.quantized.Conv3d.html +++ b/2.9/generated/torch.ao.nn.quantized.Conv3d.html @@ -4415,7 +4415,7 @@

                        Conv3d#

                        -class torch.ao.nn.quantized.Conv3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                        +class torch.ao.nn.quantized.Conv3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                        Applies a 3D convolution over a quantized input signal composed of several quantized input planes.

                        For details on input arguments, parameters, and implementation see @@ -4454,7 +4454,7 @@

                        Conv3d
                        -classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#
                        +classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#

                        Creates a quantized module from a float module or qparams_dict.

                        Parameters
                        diff --git a/2.9/generated/torch.ao.nn.quantized.ConvTranspose1d.html b/2.9/generated/torch.ao.nn.quantized.ConvTranspose1d.html index 186cbf8e7ea..ce0b1f48657 100644 --- a/2.9/generated/torch.ao.nn.quantized.ConvTranspose1d.html +++ b/2.9/generated/torch.ao.nn.quantized.ConvTranspose1d.html @@ -4415,7 +4415,7 @@

                        ConvTranspose1d#

                        -class torch.ao.nn.quantized.ConvTranspose1d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                        +class torch.ao.nn.quantized.ConvTranspose1d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                        Applies a 1D transposed convolution operator over an input image composed of several input planes. For details on input arguments, parameters, and implementation see diff --git a/2.9/generated/torch.ao.nn.quantized.ConvTranspose2d.html b/2.9/generated/torch.ao.nn.quantized.ConvTranspose2d.html index adf6b8b3272..f97e9a45ad5 100644 --- a/2.9/generated/torch.ao.nn.quantized.ConvTranspose2d.html +++ b/2.9/generated/torch.ao.nn.quantized.ConvTranspose2d.html @@ -4415,7 +4415,7 @@

                        ConvTranspose2d#

                        -class torch.ao.nn.quantized.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                        +class torch.ao.nn.quantized.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                        Applies a 2D transposed convolution operator over an input image composed of several input planes. For details on input arguments, parameters, and implementation see diff --git a/2.9/generated/torch.ao.nn.quantized.ConvTranspose3d.html b/2.9/generated/torch.ao.nn.quantized.ConvTranspose3d.html index 9b742515ae9..14fd5fce0ae 100644 --- a/2.9/generated/torch.ao.nn.quantized.ConvTranspose3d.html +++ b/2.9/generated/torch.ao.nn.quantized.ConvTranspose3d.html @@ -4415,7 +4415,7 @@

                        ConvTranspose3d#

                        -class torch.ao.nn.quantized.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                        +class torch.ao.nn.quantized.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                        Applies a 3D transposed convolution operator over an input image composed of several input planes. For details on input arguments, parameters, and implementation see diff --git a/2.9/generated/torch.ao.nn.quantized.ELU.html b/2.9/generated/torch.ao.nn.quantized.ELU.html index db79be9cfcc..ed3cdb29ece 100644 --- a/2.9/generated/torch.ao.nn.quantized.ELU.html +++ b/2.9/generated/torch.ao.nn.quantized.ELU.html @@ -4415,7 +4415,7 @@

                        ELU#

                        -class torch.ao.nn.quantized.ELU(scale, zero_point, alpha=1.0)[source]#
                        +class torch.ao.nn.quantized.ELU(scale, zero_point, alpha=1.0)[source]#

                        This is the quantized equivalent of ELU.

                        Parameters
                        diff --git a/2.9/generated/torch.ao.nn.quantized.Embedding.html b/2.9/generated/torch.ao.nn.quantized.Embedding.html index 9488e4879dd..a00df6e1914 100644 --- a/2.9/generated/torch.ao.nn.quantized.Embedding.html +++ b/2.9/generated/torch.ao.nn.quantized.Embedding.html @@ -4415,7 +4415,7 @@

                        Embedding#

                        -class torch.ao.nn.quantized.Embedding(num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None, dtype=torch.quint8)[source]#
                        +class torch.ao.nn.quantized.Embedding(num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None, dtype=torch.quint8)[source]#

                        A quantized Embedding module with quantized packed weights as inputs. We adopt the same interface as torch.nn.Embedding, please see https://pytorch.org/docs/stable/generated/torch.nn.Embedding.html for documentation.

                        @@ -4439,7 +4439,7 @@

                        Embedding
                        -classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#
                        +classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#

                        Create a quantized embedding module from a float module

                        Parameters
                        diff --git a/2.9/generated/torch.ao.nn.quantized.EmbeddingBag.html b/2.9/generated/torch.ao.nn.quantized.EmbeddingBag.html index 7cb42d2ca8e..ffeb8988c57 100644 --- a/2.9/generated/torch.ao.nn.quantized.EmbeddingBag.html +++ b/2.9/generated/torch.ao.nn.quantized.EmbeddingBag.html @@ -4415,7 +4415,7 @@

                        EmbeddingBag#

                        -class torch.ao.nn.quantized.EmbeddingBag(num_embeddings, embedding_dim, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, mode='sum', sparse=False, _weight=None, include_last_offset=False, dtype=torch.quint8)[source]#
                        +class torch.ao.nn.quantized.EmbeddingBag(num_embeddings, embedding_dim, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, mode='sum', sparse=False, _weight=None, include_last_offset=False, dtype=torch.quint8)[source]#

                        A quantized EmbeddingBag module with quantized packed weights as inputs. We adopt the same interface as torch.nn.EmbeddingBag, please see https://pytorch.org/docs/stable/generated/torch.nn.EmbeddingBag.html for documentation.

                        @@ -4440,7 +4440,7 @@

                        EmbeddingBag
                        -classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#
                        +classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#

                        Create a quantized embedding_bag module from a float module

                        Parameters
                        diff --git a/2.9/generated/torch.ao.nn.quantized.FXFloatFunctional.html b/2.9/generated/torch.ao.nn.quantized.FXFloatFunctional.html index 0b662a56383..3162d1ea063 100644 --- a/2.9/generated/torch.ao.nn.quantized.FXFloatFunctional.html +++ b/2.9/generated/torch.ao.nn.quantized.FXFloatFunctional.html @@ -4415,7 +4415,7 @@

                        FXFloatFunctional#

                        -class torch.ao.nn.quantized.FXFloatFunctional(*args, **kwargs)[source]#
                        +class torch.ao.nn.quantized.FXFloatFunctional(*args, **kwargs)[source]#

                        module to replace FloatFunctional module before FX graph mode quantization, since activation_post_process will be inserted in top level module directly

                        diff --git a/2.9/generated/torch.ao.nn.quantized.FloatFunctional.html b/2.9/generated/torch.ao.nn.quantized.FloatFunctional.html index 97c3c7ad4c1..44987e45bba 100644 --- a/2.9/generated/torch.ao.nn.quantized.FloatFunctional.html +++ b/2.9/generated/torch.ao.nn.quantized.FloatFunctional.html @@ -4415,7 +4415,7 @@

                        FloatFunctional#

                        -class torch.ao.nn.quantized.FloatFunctional[source]#
                        +class torch.ao.nn.quantized.FloatFunctional[source]#

                        State collector class for float operations.

                        The instance of this class can be used instead of the torch. prefix for some operations. See example usage below.

                        diff --git a/2.9/generated/torch.ao.nn.quantized.GroupNorm.html b/2.9/generated/torch.ao.nn.quantized.GroupNorm.html index 126f6a9ac12..4399a0c2ada 100644 --- a/2.9/generated/torch.ao.nn.quantized.GroupNorm.html +++ b/2.9/generated/torch.ao.nn.quantized.GroupNorm.html @@ -4415,7 +4415,7 @@

                        GroupNorm#

                        -class torch.ao.nn.quantized.GroupNorm(num_groups, num_channels, weight, bias, scale, zero_point, eps=1e-05, affine=True, device=None, dtype=None)[source]#
                        +class torch.ao.nn.quantized.GroupNorm(num_groups, num_channels, weight, bias, scale, zero_point, eps=1e-05, affine=True, device=None, dtype=None)[source]#

                        This is the quantized version of GroupNorm.

                        Additional args:
                          diff --git a/2.9/generated/torch.ao.nn.quantized.Hardswish.html b/2.9/generated/torch.ao.nn.quantized.Hardswish.html index 444863cfdb3..594ed031047 100644 --- a/2.9/generated/torch.ao.nn.quantized.Hardswish.html +++ b/2.9/generated/torch.ao.nn.quantized.Hardswish.html @@ -4415,7 +4415,7 @@

                          Hardswish#

                          -class torch.ao.nn.quantized.Hardswish(scale, zero_point, device=None, dtype=None)[source]#
                          +class torch.ao.nn.quantized.Hardswish(scale, zero_point, device=None, dtype=None)[source]#

                          This is the quantized version of Hardswish.

                          Parameters
                          diff --git a/2.9/generated/torch.ao.nn.quantized.InstanceNorm1d.html b/2.9/generated/torch.ao.nn.quantized.InstanceNorm1d.html index 36e09a06c39..2d3ae053c55 100644 --- a/2.9/generated/torch.ao.nn.quantized.InstanceNorm1d.html +++ b/2.9/generated/torch.ao.nn.quantized.InstanceNorm1d.html @@ -4415,7 +4415,7 @@

                          InstanceNorm1d#

                          -class torch.ao.nn.quantized.InstanceNorm1d(num_features, weight, bias, scale, zero_point, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#
                          +class torch.ao.nn.quantized.InstanceNorm1d(num_features, weight, bias, scale, zero_point, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#

                          This is the quantized version of InstanceNorm1d.

                          Additional args:
                            diff --git a/2.9/generated/torch.ao.nn.quantized.InstanceNorm2d.html b/2.9/generated/torch.ao.nn.quantized.InstanceNorm2d.html index a47f2d07ebf..d0f8f1502fc 100644 --- a/2.9/generated/torch.ao.nn.quantized.InstanceNorm2d.html +++ b/2.9/generated/torch.ao.nn.quantized.InstanceNorm2d.html @@ -4415,7 +4415,7 @@

                            InstanceNorm2d#

                            -class torch.ao.nn.quantized.InstanceNorm2d(num_features, weight, bias, scale, zero_point, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#
                            +class torch.ao.nn.quantized.InstanceNorm2d(num_features, weight, bias, scale, zero_point, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#

                            This is the quantized version of InstanceNorm2d.

                            Additional args:
                              diff --git a/2.9/generated/torch.ao.nn.quantized.InstanceNorm3d.html b/2.9/generated/torch.ao.nn.quantized.InstanceNorm3d.html index 6269173742e..9f1a0cefa60 100644 --- a/2.9/generated/torch.ao.nn.quantized.InstanceNorm3d.html +++ b/2.9/generated/torch.ao.nn.quantized.InstanceNorm3d.html @@ -4415,7 +4415,7 @@

                              InstanceNorm3d#

                              -class torch.ao.nn.quantized.InstanceNorm3d(num_features, weight, bias, scale, zero_point, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#
                              +class torch.ao.nn.quantized.InstanceNorm3d(num_features, weight, bias, scale, zero_point, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#

                              This is the quantized version of InstanceNorm3d.

                              Additional args:
                                diff --git a/2.9/generated/torch.ao.nn.quantized.LayerNorm.html b/2.9/generated/torch.ao.nn.quantized.LayerNorm.html index 454186e9664..cdaefed1104 100644 --- a/2.9/generated/torch.ao.nn.quantized.LayerNorm.html +++ b/2.9/generated/torch.ao.nn.quantized.LayerNorm.html @@ -4415,7 +4415,7 @@

                                LayerNorm#

                                -class torch.ao.nn.quantized.LayerNorm(normalized_shape, weight, bias, scale, zero_point, eps=1e-05, elementwise_affine=True, device=None, dtype=None)[source]#
                                +class torch.ao.nn.quantized.LayerNorm(normalized_shape, weight, bias, scale, zero_point, eps=1e-05, elementwise_affine=True, device=None, dtype=None)[source]#

                                This is the quantized version of LayerNorm.

                                Additional args:
                                  diff --git a/2.9/generated/torch.ao.nn.quantized.LeakyReLU.html b/2.9/generated/torch.ao.nn.quantized.LeakyReLU.html index 509f3257111..e02d8e5fb1a 100644 --- a/2.9/generated/torch.ao.nn.quantized.LeakyReLU.html +++ b/2.9/generated/torch.ao.nn.quantized.LeakyReLU.html @@ -4415,7 +4415,7 @@

                                  LeakyReLU#

                                  -class torch.ao.nn.quantized.LeakyReLU(scale, zero_point, negative_slope=0.01, inplace=False, device=None, dtype=None)[source]#
                                  +class torch.ao.nn.quantized.LeakyReLU(scale, zero_point, negative_slope=0.01, inplace=False, device=None, dtype=None)[source]#

                                  This is the quantized equivalent of LeakyReLU.

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.nn.quantized.Linear.html b/2.9/generated/torch.ao.nn.quantized.Linear.html index 130777c141f..90c2c7bf498 100644 --- a/2.9/generated/torch.ao.nn.quantized.Linear.html +++ b/2.9/generated/torch.ao.nn.quantized.Linear.html @@ -4415,7 +4415,7 @@

                                  Linear#

                                  -class torch.ao.nn.quantized.Linear(in_features, out_features, bias_=True, dtype=torch.qint8)[source]#
                                  +class torch.ao.nn.quantized.Linear(in_features, out_features, bias_=True, dtype=torch.qint8)[source]#

                                  A quantized linear module with quantized tensor as inputs and outputs. We adopt the same interface as torch.nn.Linear, please see https://pytorch.org/docs/stable/nn.html#torch.nn.Linear for documentation.

                                  @@ -4444,7 +4444,7 @@

                                  Linear
                                  -classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#
                                  +classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#

                                  Create a quantized module from an observed float module

                                  Parameters
                                  @@ -4460,7 +4460,7 @@

                                  Linear
                                  -classmethod from_reference(ref_qlinear, output_scale, output_zero_point)[source]#
                                  +classmethod from_reference(ref_qlinear, output_scale, output_zero_point)[source]#

                                  Create a (fbgemm/qnnpack) quantized module from a reference quantized module

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.nn.quantized.QFunctional.html b/2.9/generated/torch.ao.nn.quantized.QFunctional.html index 92ab3f9aa6f..802f4c121e0 100644 --- a/2.9/generated/torch.ao.nn.quantized.QFunctional.html +++ b/2.9/generated/torch.ao.nn.quantized.QFunctional.html @@ -4415,7 +4415,7 @@

                                  QFunctional#

                                  -class torch.ao.nn.quantized.QFunctional[source]#
                                  +class torch.ao.nn.quantized.QFunctional[source]#

                                  Wrapper class for quantized operations.

                                  The instance of this class can be used instead of the torch.ops.quantized prefix. See example usage below.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.ReLU6.html b/2.9/generated/torch.ao.nn.quantized.ReLU6.html index dc9a56cf179..29de80d6cd7 100644 --- a/2.9/generated/torch.ao.nn.quantized.ReLU6.html +++ b/2.9/generated/torch.ao.nn.quantized.ReLU6.html @@ -4415,7 +4415,7 @@

                                  ReLU6#

                                  -class torch.ao.nn.quantized.ReLU6(inplace=False)[source]#
                                  +class torch.ao.nn.quantized.ReLU6(inplace=False)[source]#

                                  Applies the element-wise function:

                                  ReLU6(x)=min(max(x0,x),q(6))\text{ReLU6}(x) = \min(\max(x_0, x), q(6)), where x0x_0 is the zero_point, and q(6)q(6) is the quantized representation of number 6.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.Sigmoid.html b/2.9/generated/torch.ao.nn.quantized.Sigmoid.html index 80de2e351c1..6a624de6c84 100644 --- a/2.9/generated/torch.ao.nn.quantized.Sigmoid.html +++ b/2.9/generated/torch.ao.nn.quantized.Sigmoid.html @@ -4415,7 +4415,7 @@

                                  Sigmoid#

                                  -class torch.ao.nn.quantized.Sigmoid(output_scale, output_zero_point)[source]#
                                  +class torch.ao.nn.quantized.Sigmoid(output_scale, output_zero_point)[source]#

                                  This is the quantized equivalent of Sigmoid.

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.nn.quantized.dynamic.GRU.html b/2.9/generated/torch.ao.nn.quantized.dynamic.GRU.html index f12a4f3d41a..4da42c95164 100644 --- a/2.9/generated/torch.ao.nn.quantized.dynamic.GRU.html +++ b/2.9/generated/torch.ao.nn.quantized.dynamic.GRU.html @@ -4415,7 +4415,7 @@

                                  GRU#

                                  -class torch.ao.nn.quantized.dynamic.GRU(*args, **kwargs)[source]#
                                  +class torch.ao.nn.quantized.dynamic.GRU(*args, **kwargs)[source]#

                                  Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.

                                  For each element in the input sequence, each layer computes the following function:

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.dynamic.GRUCell.html b/2.9/generated/torch.ao.nn.quantized.dynamic.GRUCell.html index 6875f36b700..ae9907ba488 100644 --- a/2.9/generated/torch.ao.nn.quantized.dynamic.GRUCell.html +++ b/2.9/generated/torch.ao.nn.quantized.dynamic.GRUCell.html @@ -4415,7 +4415,7 @@

                                  GRUCell#

                                  -class torch.ao.nn.quantized.dynamic.GRUCell(input_size, hidden_size, bias=True, dtype=torch.qint8)[source]#
                                  +class torch.ao.nn.quantized.dynamic.GRUCell(input_size, hidden_size, bias=True, dtype=torch.qint8)[source]#

                                  A gated recurrent unit (GRU) cell

                                  A dynamic quantized GRUCell module with floating point tensor as inputs and outputs. Weights are quantized to 8 bits. We adopt the same interface as torch.nn.GRUCell, diff --git a/2.9/generated/torch.ao.nn.quantized.dynamic.LSTM.html b/2.9/generated/torch.ao.nn.quantized.dynamic.LSTM.html index e47162f33ee..5de6499dbb7 100644 --- a/2.9/generated/torch.ao.nn.quantized.dynamic.LSTM.html +++ b/2.9/generated/torch.ao.nn.quantized.dynamic.LSTM.html @@ -4415,7 +4415,7 @@

                                  LSTM#

                                  -class torch.ao.nn.quantized.dynamic.LSTM(*args, **kwargs)[source]#
                                  +class torch.ao.nn.quantized.dynamic.LSTM(*args, **kwargs)[source]#

                                  A dynamic quantized LSTM module with floating point tensor as inputs and outputs. We adopt the same interface as torch.nn.LSTM, please see https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM for documentation.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.dynamic.LSTMCell.html b/2.9/generated/torch.ao.nn.quantized.dynamic.LSTMCell.html index 5860cb743e4..325f8ff4db3 100644 --- a/2.9/generated/torch.ao.nn.quantized.dynamic.LSTMCell.html +++ b/2.9/generated/torch.ao.nn.quantized.dynamic.LSTMCell.html @@ -4415,7 +4415,7 @@

                                  LSTMCell#

                                  -class torch.ao.nn.quantized.dynamic.LSTMCell(*args, **kwargs)[source]#
                                  +class torch.ao.nn.quantized.dynamic.LSTMCell(*args, **kwargs)[source]#

                                  A long short-term memory (LSTM) cell.

                                  A dynamic quantized LSTMCell module with floating point tensor as inputs and outputs. Weights are quantized to 8 bits. We adopt the same interface as torch.nn.LSTMCell, diff --git a/2.9/generated/torch.ao.nn.quantized.dynamic.Linear.html b/2.9/generated/torch.ao.nn.quantized.dynamic.Linear.html index a8cbcf1cdd0..501fc456055 100644 --- a/2.9/generated/torch.ao.nn.quantized.dynamic.Linear.html +++ b/2.9/generated/torch.ao.nn.quantized.dynamic.Linear.html @@ -4415,7 +4415,7 @@

                                  Linear#

                                  -class torch.ao.nn.quantized.dynamic.Linear(in_features, out_features, bias_=True, dtype=torch.qint8)[source]#
                                  +class torch.ao.nn.quantized.dynamic.Linear(in_features, out_features, bias_=True, dtype=torch.qint8)[source]#

                                  A dynamic quantized linear module with floating point tensor as inputs and outputs. We adopt the same interface as torch.nn.Linear, please see https://pytorch.org/docs/stable/nn.html#torch.nn.Linear for documentation.

                                  @@ -4442,7 +4442,7 @@

                                  Linear
                                  -classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#
                                  +classmethod from_float(mod, use_precomputed_fake_quant=False)[source]#

                                  Create a dynamic quantized module from a float module or qparams_dict

                                  Parameters
                                  @@ -4454,7 +4454,7 @@

                                  Linear
                                  -classmethod from_reference(ref_qlinear)[source]#
                                  +classmethod from_reference(ref_qlinear)[source]#

                                  Create a (fbgemm/qnnpack) dynamic quantized module from a reference quantized module :param ref_qlinear: a reference quantized module, either produced by diff --git a/2.9/generated/torch.ao.nn.quantized.dynamic.RNNCell.html b/2.9/generated/torch.ao.nn.quantized.dynamic.RNNCell.html index efc30df4432..720ba335e50 100644 --- a/2.9/generated/torch.ao.nn.quantized.dynamic.RNNCell.html +++ b/2.9/generated/torch.ao.nn.quantized.dynamic.RNNCell.html @@ -4415,7 +4415,7 @@

                                  RNNCell#

                                  -class torch.ao.nn.quantized.dynamic.RNNCell(input_size, hidden_size, bias=True, nonlinearity='tanh', dtype=torch.qint8)[source]#
                                  +class torch.ao.nn.quantized.dynamic.RNNCell(input_size, hidden_size, bias=True, nonlinearity='tanh', dtype=torch.qint8)[source]#

                                  An Elman RNN cell with tanh or ReLU non-linearity. A dynamic quantized RNNCell module with floating point tensor as inputs and outputs. Weights are quantized to 8 bits. We adopt the same interface as torch.nn.RNNCell, diff --git a/2.9/generated/torch.ao.nn.quantized.functional.adaptive_avg_pool2d.html b/2.9/generated/torch.ao.nn.quantized.functional.adaptive_avg_pool2d.html index ea1074a05ee..0a6f0096d0b 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.adaptive_avg_pool2d.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.adaptive_avg_pool2d.html @@ -4415,7 +4415,7 @@

                                  adaptive_avg_pool2d#

                                  -class torch.ao.nn.quantized.functional.adaptive_avg_pool2d(input, output_size)[source]#
                                  +class torch.ao.nn.quantized.functional.adaptive_avg_pool2d(input, output_size)[source]#

                                  Applies a 2D adaptive average pooling over a quantized input signal composed of several quantized input planes.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.adaptive_avg_pool3d.html b/2.9/generated/torch.ao.nn.quantized.functional.adaptive_avg_pool3d.html index da64bd6e01a..c0072406161 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.adaptive_avg_pool3d.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.adaptive_avg_pool3d.html @@ -4415,7 +4415,7 @@

                                  adaptive_avg_pool3d#

                                  -class torch.ao.nn.quantized.functional.adaptive_avg_pool3d(input, output_size)[source]#
                                  +class torch.ao.nn.quantized.functional.adaptive_avg_pool3d(input, output_size)[source]#

                                  Applies a 3D adaptive average pooling over a quantized input signal composed of several quantized input planes.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.avg_pool2d.html b/2.9/generated/torch.ao.nn.quantized.functional.avg_pool2d.html index ee7ff683354..6c2127f30e4 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.avg_pool2d.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.avg_pool2d.html @@ -4415,7 +4415,7 @@

                                  avg_pool2d#

                                  -class torch.ao.nn.quantized.functional.avg_pool2d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)[source]#
                                  +class torch.ao.nn.quantized.functional.avg_pool2d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)[source]#

                                  Applies 2D average-pooling operation in kH×kWkH \times kW regions by step size sH×sWsH \times sW steps. The number of output features is equal to the number of input planes.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.avg_pool3d.html b/2.9/generated/torch.ao.nn.quantized.functional.avg_pool3d.html index ee20bfffdeb..9a3d4fe940c 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.avg_pool3d.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.avg_pool3d.html @@ -4415,7 +4415,7 @@

                                  avg_pool3d#

                                  -class torch.ao.nn.quantized.functional.avg_pool3d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)[source]#
                                  +class torch.ao.nn.quantized.functional.avg_pool3d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)[source]#

                                  Applies 3D average-pooling operation in kD timeskH×kWkD \ times kH \times kW regions by step size sD×sH×sWsD \times sH \times sW steps. The number of output features is equal to the number of input planes.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.celu.html b/2.9/generated/torch.ao.nn.quantized.functional.celu.html index 4b214e1f893..75e575aca88 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.celu.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.celu.html @@ -4415,7 +4415,7 @@

                                  celu#

                                  -class torch.ao.nn.quantized.functional.celu(input, scale, zero_point, alpha=1.)[source]#
                                  +class torch.ao.nn.quantized.functional.celu(input, scale, zero_point, alpha=1.)[source]#

                                  Applies the quantized CELU function element-wise.

                                  CELU(x)=max(0,x)+min(0,α(exp(x/α)1))\text{CELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x / \alpha) - 1)) diff --git a/2.9/generated/torch.ao.nn.quantized.functional.clamp.html b/2.9/generated/torch.ao.nn.quantized.functional.clamp.html index 58c3344de93..9558a63fb1b 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.clamp.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.clamp.html @@ -4415,7 +4415,7 @@

                                  clamp#

                                  -class torch.ao.nn.quantized.functional.clamp(input, min_, max_)[source]#
                                  +class torch.ao.nn.quantized.functional.clamp(input, min_, max_)[source]#

                                  float(input, min_, max_) -> Tensor

                                  Applies the clamp function element-wise. See clamp for more details.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.conv1d.html b/2.9/generated/torch.ao.nn.quantized.functional.conv1d.html index 108d8230c13..4e7d93c5f48 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.conv1d.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.conv1d.html @@ -4415,7 +4415,7 @@

                                  conv1d#

                                  -class torch.ao.nn.quantized.functional.conv1d(input, weight, bias, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', scale=1.0, zero_point=0, dtype=torch.quint8)[source]#
                                  +class torch.ao.nn.quantized.functional.conv1d(input, weight, bias, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', scale=1.0, zero_point=0, dtype=torch.quint8)[source]#

                                  Applies a 1D convolution over a quantized 1D input composed of several input planes.

                                  See Conv1d for details and output shape.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.conv2d.html b/2.9/generated/torch.ao.nn.quantized.functional.conv2d.html index 8a2a4ee8460..a9c58f3ba23 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.conv2d.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.conv2d.html @@ -4415,7 +4415,7 @@

                                  conv2d#

                                  -class torch.ao.nn.quantized.functional.conv2d(input, weight, bias, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', scale=1.0, zero_point=0, dtype=torch.quint8)[source]#
                                  +class torch.ao.nn.quantized.functional.conv2d(input, weight, bias, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', scale=1.0, zero_point=0, dtype=torch.quint8)[source]#

                                  Applies a 2D convolution over a quantized 2D input composed of several input planes.

                                  See Conv2d for details and output shape.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.conv3d.html b/2.9/generated/torch.ao.nn.quantized.functional.conv3d.html index bb48628161f..204f09e48c6 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.conv3d.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.conv3d.html @@ -4415,7 +4415,7 @@

                                  conv3d#

                                  -class torch.ao.nn.quantized.functional.conv3d(input, weight, bias, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', scale=1.0, zero_point=0, dtype=torch.quint8)[source]#
                                  +class torch.ao.nn.quantized.functional.conv3d(input, weight, bias, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', scale=1.0, zero_point=0, dtype=torch.quint8)[source]#

                                  Applies a 3D convolution over a quantized 3D input composed of several input planes.

                                  See Conv3d for details and output shape.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.elu.html b/2.9/generated/torch.ao.nn.quantized.functional.elu.html index 0da89d67cbe..0fe9074d339 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.elu.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.elu.html @@ -4415,7 +4415,7 @@

                                  elu#

                                  -class torch.ao.nn.quantized.functional.elu(input, scale, zero_point, alpha=1.0)[source]#
                                  +class torch.ao.nn.quantized.functional.elu(input, scale, zero_point, alpha=1.0)[source]#

                                  This is the quantized version of elu().

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.hardsigmoid.html b/2.9/generated/torch.ao.nn.quantized.functional.hardsigmoid.html index 4abd08fe222..8a408f8c7b7 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.hardsigmoid.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.hardsigmoid.html @@ -4415,7 +4415,7 @@

                                  hardsigmoid#

                                  -class torch.ao.nn.quantized.functional.hardsigmoid(input, inplace=False)[source]#
                                  +class torch.ao.nn.quantized.functional.hardsigmoid(input, inplace=False)[source]#

                                  This is the quantized version of hardsigmoid().

                                  Return type
                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.hardswish.html b/2.9/generated/torch.ao.nn.quantized.functional.hardswish.html index 4605b7fe53a..6ddf3cef892 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.hardswish.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.hardswish.html @@ -4415,7 +4415,7 @@

                                  hardswish#

                                  -class torch.ao.nn.quantized.functional.hardswish(input, scale, zero_point)[source]#
                                  +class torch.ao.nn.quantized.functional.hardswish(input, scale, zero_point)[source]#

                                  This is the quantized version of hardswish().

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.hardtanh.html b/2.9/generated/torch.ao.nn.quantized.functional.hardtanh.html index c8e01fdde1f..a7390248e86 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.hardtanh.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.hardtanh.html @@ -4415,7 +4415,7 @@

                                  hardtanh#

                                  -class torch.ao.nn.quantized.functional.hardtanh(input, min_val=-1.0, max_val=1.0, inplace=False)[source]#
                                  +class torch.ao.nn.quantized.functional.hardtanh(input, min_val=-1.0, max_val=1.0, inplace=False)[source]#

                                  This is the quantized version of hardtanh().

                                  Return type
                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.interpolate.html b/2.9/generated/torch.ao.nn.quantized.functional.interpolate.html index 5ffde7ca4f5..ed82f5da296 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.interpolate.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.interpolate.html @@ -4415,7 +4415,7 @@

                                  interpolate#

                                  -class torch.ao.nn.quantized.functional.interpolate(input, size=None, scale_factor=None, mode='nearest', align_corners=None)[source]#
                                  +class torch.ao.nn.quantized.functional.interpolate(input, size=None, scale_factor=None, mode='nearest', align_corners=None)[source]#

                                  Down/up samples the input to either the given size or the given scale_factor

                                  See torch.nn.functional.interpolate() for implementation details.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.leaky_relu.html b/2.9/generated/torch.ao.nn.quantized.functional.leaky_relu.html index 0fbcdff10ac..5a394e60a5b 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.leaky_relu.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.leaky_relu.html @@ -4415,7 +4415,7 @@

                                  leaky_relu#

                                  -class torch.ao.nn.quantized.functional.leaky_relu(input, negative_slope=0.01, inplace=False, scale=None, zero_point=None)[source]#
                                  +class torch.ao.nn.quantized.functional.leaky_relu(input, negative_slope=0.01, inplace=False, scale=None, zero_point=None)[source]#

                                  Quantized version of the. leaky_relu(input, negative_slope=0.01, inplace=False, scale, zero_point) -> Tensor

                                  Applies element-wise, diff --git a/2.9/generated/torch.ao.nn.quantized.functional.linear.html b/2.9/generated/torch.ao.nn.quantized.functional.linear.html index a44dea1c38a..f7b2951efdb 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.linear.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.linear.html @@ -4415,7 +4415,7 @@

                                  linear#

                                  -class torch.ao.nn.quantized.functional.linear(input, weight, bias=None, scale=None, zero_point=None)[source]#
                                  +class torch.ao.nn.quantized.functional.linear(input, weight, bias=None, scale=None, zero_point=None)[source]#

                                  Applies a linear transformation to the incoming quantized data: y=xAT+by = xA^T + b. See Linear

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.max_pool1d.html b/2.9/generated/torch.ao.nn.quantized.functional.max_pool1d.html index fff22808eb7..cbc36e9c15e 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.max_pool1d.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.max_pool1d.html @@ -4415,7 +4415,7 @@

                                  max_pool1d#

                                  -class torch.ao.nn.quantized.functional.max_pool1d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False)[source]#
                                  +class torch.ao.nn.quantized.functional.max_pool1d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False)[source]#

                                  Applies a 1D max pooling over a quantized input signal composed of several quantized input planes.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.max_pool2d.html b/2.9/generated/torch.ao.nn.quantized.functional.max_pool2d.html index bfc5dbb6c48..e61e6a79972 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.max_pool2d.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.max_pool2d.html @@ -4415,7 +4415,7 @@

                                  max_pool2d#

                                  -class torch.ao.nn.quantized.functional.max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False)[source]#
                                  +class torch.ao.nn.quantized.functional.max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False)[source]#

                                  Applies a 2D max pooling over a quantized input signal composed of several quantized input planes.

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.threshold.html b/2.9/generated/torch.ao.nn.quantized.functional.threshold.html index 2e26b810c8c..ffc3696470d 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.threshold.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.threshold.html @@ -4415,7 +4415,7 @@

                                  threshold#

                                  -class torch.ao.nn.quantized.functional.threshold(input, threshold, value)[source]#
                                  +class torch.ao.nn.quantized.functional.threshold(input, threshold, value)[source]#

                                  Applies the quantized version of the threshold function element-wise:

                                  x={xif x>thresholdvalueotherwisex = \begin{cases} diff --git a/2.9/generated/torch.ao.nn.quantized.functional.upsample.html b/2.9/generated/torch.ao.nn.quantized.functional.upsample.html index 04a8c1f0305..5744fced74f 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.upsample.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.upsample.html @@ -4415,7 +4415,7 @@

                                  upsample#

                                  -class torch.ao.nn.quantized.functional.upsample(input, size=None, scale_factor=None, mode='nearest', align_corners=None)[source]#
                                  +class torch.ao.nn.quantized.functional.upsample(input, size=None, scale_factor=None, mode='nearest', align_corners=None)[source]#

                                  Upsamples the input to either the given size or the given scale_factor

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.upsample_bilinear.html b/2.9/generated/torch.ao.nn.quantized.functional.upsample_bilinear.html index d7486114a87..76a668baf29 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.upsample_bilinear.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.upsample_bilinear.html @@ -4415,7 +4415,7 @@

                                  upsample_bilinear#

                                  -class torch.ao.nn.quantized.functional.upsample_bilinear(input, size=None, scale_factor=None)[source]#
                                  +class torch.ao.nn.quantized.functional.upsample_bilinear(input, size=None, scale_factor=None)[source]#

                                  Upsamples the input, using bilinear upsampling.

                                  Warning

                                  diff --git a/2.9/generated/torch.ao.nn.quantized.functional.upsample_nearest.html b/2.9/generated/torch.ao.nn.quantized.functional.upsample_nearest.html index 2fb3106e4fa..6332cb07a82 100644 --- a/2.9/generated/torch.ao.nn.quantized.functional.upsample_nearest.html +++ b/2.9/generated/torch.ao.nn.quantized.functional.upsample_nearest.html @@ -4415,7 +4415,7 @@

                                  upsample_nearest#

                                  -class torch.ao.nn.quantized.functional.upsample_nearest(input, size=None, scale_factor=None)[source]#
                                  +class torch.ao.nn.quantized.functional.upsample_nearest(input, size=None, scale_factor=None)[source]#

                                  Upsamples the input, using nearest neighbours’ pixel values.

                                  Warning

                                  diff --git a/2.9/generated/torch.ao.quantization.DeQuantStub.html b/2.9/generated/torch.ao.quantization.DeQuantStub.html index b4d80ba5caa..0e322a298d7 100644 --- a/2.9/generated/torch.ao.quantization.DeQuantStub.html +++ b/2.9/generated/torch.ao.quantization.DeQuantStub.html @@ -4415,7 +4415,7 @@

                                  DeQuantStub#

                                  -class torch.ao.quantization.DeQuantStub(qconfig=None)[source]#
                                  +class torch.ao.quantization.DeQuantStub(qconfig=None)[source]#

                                  Dequantize stub module, before calibration, this is same as identity, this will be swapped as nnq.DeQuantize in convert.

                                  diff --git a/2.9/generated/torch.ao.quantization.QuantStub.html b/2.9/generated/torch.ao.quantization.QuantStub.html index a6cd40cc6d1..fbb89377c7b 100644 --- a/2.9/generated/torch.ao.quantization.QuantStub.html +++ b/2.9/generated/torch.ao.quantization.QuantStub.html @@ -4415,7 +4415,7 @@

                                  QuantStub#

                                  -class torch.ao.quantization.QuantStub(qconfig=None)[source]#
                                  +class torch.ao.quantization.QuantStub(qconfig=None)[source]#

                                  Quantize stub module, before calibration, this is same as an observer, it will be swapped as nnq.Quantize in convert.

                                  diff --git a/2.9/generated/torch.ao.quantization.QuantWrapper.html b/2.9/generated/torch.ao.quantization.QuantWrapper.html index 835d5a58018..e168a3cfe2f 100644 --- a/2.9/generated/torch.ao.quantization.QuantWrapper.html +++ b/2.9/generated/torch.ao.quantization.QuantWrapper.html @@ -4415,7 +4415,7 @@

                                  QuantWrapper#

                                  -class torch.ao.quantization.QuantWrapper(module)[source]#
                                  +class torch.ao.quantization.QuantWrapper(module)[source]#

                                  A wrapper class that wraps the input module, adds QuantStub and DeQuantStub and surround the call to module with call to quant and dequant modules.

                                  diff --git a/2.9/generated/torch.ao.quantization.add_quant_dequant.html b/2.9/generated/torch.ao.quantization.add_quant_dequant.html index 923e55020f1..edc8a2b72de 100644 --- a/2.9/generated/torch.ao.quantization.add_quant_dequant.html +++ b/2.9/generated/torch.ao.quantization.add_quant_dequant.html @@ -4415,7 +4415,7 @@

                                  add_quant_dequant#

                                  -class torch.ao.quantization.add_quant_dequant(module)[source]#
                                  +class torch.ao.quantization.add_quant_dequant(module)[source]#

                                  Wrap the leaf child module in QuantWrapper if it has a valid qconfig Note that this function will modify the children of module inplace and it can return a new module which wraps the input module as well.

                                  diff --git a/2.9/generated/torch.ao.quantization.backend_config.BackendConfig.html b/2.9/generated/torch.ao.quantization.backend_config.BackendConfig.html index 7138d4945ba..36448226195 100644 --- a/2.9/generated/torch.ao.quantization.backend_config.BackendConfig.html +++ b/2.9/generated/torch.ao.quantization.backend_config.BackendConfig.html @@ -4415,7 +4415,7 @@

                                  BackendConfig#

                                  -class torch.ao.quantization.backend_config.BackendConfig(name='')[source]#
                                  +class torch.ao.quantization.backend_config.BackendConfig(name='')[source]#

                                  Config that defines the set of patterns that can be quantized on a given backend, and how reference quantized models can be produced from these patterns.

                                  A pattern in this context refers to a module, a functional, an operator, or a directed acyclic graph @@ -4468,7 +4468,7 @@

                                  BackendConfig
                                  -classmethod from_dict(backend_config_dict)[source]#
                                  +classmethod from_dict(backend_config_dict)[source]#

                                  Create a BackendConfig from a dictionary with the following items:

                                  “name”: the name of the target backend

                                  @@ -4483,7 +4483,7 @@

                                  BackendConfig
                                  -set_backend_pattern_config(config)[source]#
                                  +set_backend_pattern_config(config)[source]#

                                  Set the config for an pattern that can be run on the target backend. This overrides any existing config for the given pattern.

                                  @@ -4495,7 +4495,7 @@

                                  BackendConfig
                                  -set_backend_pattern_configs(configs)[source]#
                                  +set_backend_pattern_configs(configs)[source]#

                                  Set the configs for patterns that can be run on the target backend. This overrides any existing config for a given pattern if it was previously registered already.

                                  @@ -4507,7 +4507,7 @@

                                  BackendConfig
                                  -set_name(name)[source]#
                                  +set_name(name)[source]#

                                  Set the name of the target backend.

                                  Return type
                                  @@ -4518,7 +4518,7 @@

                                  BackendConfig
                                  -to_dict()[source]#
                                  +to_dict()[source]#

                                  Convert this BackendConfig to a dictionary with the items described in from_dict().

                                  diff --git a/2.9/generated/torch.ao.quantization.backend_config.BackendPatternConfig.html b/2.9/generated/torch.ao.quantization.backend_config.BackendPatternConfig.html index 9093c83abeb..05897d937dd 100644 --- a/2.9/generated/torch.ao.quantization.backend_config.BackendPatternConfig.html +++ b/2.9/generated/torch.ao.quantization.backend_config.BackendPatternConfig.html @@ -4415,14 +4415,14 @@

                                  BackendPatternConfig#

                                  -class torch.ao.quantization.backend_config.BackendPatternConfig(pattern=None)[source]#
                                  +class torch.ao.quantization.backend_config.BackendPatternConfig(pattern=None)[source]#

                                  Config object that specifies quantization behavior for a given operator pattern. For a detailed example usage, see BackendConfig.

                                  -add_dtype_config(dtype_config)[source]#
                                  +add_dtype_config(dtype_config)[source]#

                                  Add a set of supported data types passed as arguments to quantize ops in the reference model spec.

                                  @@ -4434,7 +4434,7 @@

                                  BackendPatternConfig
                                  -classmethod from_dict(backend_pattern_config_dict)[source]#
                                  +classmethod from_dict(backend_pattern_config_dict)[source]#

                                  Create a BackendPatternConfig from a dictionary with the following items:

                                  “pattern”: the pattern being configured @@ -4458,7 +4458,7 @@

                                  BackendPatternConfig
                                  -set_dtype_configs(dtype_configs)[source]#
                                  +set_dtype_configs(dtype_configs)[source]#

                                  Set the supported data types passed as arguments to quantize ops in the reference model spec, overriding all previously registered data types.

                                  @@ -4470,7 +4470,7 @@

                                  BackendPatternConfig
                                  -set_fused_module(fused_module)[source]#
                                  +set_fused_module(fused_module)[source]#

                                  Set the module that represents the fused implementation for this pattern.

                                  Return type
                                  @@ -4481,7 +4481,7 @@

                                  BackendPatternConfig
                                  -set_fuser_method(fuser_method)[source]#
                                  +set_fuser_method(fuser_method)[source]#

                                  Set the function that specifies how to fuse this BackendPatternConfig’s pattern.

                                  The first argument of this function should be is_qat, and the rest of the arguments should be the items in the tuple pattern. The return value of this function should be @@ -4503,7 +4503,7 @@

                                  BackendPatternConfig
                                  -set_observation_type(observation_type)[source]#
                                  +set_observation_type(observation_type)[source]#

                                  Set how observers should be inserted in the graph for this pattern.

                                  Observation type here refers to how observers (or quant-dequant ops) will be placed in the graph. This is used to produce the desired reference patterns understood by @@ -4528,7 +4528,7 @@

                                  BackendPatternConfig
                                  -set_pattern(pattern)[source]#
                                  +set_pattern(pattern)[source]#

                                  Set the pattern to configure.

                                  The pattern can be a float module, functional operator, pytorch operator, or a tuple combination of the above. Tuple patterns are treated as sequential patterns, and @@ -4542,7 +4542,7 @@

                                  BackendPatternConfig
                                  -set_qat_module(qat_module)[source]#
                                  +set_qat_module(qat_module)[source]#

                                  Set the module that represents the QAT implementation for this pattern.

                                  Return type
                                  @@ -4553,7 +4553,7 @@

                                  BackendPatternConfig
                                  -set_reference_quantized_module(reference_quantized_module)[source]#
                                  +set_reference_quantized_module(reference_quantized_module)[source]#

                                  Set the module that represents the reference quantized implementation for this pattern’s root module.

                                  For more detail, see set_root_module().

                                  @@ -4566,7 +4566,7 @@

                                  BackendPatternConfig
                                  -set_root_module(root_module)[source]#
                                  +set_root_module(root_module)[source]#

                                  Set the module that represents the root for this pattern.

                                  When we construct the reference quantized model during the convert phase, the root modules (e.g. torch.nn.Linear for torch.ao.nn.intrinsic.LinearReLU) @@ -4585,7 +4585,7 @@

                                  BackendPatternConfig
                                  -to_dict()[source]#
                                  +to_dict()[source]#

                                  Convert this BackendPatternConfig to a dictionary with the items described in from_dict().

                                  diff --git a/2.9/generated/torch.ao.quantization.backend_config.DTypeConfig.html b/2.9/generated/torch.ao.quantization.backend_config.DTypeConfig.html index 10f6e2a142b..102559c6328 100644 --- a/2.9/generated/torch.ao.quantization.backend_config.DTypeConfig.html +++ b/2.9/generated/torch.ao.quantization.backend_config.DTypeConfig.html @@ -4415,7 +4415,7 @@

                                  DTypeConfig#

                                  -class torch.ao.quantization.backend_config.DTypeConfig(input_dtype=None, output_dtype=None, weight_dtype=None, bias_dtype=None, is_dynamic=None)[source]#
                                  +class torch.ao.quantization.backend_config.DTypeConfig(input_dtype=None, output_dtype=None, weight_dtype=None, bias_dtype=None, is_dynamic=None)[source]#

                                  Config object that specifies the supported data types passed as arguments to quantize ops in the reference model spec, for input and output activations, weights, and biases.

                                  @@ -4481,7 +4481,7 @@

                                  DTypeConfig
                                  -classmethod from_dict(dtype_config_dict)[source]#
                                  +classmethod from_dict(dtype_config_dict)[source]#
                                  Create a DTypeConfig from a dictionary with the following items (all optional):

                                  “input_dtype”: torch.dtype or DTypeWithConstraints “output_dtype”: torch.dtype or DTypeWithConstraints @@ -4499,7 +4499,7 @@

                                  DTypeConfig
                                  -to_dict()[source]#
                                  +to_dict()[source]#

                                  Convert this DTypeConfig to a dictionary with the items described in from_dict().

                                  diff --git a/2.9/generated/torch.ao.quantization.backend_config.DTypeWithConstraints.html b/2.9/generated/torch.ao.quantization.backend_config.DTypeWithConstraints.html index c52cce2c77f..e8970c962ef 100644 --- a/2.9/generated/torch.ao.quantization.backend_config.DTypeWithConstraints.html +++ b/2.9/generated/torch.ao.quantization.backend_config.DTypeWithConstraints.html @@ -4415,7 +4415,7 @@

                                  DTypeWithConstraints#

                                  -class torch.ao.quantization.backend_config.DTypeWithConstraints(dtype=None, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None, scale_exact_match=None, zero_point_exact_match=None)[source]#
                                  +class torch.ao.quantization.backend_config.DTypeWithConstraints(dtype=None, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None, scale_exact_match=None, zero_point_exact_match=None)[source]#

                                  Config for specifying additional constraints for a given dtype, such as quantization value ranges, scale value ranges, and fixed quantization params, to be used in DTypeConfig.

                                  diff --git a/2.9/generated/torch.ao.quantization.backend_config.ObservationType.html b/2.9/generated/torch.ao.quantization.backend_config.ObservationType.html index c0e688e640b..8a260e07e88 100644 --- a/2.9/generated/torch.ao.quantization.backend_config.ObservationType.html +++ b/2.9/generated/torch.ao.quantization.backend_config.ObservationType.html @@ -4415,7 +4415,7 @@

                                  ObservationType#

                                  -class torch.ao.quantization.backend_config.ObservationType(value)[source]#
                                  +class torch.ao.quantization.backend_config.ObservationType(value)[source]#

                                  An enum that represents different ways of how an operator/operator pattern should be observed

                                  diff --git a/2.9/generated/torch.ao.quantization.compare_results.html b/2.9/generated/torch.ao.quantization.compare_results.html index 8ae966646cf..87e927d54b1 100644 --- a/2.9/generated/torch.ao.quantization.compare_results.html +++ b/2.9/generated/torch.ao.quantization.compare_results.html @@ -4415,7 +4415,7 @@

                                  compare_results#

                                  -class torch.ao.quantization.compare_results(ref_results, actual_results)[source]#
                                  +class torch.ao.quantization.compare_results(ref_results, actual_results)[source]#

                                  Given two dict mapping from debug_handle_id (int) to list of tensors return a map from debug_handle_id to NodeAccuracySummary that contains comparison information like SQNR, MSE etc.

                                  diff --git a/2.9/generated/torch.ao.quantization.convert.html b/2.9/generated/torch.ao.quantization.convert.html index 14edc0f0013..cfa45da5760 100644 --- a/2.9/generated/torch.ao.quantization.convert.html +++ b/2.9/generated/torch.ao.quantization.convert.html @@ -4415,7 +4415,7 @@

                                  convert#

                                  -class torch.ao.quantization.convert(module, mapping=None, inplace=False, remove_qconfig=True, is_reference=False, convert_custom_config_dict=None, use_precomputed_fake_quant=False)[source]#
                                  +class torch.ao.quantization.convert(module, mapping=None, inplace=False, remove_qconfig=True, is_reference=False, convert_custom_config_dict=None, use_precomputed_fake_quant=False)[source]#

                                  Converts submodules in input module to a different module according to mapping by calling from_float method on the target module class. And remove qconfig at the end if remove_qconfig is set to True.

                                  diff --git a/2.9/generated/torch.ao.quantization.default_eval_fn.html b/2.9/generated/torch.ao.quantization.default_eval_fn.html index 495e2f72092..7cb40c28d8d 100644 --- a/2.9/generated/torch.ao.quantization.default_eval_fn.html +++ b/2.9/generated/torch.ao.quantization.default_eval_fn.html @@ -4415,7 +4415,7 @@

                                  default_eval_fn#

                                  -class torch.ao.quantization.default_eval_fn(model, calib_data)[source]#
                                  +class torch.ao.quantization.default_eval_fn(model, calib_data)[source]#

                                  Define the default evaluation function.

                                  Default evaluation function takes a torch.utils.data.Dataset or a list of input Tensors and run the model on the dataset

                                  diff --git a/2.9/generated/torch.ao.quantization.extract_results_from_loggers.html b/2.9/generated/torch.ao.quantization.extract_results_from_loggers.html index 01cbb6701d8..b895415a49b 100644 --- a/2.9/generated/torch.ao.quantization.extract_results_from_loggers.html +++ b/2.9/generated/torch.ao.quantization.extract_results_from_loggers.html @@ -4415,7 +4415,7 @@

                                  extract_results_from_loggers#

                                  -class torch.ao.quantization.extract_results_from_loggers(model)[source]#
                                  +class torch.ao.quantization.extract_results_from_loggers(model)[source]#

                                  For a given model, extract the tensors stats and related information for each debug handle. The reason we have a list of object, instead of Tensor is because the output of node may not be a Tensor, it could be (nested) list, tuple or dict as well.

                                  diff --git a/2.9/generated/torch.ao.quantization.fake_quantize.FakeQuantize.html b/2.9/generated/torch.ao.quantization.fake_quantize.FakeQuantize.html index ae8e51ac304..1876c00a082 100644 --- a/2.9/generated/torch.ao.quantization.fake_quantize.FakeQuantize.html +++ b/2.9/generated/torch.ao.quantization.fake_quantize.FakeQuantize.html @@ -4415,7 +4415,7 @@

                                  FakeQuantize#

                                  -class torch.ao.quantization.fake_quantize.FakeQuantize(observer=<class 'torch.ao.quantization.observer.MovingAverageMinMaxObserver'>, quant_min=None, quant_max=None, is_dynamic=False, **observer_kwargs)[source]#
                                  +class torch.ao.quantization.fake_quantize.FakeQuantize(observer=<class 'torch.ao.quantization.observer.MovingAverageMinMaxObserver'>, quant_min=None, quant_max=None, is_dynamic=False, **observer_kwargs)[source]#

                                  Simulate the quantize and dequantize operations in training time.

                                  The output of this module is given by:

                                  x_out = (
                                  diff --git a/2.9/generated/torch.ao.quantization.fake_quantize.FakeQuantizeBase.html b/2.9/generated/torch.ao.quantization.fake_quantize.FakeQuantizeBase.html
                                  index 7f0758f8e11..de6eee20481 100644
                                  --- a/2.9/generated/torch.ao.quantization.fake_quantize.FakeQuantizeBase.html
                                  +++ b/2.9/generated/torch.ao.quantization.fake_quantize.FakeQuantizeBase.html
                                  @@ -4415,7 +4415,7 @@
                                   

                                  FakeQuantizeBase#

                                  -class torch.ao.quantization.fake_quantize.FakeQuantizeBase[source]#
                                  +class torch.ao.quantization.fake_quantize.FakeQuantizeBase[source]#

                                  Base fake quantize module.

                                  Base fake quantize module Any fake quantize implementation should derive from this class.

                                  diff --git a/2.9/generated/torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize.html b/2.9/generated/torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize.html index 60cee5f73ce..8353b6a4d7c 100644 --- a/2.9/generated/torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize.html +++ b/2.9/generated/torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize.html @@ -4415,14 +4415,14 @@

                                  FixedQParamsFakeQuantize#

                                  -class torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize(observer)[source]#
                                  +class torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize(observer)[source]#

                                  Simulate quantize and dequantize in training time.

                                  Simulate quantize and dequantize with fixed quantization parameters in training time. Only per tensor quantization is supported.

                                  -extra_repr()[source]#
                                  +extra_repr()[source]#

                                  Define a string representation of the object’s attributes.

                                  diff --git a/2.9/generated/torch.ao.quantization.fake_quantize.FusedMovingAvgObsFakeQuantize.html b/2.9/generated/torch.ao.quantization.fake_quantize.FusedMovingAvgObsFakeQuantize.html index bf2981c2dfe..44ec6815742 100644 --- a/2.9/generated/torch.ao.quantization.fake_quantize.FusedMovingAvgObsFakeQuantize.html +++ b/2.9/generated/torch.ao.quantization.fake_quantize.FusedMovingAvgObsFakeQuantize.html @@ -4415,7 +4415,7 @@

                                  FusedMovingAvgObsFakeQuantize#

                                  -class torch.ao.quantization.fake_quantize.FusedMovingAvgObsFakeQuantize(observer=<class 'torch.ao.quantization.observer.MovingAverageMinMaxObserver'>, quant_min=0, quant_max=255, **observer_kwargs)[source]#
                                  +class torch.ao.quantization.fake_quantize.FusedMovingAvgObsFakeQuantize(observer=<class 'torch.ao.quantization.observer.MovingAverageMinMaxObserver'>, quant_min=0, quant_max=255, **observer_kwargs)[source]#

                                  Define a fused module to observe the tensor.

                                  Fused module that is used to observe the input tensor (compute min/max), compute scale/zero_point and fake_quantize the tensor. diff --git a/2.9/generated/torch.ao.quantization.fake_quantize.disable_fake_quant.html b/2.9/generated/torch.ao.quantization.fake_quantize.disable_fake_quant.html index 69c3a04dd79..997c9c62e6c 100644 --- a/2.9/generated/torch.ao.quantization.fake_quantize.disable_fake_quant.html +++ b/2.9/generated/torch.ao.quantization.fake_quantize.disable_fake_quant.html @@ -4415,7 +4415,7 @@

                                  disable_fake_quant#

                                  -class torch.ao.quantization.fake_quantize.disable_fake_quant(mod)[source]#
                                  +class torch.ao.quantization.fake_quantize.disable_fake_quant(mod)[source]#

                                  Disable fake quantization for the module.

                                  Disable fake quantization for this module, if applicable. Example usage:

                                  # model is any PyTorch model
                                  diff --git a/2.9/generated/torch.ao.quantization.fake_quantize.disable_observer.html b/2.9/generated/torch.ao.quantization.fake_quantize.disable_observer.html
                                  index a7e8c4b16f7..cacdd037ca4 100644
                                  --- a/2.9/generated/torch.ao.quantization.fake_quantize.disable_observer.html
                                  +++ b/2.9/generated/torch.ao.quantization.fake_quantize.disable_observer.html
                                  @@ -4415,7 +4415,7 @@
                                   

                                  disable_observer#

                                  -class torch.ao.quantization.fake_quantize.disable_observer(mod)[source]#
                                  +class torch.ao.quantization.fake_quantize.disable_observer(mod)[source]#

                                  Disable observation for this module.

                                  Disable observation for this module, if applicable. Example usage:

                                  # model is any PyTorch model
                                  diff --git a/2.9/generated/torch.ao.quantization.fake_quantize.enable_fake_quant.html b/2.9/generated/torch.ao.quantization.fake_quantize.enable_fake_quant.html
                                  index 12bb7b1f2af..5da7b9b1a60 100644
                                  --- a/2.9/generated/torch.ao.quantization.fake_quantize.enable_fake_quant.html
                                  +++ b/2.9/generated/torch.ao.quantization.fake_quantize.enable_fake_quant.html
                                  @@ -4415,7 +4415,7 @@
                                   

                                  enable_fake_quant#

                                  -class torch.ao.quantization.fake_quantize.enable_fake_quant(mod)[source]#
                                  +class torch.ao.quantization.fake_quantize.enable_fake_quant(mod)[source]#

                                  Enable fake quantization for the module.

                                  Enable fake quantization for this module, if applicable. Example usage:

                                  # model is any PyTorch model
                                  diff --git a/2.9/generated/torch.ao.quantization.fake_quantize.enable_observer.html b/2.9/generated/torch.ao.quantization.fake_quantize.enable_observer.html
                                  index e130f416a9b..0fd1991747e 100644
                                  --- a/2.9/generated/torch.ao.quantization.fake_quantize.enable_observer.html
                                  +++ b/2.9/generated/torch.ao.quantization.fake_quantize.enable_observer.html
                                  @@ -4415,7 +4415,7 @@
                                   

                                  enable_observer#

                                  -class torch.ao.quantization.fake_quantize.enable_observer(mod)[source]#
                                  +class torch.ao.quantization.fake_quantize.enable_observer(mod)[source]#

                                  Enable observation for this module.

                                  Enable observation for this module, if applicable. Example usage:

                                  # model is any PyTorch model
                                  diff --git a/2.9/generated/torch.ao.quantization.fuse_modules.fuse_modules.html b/2.9/generated/torch.ao.quantization.fuse_modules.fuse_modules.html
                                  index 78f137f3f34..9f4c0fe467d 100644
                                  --- a/2.9/generated/torch.ao.quantization.fuse_modules.fuse_modules.html
                                  +++ b/2.9/generated/torch.ao.quantization.fuse_modules.fuse_modules.html
                                  @@ -4415,7 +4415,7 @@
                                   

                                  fuse_modules#

                                  -class torch.ao.quantization.fuse_modules.fuse_modules(model, modules_to_fuse, inplace=False, fuser_func=<function fuse_known_modules>, fuse_custom_config_dict=None)[source]#
                                  +class torch.ao.quantization.fuse_modules.fuse_modules(model, modules_to_fuse, inplace=False, fuser_func=<function fuse_known_modules>, fuse_custom_config_dict=None)[source]#

                                  Fuse a list of modules into a single module.

                                  Fuses only the following sequence of modules: conv, bn diff --git a/2.9/generated/torch.ao.quantization.fx.custom_config.ConvertCustomConfig.html b/2.9/generated/torch.ao.quantization.fx.custom_config.ConvertCustomConfig.html index 9b595e5cbe6..3483156e29f 100644 --- a/2.9/generated/torch.ao.quantization.fx.custom_config.ConvertCustomConfig.html +++ b/2.9/generated/torch.ao.quantization.fx.custom_config.ConvertCustomConfig.html @@ -4415,7 +4415,7 @@

                                  ConvertCustomConfig#

                                  -class torch.ao.quantization.fx.custom_config.ConvertCustomConfig[source]#
                                  +class torch.ao.quantization.fx.custom_config.ConvertCustomConfig[source]#

                                  Custom configuration for convert_fx().

                                  Example usage:

                                  convert_custom_config = ConvertCustomConfig()             .set_observed_to_quantized_mapping(ObservedCustomModule, QuantizedCustomModule)             .set_preserved_attributes(["attr1", "attr2"])
                                  @@ -4425,7 +4425,7 @@ 

                                  ConvertCustomConfig
                                  -classmethod from_dict(convert_custom_config_dict)[source]#
                                  +classmethod from_dict(convert_custom_config_dict)[source]#

                                  Create a ConvertCustomConfig from a dictionary with the following items:

                                  “observed_to_quantized_custom_module_class”: a nested dictionary mapping from quantization @@ -4447,7 +4447,7 @@

                                  ConvertCustomConfig
                                  -set_observed_to_quantized_mapping(observed_class, quantized_class, quant_type=QuantType.STATIC)[source]#
                                  +set_observed_to_quantized_mapping(observed_class, quantized_class, quant_type=QuantType.STATIC)[source]#

                                  Set the mapping from a custom observed module class to a custom quantized module class.

                                  The quantized module class must have a from_observed class method that converts the observed module class to the quantized module class.

                                  @@ -4460,7 +4460,7 @@

                                  ConvertCustomConfig
                                  -set_preserved_attributes(attributes)[source]#
                                  +set_preserved_attributes(attributes)[source]#

                                  Set the names of the attributes that will persist in the graph module even if they are not used in the model’s forward method.

                                  @@ -4472,7 +4472,7 @@

                                  ConvertCustomConfig
                                  -to_dict()[source]#
                                  +to_dict()[source]#

                                  Convert this ConvertCustomConfig to a dictionary with the items described in from_dict().

                                  diff --git a/2.9/generated/torch.ao.quantization.fx.custom_config.FuseCustomConfig.html b/2.9/generated/torch.ao.quantization.fx.custom_config.FuseCustomConfig.html index e7ea0954cbc..63e67170c96 100644 --- a/2.9/generated/torch.ao.quantization.fx.custom_config.FuseCustomConfig.html +++ b/2.9/generated/torch.ao.quantization.fx.custom_config.FuseCustomConfig.html @@ -4415,7 +4415,7 @@

                                  FuseCustomConfig#

                                  -class torch.ao.quantization.fx.custom_config.FuseCustomConfig[source]#
                                  +class torch.ao.quantization.fx.custom_config.FuseCustomConfig[source]#

                                  Custom configuration for fuse_fx().

                                  Example usage:

                                  fuse_custom_config = FuseCustomConfig().set_preserved_attributes(
                                  @@ -4427,7 +4427,7 @@ 

                                  FuseCustomConfig
                                  -classmethod from_dict(fuse_custom_config_dict)[source]#
                                  +classmethod from_dict(fuse_custom_config_dict)[source]#

                                  Create a ConvertCustomConfig from a dictionary with the following items:

                                  “preserved_attributes”: a list of attributes that persist even if they are not used in forward

                                  @@ -4442,7 +4442,7 @@

                                  FuseCustomConfig
                                  -set_preserved_attributes(attributes)[source]#
                                  +set_preserved_attributes(attributes)[source]#

                                  Set the names of the attributes that will persist in the graph module even if they are not used in the model’s forward method.

                                  @@ -4454,7 +4454,7 @@

                                  FuseCustomConfig
                                  -to_dict()[source]#
                                  +to_dict()[source]#

                                  Convert this FuseCustomConfig to a dictionary with the items described in from_dict().

                                  diff --git a/2.9/generated/torch.ao.quantization.fx.custom_config.PrepareCustomConfig.html b/2.9/generated/torch.ao.quantization.fx.custom_config.PrepareCustomConfig.html index 6a63a79ffc0..197a41a81ad 100644 --- a/2.9/generated/torch.ao.quantization.fx.custom_config.PrepareCustomConfig.html +++ b/2.9/generated/torch.ao.quantization.fx.custom_config.PrepareCustomConfig.html @@ -4415,7 +4415,7 @@

                                  PrepareCustomConfig#

                                  -class torch.ao.quantization.fx.custom_config.PrepareCustomConfig[source]#
                                  +class torch.ao.quantization.fx.custom_config.PrepareCustomConfig[source]#

                                  Custom configuration for prepare_fx() and prepare_qat_fx().

                                  Example usage:

                                  @@ -4426,7 +4426,7 @@

                                  PrepareCustomConfig
                                  -classmethod from_dict(prepare_custom_config_dict)[source]#
                                  +classmethod from_dict(prepare_custom_config_dict)[source]#

                                  Create a PrepareCustomConfig from a dictionary with the following items:

                                  “standalone_module_name”: a list of (module_name, qconfig_mapping, example_inputs, @@ -4452,7 +4452,7 @@

                                  PrepareCustomConfig
                                  -set_float_to_observed_mapping(float_class, observed_class, quant_type=QuantType.STATIC)[source]#
                                  +set_float_to_observed_mapping(float_class, observed_class, quant_type=QuantType.STATIC)[source]#

                                  Set the mapping from a custom float module class to a custom observed module class.

                                  The observed module class must have a from_float class method that converts the float module class to the observed module class. This is currently only supported for static quantization.

                                  @@ -4465,7 +4465,7 @@

                                  PrepareCustomConfig
                                  -set_input_quantized_indexes(indexes)[source]#
                                  +set_input_quantized_indexes(indexes)[source]#

                                  Set the indexes of the inputs of the graph that should be quantized. Inputs are otherwise assumed to be in fp32 by default instead.

                                  @@ -4477,7 +4477,7 @@

                                  PrepareCustomConfig
                                  -set_non_traceable_module_classes(module_classes)[source]#
                                  +set_non_traceable_module_classes(module_classes)[source]#

                                  Set the modules that are not symbolically traceable, identified by class.

                                  Return type
                                  @@ -4488,7 +4488,7 @@

                                  PrepareCustomConfig
                                  -set_non_traceable_module_names(module_names)[source]#
                                  +set_non_traceable_module_names(module_names)[source]#

                                  Set the modules that are not symbolically traceable, identified by name.

                                  Return type
                                  @@ -4499,7 +4499,7 @@

                                  PrepareCustomConfig
                                  -set_output_quantized_indexes(indexes)[source]#
                                  +set_output_quantized_indexes(indexes)[source]#

                                  Set the indexes of the outputs of the graph that should be quantized. Outputs are otherwise assumed to be in fp32 by default instead.

                                  @@ -4511,7 +4511,7 @@

                                  PrepareCustomConfig
                                  -set_preserved_attributes(attributes)[source]#
                                  +set_preserved_attributes(attributes)[source]#

                                  Set the names of the attributes that will persist in the graph module even if they are not used in the model’s forward method.

                                  @@ -4523,7 +4523,7 @@

                                  PrepareCustomConfig
                                  -set_standalone_module_class(module_class, qconfig_mapping, example_inputs, prepare_custom_config, backend_config)[source]#
                                  +set_standalone_module_class(module_class, qconfig_mapping, example_inputs, prepare_custom_config, backend_config)[source]#

                                  Set the configuration for running a standalone module identified by module_class.

                                  If qconfig_mapping is None, the parent qconfig_mapping will be used instead. If prepare_custom_config is None, an empty PrepareCustomConfig will be used. @@ -4537,7 +4537,7 @@

                                  PrepareCustomConfig
                                  -set_standalone_module_name(module_name, qconfig_mapping, example_inputs, prepare_custom_config, backend_config)[source]#
                                  +set_standalone_module_name(module_name, qconfig_mapping, example_inputs, prepare_custom_config, backend_config)[source]#

                                  Set the configuration for running a standalone module identified by module_name.

                                  If qconfig_mapping is None, the parent qconfig_mapping will be used instead. If prepare_custom_config is None, an empty PrepareCustomConfig will be used. @@ -4551,7 +4551,7 @@

                                  PrepareCustomConfig
                                  -to_dict()[source]#
                                  +to_dict()[source]#

                                  Convert this PrepareCustomConfig to a dictionary with the items described in from_dict().

                                  diff --git a/2.9/generated/torch.ao.quantization.fx.custom_config.StandaloneModuleConfigEntry.html b/2.9/generated/torch.ao.quantization.fx.custom_config.StandaloneModuleConfigEntry.html index b79c4497fe1..b20b7c08771 100644 --- a/2.9/generated/torch.ao.quantization.fx.custom_config.StandaloneModuleConfigEntry.html +++ b/2.9/generated/torch.ao.quantization.fx.custom_config.StandaloneModuleConfigEntry.html @@ -4415,7 +4415,7 @@

                                  StandaloneModuleConfigEntry#

                                  -class torch.ao.quantization.fx.custom_config.StandaloneModuleConfigEntry(qconfig_mapping: 'Optional[QConfigMapping]', example_inputs: 'tuple[Any, ...]', prepare_custom_config: 'Optional[PrepareCustomConfig]', backend_config: 'Optional[BackendConfig]')[source]#
                                  +class torch.ao.quantization.fx.custom_config.StandaloneModuleConfigEntry(qconfig_mapping: 'Optional[QConfigMapping]', example_inputs: 'tuple[Any, ...]', prepare_custom_config: 'Optional[PrepareCustomConfig]', backend_config: 'Optional[BackendConfig]')[source]#
                                  diff --git a/2.9/generated/torch.ao.quantization.generate_numeric_debug_handle.html b/2.9/generated/torch.ao.quantization.generate_numeric_debug_handle.html index 1fd8a03d2b1..9b06a022ccb 100644 --- a/2.9/generated/torch.ao.quantization.generate_numeric_debug_handle.html +++ b/2.9/generated/torch.ao.quantization.generate_numeric_debug_handle.html @@ -4415,7 +4415,7 @@

                                  generate_numeric_debug_handle#

                                  -class torch.ao.quantization.generate_numeric_debug_handle(ep)[source]#
                                  +class torch.ao.quantization.generate_numeric_debug_handle(ep)[source]#

                                  Attach numeric_debug_handle_id for all nodes in the graph module of the given ExportedProgram, like conv2d, squeeze, conv1d, etc, except for placeholder. Notice that nodes like getattr are out of scope since they are not in the graph.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.AffineQuantizedObserverBase.html b/2.9/generated/torch.ao.quantization.observer.AffineQuantizedObserverBase.html index 85a6bf3cb2a..06986499271 100644 --- a/2.9/generated/torch.ao.quantization.observer.AffineQuantizedObserverBase.html +++ b/2.9/generated/torch.ao.quantization.observer.AffineQuantizedObserverBase.html @@ -4415,7 +4415,7 @@

                                  AffineQuantizedObserverBase#

                                  -class torch.ao.quantization.observer.AffineQuantizedObserverBase(mapping_type, target_dtype, granularity, quant_min=None, quant_max=None, eps=None, scale_dtype=None, zero_point_dtype=None, preserve_zero=True, zero_point_domain=ZeroPointDomain.INT, **kwargs)[source]#
                                  +class torch.ao.quantization.observer.AffineQuantizedObserverBase(mapping_type, target_dtype, granularity, quant_min=None, quant_max=None, eps=None, scale_dtype=None, zero_point_dtype=None, preserve_zero=True, zero_point_domain=ZeroPointDomain.INT, **kwargs)[source]#

                                  Observer module for affine quantization (pytorch/ao)

                                  Parameters
                                  @@ -4429,7 +4429,7 @@

                                  AffineQuantizedObserverBase
                                  -abstract calculate_qparams()[source]#
                                  +abstract calculate_qparams()[source]#

                                  Calculate quantization parameter based on the stats attached to the observer module and returns a tuple of scale and zero_point Tensor

                                  @@ -4441,7 +4441,7 @@

                                  AffineQuantizedObserverBase
                                  -convert(model, observer_node)[source]#
                                  +convert(model, observer_node)[source]#

                                  Converts the observer node in the graph into its quantized representation

                                  Parameters
                                  @@ -4455,7 +4455,7 @@

                                  AffineQuantizedObserverBase
                                  -abstract forward(input)[source]#
                                  +abstract forward(input)[source]#

                                  forward function should take the input tensor and updates internal stats and return the original input Tensor

                                  @@ -4467,7 +4467,7 @@

                                  AffineQuantizedObserverBase
                                  -classmethod with_args(**kwargs)[source]#
                                  +classmethod with_args(**kwargs)[source]#

                                  Wrapper that allows creation of class factories.

                                  This can be useful when there is a need to create classes with the same constructor arguments, but different instances. Can be used in conjunction with diff --git a/2.9/generated/torch.ao.quantization.observer.Granularity.html b/2.9/generated/torch.ao.quantization.observer.Granularity.html index e3cb4159c9b..404a308807c 100644 --- a/2.9/generated/torch.ao.quantization.observer.Granularity.html +++ b/2.9/generated/torch.ao.quantization.observer.Granularity.html @@ -4415,7 +4415,7 @@

                                  Granularity#

                                  -class torch.ao.quantization.observer.Granularity[source]#
                                  +class torch.ao.quantization.observer.Granularity[source]#

                                  Base class for representing the granularity of quantization.

                                  This class serves as a parent for specific granularity types used in quantization operations, such as per-tensor or per-axis quantization.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.HistogramObserver.html b/2.9/generated/torch.ao.quantization.observer.HistogramObserver.html index cfa57373ce5..f6be41d177b 100644 --- a/2.9/generated/torch.ao.quantization.observer.HistogramObserver.html +++ b/2.9/generated/torch.ao.quantization.observer.HistogramObserver.html @@ -4415,7 +4415,7 @@

                                  HistogramObserver#

                                  -class torch.ao.quantization.observer.HistogramObserver(bins=2048, dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=False, quant_min=None, quant_max=None, factory_kwargs=None, eps=1.1920928955078125e-07, is_dynamic=False, **kwargs)[source]#
                                  +class torch.ao.quantization.observer.HistogramObserver(bins=2048, dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=False, quant_min=None, quant_max=None, factory_kwargs=None, eps=1.1920928955078125e-07, is_dynamic=False, **kwargs)[source]#

                                  The module records the running histogram of tensor values along with min/max values. calculate_qparams will calculate scale and zero_point.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.MappingType.html b/2.9/generated/torch.ao.quantization.observer.MappingType.html index 711817672ca..c4ed9ca23f2 100644 --- a/2.9/generated/torch.ao.quantization.observer.MappingType.html +++ b/2.9/generated/torch.ao.quantization.observer.MappingType.html @@ -4415,7 +4415,7 @@

                                  MappingType#

                                  -class torch.ao.quantization.observer.MappingType(value)[source]#
                                  +class torch.ao.quantization.observer.MappingType(value)[source]#

                                  How floating point number is mapped to integer number

                                  symmetric mapping means floating point range is symmetrically mapped to integer range let’s say we have floating point range (-3.5, 10.2) and integer range (-8, 7) (int4) diff --git a/2.9/generated/torch.ao.quantization.observer.MinMaxObserver.html b/2.9/generated/torch.ao.quantization.observer.MinMaxObserver.html index 9fffb0d95c0..7af28ddcb25 100644 --- a/2.9/generated/torch.ao.quantization.observer.MinMaxObserver.html +++ b/2.9/generated/torch.ao.quantization.observer.MinMaxObserver.html @@ -4415,7 +4415,7 @@

                                  MinMaxObserver#

                                  -class torch.ao.quantization.observer.MinMaxObserver(dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=False, quant_min=None, quant_max=None, factory_kwargs=None, eps=1.1920928955078125e-07, is_dynamic=False, **kwargs)[source]#
                                  +class torch.ao.quantization.observer.MinMaxObserver(dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=False, quant_min=None, quant_max=None, factory_kwargs=None, eps=1.1920928955078125e-07, is_dynamic=False, **kwargs)[source]#

                                  Observer module for computing the quantization parameters based on the running min and max values.

                                  This observer uses the tensor min/max statistics to compute the quantization @@ -4475,19 +4475,19 @@

                                  MinMaxObserver
                                  -calculate_qparams()[source]#
                                  +calculate_qparams()[source]#

                                  Calculates the quantization parameters.

                                  -forward(x_orig)[source]#
                                  +forward(x_orig)[source]#

                                  Records the running minimum and maximum of x.

                                  -reset_min_max_vals()[source]#
                                  +reset_min_max_vals()[source]#

                                  Resets the min/max values.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.MovingAverageMinMaxObserver.html b/2.9/generated/torch.ao.quantization.observer.MovingAverageMinMaxObserver.html index 5d1862b4b37..9df5cea1da6 100644 --- a/2.9/generated/torch.ao.quantization.observer.MovingAverageMinMaxObserver.html +++ b/2.9/generated/torch.ao.quantization.observer.MovingAverageMinMaxObserver.html @@ -4415,7 +4415,7 @@

                                  MovingAverageMinMaxObserver#

                                  -class torch.ao.quantization.observer.MovingAverageMinMaxObserver(averaging_constant=0.01, dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=False, quant_min=None, quant_max=None, eps=1.1920928955078125e-07, is_dynamic=False, **kwargs)[source]#
                                  +class torch.ao.quantization.observer.MovingAverageMinMaxObserver(averaging_constant=0.01, dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=False, quant_min=None, quant_max=None, eps=1.1920928955078125e-07, is_dynamic=False, **kwargs)[source]#

                                  Observer module for computing the quantization parameters based on the moving average of the min and max values.

                                  This observer computes the quantization parameters based on the moving diff --git a/2.9/generated/torch.ao.quantization.observer.MovingAveragePerChannelMinMaxObserver.html b/2.9/generated/torch.ao.quantization.observer.MovingAveragePerChannelMinMaxObserver.html index 6fa5da11bd0..1fa24f23afc 100644 --- a/2.9/generated/torch.ao.quantization.observer.MovingAveragePerChannelMinMaxObserver.html +++ b/2.9/generated/torch.ao.quantization.observer.MovingAveragePerChannelMinMaxObserver.html @@ -4415,7 +4415,7 @@

                                  MovingAveragePerChannelMinMaxObserver#

                                  -class torch.ao.quantization.observer.MovingAveragePerChannelMinMaxObserver(averaging_constant=0.01, ch_axis=0, dtype=torch.quint8, qscheme=torch.per_channel_affine, reduce_range=False, quant_min=None, quant_max=None, eps=1.1920928955078125e-07, is_dynamic=False, **kwargs)[source]#
                                  +class torch.ao.quantization.observer.MovingAveragePerChannelMinMaxObserver(averaging_constant=0.01, ch_axis=0, dtype=torch.quint8, qscheme=torch.per_channel_affine, reduce_range=False, quant_min=None, quant_max=None, eps=1.1920928955078125e-07, is_dynamic=False, **kwargs)[source]#

                                  Observer module for computing the quantization parameters based on the running per channel min and max values.

                                  This observer uses the tensor min/max statistics to compute the per channel diff --git a/2.9/generated/torch.ao.quantization.observer.NoopObserver.html b/2.9/generated/torch.ao.quantization.observer.NoopObserver.html index e5d9e8ad14b..64fa2798f28 100644 --- a/2.9/generated/torch.ao.quantization.observer.NoopObserver.html +++ b/2.9/generated/torch.ao.quantization.observer.NoopObserver.html @@ -4415,7 +4415,7 @@

                                  NoopObserver#

                                  -class torch.ao.quantization.observer.NoopObserver(dtype=torch.float16, custom_op_name='')[source]#
                                  +class torch.ao.quantization.observer.NoopObserver(dtype=torch.float16, custom_op_name='')[source]#

                                  Observer that doesn’t do anything and just passes its configuration to the quantized module’s .from_float().

                                  Primarily used for quantization to float16 which doesn’t require determining diff --git a/2.9/generated/torch.ao.quantization.observer.ObserverBase.html b/2.9/generated/torch.ao.quantization.observer.ObserverBase.html index 4c462befb21..ebb0b70630a 100644 --- a/2.9/generated/torch.ao.quantization.observer.ObserverBase.html +++ b/2.9/generated/torch.ao.quantization.observer.ObserverBase.html @@ -4415,7 +4415,7 @@

                                  ObserverBase#

                                  -class torch.ao.quantization.observer.ObserverBase(dtype, is_dynamic=False)[source]#
                                  +class torch.ao.quantization.observer.ObserverBase(dtype, is_dynamic=False)[source]#

                                  Base observer Module. Any observer implementation should derive from this class.

                                  Concrete observers should follow the same API. In forward, they will update @@ -4434,7 +4434,7 @@

                                  ObserverBase
                                  -classmethod with_args(**kwargs)[source]#
                                  +classmethod with_args(**kwargs)[source]#

                                  Wrapper that allows creation of class factories.

                                  This can be useful when there is a need to create classes with the same constructor arguments, but different instances. Can be used in conjunction with @@ -4452,7 +4452,7 @@

                                  ObserverBase
                                  -classmethod with_callable_args(**kwargs)[source]#
                                  +classmethod with_callable_args(**kwargs)[source]#

                                  Wrapper that allows creation of class factories args that need to be called at construction time.

                                  This can be useful when there is a need to create classes with the same diff --git a/2.9/generated/torch.ao.quantization.observer.PerAxis.html b/2.9/generated/torch.ao.quantization.observer.PerAxis.html index 7bf22d18e2e..bbcd5609a05 100644 --- a/2.9/generated/torch.ao.quantization.observer.PerAxis.html +++ b/2.9/generated/torch.ao.quantization.observer.PerAxis.html @@ -4415,7 +4415,7 @@

                                  PerAxis#

                                  -class torch.ao.quantization.observer.PerAxis(axis)[source]#
                                  +class torch.ao.quantization.observer.PerAxis(axis)[source]#

                                  Represents per-axis granularity in quantization.

                                  This granularity type calculates different quantization parameters along a specified axis of the tensor.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.PerBlock.html b/2.9/generated/torch.ao.quantization.observer.PerBlock.html index cadc125f957..12c75fabbfc 100644 --- a/2.9/generated/torch.ao.quantization.observer.PerBlock.html +++ b/2.9/generated/torch.ao.quantization.observer.PerBlock.html @@ -4415,7 +4415,7 @@

                                  PerBlock#

                                  -class torch.ao.quantization.observer.PerBlock(block_size)[source]#
                                  +class torch.ao.quantization.observer.PerBlock(block_size)[source]#

                                  Represents per-block granularity in quantization. See quantize_affine() for docs for block_size

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.PerChannelMinMaxObserver.html b/2.9/generated/torch.ao.quantization.observer.PerChannelMinMaxObserver.html index cdc5b178902..7f84431af03 100644 --- a/2.9/generated/torch.ao.quantization.observer.PerChannelMinMaxObserver.html +++ b/2.9/generated/torch.ao.quantization.observer.PerChannelMinMaxObserver.html @@ -4415,7 +4415,7 @@

                                  PerChannelMinMaxObserver#

                                  -class torch.ao.quantization.observer.PerChannelMinMaxObserver(ch_axis=0, dtype=torch.quint8, qscheme=torch.per_channel_affine, reduce_range=False, quant_min=None, quant_max=None, factory_kwargs=None, eps=1.1920928955078125e-07, is_dynamic=False, **kwargs)[source]#
                                  +class torch.ao.quantization.observer.PerChannelMinMaxObserver(ch_axis=0, dtype=torch.quint8, qscheme=torch.per_channel_affine, reduce_range=False, quant_min=None, quant_max=None, factory_kwargs=None, eps=1.1920928955078125e-07, is_dynamic=False, **kwargs)[source]#

                                  Observer module for computing the quantization parameters based on the running per channel min and max values.

                                  This observer uses the tensor min/max statistics to compute the per channel @@ -4447,7 +4447,7 @@

                                  PerChannelMinMaxObserver
                                  -reset_min_max_vals()[source]#
                                  +reset_min_max_vals()[source]#

                                  Resets the min/max values.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.PerGroup.html b/2.9/generated/torch.ao.quantization.observer.PerGroup.html index 89a14d48034..024c445bbaa 100644 --- a/2.9/generated/torch.ao.quantization.observer.PerGroup.html +++ b/2.9/generated/torch.ao.quantization.observer.PerGroup.html @@ -4415,7 +4415,7 @@

                                  PerGroup#

                                  -class torch.ao.quantization.observer.PerGroup(group_size)[source]#
                                  +class torch.ao.quantization.observer.PerGroup(group_size)[source]#

                                  Represents per-channel group granularity in quantization.

                                  This granularity type calculates different quantization parameters for each group of <group_size> elements.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.PerRow.html b/2.9/generated/torch.ao.quantization.observer.PerRow.html index e960d098e12..674f8da3b2f 100644 --- a/2.9/generated/torch.ao.quantization.observer.PerRow.html +++ b/2.9/generated/torch.ao.quantization.observer.PerRow.html @@ -4415,7 +4415,7 @@

                                  PerRow#

                                  -class torch.ao.quantization.observer.PerRow[source]#
                                  +class torch.ao.quantization.observer.PerRow[source]#

                                  Represents row-wise granularity in quantization.

                                  This is a special case of per-axis quantization and is unique to Float8 matmuls where the input is quantized with a block_size of (1, …, input.shape[-1]). And the weight diff --git a/2.9/generated/torch.ao.quantization.observer.PerTensor.html b/2.9/generated/torch.ao.quantization.observer.PerTensor.html index a5b8ec8d60d..f66b061b7ce 100644 --- a/2.9/generated/torch.ao.quantization.observer.PerTensor.html +++ b/2.9/generated/torch.ao.quantization.observer.PerTensor.html @@ -4415,7 +4415,7 @@

                                  PerTensor#

                                  -class torch.ao.quantization.observer.PerTensor[source]#
                                  +class torch.ao.quantization.observer.PerTensor[source]#

                                  Represents per-tensor granularity in quantization.

                                  This granularity type calculates the quantization parameters based off the entire tensor.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.PerToken.html b/2.9/generated/torch.ao.quantization.observer.PerToken.html index 646ad671467..61d94f642f4 100644 --- a/2.9/generated/torch.ao.quantization.observer.PerToken.html +++ b/2.9/generated/torch.ao.quantization.observer.PerToken.html @@ -4415,7 +4415,7 @@

                                  PerToken#

                                  -class torch.ao.quantization.observer.PerToken[source]#
                                  +class torch.ao.quantization.observer.PerToken[source]#

                                  Represents per-token granularity in quantization.

                                  This granularity type calculates a different set of quantization parameters for each token, which is represented as the last dimension of the tensor.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.PlaceholderObserver.html b/2.9/generated/torch.ao.quantization.observer.PlaceholderObserver.html index 8f0ae566303..0331ced399b 100644 --- a/2.9/generated/torch.ao.quantization.observer.PlaceholderObserver.html +++ b/2.9/generated/torch.ao.quantization.observer.PlaceholderObserver.html @@ -4415,7 +4415,7 @@

                                  PlaceholderObserver#

                                  -class torch.ao.quantization.observer.PlaceholderObserver(dtype=torch.float32, custom_op_name='', compute_dtype=None, quant_min=None, quant_max=None, qscheme=None, eps=None, is_dynamic=False)[source]#
                                  +class torch.ao.quantization.observer.PlaceholderObserver(dtype=torch.float32, custom_op_name='', compute_dtype=None, quant_min=None, quant_max=None, qscheme=None, eps=None, is_dynamic=False)[source]#

                                  Observer that doesn’t do anything and just passes its configuration to the quantized module’s .from_float().

                                  Can be used for quantization to float16 which doesn’t require determining diff --git a/2.9/generated/torch.ao.quantization.observer.RecordingObserver.html b/2.9/generated/torch.ao.quantization.observer.RecordingObserver.html index 10a6051e123..9e47aaa53c4 100644 --- a/2.9/generated/torch.ao.quantization.observer.RecordingObserver.html +++ b/2.9/generated/torch.ao.quantization.observer.RecordingObserver.html @@ -4415,7 +4415,7 @@

                                  RecordingObserver#

                                  -class torch.ao.quantization.observer.RecordingObserver(dtype=torch.quint8)[source]#
                                  +class torch.ao.quantization.observer.RecordingObserver(dtype=torch.quint8)[source]#

                                  The module is mainly for debug and records the tensor values during runtime.

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.quantization.observer.TorchAODType.html b/2.9/generated/torch.ao.quantization.observer.TorchAODType.html index 6381b23a206..78458e85981 100644 --- a/2.9/generated/torch.ao.quantization.observer.TorchAODType.html +++ b/2.9/generated/torch.ao.quantization.observer.TorchAODType.html @@ -4415,7 +4415,7 @@

                                  TorchAODType#

                                  -class torch.ao.quantization.observer.TorchAODType(value)[source]#
                                  +class torch.ao.quantization.observer.TorchAODType(value)[source]#

                                  Placeholder for dtypes that do not exist in PyTorch core yet.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.ZeroPointDomain.html b/2.9/generated/torch.ao.quantization.observer.ZeroPointDomain.html index 8b2e5e5424a..36f1d9c6e74 100644 --- a/2.9/generated/torch.ao.quantization.observer.ZeroPointDomain.html +++ b/2.9/generated/torch.ao.quantization.observer.ZeroPointDomain.html @@ -4415,7 +4415,7 @@

                                  ZeroPointDomain#

                                  -class torch.ao.quantization.observer.ZeroPointDomain(value)[source]#
                                  +class torch.ao.quantization.observer.ZeroPointDomain(value)[source]#

                                  Enum that indicate whether zero_point is in integer domain or floating point domain

                                  integer domain: quantized_val = (float_val / scale) (integer) + zero_point (integer) float domain: quantized_val = (float_val - (zero_point (float) - scale * mid_point)) / scale diff --git a/2.9/generated/torch.ao.quantization.observer.default_debug_observer.html b/2.9/generated/torch.ao.quantization.observer.default_debug_observer.html index a512de9f769..087941b3aa7 100644 --- a/2.9/generated/torch.ao.quantization.observer.default_debug_observer.html +++ b/2.9/generated/torch.ao.quantization.observer.default_debug_observer.html @@ -4415,7 +4415,7 @@

                                  default_debug_observer#

                                  -torch.ao.quantization.observer.default_debug_observer[source]#
                                  +torch.ao.quantization.observer.default_debug_observer[source]#

                                  Default debug-only observer.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.default_placeholder_observer.html b/2.9/generated/torch.ao.quantization.observer.default_placeholder_observer.html index 86b7dc76cd3..b2492e3f689 100644 --- a/2.9/generated/torch.ao.quantization.observer.default_placeholder_observer.html +++ b/2.9/generated/torch.ao.quantization.observer.default_placeholder_observer.html @@ -4415,7 +4415,7 @@

                                  default_placeholder_observer#

                                  -torch.ao.quantization.observer.default_placeholder_observer[source]#
                                  +torch.ao.quantization.observer.default_placeholder_observer[source]#

                                  Default placeholder observer, usually used for quantization to torch.float16.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.get_block_size.html b/2.9/generated/torch.ao.quantization.observer.get_block_size.html index 2c150ae0b32..c75fd233eab 100644 --- a/2.9/generated/torch.ao.quantization.observer.get_block_size.html +++ b/2.9/generated/torch.ao.quantization.observer.get_block_size.html @@ -4415,7 +4415,7 @@

                                  get_block_size#

                                  -class torch.ao.quantization.observer.get_block_size(input_shape, granularity)[source]#
                                  +class torch.ao.quantization.observer.get_block_size(input_shape, granularity)[source]#

                                  Get the block size based on the input shape and granularity type.

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.quantization.observer.get_observer_state_dict.html b/2.9/generated/torch.ao.quantization.observer.get_observer_state_dict.html index 8c8042ecb2f..9a3467fa3a6 100644 --- a/2.9/generated/torch.ao.quantization.observer.get_observer_state_dict.html +++ b/2.9/generated/torch.ao.quantization.observer.get_observer_state_dict.html @@ -4415,7 +4415,7 @@

                                  get_observer_state_dict#

                                  -class torch.ao.quantization.observer.get_observer_state_dict(mod)[source]#
                                  +class torch.ao.quantization.observer.get_observer_state_dict(mod)[source]#

                                  Returns the state dict corresponding to the observer stats. Traverse the model state_dict and extract out the stats.

                                  diff --git a/2.9/generated/torch.ao.quantization.observer.load_observer_state_dict.html b/2.9/generated/torch.ao.quantization.observer.load_observer_state_dict.html index b6214f95f8d..71f67d2b776 100644 --- a/2.9/generated/torch.ao.quantization.observer.load_observer_state_dict.html +++ b/2.9/generated/torch.ao.quantization.observer.load_observer_state_dict.html @@ -4415,7 +4415,7 @@

                                  load_observer_state_dict#

                                  -class torch.ao.quantization.observer.load_observer_state_dict(mod, obs_dict)[source]#
                                  +class torch.ao.quantization.observer.load_observer_state_dict(mod, obs_dict)[source]#

                                  Given input model and a state_dict containing model observer stats, load the stats back into the model. The observer state_dict can be saved using torch.ao.quantization.get_observer_state_dict

                                  diff --git a/2.9/generated/torch.ao.quantization.prepare.html b/2.9/generated/torch.ao.quantization.prepare.html index dd1e37375e4..8c91d855e35 100644 --- a/2.9/generated/torch.ao.quantization.prepare.html +++ b/2.9/generated/torch.ao.quantization.prepare.html @@ -4415,7 +4415,7 @@

                                  prepare#

                                  -class torch.ao.quantization.prepare(model, inplace=False, allow_list=None, observer_non_leaf_module_list=None, prepare_custom_config_dict=None)[source]#
                                  +class torch.ao.quantization.prepare(model, inplace=False, allow_list=None, observer_non_leaf_module_list=None, prepare_custom_config_dict=None)[source]#

                                  Prepares a copy of the model for quantization calibration or quantization-aware training.

                                  Quantization configuration should be assigned preemptively to individual submodules in .qconfig attribute.

                                  diff --git a/2.9/generated/torch.ao.quantization.prepare_for_propagation_comparison.html b/2.9/generated/torch.ao.quantization.prepare_for_propagation_comparison.html index e6e19d6a794..8496fe757c0 100644 --- a/2.9/generated/torch.ao.quantization.prepare_for_propagation_comparison.html +++ b/2.9/generated/torch.ao.quantization.prepare_for_propagation_comparison.html @@ -4415,7 +4415,7 @@

                                  prepare_for_propagation_comparison#

                                  -class torch.ao.quantization.prepare_for_propagation_comparison(model)[source]#
                                  +class torch.ao.quantization.prepare_for_propagation_comparison(model)[source]#

                                  Add output loggers to node that has numeric_debug_handle

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.quantization.prepare_qat.html b/2.9/generated/torch.ao.quantization.prepare_qat.html index bac285a08b5..d19c0766b48 100644 --- a/2.9/generated/torch.ao.quantization.prepare_qat.html +++ b/2.9/generated/torch.ao.quantization.prepare_qat.html @@ -4415,7 +4415,7 @@

                                  prepare_qat#

                                  -class torch.ao.quantization.prepare_qat(model, mapping=None, inplace=False)[source]#
                                  +class torch.ao.quantization.prepare_qat(model, mapping=None, inplace=False)[source]#

                                  Prepares a copy of the model for quantization calibration or quantization-aware training and converts it to quantized version.

                                  Quantization configuration should be assigned preemptively diff --git a/2.9/generated/torch.ao.quantization.propagate_qconfig_.html b/2.9/generated/torch.ao.quantization.propagate_qconfig_.html index 6a8dee08944..0977da186ea 100644 --- a/2.9/generated/torch.ao.quantization.propagate_qconfig_.html +++ b/2.9/generated/torch.ao.quantization.propagate_qconfig_.html @@ -4415,7 +4415,7 @@

                                  propagate_qconfig#

                                  -class torch.ao.quantization.propagate_qconfig_(module, qconfig_dict=None, prepare_custom_config_dict=None)[source]#
                                  +class torch.ao.quantization.propagate_qconfig_(module, qconfig_dict=None, prepare_custom_config_dict=None)[source]#

                                  Propagate qconfig through the module hierarchy and assign qconfig attribute on each leaf module

                                  diff --git a/2.9/generated/torch.ao.quantization.pt2e.export_utils.model_is_exported.html b/2.9/generated/torch.ao.quantization.pt2e.export_utils.model_is_exported.html index 40a1bfcc9a2..a9ff1a76724 100644 --- a/2.9/generated/torch.ao.quantization.pt2e.export_utils.model_is_exported.html +++ b/2.9/generated/torch.ao.quantization.pt2e.export_utils.model_is_exported.html @@ -4415,7 +4415,7 @@

                                  model_is_exported#

                                  -class torch.ao.quantization.pt2e.export_utils.model_is_exported(m)[source]#
                                  +class torch.ao.quantization.pt2e.export_utils.model_is_exported(m)[source]#

                                  Return True if the torch.nn.Module was exported, False otherwise (e.g. if the model was FX symbolically traced or not traced at all).

                                  diff --git a/2.9/generated/torch.ao.quantization.pt2e.lowering.lower_pt2e_quantized_to_x86.html b/2.9/generated/torch.ao.quantization.pt2e.lowering.lower_pt2e_quantized_to_x86.html index 6cae8a96a8c..10e78c397f4 100644 --- a/2.9/generated/torch.ao.quantization.pt2e.lowering.lower_pt2e_quantized_to_x86.html +++ b/2.9/generated/torch.ao.quantization.pt2e.lowering.lower_pt2e_quantized_to_x86.html @@ -4415,7 +4415,7 @@

                                  lower_pt2e_quantized_to_x86#

                                  -class torch.ao.quantization.pt2e.lowering.lower_pt2e_quantized_to_x86(model, example_inputs)[source]#
                                  +class torch.ao.quantization.pt2e.lowering.lower_pt2e_quantized_to_x86(model, example_inputs)[source]#

                                  Lower a PT2E-qantized model to x86 backend.

                                  Args: * model (torch.fx.GraphModule): a model quantized by PT2E quantization flow. diff --git a/2.9/generated/torch.ao.quantization.qconfig.QConfig.html b/2.9/generated/torch.ao.quantization.qconfig.QConfig.html index 77979a367f3..2a23914ad3b 100644 --- a/2.9/generated/torch.ao.quantization.qconfig.QConfig.html +++ b/2.9/generated/torch.ao.quantization.qconfig.QConfig.html @@ -4415,7 +4415,7 @@

                                  QConfig#

                                  -class torch.ao.quantization.qconfig.QConfig(activation, weight)[source]#
                                  +class torch.ao.quantization.qconfig.QConfig(activation, weight)[source]#

                                  Describes how to quantize a layer or a part of the network by providing settings (observer classes) for activations and weights respectively.

                                  Note that QConfig needs to contain observer classes (like MinMaxObserver) or a callable that returns diff --git a/2.9/generated/torch.ao.quantization.qconfig_mapping.QConfigMapping.html b/2.9/generated/torch.ao.quantization.qconfig_mapping.QConfigMapping.html index 79b6c0e3638..7f2b14fa81a 100644 --- a/2.9/generated/torch.ao.quantization.qconfig_mapping.QConfigMapping.html +++ b/2.9/generated/torch.ao.quantization.qconfig_mapping.QConfigMapping.html @@ -4415,7 +4415,7 @@

                                  QConfigMapping#

                                  -class torch.ao.quantization.qconfig_mapping.QConfigMapping[source]#
                                  +class torch.ao.quantization.qconfig_mapping.QConfigMapping[source]#

                                  Mapping from model ops to torch.ao.quantization.QConfig s.

                                  The user can specify QConfigs using the following methods (in increasing match priority):

                                  @@ -4442,7 +4442,7 @@

                                  QConfigMapping
                                  -classmethod from_dict(qconfig_dict)[source]#
                                  +classmethod from_dict(qconfig_dict)[source]#

                                  Create a QConfigMapping from a dictionary with the following keys (all optional):

                                  “” (for global QConfig)

                                  @@ -4461,7 +4461,7 @@

                                  QConfigMapping
                                  -set_global(global_qconfig)[source]#
                                  +set_global(global_qconfig)[source]#

                                  Set the global (default) QConfig.

                                  Return type
                                  @@ -4472,7 +4472,7 @@

                                  QConfigMapping
                                  -set_module_name(module_name, qconfig)[source]#
                                  +set_module_name(module_name, qconfig)[source]#

                                  Set the QConfig for modules matching the given module name. If the QConfig for an existing module name was already set, the new QConfig will override the old one.

                                  @@ -4484,7 +4484,7 @@

                                  QConfigMapping
                                  -set_module_name_object_type_order(module_name, object_type, index, qconfig)[source]#
                                  +set_module_name_object_type_order(module_name, object_type, index, qconfig)[source]#

                                  Set the QConfig for modules matching a combination of the given module name, object type, and the index at which the module appears.

                                  If the QConfig for an existing (module name, object type, index) was already set, the new QConfig @@ -4498,7 +4498,7 @@

                                  QConfigMapping
                                  -set_module_name_regex(module_name_regex, qconfig)[source]#
                                  +set_module_name_regex(module_name_regex, qconfig)[source]#

                                  Set the QConfig for modules matching the given regex string.

                                  Regexes will be matched in the order in which they are registered through this method. Thus, the caller should register more specific patterns first, e.g.:

                                  @@ -4521,7 +4521,7 @@

                                  QConfigMapping
                                  -set_object_type(object_type, qconfig)[source]#
                                  +set_object_type(object_type, qconfig)[source]#

                                  Set the QConfig for a given module type, function, or method name. If the QConfig for an existing object type was already set, the new QConfig will override the old one.

                                  @@ -4533,7 +4533,7 @@

                                  QConfigMapping
                                  -to_dict()[source]#
                                  +to_dict()[source]#

                                  Convert this QConfigMapping to a dictionary with the following keys:

                                  “” (for global QConfig)

                                  diff --git a/2.9/generated/torch.ao.quantization.qconfig_mapping.get_default_qat_qconfig_mapping.html b/2.9/generated/torch.ao.quantization.qconfig_mapping.get_default_qat_qconfig_mapping.html index de56976e736..c1f2614fba1 100644 --- a/2.9/generated/torch.ao.quantization.qconfig_mapping.get_default_qat_qconfig_mapping.html +++ b/2.9/generated/torch.ao.quantization.qconfig_mapping.get_default_qat_qconfig_mapping.html @@ -4415,7 +4415,7 @@

                                  get_default_qat_qconfig_mapping#

                                  -class torch.ao.quantization.qconfig_mapping.get_default_qat_qconfig_mapping(backend='x86', version=1)[source]#
                                  +class torch.ao.quantization.qconfig_mapping.get_default_qat_qconfig_mapping(backend='x86', version=1)[source]#

                                  Return the default QConfigMapping for quantization aware training.

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.quantization.qconfig_mapping.get_default_qconfig_mapping.html b/2.9/generated/torch.ao.quantization.qconfig_mapping.get_default_qconfig_mapping.html index d22481aa33f..6829a09eadf 100644 --- a/2.9/generated/torch.ao.quantization.qconfig_mapping.get_default_qconfig_mapping.html +++ b/2.9/generated/torch.ao.quantization.qconfig_mapping.get_default_qconfig_mapping.html @@ -4415,7 +4415,7 @@

                                  get_default_qconfig_mapping#

                                  -class torch.ao.quantization.qconfig_mapping.get_default_qconfig_mapping(backend='x86', version=0)[source]#
                                  +class torch.ao.quantization.qconfig_mapping.get_default_qconfig_mapping(backend='x86', version=0)[source]#

                                  Return the default QConfigMapping for post training quantization.

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.quantization.quantize.html b/2.9/generated/torch.ao.quantization.quantize.html index 254f4b3892e..bd8a63a416a 100644 --- a/2.9/generated/torch.ao.quantization.quantize.html +++ b/2.9/generated/torch.ao.quantization.quantize.html @@ -4415,7 +4415,7 @@

                                  quantize#

                                  -class torch.ao.quantization.quantize(model, run_fn, run_args, mapping=None, inplace=False)[source]#
                                  +class torch.ao.quantization.quantize(model, run_fn, run_args, mapping=None, inplace=False)[source]#

                                  Quantize the input float model with post training static quantization.

                                  First it will prepare the model for calibration, then it calls run_fn which will run the calibration step, after that we will diff --git a/2.9/generated/torch.ao.quantization.quantize_dynamic.html b/2.9/generated/torch.ao.quantization.quantize_dynamic.html index cf8357c37f7..f349fcc8be2 100644 --- a/2.9/generated/torch.ao.quantization.quantize_dynamic.html +++ b/2.9/generated/torch.ao.quantization.quantize_dynamic.html @@ -4415,7 +4415,7 @@

                                  quantize_dynamic#

                                  -class torch.ao.quantization.quantize_dynamic(model, qconfig_spec=None, dtype=torch.qint8, mapping=None, inplace=False)[source]#
                                  +class torch.ao.quantization.quantize_dynamic(model, qconfig_spec=None, dtype=torch.qint8, mapping=None, inplace=False)[source]#

                                  Converts a float model to dynamic (i.e. weights-only) quantized model.

                                  Replaces specified modules with dynamic weight-only quantized versions and output the quantized model.

                                  For simplest usage provide dtype argument that can be float16 or qint8. Weight-only quantization diff --git a/2.9/generated/torch.ao.quantization.quantize_fx.convert_fx.html b/2.9/generated/torch.ao.quantization.quantize_fx.convert_fx.html index ca3bff0329a..054993c5d57 100644 --- a/2.9/generated/torch.ao.quantization.quantize_fx.convert_fx.html +++ b/2.9/generated/torch.ao.quantization.quantize_fx.convert_fx.html @@ -4415,7 +4415,7 @@

                                  convert_fx#

                                  -class torch.ao.quantization.quantize_fx.convert_fx(graph_module, convert_custom_config=None, _remove_qconfig=True, qconfig_mapping=None, backend_config=None, keep_original_weights=False)[source]#
                                  +class torch.ao.quantization.quantize_fx.convert_fx(graph_module, convert_custom_config=None, _remove_qconfig=True, qconfig_mapping=None, backend_config=None, keep_original_weights=False)[source]#

                                  Convert a calibrated or trained model to a quantized model

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.quantization.quantize_fx.fuse_fx.html b/2.9/generated/torch.ao.quantization.quantize_fx.fuse_fx.html index b8171ad8306..4b9947ac2b1 100644 --- a/2.9/generated/torch.ao.quantization.quantize_fx.fuse_fx.html +++ b/2.9/generated/torch.ao.quantization.quantize_fx.fuse_fx.html @@ -4415,7 +4415,7 @@

                                  fuse_fx#

                                  -class torch.ao.quantization.quantize_fx.fuse_fx(model, fuse_custom_config=None, backend_config=None)[source]#
                                  +class torch.ao.quantization.quantize_fx.fuse_fx(model, fuse_custom_config=None, backend_config=None)[source]#

                                  Fuse modules like conv+bn, conv+bn+relu etc, model must be in eval mode. Fusion rules are defined in torch.ao.quantization.fx.fusion_pattern.py

                                  diff --git a/2.9/generated/torch.ao.quantization.quantize_fx.prepare_fx.html b/2.9/generated/torch.ao.quantization.quantize_fx.prepare_fx.html index 99db879b5c8..bd3fab77c21 100644 --- a/2.9/generated/torch.ao.quantization.quantize_fx.prepare_fx.html +++ b/2.9/generated/torch.ao.quantization.quantize_fx.prepare_fx.html @@ -4415,7 +4415,7 @@

                                  prepare_fx#

                                  -class torch.ao.quantization.quantize_fx.prepare_fx(model, qconfig_mapping, example_inputs, prepare_custom_config=None, _equalization_config=None, backend_config=None)[source]#
                                  +class torch.ao.quantization.quantize_fx.prepare_fx(model, qconfig_mapping, example_inputs, prepare_custom_config=None, _equalization_config=None, backend_config=None)[source]#

                                  Prepare a model for post training quantization

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.quantization.quantize_fx.prepare_qat_fx.html b/2.9/generated/torch.ao.quantization.quantize_fx.prepare_qat_fx.html index 3219b5d4af9..b37ddbcf7e3 100644 --- a/2.9/generated/torch.ao.quantization.quantize_fx.prepare_qat_fx.html +++ b/2.9/generated/torch.ao.quantization.quantize_fx.prepare_qat_fx.html @@ -4415,7 +4415,7 @@

                                  prepare_qat_fx#

                                  -class torch.ao.quantization.quantize_fx.prepare_qat_fx(model, qconfig_mapping, example_inputs, prepare_custom_config=None, backend_config=None)[source]#
                                  +class torch.ao.quantization.quantize_fx.prepare_qat_fx(model, qconfig_mapping, example_inputs, prepare_custom_config=None, backend_config=None)[source]#

                                  Prepare a model for quantization aware training

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.quantization.quantize_qat.html b/2.9/generated/torch.ao.quantization.quantize_qat.html index a6a83bd01eb..d570b9d460c 100644 --- a/2.9/generated/torch.ao.quantization.quantize_qat.html +++ b/2.9/generated/torch.ao.quantization.quantize_qat.html @@ -4415,7 +4415,7 @@

                                  quantize_qat#

                                  -class torch.ao.quantization.quantize_qat(model, run_fn, run_args, inplace=False)[source]#
                                  +class torch.ao.quantization.quantize_qat(model, run_fn, run_args, inplace=False)[source]#

                                  Do quantization aware training and output a quantized model

                                  Parameters
                                  diff --git a/2.9/generated/torch.ao.quantization.swap_module.html b/2.9/generated/torch.ao.quantization.swap_module.html index 4aa07ce0588..0ac0de9e6c9 100644 --- a/2.9/generated/torch.ao.quantization.swap_module.html +++ b/2.9/generated/torch.ao.quantization.swap_module.html @@ -4415,7 +4415,7 @@

                                  swap_module#

                                  -class torch.ao.quantization.swap_module(mod, mapping, custom_module_class_mapping, use_precomputed_fake_quant=False)[source]#
                                  +class torch.ao.quantization.swap_module(mod, mapping, custom_module_class_mapping, use_precomputed_fake_quant=False)[source]#

                                  Swaps the module if it has a quantized counterpart and it has an observer attached.

                                  diff --git a/2.9/generated/torch.are_deterministic_algorithms_enabled.html b/2.9/generated/torch.are_deterministic_algorithms_enabled.html index 70e388c3295..95816885654 100644 --- a/2.9/generated/torch.are_deterministic_algorithms_enabled.html +++ b/2.9/generated/torch.are_deterministic_algorithms_enabled.html @@ -4404,7 +4404,7 @@

                                  torch.are_deterministic_algorithms_enabled#

                                  -torch.are_deterministic_algorithms_enabled()[source]#
                                  +torch.are_deterministic_algorithms_enabled()[source]#

                                  Returns True if the global deterministic flag is turned on. Refer to torch.use_deterministic_algorithms() documentation for more details.

                                  diff --git a/2.9/generated/torch.atleast_1d.html b/2.9/generated/torch.atleast_1d.html index 836ccd8a492..99bdff25471 100644 --- a/2.9/generated/torch.atleast_1d.html +++ b/2.9/generated/torch.atleast_1d.html @@ -4404,7 +4404,7 @@

                                  torch.atleast_1d#

                                  -torch.atleast_1d(*tensors)[source]#
                                  +torch.atleast_1d(*tensors)[source]#

                                  Returns a 1-dimensional view of each input tensor with zero dimensions. Input tensors with one or more dimensions are returned as-is.

                                  diff --git a/2.9/generated/torch.atleast_2d.html b/2.9/generated/torch.atleast_2d.html index c2a56479aa3..34ff1f2bb67 100644 --- a/2.9/generated/torch.atleast_2d.html +++ b/2.9/generated/torch.atleast_2d.html @@ -4404,7 +4404,7 @@

                                  torch.atleast_2d#

                                  -torch.atleast_2d(*tensors)[source]#
                                  +torch.atleast_2d(*tensors)[source]#

                                  Returns a 2-dimensional view of each input tensor with zero dimensions. Input tensors with two or more dimensions are returned as-is.

                                  diff --git a/2.9/generated/torch.atleast_3d.html b/2.9/generated/torch.atleast_3d.html index cf362ef2dad..d9f3d431f85 100644 --- a/2.9/generated/torch.atleast_3d.html +++ b/2.9/generated/torch.atleast_3d.html @@ -4404,7 +4404,7 @@

                                  torch.atleast_3d#

                                  -torch.atleast_3d(*tensors)[source]#
                                  +torch.atleast_3d(*tensors)[source]#

                                  Returns a 3-dimensional view of each input tensor with zero dimensions. Input tensors with three or more dimensions are returned as-is.

                                  diff --git a/2.9/generated/torch.autograd.Function.backward.html b/2.9/generated/torch.autograd.Function.backward.html index 9554faadfa5..fdc1c3c57da 100644 --- a/2.9/generated/torch.autograd.Function.backward.html +++ b/2.9/generated/torch.autograd.Function.backward.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.Function.backward#

                                  -static Function.backward(ctx, *grad_outputs)[source]#
                                  +static Function.backward(ctx, *grad_outputs)[source]#

                                  Define a formula for differentiating the operation with backward mode automatic differentiation.

                                  This function is to be overridden by all subclasses. (Defining this function is equivalent to defining the vjp function.)

                                  diff --git a/2.9/generated/torch.autograd.Function.forward.html b/2.9/generated/torch.autograd.Function.forward.html index 8914dcaceb9..a3e3adc9984 100644 --- a/2.9/generated/torch.autograd.Function.forward.html +++ b/2.9/generated/torch.autograd.Function.forward.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.Function.forward#

                                  -static Function.forward(*args, **kwargs)[source]#
                                  +static Function.forward(*args, **kwargs)[source]#

                                  Define the forward of the custom autograd Function.

                                  This function is to be overridden by all subclasses. There are two ways to define forward:

                                  diff --git a/2.9/generated/torch.autograd.Function.jvp.html b/2.9/generated/torch.autograd.Function.jvp.html index a039c60e308..a68df6f45fc 100644 --- a/2.9/generated/torch.autograd.Function.jvp.html +++ b/2.9/generated/torch.autograd.Function.jvp.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.Function.jvp#

                                  -static Function.jvp(ctx, *grad_inputs)[source]#
                                  +static Function.jvp(ctx, *grad_inputs)[source]#

                                  Define a formula for differentiating the operation with forward mode automatic differentiation.

                                  This function is to be overridden by all subclasses. It must accept a context ctx as the first argument, followed by diff --git a/2.9/generated/torch.autograd.Function.vmap.html b/2.9/generated/torch.autograd.Function.vmap.html index b4cd5b4c386..f5b2d394bf6 100644 --- a/2.9/generated/torch.autograd.Function.vmap.html +++ b/2.9/generated/torch.autograd.Function.vmap.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.Function.vmap#

                                  -static Function.vmap(info, in_dims, *args)[source]#
                                  +static Function.vmap(info, in_dims, *args)[source]#

                                  Define the behavior for this autograd.Function underneath torch.vmap().

                                  For a torch.autograd.Function() to support torch.vmap(), you must either override this static method, or set diff --git a/2.9/generated/torch.autograd.backward.html b/2.9/generated/torch.autograd.backward.html index b3166b8aa41..eb30a633df5 100644 --- a/2.9/generated/torch.autograd.backward.html +++ b/2.9/generated/torch.autograd.backward.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.backward#

                                  -torch.autograd.backward(tensors, grad_tensors=None, retain_graph=None, create_graph=False, grad_variables=None, inputs=None)[source]#
                                  +torch.autograd.backward(tensors, grad_tensors=None, retain_graph=None, create_graph=False, grad_variables=None, inputs=None)[source]#

                                  Compute the sum of gradients of given tensors with respect to graph leaves.

                                  The graph is differentiated using the chain rule. If any of tensors are non-scalar (i.e. their data has more than one element) and require diff --git a/2.9/generated/torch.autograd.forward_ad.UnpackedDualTensor.html b/2.9/generated/torch.autograd.forward_ad.UnpackedDualTensor.html index 67750d9c5ec..6b7deff0231 100644 --- a/2.9/generated/torch.autograd.forward_ad.UnpackedDualTensor.html +++ b/2.9/generated/torch.autograd.forward_ad.UnpackedDualTensor.html @@ -4404,7 +4404,7 @@

                                  UnpackedDualTensor#

                                  -class torch.autograd.forward_ad.UnpackedDualTensor(primal, tangent)[source]#
                                  +class torch.autograd.forward_ad.UnpackedDualTensor(primal, tangent)[source]#

                                  Namedtuple returned by unpack_dual() containing the primal and tangent components of the dual tensor.

                                  See unpack_dual() for more details.

                                  diff --git a/2.9/generated/torch.autograd.forward_ad.dual_level.html b/2.9/generated/torch.autograd.forward_ad.dual_level.html index 821b0b9b120..358cb22ead1 100644 --- a/2.9/generated/torch.autograd.forward_ad.dual_level.html +++ b/2.9/generated/torch.autograd.forward_ad.dual_level.html @@ -4404,7 +4404,7 @@

                                  dual_level#

                                  -class torch.autograd.forward_ad.dual_level[source]#
                                  +class torch.autograd.forward_ad.dual_level[source]#

                                  Context-manager for forward AD, where all forward AD computation must occur within the dual_level context.

                                  Note

                                  diff --git a/2.9/generated/torch.autograd.forward_ad.enter_dual_level.html b/2.9/generated/torch.autograd.forward_ad.enter_dual_level.html index f25d01c5be4..3c2ce7a9dde 100644 --- a/2.9/generated/torch.autograd.forward_ad.enter_dual_level.html +++ b/2.9/generated/torch.autograd.forward_ad.enter_dual_level.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.forward_ad.enter_dual_level#

                                  -torch.autograd.forward_ad.enter_dual_level()[source]#
                                  +torch.autograd.forward_ad.enter_dual_level()[source]#

                                  Enter a new forward grad level.

                                  This level can be used to make and unpack dual Tensors to compute forward gradients.

                                  diff --git a/2.9/generated/torch.autograd.forward_ad.exit_dual_level.html b/2.9/generated/torch.autograd.forward_ad.exit_dual_level.html index 721bf271bf7..6823dba93a2 100644 --- a/2.9/generated/torch.autograd.forward_ad.exit_dual_level.html +++ b/2.9/generated/torch.autograd.forward_ad.exit_dual_level.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.forward_ad.exit_dual_level#

                                  -torch.autograd.forward_ad.exit_dual_level(*, level=None)[source]#
                                  +torch.autograd.forward_ad.exit_dual_level(*, level=None)[source]#

                                  Exit a forward grad level.

                                  This function deletes all the gradients associated with this level. Only deleting the latest entered level is allowed.

                                  diff --git a/2.9/generated/torch.autograd.forward_ad.make_dual.html b/2.9/generated/torch.autograd.forward_ad.make_dual.html index 9776e7765be..338add34df7 100644 --- a/2.9/generated/torch.autograd.forward_ad.make_dual.html +++ b/2.9/generated/torch.autograd.forward_ad.make_dual.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.forward_ad.make_dual#

                                  -torch.autograd.forward_ad.make_dual(tensor, tangent, *, level=None)[source]#
                                  +torch.autograd.forward_ad.make_dual(tensor, tangent, *, level=None)[source]#

                                  Associate a tensor value with its tangent to create a “dual tensor” for forward AD gradient computation.

                                  The result is a new tensor aliased to tensor with tangent embedded as an attribute as-is if it has the same storage layout or copied otherwise. diff --git a/2.9/generated/torch.autograd.forward_ad.unpack_dual.html b/2.9/generated/torch.autograd.forward_ad.unpack_dual.html index e58e299a8b4..dd46bb5b411 100644 --- a/2.9/generated/torch.autograd.forward_ad.unpack_dual.html +++ b/2.9/generated/torch.autograd.forward_ad.unpack_dual.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.forward_ad.unpack_dual#

                                  -torch.autograd.forward_ad.unpack_dual(tensor, *, level=None)[source]#
                                  +torch.autograd.forward_ad.unpack_dual(tensor, *, level=None)[source]#

                                  Unpack a “dual tensor” to get both its Tensor value and its forward AD gradient.

                                  The result is a namedtuple (primal, tangent) where primal is a view of tensor’s primal and tangent is tensor’s tangent as-is. diff --git a/2.9/generated/torch.autograd.function.BackwardCFunction.html b/2.9/generated/torch.autograd.function.BackwardCFunction.html index cf6f516c2ba..55c20fd8165 100644 --- a/2.9/generated/torch.autograd.function.BackwardCFunction.html +++ b/2.9/generated/torch.autograd.function.BackwardCFunction.html @@ -4404,23 +4404,23 @@

                                  BackwardCFunction#

                                  -class torch.autograd.function.BackwardCFunction[source]#
                                  +class torch.autograd.function.BackwardCFunction[source]#

                                  This class is used for internal autograd work. Do not use.

                                  -apply(*args)[source]#
                                  +apply(*args)[source]#

                                  Apply method used when executing this Node during the backward

                                  -apply_jvp(*args)[source]#
                                  +apply_jvp(*args)[source]#

                                  Apply method used when executing forward mode AD during the forward

                                  -mark_dirty(*args)[source]#
                                  +mark_dirty(*args)[source]#

                                  Mark given tensors as modified in an in-place operation.

                                  This should be called at most once, in either the setup_context() or forward() methods, and all arguments should be inputs.

                                  @@ -4458,7 +4458,7 @@

                                  BackwardCFunction
                                  -mark_non_differentiable(*args)[source]#
                                  +mark_non_differentiable(*args)[source]#

                                  Mark outputs as non-differentiable.

                                  This should be called at most once, in either the setup_context() or forward() methods, and all arguments should be tensor outputs.

                                  @@ -4493,7 +4493,7 @@

                                  BackwardCFunction
                                  -save_for_backward(*tensors)[source]#
                                  +save_for_backward(*tensors)[source]#

                                  Save given tensors for a future call to backward().

                                  save_for_backward should be called at most once, in either the setup_context() or forward() methods, and only with tensors.

                                  @@ -4549,7 +4549,7 @@

                                  BackwardCFunction
                                  -save_for_forward(*tensors)[source]#
                                  +save_for_forward(*tensors)[source]#

                                  Save given tensors for a future call to jvp().

                                  save_for_forward should be called at most once, in either the setup_context() or forward() methods, and all arguments @@ -4595,7 +4595,7 @@

                                  BackwardCFunction
                                  -set_materialize_grads(value)[source]#
                                  +set_materialize_grads(value)[source]#

                                  Set whether to materialize grad tensors. Default is True.

                                  This should be called only from either the setup_context() or forward() methods.

                                  diff --git a/2.9/generated/torch.autograd.function.FunctionCtx.mark_dirty.html b/2.9/generated/torch.autograd.function.FunctionCtx.mark_dirty.html index 2e9a9f22dbe..eed17af9c67 100644 --- a/2.9/generated/torch.autograd.function.FunctionCtx.mark_dirty.html +++ b/2.9/generated/torch.autograd.function.FunctionCtx.mark_dirty.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.function.FunctionCtx.mark_dirty#

                                  -FunctionCtx.mark_dirty(*args)[source]#
                                  +FunctionCtx.mark_dirty(*args)[source]#

                                  Mark given tensors as modified in an in-place operation.

                                  This should be called at most once, in either the setup_context() or forward() methods, and all arguments should be inputs.

                                  diff --git a/2.9/generated/torch.autograd.function.FunctionCtx.mark_non_differentiable.html b/2.9/generated/torch.autograd.function.FunctionCtx.mark_non_differentiable.html index 6bf46e10ba3..e6191ac9da9 100644 --- a/2.9/generated/torch.autograd.function.FunctionCtx.mark_non_differentiable.html +++ b/2.9/generated/torch.autograd.function.FunctionCtx.mark_non_differentiable.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.function.FunctionCtx.mark_non_differentiable#

                                  -FunctionCtx.mark_non_differentiable(*args)[source]#
                                  +FunctionCtx.mark_non_differentiable(*args)[source]#

                                  Mark outputs as non-differentiable.

                                  This should be called at most once, in either the setup_context() or forward() methods, and all arguments should be tensor outputs.

                                  diff --git a/2.9/generated/torch.autograd.function.FunctionCtx.save_for_backward.html b/2.9/generated/torch.autograd.function.FunctionCtx.save_for_backward.html index 9ac82633796..52e9eeb6e10 100644 --- a/2.9/generated/torch.autograd.function.FunctionCtx.save_for_backward.html +++ b/2.9/generated/torch.autograd.function.FunctionCtx.save_for_backward.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.function.FunctionCtx.save_for_backward#

                                  -FunctionCtx.save_for_backward(*tensors)[source]#
                                  +FunctionCtx.save_for_backward(*tensors)[source]#

                                  Save given tensors for a future call to backward().

                                  save_for_backward should be called at most once, in either the setup_context() or forward() methods, and only with tensors.

                                  diff --git a/2.9/generated/torch.autograd.function.FunctionCtx.set_materialize_grads.html b/2.9/generated/torch.autograd.function.FunctionCtx.set_materialize_grads.html index d86f66a6975..6df79dfbcaa 100644 --- a/2.9/generated/torch.autograd.function.FunctionCtx.set_materialize_grads.html +++ b/2.9/generated/torch.autograd.function.FunctionCtx.set_materialize_grads.html @@ -4404,7 +4404,7 @@

                                  torch.autograd.function.FunctionCtx.set_materialize_grads#

                                  -FunctionCtx.set_materialize_grads(value)[source]#
                                  +FunctionCtx.set_materialize_grads(value)[source]#

                                  Set whether to materialize grad tensors. Default is True.

                                  This should be called only from either the setup_context() or forward() methods.

                                  diff --git a/2.9/generated/torch.autograd.function.InplaceFunction.html b/2.9/generated/torch.autograd.function.InplaceFunction.html index 86181f4d1a3..1ca5fecf84a 100644 --- a/2.9/generated/torch.autograd.function.InplaceFunction.html +++ b/2.9/generated/torch.autograd.function.InplaceFunction.html @@ -4404,12 +4404,12 @@

                                  InplaceFunction#

                                  -class torch.autograd.function.InplaceFunction(inplace=False)[source]#
                                  +class torch.autograd.function.InplaceFunction(inplace=False)[source]#

                                  This class is here only for backward compatibility reasons. Use Function instead of this for any new use case.

                                  -static backward(ctx, *grad_outputs)[source]#
                                  +static backward(ctx, *grad_outputs)[source]#

                                  Define a formula for differentiating the operation with backward mode automatic differentiation.

                                  This function is to be overridden by all subclasses. (Defining this function is equivalent to defining the vjp function.)

                                  @@ -4436,7 +4436,7 @@

                                  InplaceFunction
                                  -static forward(*args, **kwargs)[source]#
                                  +static forward(*args, **kwargs)[source]#

                                  Define the forward of the custom autograd Function.

                                  This function is to be overridden by all subclasses. There are two ways to define forward:

                                  @@ -4486,7 +4486,7 @@

                                  InplaceFunction
                                  -static jvp(ctx, *grad_inputs)[source]#
                                  +static jvp(ctx, *grad_inputs)[source]#

                                  Define a formula for differentiating the operation with forward mode automatic differentiation.

                                  This function is to be overridden by all subclasses. It must accept a context ctx as the first argument, followed by @@ -4509,7 +4509,7 @@

                                  InplaceFunction
                                  -mark_dirty(*args)[source]#
                                  +mark_dirty(*args)[source]#

                                  Mark given tensors as modified in an in-place operation.

                                  This should be called at most once, in either the setup_context() or forward() methods, and all arguments should be inputs.

                                  @@ -4547,7 +4547,7 @@

                                  InplaceFunction
                                  -mark_non_differentiable(*args)[source]#
                                  +mark_non_differentiable(*args)[source]#

                                  Mark outputs as non-differentiable.

                                  This should be called at most once, in either the setup_context() or forward() methods, and all arguments should be tensor outputs.

                                  @@ -4582,7 +4582,7 @@

                                  InplaceFunction
                                  -save_for_backward(*tensors)[source]#
                                  +save_for_backward(*tensors)[source]#

                                  Save given tensors for a future call to backward().

                                  save_for_backward should be called at most once, in either the setup_context() or forward() methods, and only with tensors.

                                  @@ -4638,7 +4638,7 @@

                                  InplaceFunction
                                  -save_for_forward(*tensors)[source]#
                                  +save_for_forward(*tensors)[source]#

                                  Save given tensors for a future call to jvp().

                                  save_for_forward should be called at most once, in either the setup_context() or forward() methods, and all arguments @@ -4684,7 +4684,7 @@

                                  InplaceFunction
                                  -set_materialize_grads(value)[source]#
                                  +set_materialize_grads(value)[source]#

                                  Set whether to materialize grad tensors. Default is True.

                                  This should be called only from either the setup_context() or forward() methods.

                                  @@ -4730,7 +4730,7 @@

                                  InplaceFunction
                                  -static setup_context(ctx, inputs, output)[source]#
                                  +static setup_context(ctx, inputs, output)[source]#

                                  There are two ways to define the forward pass of an autograd.Function.

                                  Either:

                                    @@ -4751,7 +4751,7 @@

                                    InplaceFunction
                                    -static vjp(ctx, *grad_outputs)[source]#
                                    +static vjp(ctx, *grad_outputs)[source]#

                                    Define a formula for differentiating the operation with backward mode automatic differentiation.

                                    This function is to be overridden by all subclasses. (Defining this function is equivalent to defining the vjp function.)

                                    @@ -4778,7 +4778,7 @@

                                    InplaceFunction
                                    -static vmap(info, in_dims, *args)[source]#
                                    +static vmap(info, in_dims, *args)[source]#

                                    Define the behavior for this autograd.Function underneath torch.vmap().

                                    For a torch.autograd.Function() to support torch.vmap(), you must either override this static method, or set diff --git a/2.9/generated/torch.autograd.function.NestedIOFunction.html b/2.9/generated/torch.autograd.function.NestedIOFunction.html index 1eb59b2aed2..95ed10b2a18 100644 --- a/2.9/generated/torch.autograd.function.NestedIOFunction.html +++ b/2.9/generated/torch.autograd.function.NestedIOFunction.html @@ -4404,12 +4404,12 @@

                                    NestedIOFunction#

                                    -class torch.autograd.function.NestedIOFunction(*args, **kwargs)[source]#
                                    +class torch.autograd.function.NestedIOFunction(*args, **kwargs)[source]#

                                    This class is here only for backward compatibility reasons. Use Function instead of this for any new use case.

                                    -backward(*gradients)[source]#
                                    +backward(*gradients)[source]#

                                    Shared backward utility.

                                    Return type
                                    @@ -4420,7 +4420,7 @@

                                    NestedIOFunction
                                    -backward_extended(*grad_output)[source]#
                                    +backward_extended(*grad_output)[source]#

                                    User defined backward.

                                    @@ -4428,7 +4428,7 @@

                                    NestedIOFunction
                                    -forward(*args)[source]#
                                    +forward(*args)[source]#

                                    Shared forward utility.

                                    Return type
                                    @@ -4439,7 +4439,7 @@

                                    NestedIOFunction
                                    -forward_extended(*input)[source]#
                                    +forward_extended(*input)[source]#

                                    User defined forward.

                                    @@ -4447,7 +4447,7 @@

                                    NestedIOFunction
                                    -static jvp(ctx, *grad_inputs)[source]#
                                    +static jvp(ctx, *grad_inputs)[source]#

                                    Define a formula for differentiating the operation with forward mode automatic differentiation.

                                    This function is to be overridden by all subclasses. It must accept a context ctx as the first argument, followed by @@ -4470,7 +4470,7 @@

                                    NestedIOFunction
                                    -mark_dirty(*args, **kwargs)[source]#
                                    +mark_dirty(*args, **kwargs)[source]#

                                    See Function.mark_dirty().

                                    @@ -4478,7 +4478,7 @@

                                    NestedIOFunction
                                    -mark_non_differentiable(*args, **kwargs)[source]#
                                    +mark_non_differentiable(*args, **kwargs)[source]#

                                    See Function.mark_non_differentiable().

                                    @@ -4486,7 +4486,7 @@

                                    NestedIOFunction
                                    -save_for_backward(*args)[source]#
                                    +save_for_backward(*args)[source]#

                                    See Function.save_for_backward().

                                    @@ -4494,7 +4494,7 @@

                                    NestedIOFunction
                                    -save_for_forward(*tensors)[source]#
                                    +save_for_forward(*tensors)[source]#

                                    Save given tensors for a future call to jvp().

                                    save_for_forward should be called at most once, in either the setup_context() or forward() methods, and all arguments @@ -4546,7 +4546,7 @@

                                    NestedIOFunction
                                    -set_materialize_grads(value)[source]#
                                    +set_materialize_grads(value)[source]#

                                    Set whether to materialize grad tensors. Default is True.

                                    This should be called only from either the setup_context() or forward() methods.

                                    @@ -4592,7 +4592,7 @@

                                    NestedIOFunction
                                    -static setup_context(ctx, inputs, output)[source]#
                                    +static setup_context(ctx, inputs, output)[source]#

                                    There are two ways to define the forward pass of an autograd.Function.

                                    Either:

                                      @@ -4613,7 +4613,7 @@

                                      NestedIOFunction
                                      -static vjp(ctx, *grad_outputs)[source]#
                                      +static vjp(ctx, *grad_outputs)[source]#

                                      Define a formula for differentiating the operation with backward mode automatic differentiation.

                                      This function is to be overridden by all subclasses. (Defining this function is equivalent to defining the vjp function.)

                                      @@ -4640,7 +4640,7 @@

                                      NestedIOFunction
                                      -static vmap(info, in_dims, *args)[source]#
                                      +static vmap(info, in_dims, *args)[source]#

                                      Define the behavior for this autograd.Function underneath torch.vmap().

                                      For a torch.autograd.Function() to support torch.vmap(), you must either override this static method, or set diff --git a/2.9/generated/torch.autograd.function.once_differentiable.html b/2.9/generated/torch.autograd.function.once_differentiable.html index 3419e48905c..f383b912c14 100644 --- a/2.9/generated/torch.autograd.function.once_differentiable.html +++ b/2.9/generated/torch.autograd.function.once_differentiable.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.function.once_differentiable#

                                      -torch.autograd.function.once_differentiable(fn)[source]#
                                      +torch.autograd.function.once_differentiable(fn)[source]#
                                      Return type

                                      Callable[[Concatenate[_T, ~_P]], _R]

                                      diff --git a/2.9/generated/torch.autograd.functional.hessian.html b/2.9/generated/torch.autograd.functional.hessian.html index 09f0571bec1..de4c5b47be4 100644 --- a/2.9/generated/torch.autograd.functional.hessian.html +++ b/2.9/generated/torch.autograd.functional.hessian.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.functional.hessian#

                                      -torch.autograd.functional.hessian(func, inputs, create_graph=False, strict=False, vectorize=False, outer_jacobian_strategy='reverse-mode')[source]#
                                      +torch.autograd.functional.hessian(func, inputs, create_graph=False, strict=False, vectorize=False, outer_jacobian_strategy='reverse-mode')[source]#

                                      Compute the Hessian of a given scalar function.

                                      Parameters
                                      diff --git a/2.9/generated/torch.autograd.functional.hvp.html b/2.9/generated/torch.autograd.functional.hvp.html index 003b40b6438..c75e69afa4e 100644 --- a/2.9/generated/torch.autograd.functional.hvp.html +++ b/2.9/generated/torch.autograd.functional.hvp.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.functional.hvp#

                                      -torch.autograd.functional.hvp(func, inputs, v=None, create_graph=False, strict=False)[source]#
                                      +torch.autograd.functional.hvp(func, inputs, v=None, create_graph=False, strict=False)[source]#

                                      Compute the dot product between the scalar function’s Hessian and a vector v at a specified point.

                                      Parameters
                                      diff --git a/2.9/generated/torch.autograd.functional.jacobian.html b/2.9/generated/torch.autograd.functional.jacobian.html index 6e7951de0d9..67777cccdd6 100644 --- a/2.9/generated/torch.autograd.functional.jacobian.html +++ b/2.9/generated/torch.autograd.functional.jacobian.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.functional.jacobian#

                                      -torch.autograd.functional.jacobian(func, inputs, create_graph=False, strict=False, vectorize=False, strategy='reverse-mode')[source]#
                                      +torch.autograd.functional.jacobian(func, inputs, create_graph=False, strict=False, vectorize=False, strategy='reverse-mode')[source]#

                                      Compute the Jacobian of a given function.

                                      Parameters
                                      diff --git a/2.9/generated/torch.autograd.functional.jvp.html b/2.9/generated/torch.autograd.functional.jvp.html index 78703d75714..479aac7a5e0 100644 --- a/2.9/generated/torch.autograd.functional.jvp.html +++ b/2.9/generated/torch.autograd.functional.jvp.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.functional.jvp#

                                      -torch.autograd.functional.jvp(func, inputs, v=None, create_graph=False, strict=False)[source]#
                                      +torch.autograd.functional.jvp(func, inputs, v=None, create_graph=False, strict=False)[source]#

                                      Compute the dot product between the Jacobian of the given function at the point given by the inputs and a vector v.

                                      Parameters
                                      diff --git a/2.9/generated/torch.autograd.functional.vhp.html b/2.9/generated/torch.autograd.functional.vhp.html index c01ee897dca..453fbbeccc5 100644 --- a/2.9/generated/torch.autograd.functional.vhp.html +++ b/2.9/generated/torch.autograd.functional.vhp.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.functional.vhp#

                                      -torch.autograd.functional.vhp(func, inputs, v=None, create_graph=False, strict=False)[source]#
                                      +torch.autograd.functional.vhp(func, inputs, v=None, create_graph=False, strict=False)[source]#

                                      Compute the dot product between vector v and Hessian of a given scalar function at a specified point.

                                      Parameters
                                      diff --git a/2.9/generated/torch.autograd.functional.vjp.html b/2.9/generated/torch.autograd.functional.vjp.html index dd4e05fa78d..7f1e1bb578e 100644 --- a/2.9/generated/torch.autograd.functional.vjp.html +++ b/2.9/generated/torch.autograd.functional.vjp.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.functional.vjp#

                                      -torch.autograd.functional.vjp(func, inputs, v=None, create_graph=False, strict=False)[source]#
                                      +torch.autograd.functional.vjp(func, inputs, v=None, create_graph=False, strict=False)[source]#

                                      Compute the dot product between a vector v and the Jacobian of the given function at the point given by the inputs.

                                      Parameters
                                      diff --git a/2.9/generated/torch.autograd.grad.html b/2.9/generated/torch.autograd.grad.html index 5e60deb0e73..a35d7508201 100644 --- a/2.9/generated/torch.autograd.grad.html +++ b/2.9/generated/torch.autograd.grad.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.grad#

                                      -torch.autograd.grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False, only_inputs=True, allow_unused=None, is_grads_batched=False, materialize_grads=False)[source]#
                                      +torch.autograd.grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False, only_inputs=True, allow_unused=None, is_grads_batched=False, materialize_grads=False)[source]#

                                      Compute and return the sum of gradients of outputs with respect to the inputs.

                                      grad_outputs should be a sequence of length matching output containing the “vector” in vector-Jacobian product, usually the pre-computed diff --git a/2.9/generated/torch.autograd.grad_mode.inference_mode.html b/2.9/generated/torch.autograd.grad_mode.inference_mode.html index 3da80220333..04b11bb2d56 100644 --- a/2.9/generated/torch.autograd.grad_mode.inference_mode.html +++ b/2.9/generated/torch.autograd.grad_mode.inference_mode.html @@ -4404,7 +4404,7 @@

                                      inference_mode#

                                      -class torch.autograd.grad_mode.inference_mode(mode=True)[source]#
                                      +class torch.autograd.grad_mode.inference_mode(mode=True)[source]#

                                      Context manager that enables or disables inference mode.

                                      InferenceMode is analogous to no_grad and should be used when you are certain your operations will not interact with autograd @@ -4466,7 +4466,7 @@

                                      inference_mode
                                      -clone()[source]#
                                      +clone()[source]#

                                      Create a copy of this class

                                      Return type
                                      diff --git a/2.9/generated/torch.autograd.grad_mode.set_grad_enabled.html b/2.9/generated/torch.autograd.grad_mode.set_grad_enabled.html index d9d70a6a7ee..fc0e9a5785f 100644 --- a/2.9/generated/torch.autograd.grad_mode.set_grad_enabled.html +++ b/2.9/generated/torch.autograd.grad_mode.set_grad_enabled.html @@ -4404,7 +4404,7 @@

                                      set_grad_enabled#

                                      -class torch.autograd.grad_mode.set_grad_enabled(mode)[source]#
                                      +class torch.autograd.grad_mode.set_grad_enabled(mode)[source]#

                                      Context-manager that sets gradient calculation on or off.

                                      set_grad_enabled will enable or disable grads based on its argument mode. It can be used as a context-manager or as a function.

                                      @@ -4448,7 +4448,7 @@

                                      set_grad_enabled
                                      -clone()[source]#
                                      +clone()[source]#

                                      Create a copy of this class

                                      Return type
                                      diff --git a/2.9/generated/torch.autograd.grad_mode.set_multithreading_enabled.html b/2.9/generated/torch.autograd.grad_mode.set_multithreading_enabled.html index 23cd3f0df62..a2a91d7423b 100644 --- a/2.9/generated/torch.autograd.grad_mode.set_multithreading_enabled.html +++ b/2.9/generated/torch.autograd.grad_mode.set_multithreading_enabled.html @@ -4404,7 +4404,7 @@

                                      set_multithreading_enabled#

                                      -class torch.autograd.grad_mode.set_multithreading_enabled(mode)[source]#
                                      +class torch.autograd.grad_mode.set_multithreading_enabled(mode)[source]#

                                      Context-manager that sets multithreaded backwards on or off.

                                      set_multithreading_enabled will enable or disable multithreaded backwards based on its argument mode. It can be used as a context-manager or as a function.

                                      @@ -4422,7 +4422,7 @@

                                      set_multithreading_enabled
                                      -clone()[source]#
                                      +clone()[source]#

                                      Create a copy of this class

                                      Return type
                                      diff --git a/2.9/generated/torch.autograd.gradcheck.GradcheckError.html b/2.9/generated/torch.autograd.gradcheck.GradcheckError.html index 9e84e3121e5..da4cf4a73fd 100644 --- a/2.9/generated/torch.autograd.gradcheck.GradcheckError.html +++ b/2.9/generated/torch.autograd.gradcheck.GradcheckError.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.gradcheck.GradcheckError#

                                      -exception torch.autograd.gradcheck.GradcheckError[source]#
                                      +exception torch.autograd.gradcheck.GradcheckError[source]#

                                      Error raised by gradcheck() and gradgradcheck().

                                      diff --git a/2.9/generated/torch.autograd.gradcheck.gradcheck.html b/2.9/generated/torch.autograd.gradcheck.gradcheck.html index 84868c7bb17..03bc1f081b9 100644 --- a/2.9/generated/torch.autograd.gradcheck.gradcheck.html +++ b/2.9/generated/torch.autograd.gradcheck.gradcheck.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.gradcheck.gradcheck#

                                      -torch.autograd.gradcheck.gradcheck(func, inputs, *, eps=1e-06, atol=1e-05, rtol=0.001, raise_exception=True, nondet_tol=0.0, check_undefined_grad=True, check_grad_dtypes=False, check_batched_grad=False, check_batched_forward_grad=False, check_forward_ad=False, check_backward_ad=True, fast_mode=False, masked=None)[source]#
                                      +torch.autograd.gradcheck.gradcheck(func, inputs, *, eps=1e-06, atol=1e-05, rtol=0.001, raise_exception=True, nondet_tol=0.0, check_undefined_grad=True, check_grad_dtypes=False, check_batched_grad=False, check_batched_forward_grad=False, check_forward_ad=False, check_backward_ad=True, fast_mode=False, masked=None)[source]#

                                      Check gradients computed via small finite differences against analytical gradients wrt tensors in inputs that are of floating point or complex type and with requires_grad=True.

                                      diff --git a/2.9/generated/torch.autograd.gradcheck.gradgradcheck.html b/2.9/generated/torch.autograd.gradcheck.gradgradcheck.html index a2b7c4f16cf..1e01bfa6608 100644 --- a/2.9/generated/torch.autograd.gradcheck.gradgradcheck.html +++ b/2.9/generated/torch.autograd.gradcheck.gradgradcheck.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.gradcheck.gradgradcheck#

                                      -torch.autograd.gradcheck.gradgradcheck(func, inputs, grad_outputs=None, *, eps=1e-06, atol=1e-05, rtol=0.001, gen_non_contig_grad_outputs=False, raise_exception=True, nondet_tol=0.0, check_undefined_grad=True, check_grad_dtypes=False, check_batched_grad=False, check_fwd_over_rev=False, check_rev_over_rev=True, fast_mode=False, masked=False)[source]#
                                      +torch.autograd.gradcheck.gradgradcheck(func, inputs, grad_outputs=None, *, eps=1e-06, atol=1e-05, rtol=0.001, gen_non_contig_grad_outputs=False, raise_exception=True, nondet_tol=0.0, check_undefined_grad=True, check_grad_dtypes=False, check_batched_grad=False, check_fwd_over_rev=False, check_rev_over_rev=True, fast_mode=False, masked=False)[source]#

                                      Check gradients of gradients computed via small finite differences against analytical gradients wrt tensors in inputs and grad_outputs that are of floating point or complex type and with diff --git a/2.9/generated/torch.autograd.graph.Node.metadata.html b/2.9/generated/torch.autograd.graph.Node.metadata.html index ddf7e6b444c..111fe5aac48 100644 --- a/2.9/generated/torch.autograd.graph.Node.metadata.html +++ b/2.9/generated/torch.autograd.graph.Node.metadata.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.graph.Node.metadata#

                                      -abstract Node.metadata()[source]#
                                      +abstract Node.metadata()[source]#

                                      Return the metadata.

                                      Return type
                                      diff --git a/2.9/generated/torch.autograd.graph.Node.name.html b/2.9/generated/torch.autograd.graph.Node.name.html index 0d64b390586..f82f4565334 100644 --- a/2.9/generated/torch.autograd.graph.Node.name.html +++ b/2.9/generated/torch.autograd.graph.Node.name.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.graph.Node.name#

                                      -abstract Node.name()[source]#
                                      +abstract Node.name()[source]#

                                      Return the name.

                                      Example:

                                      >>> import torch
                                      diff --git a/2.9/generated/torch.autograd.graph.Node.register_hook.html b/2.9/generated/torch.autograd.graph.Node.register_hook.html
                                      index 0d02a9fa1ef..26e8ee75fbc 100644
                                      --- a/2.9/generated/torch.autograd.graph.Node.register_hook.html
                                      +++ b/2.9/generated/torch.autograd.graph.Node.register_hook.html
                                      @@ -4404,7 +4404,7 @@
                                       

                                      torch.autograd.graph.Node.register_hook#

                                      -abstract Node.register_hook(fn)[source]#
                                      +abstract Node.register_hook(fn)[source]#

                                      Register a backward hook.

                                      The hook will be called every time a gradient with respect to the Node is computed. The hook should have the following signature:

                                      diff --git a/2.9/generated/torch.autograd.graph.Node.register_prehook.html b/2.9/generated/torch.autograd.graph.Node.register_prehook.html index 4ba05e47c26..1f927e430ea 100644 --- a/2.9/generated/torch.autograd.graph.Node.register_prehook.html +++ b/2.9/generated/torch.autograd.graph.Node.register_prehook.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.graph.Node.register_prehook#

                                      -abstract Node.register_prehook(fn)[source]#
                                      +abstract Node.register_prehook(fn)[source]#

                                      Register a backward pre-hook.

                                      The hook will be called every time a gradient with respect to the Node is computed. The hook should have the following signature:

                                      diff --git a/2.9/generated/torch.autograd.graph.increment_version.html b/2.9/generated/torch.autograd.graph.increment_version.html index bff7b683059..9a641bf25a3 100644 --- a/2.9/generated/torch.autograd.graph.increment_version.html +++ b/2.9/generated/torch.autograd.graph.increment_version.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.graph.increment_version#

                                      -torch.autograd.graph.increment_version(tensor)[source]#
                                      +torch.autograd.graph.increment_version(tensor)[source]#

                                      Update autograd metadata tracking whether the given Tensor was modified in place.

                                      This is to enable more accurate error checking within the autograd engine. It is already done automatically by PyTorch functions and within custom Function diff --git a/2.9/generated/torch.autograd.profiler.EnforceUnique.html b/2.9/generated/torch.autograd.profiler.EnforceUnique.html index 48e759c3c6b..0f5d795c6e3 100644 --- a/2.9/generated/torch.autograd.profiler.EnforceUnique.html +++ b/2.9/generated/torch.autograd.profiler.EnforceUnique.html @@ -4404,11 +4404,11 @@

                                      EnforceUnique#

                                      -class torch.autograd.profiler.EnforceUnique[source]#
                                      +class torch.autograd.profiler.EnforceUnique[source]#

                                      Raises an error if a key is seen more than once.

                                      -see(*key)[source]#
                                      +see(*key)[source]#

                                      Observe a key and raise an error if it is seen multiple times.

                                      diff --git a/2.9/generated/torch.autograd.profiler.KinetoStepTracker.html b/2.9/generated/torch.autograd.profiler.KinetoStepTracker.html index 8d11daba80c..99439474e2d 100644 --- a/2.9/generated/torch.autograd.profiler.KinetoStepTracker.html +++ b/2.9/generated/torch.autograd.profiler.KinetoStepTracker.html @@ -4404,7 +4404,7 @@

                                      KinetoStepTracker#

                                      -class torch.autograd.profiler.KinetoStepTracker[source]#
                                      +class torch.autograd.profiler.KinetoStepTracker[source]#

                                      Provides an abstraction for incrementing the step count globally.

                                      Previously, we only had one place to mark that a step() has occurred in the program via pytorch profiler step(). We will now add step hooks @@ -4439,7 +4439,7 @@

                                      KinetoStepTracker
                                      -classmethod current_step()[source]#
                                      +classmethod current_step()[source]#

                                      Get the latest step for any requester

                                      Return type
                                      @@ -4450,7 +4450,7 @@

                                      KinetoStepTracker
                                      -classmethod erase_step_count(requester)[source]#
                                      +classmethod erase_step_count(requester)[source]#

                                      Remove a given requester.

                                      Return type
                                      @@ -4461,7 +4461,7 @@

                                      KinetoStepTracker
                                      -classmethod increment_step(requester)[source]#
                                      +classmethod increment_step(requester)[source]#

                                      Increments the step count for the requester.

                                      Additionally if the max over all step counts has incremented then trigger the _kineto_step() returns global step count

                                      @@ -4474,7 +4474,7 @@

                                      KinetoStepTracker
                                      -classmethod init_step_count(requester)[source]#
                                      +classmethod init_step_count(requester)[source]#

                                      Initialize for a given requester.

                                      diff --git a/2.9/generated/torch.autograd.profiler.load_nvprof.html b/2.9/generated/torch.autograd.profiler.load_nvprof.html index 8580401a819..c863e57b578 100644 --- a/2.9/generated/torch.autograd.profiler.load_nvprof.html +++ b/2.9/generated/torch.autograd.profiler.load_nvprof.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.profiler.load_nvprof#

                                      -torch.autograd.profiler.load_nvprof(path)[source]#
                                      +torch.autograd.profiler.load_nvprof(path)[source]#

                                      Open an nvprof trace file and parses autograd annotations.

                                      Parameters
                                      diff --git a/2.9/generated/torch.autograd.profiler.parse_nvprof_trace.html b/2.9/generated/torch.autograd.profiler.parse_nvprof_trace.html index e68a4a2aab0..06a71b4a5f3 100644 --- a/2.9/generated/torch.autograd.profiler.parse_nvprof_trace.html +++ b/2.9/generated/torch.autograd.profiler.parse_nvprof_trace.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.profiler.parse_nvprof_trace#

                                      -torch.autograd.profiler.parse_nvprof_trace(path)[source]#
                                      +torch.autograd.profiler.parse_nvprof_trace(path)[source]#

                                      diff --git a/2.9/generated/torch.autograd.profiler.profile.export_chrome_trace.html b/2.9/generated/torch.autograd.profiler.profile.export_chrome_trace.html index d082d479fc0..aa70f628ff1 100644 --- a/2.9/generated/torch.autograd.profiler.profile.export_chrome_trace.html +++ b/2.9/generated/torch.autograd.profiler.profile.export_chrome_trace.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.profiler.profile.export_chrome_trace#

                                      -profile.export_chrome_trace(path)[source]#
                                      +profile.export_chrome_trace(path)[source]#

                                      Export an EventList as a Chrome tracing tools file.

                                      The checkpoint can be later loaded and inspected under chrome://tracing URL.

                                      diff --git a/2.9/generated/torch.autograd.profiler.profile.key_averages.html b/2.9/generated/torch.autograd.profiler.profile.key_averages.html index 5f1ed7f3194..eb9654a2b23 100644 --- a/2.9/generated/torch.autograd.profiler.profile.key_averages.html +++ b/2.9/generated/torch.autograd.profiler.profile.key_averages.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.profiler.profile.key_averages#

                                      -profile.key_averages(group_by_input_shape=False, group_by_stack_n=0, group_by_overload_name=False)[source]#
                                      +profile.key_averages(group_by_input_shape=False, group_by_stack_n=0, group_by_overload_name=False)[source]#

                                      Averages all function events over their keys.

                                      Parameters
                                      diff --git a/2.9/generated/torch.autograd.profiler.profile.total_average.html b/2.9/generated/torch.autograd.profiler.profile.total_average.html index 8d2b8125206..ed30718e80e 100644 --- a/2.9/generated/torch.autograd.profiler.profile.total_average.html +++ b/2.9/generated/torch.autograd.profiler.profile.total_average.html @@ -4404,7 +4404,7 @@

                                      torch.autograd.profiler.profile.total_average#

                                      -profile.total_average()[source]#
                                      +profile.total_average()[source]#

                                      Averages all events.

                                      Returns
                                      diff --git a/2.9/generated/torch.autograd.profiler.record_function.html b/2.9/generated/torch.autograd.profiler.record_function.html index e95b8a5facc..64f8b010aab 100644 --- a/2.9/generated/torch.autograd.profiler.record_function.html +++ b/2.9/generated/torch.autograd.profiler.record_function.html @@ -4404,7 +4404,7 @@

                                      record_function#

                                      -class torch.autograd.profiler.record_function(name, args=None)[source]#
                                      +class torch.autograd.profiler.record_function(name, args=None)[source]#

                                      Context manager/function decorator that adds a label to a code block/function when running autograd profiler. Label will only appear if CPU activity tracing is enabled.

                                      It is useful when tracing the code profile.

                                      diff --git a/2.9/generated/torch.autograd.profiler_util.Interval.html b/2.9/generated/torch.autograd.profiler_util.Interval.html index a07857c8e9b..5e85ae4563a 100644 --- a/2.9/generated/torch.autograd.profiler_util.Interval.html +++ b/2.9/generated/torch.autograd.profiler_util.Interval.html @@ -4404,10 +4404,10 @@

                                      Interval#

                                      -class torch.autograd.profiler_util.Interval(start, end)[source]#
                                      +class torch.autograd.profiler_util.Interval(start, end)[source]#
                                      -elapsed_us()[source]#
                                      +elapsed_us()[source]#

                                      Returns the length of the interval

                                      diff --git a/2.9/generated/torch.autograd.profiler_util.MemRecordsAcc.html b/2.9/generated/torch.autograd.profiler_util.MemRecordsAcc.html index 018934aa00d..18b29941fb0 100644 --- a/2.9/generated/torch.autograd.profiler_util.MemRecordsAcc.html +++ b/2.9/generated/torch.autograd.profiler_util.MemRecordsAcc.html @@ -4404,11 +4404,11 @@

                                      MemRecordsAcc#

                                      -class torch.autograd.profiler_util.MemRecordsAcc(mem_records)[source]#
                                      +class torch.autograd.profiler_util.MemRecordsAcc(mem_records)[source]#

                                      Acceleration structure for accessing mem_records in interval.

                                      -in_interval(start_us, end_us)[source]#
                                      +in_interval(start_us, end_us)[source]#

                                      Return all records in the given interval To maintain backward compatibility, convert us to ns in function

                                      diff --git a/2.9/generated/torch.autograd.profiler_util.StringTable.html b/2.9/generated/torch.autograd.profiler_util.StringTable.html index a7839e80406..1893ca956a7 100644 --- a/2.9/generated/torch.autograd.profiler_util.StringTable.html +++ b/2.9/generated/torch.autograd.profiler_util.StringTable.html @@ -4404,7 +4404,7 @@

                                      StringTable#

                                      -class torch.autograd.profiler_util.StringTable[source]#
                                      +class torch.autograd.profiler_util.StringTable[source]#
                                      clear() None.  Remove all items from D.#
                                      diff --git a/2.9/generated/torch.block_diag.html b/2.9/generated/torch.block_diag.html index f89e44b7bbc..f3a238d7084 100644 --- a/2.9/generated/torch.block_diag.html +++ b/2.9/generated/torch.block_diag.html @@ -4404,7 +4404,7 @@

                                      torch.block_diag#

                                      -torch.block_diag(*tensors)[source]#
                                      +torch.block_diag(*tensors)[source]#

                                      Create a block diagonal matrix from provided tensors.

                                      Parameters
                                      diff --git a/2.9/generated/torch.broadcast_shapes.html b/2.9/generated/torch.broadcast_shapes.html index 27e97d580e9..ca8e8623674 100644 --- a/2.9/generated/torch.broadcast_shapes.html +++ b/2.9/generated/torch.broadcast_shapes.html @@ -4404,7 +4404,7 @@

                                      torch.broadcast_shapes#

                                      -torch.broadcast_shapes(*shapes) Size[source]#
                                      +torch.broadcast_shapes(*shapes) Size[source]#

                                      Similar to broadcast_tensors() but for shapes.

                                      This is equivalent to torch.broadcast_tensors(*map(torch.empty, shapes))[0].shape diff --git a/2.9/generated/torch.broadcast_tensors.html b/2.9/generated/torch.broadcast_tensors.html index 97f888b26d3..5d206f0e779 100644 --- a/2.9/generated/torch.broadcast_tensors.html +++ b/2.9/generated/torch.broadcast_tensors.html @@ -4404,7 +4404,7 @@

                                      torch.broadcast_tensors#

                                      -torch.broadcast_tensors(*tensors) List of Tensors[source]#
                                      +torch.broadcast_tensors(*tensors) List of Tensors[source]#

                                      Broadcasts the given tensors according to Broadcasting semantics.

                                      Parameters
                                      diff --git a/2.9/generated/torch.cartesian_prod.html b/2.9/generated/torch.cartesian_prod.html index 77e38f1e751..d377de72827 100644 --- a/2.9/generated/torch.cartesian_prod.html +++ b/2.9/generated/torch.cartesian_prod.html @@ -4404,7 +4404,7 @@

                                      torch.cartesian_prod#

                                      -torch.cartesian_prod(*tensors)[source]#
                                      +torch.cartesian_prod(*tensors)[source]#

                                      Do cartesian product of the given sequence of tensors. The behavior is similar to python’s itertools.product.

                                      diff --git a/2.9/generated/torch.cdist.html b/2.9/generated/torch.cdist.html index c8ca96575c0..898679d682f 100644 --- a/2.9/generated/torch.cdist.html +++ b/2.9/generated/torch.cdist.html @@ -4404,7 +4404,7 @@

                                      torch.cdist#

                                      -torch.cdist(x1, x2, p=2.0, compute_mode='use_mm_for_euclid_dist_if_necessary')[source]#
                                      +torch.cdist(x1, x2, p=2.0, compute_mode='use_mm_for_euclid_dist_if_necessary')[source]#

                                      Computes batched the p-norm distance between each pair of the two collections of row vectors.

                                      Parameters
                                      diff --git a/2.9/generated/torch.chain_matmul.html b/2.9/generated/torch.chain_matmul.html index ff1ee470f39..5e07f73778b 100644 --- a/2.9/generated/torch.chain_matmul.html +++ b/2.9/generated/torch.chain_matmul.html @@ -4404,7 +4404,7 @@

                                      torch.chain_matmul#

                                      -torch.chain_matmul(*matrices, out=None)[source]#
                                      +torch.chain_matmul(*matrices, out=None)[source]#

                                      Returns the matrix product of the NN 2-D tensors. This product is efficiently computed using the matrix chain order algorithm which selects the order in which incurs the lowest cost in terms of arithmetic operations ([CLRS]). Note that since this is a function to compute the product, NN diff --git a/2.9/generated/torch.compile.html b/2.9/generated/torch.compile.html index 6af5ccd4c74..c01effb75c9 100644 --- a/2.9/generated/torch.compile.html +++ b/2.9/generated/torch.compile.html @@ -4404,7 +4404,7 @@

                                      torch.compile#

                                      -torch.compile(model: Callable[[_InputT], _RetT], *, fullgraph: bool = False, dynamic: Optional[bool] = None, backend: Union[str, Callable] = 'inductor', mode: Optional[str] = None, options: Optional[dict[str, Union[str, int, bool, Callable]]] = None, disable: bool = False) Callable[[_InputT], _RetT][source]#
                                      +torch.compile(model: Callable[[_InputT], _RetT], *, fullgraph: bool = False, dynamic: Optional[bool] = None, backend: Union[str, Callable] = 'inductor', mode: Optional[str] = None, options: Optional[dict[str, Union[str, int, bool, Callable]]] = None, disable: bool = False) Callable[[_InputT], _RetT][source]#
                                      torch.compile(model: None = None, *, fullgraph: bool = False, dynamic: Optional[bool] = None, backend: Union[str, Callable] = 'inductor', mode: Optional[str] = None, options: Optional[dict[str, Union[str, int, bool, Callable]]] = None, disable: bool = False) Callable[[Callable[[_InputT], _RetT]], Callable[[_InputT], _RetT]]

                                      Optimizes given model/function using TorchDynamo and specified backend. diff --git a/2.9/generated/torch.compiled_with_cxx11_abi.html b/2.9/generated/torch.compiled_with_cxx11_abi.html index daaa5f353d6..dc17c8f4bdf 100644 --- a/2.9/generated/torch.compiled_with_cxx11_abi.html +++ b/2.9/generated/torch.compiled_with_cxx11_abi.html @@ -4404,7 +4404,7 @@

                                      torch.compiled_with_cxx11_abi#

                                      -torch.compiled_with_cxx11_abi()[source]#
                                      +torch.compiled_with_cxx11_abi()[source]#

                                      Returns whether PyTorch was built with _GLIBCXX_USE_CXX11_ABI=1

                                      Return type
                                      diff --git a/2.9/generated/torch.compiler.allow_in_graph.html b/2.9/generated/torch.compiler.allow_in_graph.html index 1f52b599197..84b1a10d8a8 100644 --- a/2.9/generated/torch.compiler.allow_in_graph.html +++ b/2.9/generated/torch.compiler.allow_in_graph.html @@ -4415,7 +4415,7 @@

                                      torch.compiler.allow_in_graph#

                                      -torch.compiler.allow_in_graph(fn)[source]#
                                      +torch.compiler.allow_in_graph(fn)[source]#

                                      Tells the compiler frontend (Dynamo) to skip symbolic introspection of the function and instead directly write it to the graph when encountered.

                                      If you are using torch.compile() (with backend=”inductor” (the default)), or diff --git a/2.9/generated/torch.compiler.assume_constant_result.html b/2.9/generated/torch.compiler.assume_constant_result.html index 19cea81fc59..1159dbcef6f 100644 --- a/2.9/generated/torch.compiler.assume_constant_result.html +++ b/2.9/generated/torch.compiler.assume_constant_result.html @@ -4415,7 +4415,7 @@

                                      torch.compiler.assume_constant_result#

                                      -torch.compiler.assume_constant_result(fn)[source]#
                                      +torch.compiler.assume_constant_result(fn)[source]#

                                      This function is used to mark a function fn as having a constant result. This allows the compiler to optimize away your function. Returns The same function fn

                                      diff --git a/2.9/generated/torch.compiler.compile.html b/2.9/generated/torch.compiler.compile.html index 5d89b59d5e4..f27ea4063e0 100644 --- a/2.9/generated/torch.compiler.compile.html +++ b/2.9/generated/torch.compiler.compile.html @@ -4415,7 +4415,7 @@

                                      torch.compiler.compile#

                                      -torch.compiler.compile(*args, **kwargs)[source]#
                                      +torch.compiler.compile(*args, **kwargs)[source]#

                                      See torch.compile() for details on the arguments for this function.

                                      diff --git a/2.9/generated/torch.compiler.cudagraph_mark_step_begin.html b/2.9/generated/torch.compiler.cudagraph_mark_step_begin.html index 753a25701e0..5c3dda95ae5 100644 --- a/2.9/generated/torch.compiler.cudagraph_mark_step_begin.html +++ b/2.9/generated/torch.compiler.cudagraph_mark_step_begin.html @@ -4415,7 +4415,7 @@

                                      torch.compiler.cudagraph_mark_step_begin#

                                      -torch.compiler.cudagraph_mark_step_begin()[source]#
                                      +torch.compiler.cudagraph_mark_step_begin()[source]#

                                      Indicates that a new iteration of inference or training is about to begin.

                                      CUDA Graphs will free tensors of a prior iteration. A new iteration is started on each invocation of torch.compile, so long as there is not a pending backward that has not been called.

                                      diff --git a/2.9/generated/torch.compiler.disable.html b/2.9/generated/torch.compiler.disable.html index 7116f596369..ff8e3cf7d52 100644 --- a/2.9/generated/torch.compiler.disable.html +++ b/2.9/generated/torch.compiler.disable.html @@ -4415,7 +4415,7 @@

                                      torch.compiler.disable#

                                      -torch.compiler.disable(fn=None, recursive=True, *, reason=None)[source]#
                                      +torch.compiler.disable(fn=None, recursive=True, *, reason=None)[source]#

                                      This function provides a decorator to disable compilation on a function. It also provides the option of recursively disabling called functions.

                                      diff --git a/2.9/generated/torch.compiler.is_compiling.html b/2.9/generated/torch.compiler.is_compiling.html index 3838e079a74..ed07f7f8dc9 100644 --- a/2.9/generated/torch.compiler.is_compiling.html +++ b/2.9/generated/torch.compiler.is_compiling.html @@ -4415,7 +4415,7 @@

                                      torch.compiler.is_compiling#

                                      -torch.compiler.is_compiling()[source]#
                                      +torch.compiler.is_compiling()[source]#

                                      Indicates whether a graph is executed/traced as part of torch.compile() or torch.export().

                                      Note that there are 2 other related flags that should deprecated eventually:
                                        diff --git a/2.9/generated/torch.compiler.is_dynamo_compiling.html b/2.9/generated/torch.compiler.is_dynamo_compiling.html index c212616b0c6..07a1baeea84 100644 --- a/2.9/generated/torch.compiler.is_dynamo_compiling.html +++ b/2.9/generated/torch.compiler.is_dynamo_compiling.html @@ -4415,7 +4415,7 @@

                                        torch.compiler.is_dynamo_compiling#

                                        -torch.compiler.is_dynamo_compiling()[source]#
                                        +torch.compiler.is_dynamo_compiling()[source]#

                                        Indicates whether a graph is traced via TorchDynamo.

                                        It’s stricter than is_compiling() flag, as it would only be set to True when TorchDynamo is used.

                                        diff --git a/2.9/generated/torch.compiler.is_exporting.html b/2.9/generated/torch.compiler.is_exporting.html index 70eab6d4df0..4b93ea5f43e 100644 --- a/2.9/generated/torch.compiler.is_exporting.html +++ b/2.9/generated/torch.compiler.is_exporting.html @@ -4415,7 +4415,7 @@

                                        torch.compiler.is_exporting#

                                        -torch.compiler.is_exporting()[source]#
                                        +torch.compiler.is_exporting()[source]#

                                        Indicated whether we’re under exporting.

                                        It’s stricter than is_compiling() flag, as it would only be set to True when torch.export is used.

                                        diff --git a/2.9/generated/torch.compiler.keep_tensor_guards_unsafe.html b/2.9/generated/torch.compiler.keep_tensor_guards_unsafe.html index 62be1cc2e88..76bf06bce42 100644 --- a/2.9/generated/torch.compiler.keep_tensor_guards_unsafe.html +++ b/2.9/generated/torch.compiler.keep_tensor_guards_unsafe.html @@ -4415,7 +4415,7 @@

                                        torch.compiler.keep_tensor_guards_unsafe#

                                        -torch.compiler.keep_tensor_guards_unsafe(guard_entries, keep_parameters=False)[source]#
                                        +torch.compiler.keep_tensor_guards_unsafe(guard_entries, keep_parameters=False)[source]#

                                        A common function to keep tensor guards on all tensors. This is unsafe to use by default. But if you don’t expect any changes in the model code, you can just keep the tensor guards.

                                        diff --git a/2.9/generated/torch.compiler.list_backends.html b/2.9/generated/torch.compiler.list_backends.html index 3eb93b3d84c..a1850365d47 100644 --- a/2.9/generated/torch.compiler.list_backends.html +++ b/2.9/generated/torch.compiler.list_backends.html @@ -4415,7 +4415,7 @@

                                        torch.compiler.list_backends#

                                        -torch.compiler.list_backends(exclude_tags=('debug', 'experimental'))[source]#
                                        +torch.compiler.list_backends(exclude_tags=('debug', 'experimental'))[source]#

                                        Return valid strings that can be passed to torch.compile(…, backend=”name”).

                                        Parameters
                                        diff --git a/2.9/generated/torch.compiler.nested_compile_region.html b/2.9/generated/torch.compiler.nested_compile_region.html index a0d94108a36..9c3445a3bd7 100644 --- a/2.9/generated/torch.compiler.nested_compile_region.html +++ b/2.9/generated/torch.compiler.nested_compile_region.html @@ -4415,7 +4415,7 @@

                                        torch.compiler.nested_compile_region#

                                        -torch.compiler.nested_compile_region(fn=None)[source]#
                                        +torch.compiler.nested_compile_region(fn=None)[source]#

                                        Tells ``torch.compile`` that the marked set of operations forms a nested compile region (which is often repeated in the full model) whose code can be compiled once and safely reused. nested_compile_region can also be used diff --git a/2.9/generated/torch.compiler.reset.html b/2.9/generated/torch.compiler.reset.html index 11b6dbd74f8..18b3c3d5cbb 100644 --- a/2.9/generated/torch.compiler.reset.html +++ b/2.9/generated/torch.compiler.reset.html @@ -4415,7 +4415,7 @@

                                        torch.compiler.reset#

                                        -torch.compiler.reset()[source]#
                                        +torch.compiler.reset()[source]#

                                        This function clears all compilation caches and restores the system to its initial state. It is recommended to call this function, especially after using operations like torch.compile(…) to ensure a clean state before another unrelated compilation

                                        diff --git a/2.9/generated/torch.compiler.set_enable_guard_collectives.html b/2.9/generated/torch.compiler.set_enable_guard_collectives.html index 28aded32e80..3ff69fde7fc 100644 --- a/2.9/generated/torch.compiler.set_enable_guard_collectives.html +++ b/2.9/generated/torch.compiler.set_enable_guard_collectives.html @@ -4415,7 +4415,7 @@

                                        torch.compiler.set_enable_guard_collectives#

                                        -torch.compiler.set_enable_guard_collectives(enabled)[source]#
                                        +torch.compiler.set_enable_guard_collectives(enabled)[source]#

                                        Enables use of collectives during guard evaluation to synchronize behavior across ranks. This is expensive: we have to issue a collective every time we enter a compiled code region, even if no rank actually would need to diff --git a/2.9/generated/torch.compiler.set_stance.html b/2.9/generated/torch.compiler.set_stance.html index 57a84ee9ddc..663857c85cc 100644 --- a/2.9/generated/torch.compiler.set_stance.html +++ b/2.9/generated/torch.compiler.set_stance.html @@ -4415,7 +4415,7 @@

                                        torch.compiler.set_stance#

                                        -torch.compiler.set_stance(stance='default', *, skip_guard_eval_unsafe=False, force_backend=None)[source]#
                                        +torch.compiler.set_stance(stance='default', *, skip_guard_eval_unsafe=False, force_backend=None)[source]#

                                        Set the current stance of the compiler. Can be used as a function, context manager, or decorator. Do not use this function inside a torch.compile region - an error will be raised otherwise.

                                        diff --git a/2.9/generated/torch.compiler.skip_guard_on_all_nn_modules_unsafe.html b/2.9/generated/torch.compiler.skip_guard_on_all_nn_modules_unsafe.html index 694077ecd1c..e0ad2168a53 100644 --- a/2.9/generated/torch.compiler.skip_guard_on_all_nn_modules_unsafe.html +++ b/2.9/generated/torch.compiler.skip_guard_on_all_nn_modules_unsafe.html @@ -4415,7 +4415,7 @@

                                        torch.compiler.skip_guard_on_all_nn_modules_unsafe#

                                        -torch.compiler.skip_guard_on_all_nn_modules_unsafe(guard_entries)[source]#
                                        +torch.compiler.skip_guard_on_all_nn_modules_unsafe(guard_entries)[source]#

                                        A common function to skip guards on all nn modules, both user defined as well inbuilt nn modules (like torch.nn.Linear). This is unsafe to use by default. But for majority of torch.compile users, the model code does not diff --git a/2.9/generated/torch.compiler.skip_guard_on_globals_unsafe.html b/2.9/generated/torch.compiler.skip_guard_on_globals_unsafe.html index 26560f0ce54..dd011dc646c 100644 --- a/2.9/generated/torch.compiler.skip_guard_on_globals_unsafe.html +++ b/2.9/generated/torch.compiler.skip_guard_on_globals_unsafe.html @@ -4415,7 +4415,7 @@

                                        torch.compiler.skip_guard_on_globals_unsafe#

                                        -torch.compiler.skip_guard_on_globals_unsafe(guard_entries)[source]#
                                        +torch.compiler.skip_guard_on_globals_unsafe(guard_entries)[source]#

                                        A common function to skip guards on all globals. This is unsafe to use by default. But if you don’t expect any changes in the globals, you can just keep the tensor guards.

                                        diff --git a/2.9/generated/torch.compiler.skip_guard_on_inbuilt_nn_modules_unsafe.html b/2.9/generated/torch.compiler.skip_guard_on_inbuilt_nn_modules_unsafe.html index a0c09eb291f..821dd59d357 100644 --- a/2.9/generated/torch.compiler.skip_guard_on_inbuilt_nn_modules_unsafe.html +++ b/2.9/generated/torch.compiler.skip_guard_on_inbuilt_nn_modules_unsafe.html @@ -4415,7 +4415,7 @@

                                        torch.compiler.skip_guard_on_inbuilt_nn_modules_unsafe#

                                        -torch.compiler.skip_guard_on_inbuilt_nn_modules_unsafe(guard_entries)[source]#
                                        +torch.compiler.skip_guard_on_inbuilt_nn_modules_unsafe(guard_entries)[source]#

                                        A common function to skip guards on the inbuilt nn modules like torch.nn.Linear. This is unsafe to use by default. But for majority of torch.compile users, the model code does not modify the inbuilt nn module diff --git a/2.9/generated/torch.compiler.substitute_in_graph.html b/2.9/generated/torch.compiler.substitute_in_graph.html index 79a9ed9cd62..548677a9e16 100644 --- a/2.9/generated/torch.compiler.substitute_in_graph.html +++ b/2.9/generated/torch.compiler.substitute_in_graph.html @@ -4415,7 +4415,7 @@

                                        torch.compiler.substitute_in_graph#

                                        -torch.compiler.substitute_in_graph(original_fn, *, can_constant_fold_through=False, skip_signature_check=False)[source]#
                                        +torch.compiler.substitute_in_graph(original_fn, *, can_constant_fold_through=False, skip_signature_check=False)[source]#

                                        Register a polyfill handler for a function, usually a C function from the C extension, to be used in place of the original function when inlining the original function in the graph.

                                        diff --git a/2.9/generated/torch.cond.html b/2.9/generated/torch.cond.html index 1e4a352d9c9..6aad087c9bb 100644 --- a/2.9/generated/torch.cond.html +++ b/2.9/generated/torch.cond.html @@ -4404,7 +4404,7 @@

                                        torch.cond#

                                        -torch.cond(pred, true_fn, false_fn, operands=())[source]#
                                        +torch.cond(pred, true_fn, false_fn, operands=())[source]#

                                        Conditionally applies true_fn or false_fn.

                                        Warning

                                        diff --git a/2.9/generated/torch.cpu.Stream.html b/2.9/generated/torch.cpu.Stream.html index 6197b8d453e..4b66827ffcb 100644 --- a/2.9/generated/torch.cpu.Stream.html +++ b/2.9/generated/torch.cpu.Stream.html @@ -4404,7 +4404,7 @@

                                        Stream#

                                        -class torch.cpu.Stream(priority=-1)[source]#
                                        +class torch.cpu.Stream(priority=-1)[source]#

                                        N.B. This class only exists to facilitate device-agnostic code

                                        diff --git a/2.9/generated/torch.cpu.StreamContext.html b/2.9/generated/torch.cpu.StreamContext.html index bc52ac87e66..10580816fc0 100644 --- a/2.9/generated/torch.cpu.StreamContext.html +++ b/2.9/generated/torch.cpu.StreamContext.html @@ -4404,7 +4404,7 @@

                                        StreamContext#

                                        -class torch.cpu.StreamContext(stream)[source]#
                                        +class torch.cpu.StreamContext(stream)[source]#

                                        Context-manager that selects a given stream.

                                        N.B. This class only exists to facilitate device-agnostic code

                                        diff --git a/2.9/generated/torch.cpu.current_device.html b/2.9/generated/torch.cpu.current_device.html index 6108858aa35..038137d3613 100644 --- a/2.9/generated/torch.cpu.current_device.html +++ b/2.9/generated/torch.cpu.current_device.html @@ -4404,7 +4404,7 @@

                                        torch.cpu.current_device#

                                        -torch.cpu.current_device()[source]#
                                        +torch.cpu.current_device()[source]#

                                        Returns current device for cpu. Always ‘cpu’.

                                        N.B. This function only exists to facilitate device-agnostic code

                                        diff --git a/2.9/generated/torch.cpu.current_stream.html b/2.9/generated/torch.cpu.current_stream.html index 5dbce412888..21750e23d6a 100644 --- a/2.9/generated/torch.cpu.current_stream.html +++ b/2.9/generated/torch.cpu.current_stream.html @@ -4404,7 +4404,7 @@

                                        torch.cpu.current_stream#

                                        -torch.cpu.current_stream(device=None)[source]#
                                        +torch.cpu.current_stream(device=None)[source]#

                                        Returns the currently selected Stream for a given device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cpu.device_count.html b/2.9/generated/torch.cpu.device_count.html index 236b3475b1f..1d462a157d3 100644 --- a/2.9/generated/torch.cpu.device_count.html +++ b/2.9/generated/torch.cpu.device_count.html @@ -4404,7 +4404,7 @@

                                        torch.cpu.device_count#

                                        -torch.cpu.device_count()[source]#
                                        +torch.cpu.device_count()[source]#

                                        Returns number of CPU devices (not cores). Always 1.

                                        N.B. This function only exists to facilitate device-agnostic code

                                        diff --git a/2.9/generated/torch.cpu.is_available.html b/2.9/generated/torch.cpu.is_available.html index e7a60a77860..2ee2d52c65b 100644 --- a/2.9/generated/torch.cpu.is_available.html +++ b/2.9/generated/torch.cpu.is_available.html @@ -4404,7 +4404,7 @@

                                        torch.cpu.is_available#

                                        -torch.cpu.is_available()[source]#
                                        +torch.cpu.is_available()[source]#

                                        Returns a bool indicating if CPU is currently available.

                                        N.B. This function only exists to facilitate device-agnostic code

                                        diff --git a/2.9/generated/torch.cpu.set_device.html b/2.9/generated/torch.cpu.set_device.html index e0891f50f4e..c5bc867e832 100644 --- a/2.9/generated/torch.cpu.set_device.html +++ b/2.9/generated/torch.cpu.set_device.html @@ -4404,7 +4404,7 @@

                                        torch.cpu.set_device#

                                        -torch.cpu.set_device(device)[source]#
                                        +torch.cpu.set_device(device)[source]#

                                        Sets the current device, in CPU we do nothing.

                                        N.B. This function only exists to facilitate device-agnostic code

                                        diff --git a/2.9/generated/torch.cpu.stream.html b/2.9/generated/torch.cpu.stream.html index 840be504e8b..33fcb0d5678 100644 --- a/2.9/generated/torch.cpu.stream.html +++ b/2.9/generated/torch.cpu.stream.html @@ -4404,7 +4404,7 @@

                                        torch.cpu.stream#

                                        -torch.cpu.stream(stream)[source]#
                                        +torch.cpu.stream(stream)[source]#

                                        Wrapper around the Context-manager StreamContext that selects a given stream.

                                        N.B. This function only exists to facilitate device-agnostic code

                                        diff --git a/2.9/generated/torch.cpu.synchronize.html b/2.9/generated/torch.cpu.synchronize.html index 8c53fc36fda..38fb26fa7da 100644 --- a/2.9/generated/torch.cpu.synchronize.html +++ b/2.9/generated/torch.cpu.synchronize.html @@ -4404,7 +4404,7 @@

                                        torch.cpu.synchronize#

                                        -torch.cpu.synchronize(device=None)[source]#
                                        +torch.cpu.synchronize(device=None)[source]#

                                        Waits for all kernels in all streams on the CPU device to complete.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.CUDAGraph.html b/2.9/generated/torch.cuda.CUDAGraph.html index 8df94e6028a..d713a70c42f 100644 --- a/2.9/generated/torch.cuda.CUDAGraph.html +++ b/2.9/generated/torch.cuda.CUDAGraph.html @@ -4404,7 +4404,7 @@

                                        CUDAGraph#

                                        -class torch.cuda.CUDAGraph(keep_graph=False)[source]#
                                        +class torch.cuda.CUDAGraph(keep_graph=False)[source]#

                                        Wrapper around a CUDA graph.

                                        Parameters
                                        @@ -4436,7 +4436,7 @@

                                        CUDAGraph
                                        -capture_begin(pool=None, capture_error_mode='global')[source]#
                                        +capture_begin(pool=None, capture_error_mode='global')[source]#

                                        Begin capturing CUDA work on the current stream.

                                        Typically, you shouldn’t call capture_begin yourself. Use graph or make_graphed_callables(), @@ -4459,7 +4459,7 @@

                                        CUDAGraph
                                        -capture_end()[source]#
                                        +capture_end()[source]#

                                        End CUDA graph capture on the current stream.

                                        After capture_end, replay may be called on this instance.

                                        Typically, you shouldn’t call capture_end yourself. @@ -4471,7 +4471,7 @@

                                        CUDAGraph
                                        -debug_dump(debug_path)[source]#
                                        +debug_dump(debug_path)[source]#
                                        Parameters

                                        debug_path (required) – Path to dump the graph to.

                                        @@ -4483,7 +4483,7 @@

                                        CUDAGraph
                                        -enable_debug_mode()[source]#
                                        +enable_debug_mode()[source]#

                                        Enable debugging mode for CUDAGraph.debug_dump.

                                        @@ -4491,7 +4491,7 @@

                                        CUDAGraph
                                        -instantiate()[source]#
                                        +instantiate()[source]#

                                        Instantiate the CUDA graph. Will be called by capture_end if keep_graph=False, or by replay if keep_graph=True and instantiate has not already been @@ -4503,7 +4503,7 @@

                                        CUDAGraph
                                        -pool()[source]#
                                        +pool()[source]#

                                        Return an opaque token representing the id of this graph’s memory pool.

                                        This id can optionally be passed to another graph’s capture_begin, which hints the other graph may share the same memory pool.

                                        @@ -4516,7 +4516,7 @@

                                        CUDAGraph
                                        -raw_cuda_graph()[source]#
                                        +raw_cuda_graph()[source]#

                                        Returns the underlying cudaGraph_t. keep_graph must be True.

                                        See the following for APIs for how to manipulate this object: Graph Managmement and cuda-python Graph Management bindings

                                        @@ -4528,7 +4528,7 @@

                                        CUDAGraph
                                        -raw_cuda_graph_exec()[source]#
                                        +raw_cuda_graph_exec()[source]#

                                        Returns the underlying cudaGraphExec_t. instantiate must have been called if keep_graph is True, or capture_end must have been called if keep_graph is False. If you call instantiate() after raw_cuda_graph_exec(), the previously returned cudaGraphExec_t will be destroyed. It is your responsibility not to use this object after destruction.

                                        See the following for APIs for how to manipulate this object: Graph Execution and cuda-python Graph Execution bindings

                                        @@ -4540,7 +4540,7 @@

                                        CUDAGraph
                                        -replay()[source]#
                                        +replay()[source]#

                                        Replay the CUDA work captured by this graph.

                                        @@ -4548,7 +4548,7 @@

                                        CUDAGraph
                                        -reset()[source]#
                                        +reset()[source]#

                                        Delete the graph currently held by this instance.

                                        diff --git a/2.9/generated/torch.cuda.Event.html b/2.9/generated/torch.cuda.Event.html index 77df04e9158..5ff037047c1 100644 --- a/2.9/generated/torch.cuda.Event.html +++ b/2.9/generated/torch.cuda.Event.html @@ -4404,7 +4404,7 @@

                                        Event#

                                        -class torch.cuda.Event(enable_timing=False, blocking=False, interprocess=False, external=False)[source]#
                                        +class torch.cuda.Event(enable_timing=False, blocking=False, interprocess=False, external=False)[source]#

                                        Wrapper around a CUDA event.

                                        CUDA events are synchronization markers that can be used to monitor the device’s progress, to accurately measure timing, and to synchronize CUDA @@ -4427,7 +4427,7 @@

                                        Event#

                                        -elapsed_time(end_event)[source]#
                                        +elapsed_time(end_event)[source]#

                                        Return the time elapsed.

                                        Time reported in milliseconds after the event was recorded and before the end_event was recorded.

                                        @@ -4435,20 +4435,20 @@

                                        Event#
                                        -classmethod from_ipc_handle(device, handle)[source]#
                                        +classmethod from_ipc_handle(device, handle)[source]#

                                        Reconstruct an event from an IPC handle on the given device.

                                        -ipc_handle()[source]#
                                        +ipc_handle()[source]#

                                        Return an IPC handle of this event.

                                        If not recorded yet, the event will use the current device.

                                        -query()[source]#
                                        +query()[source]#

                                        Check if all work currently captured by event has completed.

                                        Returns
                                        @@ -4460,7 +4460,7 @@

                                        Event#
                                        -record(stream=None)[source]#
                                        +record(stream=None)[source]#

                                        Record the event in a given stream.

                                        Uses torch.cuda.current_stream() if no stream is specified. The stream’s device must match the event’s device.

                                        @@ -4468,7 +4468,7 @@

                                        Event#
                                        -synchronize()[source]#
                                        +synchronize()[source]#

                                        Wait for the event to complete.

                                        Waits until the completion of all work currently captured in this event. This prevents the CPU thread from proceeding until the event completes.

                                        @@ -4485,7 +4485,7 @@

                                        Event#
                                        -wait(stream=None)[source]#
                                        +wait(stream=None)[source]#

                                        Make all future work submitted to the given stream wait for this event.

                                        Use torch.cuda.current_stream() if no stream is specified.

                                        diff --git a/2.9/generated/torch.cuda.ExternalStream.html b/2.9/generated/torch.cuda.ExternalStream.html index f8e1c1a6409..b2faaef7d7e 100644 --- a/2.9/generated/torch.cuda.ExternalStream.html +++ b/2.9/generated/torch.cuda.ExternalStream.html @@ -4404,7 +4404,7 @@

                                        ExternalStream#

                                        -class torch.cuda.ExternalStream(stream_ptr, device=None, **kwargs)[source]#
                                        +class torch.cuda.ExternalStream(stream_ptr, device=None, **kwargs)[source]#

                                        Wrapper around an externally allocated CUDA stream.

                                        This class is used to wrap streams allocated in other libraries in order to facilitate data exchange and multi-library interactions.

                                        @@ -4427,7 +4427,7 @@

                                        ExternalStream
                                        -query()[source]#
                                        +query()[source]#

                                        Check if all the work submitted has been completed.

                                        Returns
                                        @@ -4441,7 +4441,7 @@

                                        ExternalStream
                                        -record_event(event=None)[source]#
                                        +record_event(event=None)[source]#

                                        Record an event.

                                        Parameters
                                        @@ -4456,7 +4456,7 @@

                                        ExternalStream
                                        -synchronize()[source]#
                                        +synchronize()[source]#

                                        Wait for all the kernels in this stream to complete.

                                        Note

                                        @@ -4469,7 +4469,7 @@

                                        ExternalStream
                                        -wait_event(event)[source]#
                                        +wait_event(event)[source]#

                                        Make all future work submitted to the stream wait for an event.

                                        Parameters
                                        @@ -4487,7 +4487,7 @@

                                        ExternalStream
                                        -wait_stream(stream)[source]#
                                        +wait_stream(stream)[source]#

                                        Synchronize with another stream.

                                        All future work submitted to this stream will wait until all kernels submitted to a given stream at the time of call complete.

                                        diff --git a/2.9/generated/torch.cuda.Stream.html b/2.9/generated/torch.cuda.Stream.html index 04a565aeb03..f0daba40146 100644 --- a/2.9/generated/torch.cuda.Stream.html +++ b/2.9/generated/torch.cuda.Stream.html @@ -4404,7 +4404,7 @@

                                        Stream#

                                        -class torch.cuda.Stream(device=None, priority=0, **kwargs)[source]#
                                        +class torch.cuda.Stream(device=None, priority=0, **kwargs)[source]#

                                        Wrapper around a CUDA stream.

                                        A CUDA stream is a linear sequence of execution that belongs to a specific device, independent from other streams. It supports with statement as a @@ -4426,7 +4426,7 @@

                                        Stream
                                        -query()[source]#
                                        +query()[source]#

                                        Check if all the work submitted has been completed.

                                        Returns
                                        @@ -4440,7 +4440,7 @@

                                        Stream
                                        -record_event(event=None)[source]#
                                        +record_event(event=None)[source]#

                                        Record an event.

                                        Parameters
                                        @@ -4455,7 +4455,7 @@

                                        Stream
                                        -synchronize()[source]#
                                        +synchronize()[source]#

                                        Wait for all the kernels in this stream to complete.

                                        Note

                                        @@ -4468,7 +4468,7 @@

                                        Stream
                                        -wait_event(event)[source]#
                                        +wait_event(event)[source]#

                                        Make all future work submitted to the stream wait for an event.

                                        Parameters
                                        @@ -4486,7 +4486,7 @@

                                        Stream
                                        -wait_stream(stream)[source]#
                                        +wait_stream(stream)[source]#

                                        Synchronize with another stream.

                                        All future work submitted to this stream will wait until all kernels submitted to a given stream at the time of call complete.

                                        diff --git a/2.9/generated/torch.cuda.StreamContext.html b/2.9/generated/torch.cuda.StreamContext.html index f446e6522bf..a271027cefb 100644 --- a/2.9/generated/torch.cuda.StreamContext.html +++ b/2.9/generated/torch.cuda.StreamContext.html @@ -4404,7 +4404,7 @@

                                        StreamContext#

                                        -class torch.cuda.StreamContext(stream)[source]#
                                        +class torch.cuda.StreamContext(stream)[source]#

                                        Context-manager that selects a given stream.

                                        All CUDA kernels queued within its context will be enqueued on a selected stream.

                                        diff --git a/2.9/generated/torch.cuda.can_device_access_peer.html b/2.9/generated/torch.cuda.can_device_access_peer.html index d87904a2ae7..0eb44b9d05b 100644 --- a/2.9/generated/torch.cuda.can_device_access_peer.html +++ b/2.9/generated/torch.cuda.can_device_access_peer.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.can_device_access_peer#

                                        -torch.cuda.can_device_access_peer(device, peer_device)[source]#
                                        +torch.cuda.can_device_access_peer(device, peer_device)[source]#

                                        Check if peer access between two devices is possible.

                                        Return type
                                        diff --git a/2.9/generated/torch.cuda.clock_rate.html b/2.9/generated/torch.cuda.clock_rate.html index 2007a73607f..6f23c47a079 100644 --- a/2.9/generated/torch.cuda.clock_rate.html +++ b/2.9/generated/torch.cuda.clock_rate.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.clock_rate#

                                        -torch.cuda.clock_rate(device=None)[source]#
                                        +torch.cuda.clock_rate(device=None)[source]#

                                        Return the clock speed of the GPU SM in MHz (megahertz) over the past sample period as given by nvidia-smi.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.comm.broadcast.html b/2.9/generated/torch.cuda.comm.broadcast.html index a127ca11611..9f81ba2199b 100644 --- a/2.9/generated/torch.cuda.comm.broadcast.html +++ b/2.9/generated/torch.cuda.comm.broadcast.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.comm.broadcast#

                                        -torch.cuda.comm.broadcast(tensor, devices=None, *, out=None)[source]#
                                        +torch.cuda.comm.broadcast(tensor, devices=None, *, out=None)[source]#

                                        Broadcasts a tensor to specified GPU devices.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.comm.broadcast_coalesced.html b/2.9/generated/torch.cuda.comm.broadcast_coalesced.html index 8ec76968fbb..7b6e7273f28 100644 --- a/2.9/generated/torch.cuda.comm.broadcast_coalesced.html +++ b/2.9/generated/torch.cuda.comm.broadcast_coalesced.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.comm.broadcast_coalesced#

                                        -torch.cuda.comm.broadcast_coalesced(tensors, devices, buffer_size=10485760)[source]#
                                        +torch.cuda.comm.broadcast_coalesced(tensors, devices, buffer_size=10485760)[source]#

                                        Broadcast a sequence of tensors to the specified GPUs.

                                        Small tensors are first coalesced into a buffer to reduce the number of synchronizations.

                                        diff --git a/2.9/generated/torch.cuda.comm.gather.html b/2.9/generated/torch.cuda.comm.gather.html index 18f1a45d3ba..d82fd58be02 100644 --- a/2.9/generated/torch.cuda.comm.gather.html +++ b/2.9/generated/torch.cuda.comm.gather.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.comm.gather#

                                        -torch.cuda.comm.gather(tensors, dim=0, destination=None, *, out=None)[source]#
                                        +torch.cuda.comm.gather(tensors, dim=0, destination=None, *, out=None)[source]#

                                        Gathers tensors from multiple GPU devices.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.comm.reduce_add.html b/2.9/generated/torch.cuda.comm.reduce_add.html index 3d7d6b1e06d..70dadf3a0db 100644 --- a/2.9/generated/torch.cuda.comm.reduce_add.html +++ b/2.9/generated/torch.cuda.comm.reduce_add.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.comm.reduce_add#

                                        -torch.cuda.comm.reduce_add(inputs, destination=None)[source]#
                                        +torch.cuda.comm.reduce_add(inputs, destination=None)[source]#

                                        Sum tensors from multiple GPUs.

                                        All inputs should have matching shapes, dtype, and layout. The output tensor will be of the same shape, dtype, and layout.

                                        diff --git a/2.9/generated/torch.cuda.comm.reduce_add_coalesced.html b/2.9/generated/torch.cuda.comm.reduce_add_coalesced.html index b0a29eee512..4e070a6b642 100644 --- a/2.9/generated/torch.cuda.comm.reduce_add_coalesced.html +++ b/2.9/generated/torch.cuda.comm.reduce_add_coalesced.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.comm.reduce_add_coalesced#

                                        -torch.cuda.comm.reduce_add_coalesced(inputs, destination=None, buffer_size=10485760)[source]#
                                        +torch.cuda.comm.reduce_add_coalesced(inputs, destination=None, buffer_size=10485760)[source]#

                                        Sum tensors from multiple GPUs.

                                        Small tensors are first coalesced into a buffer to reduce the number of synchronizations.

                                        diff --git a/2.9/generated/torch.cuda.comm.scatter.html b/2.9/generated/torch.cuda.comm.scatter.html index e73fbe91202..3c36bbb0cde 100644 --- a/2.9/generated/torch.cuda.comm.scatter.html +++ b/2.9/generated/torch.cuda.comm.scatter.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.comm.scatter#

                                        -torch.cuda.comm.scatter(tensor, devices=None, chunk_sizes=None, dim=0, streams=None, *, out=None)[source]#
                                        +torch.cuda.comm.scatter(tensor, devices=None, chunk_sizes=None, dim=0, streams=None, *, out=None)[source]#

                                        Scatters tensor across multiple GPUs.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.cudart.html b/2.9/generated/torch.cuda.cudart.html index bff456e2789..2a6ff2e998c 100644 --- a/2.9/generated/torch.cuda.cudart.html +++ b/2.9/generated/torch.cuda.cudart.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.cudart#

                                        -torch.cuda.cudart()[source]#
                                        +torch.cuda.cudart()[source]#

                                        Retrieves the CUDA runtime API module.

                                        This function initializes the CUDA runtime environment if it is not already initialized and returns the CUDA runtime API module (_cudart). The CUDA diff --git a/2.9/generated/torch.cuda.current_blas_handle.html b/2.9/generated/torch.cuda.current_blas_handle.html index ee5f8552390..b7c5cac8b5c 100644 --- a/2.9/generated/torch.cuda.current_blas_handle.html +++ b/2.9/generated/torch.cuda.current_blas_handle.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.current_blas_handle#

                                        -torch.cuda.current_blas_handle()[source]#
                                        +torch.cuda.current_blas_handle()[source]#

                                        Return cublasHandle_t pointer to current cuBLAS handle

                                        diff --git a/2.9/generated/torch.cuda.current_device.html b/2.9/generated/torch.cuda.current_device.html index fb7ff8a4f1f..f7032996f1d 100644 --- a/2.9/generated/torch.cuda.current_device.html +++ b/2.9/generated/torch.cuda.current_device.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.current_device#

                                        -torch.cuda.current_device()[source]#
                                        +torch.cuda.current_device()[source]#

                                        Return the index of a currently selected device.

                                        Return type
                                        diff --git a/2.9/generated/torch.cuda.current_stream.html b/2.9/generated/torch.cuda.current_stream.html index c14b2a94434..cae57920a4f 100644 --- a/2.9/generated/torch.cuda.current_stream.html +++ b/2.9/generated/torch.cuda.current_stream.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.current_stream#

                                        -torch.cuda.current_stream(device=None)[source]#
                                        +torch.cuda.current_stream(device=None)[source]#

                                        Return the currently selected Stream for a given device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.default_stream.html b/2.9/generated/torch.cuda.default_stream.html index c249f6eb77b..57cea276e10 100644 --- a/2.9/generated/torch.cuda.default_stream.html +++ b/2.9/generated/torch.cuda.default_stream.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.default_stream#

                                        -torch.cuda.default_stream(device=None)[source]#
                                        +torch.cuda.default_stream(device=None)[source]#

                                        Return the default Stream for a given device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.device.html b/2.9/generated/torch.cuda.device.html index 7273d14ed3a..39748afa118 100644 --- a/2.9/generated/torch.cuda.device.html +++ b/2.9/generated/torch.cuda.device.html @@ -4404,7 +4404,7 @@

                                        device#

                                        -class torch.cuda.device(device)[source]#
                                        +class torch.cuda.device(device)[source]#

                                        Context-manager that changes the selected device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.device_count.html b/2.9/generated/torch.cuda.device_count.html index 51e01f7a1cb..34c70aa1275 100644 --- a/2.9/generated/torch.cuda.device_count.html +++ b/2.9/generated/torch.cuda.device_count.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.device_count#

                                        -torch.cuda.device_count()[source]#
                                        +torch.cuda.device_count()[source]#

                                        Return the number of GPUs available.

                                        Note

                                        diff --git a/2.9/generated/torch.cuda.device_memory_used.html b/2.9/generated/torch.cuda.device_memory_used.html index 67ce5b50111..924d04455e1 100644 --- a/2.9/generated/torch.cuda.device_memory_used.html +++ b/2.9/generated/torch.cuda.device_memory_used.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.device_memory_used#

                                        -torch.cuda.device_memory_used(device=None)[source]#
                                        +torch.cuda.device_memory_used(device=None)[source]#

                                        Return used global (device) memory in bytes as given by nvidia-smi or amd-smi.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.device_of.html b/2.9/generated/torch.cuda.device_of.html index 6037a4576bd..ea0e12d48f7 100644 --- a/2.9/generated/torch.cuda.device_of.html +++ b/2.9/generated/torch.cuda.device_of.html @@ -4404,7 +4404,7 @@

                                        device_of#

                                        -class torch.cuda.device_of(obj)[source]#
                                        +class torch.cuda.device_of(obj)[source]#

                                        Context-manager that changes the current device to that of given object.

                                        You can use both tensors and storages as arguments. If a given object is not allocated on a GPU, this is a no-op.

                                        diff --git a/2.9/generated/torch.cuda.gds.GdsFile.html b/2.9/generated/torch.cuda.gds.GdsFile.html index 8fc3b425c4e..a0b7ae8b673 100644 --- a/2.9/generated/torch.cuda.gds.GdsFile.html +++ b/2.9/generated/torch.cuda.gds.GdsFile.html @@ -4404,7 +4404,7 @@

                                        GdsFile#

                                        -class torch.cuda.gds.GdsFile(filename, flags)[source]#
                                        +class torch.cuda.gds.GdsFile(filename, flags)[source]#

                                        Wrapper around cuFile.

                                        cuFile is a file-like interface to the GPUDirect Storage (GDS) API.

                                        See the cufile docs @@ -4436,7 +4436,7 @@

                                        GdsFile
                                        -deregister_handle()[source]#
                                        +deregister_handle()[source]#

                                        Deregisters file descriptor from cuFile Driver.

                                        This is a wrapper around cuFileHandleDeregister.

                                        @@ -4445,7 +4445,7 @@

                                        GdsFile
                                        -load_storage(storage, offset=0)[source]#
                                        +load_storage(storage, offset=0)[source]#

                                        Loads data from the file into the storage.

                                        This is a wrapper around cuFileRead. storage.nbytes() of data will be loaded from the file at offset into the storage.

                                        @@ -4461,7 +4461,7 @@

                                        GdsFile
                                        -register_handle()[source]#
                                        +register_handle()[source]#

                                        Registers file descriptor to cuFile Driver.

                                        This is a wrapper around cuFileHandleRegister.

                                        @@ -4470,7 +4470,7 @@

                                        GdsFile
                                        -save_storage(storage, offset=0)[source]#
                                        +save_storage(storage, offset=0)[source]#

                                        Saves data from the storage into the file.

                                        This is a wrapper around cuFileWrite. All bytes of the storage will be written to the file at offset.

                                        diff --git a/2.9/generated/torch.cuda.gds.gds_deregister_buffer.html b/2.9/generated/torch.cuda.gds.gds_deregister_buffer.html index 329d8b60a1e..4dcdf150884 100644 --- a/2.9/generated/torch.cuda.gds.gds_deregister_buffer.html +++ b/2.9/generated/torch.cuda.gds.gds_deregister_buffer.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.gds.gds_deregister_buffer#

                                        -torch.cuda.gds.gds_deregister_buffer(s)[source]#
                                        +torch.cuda.gds.gds_deregister_buffer(s)[source]#

                                        Deregisters a previously registered storage on a CUDA device as a cufile buffer.

                                        Example:

                                        >>> src = torch.randn(1024, device="cuda")
                                        diff --git a/2.9/generated/torch.cuda.gds.gds_register_buffer.html b/2.9/generated/torch.cuda.gds.gds_register_buffer.html
                                        index 81116e90b30..8e00091f925 100644
                                        --- a/2.9/generated/torch.cuda.gds.gds_register_buffer.html
                                        +++ b/2.9/generated/torch.cuda.gds.gds_register_buffer.html
                                        @@ -4404,7 +4404,7 @@
                                         

                                        torch.cuda.gds.gds_register_buffer#

                                        -torch.cuda.gds.gds_register_buffer(s)[source]#
                                        +torch.cuda.gds.gds_register_buffer(s)[source]#

                                        Registers a storage on a CUDA device as a cufile buffer.

                                        Example:

                                        >>> src = torch.randn(1024, device="cuda")
                                        diff --git a/2.9/generated/torch.cuda.get_arch_list.html b/2.9/generated/torch.cuda.get_arch_list.html
                                        index 2d378e6626f..f6c5e4d861e 100644
                                        --- a/2.9/generated/torch.cuda.get_arch_list.html
                                        +++ b/2.9/generated/torch.cuda.get_arch_list.html
                                        @@ -4404,7 +4404,7 @@
                                         

                                        torch.cuda.get_arch_list#

                                        -torch.cuda.get_arch_list()[source]#
                                        +torch.cuda.get_arch_list()[source]#

                                        Return list CUDA architectures this library was compiled for.

                                        Return type
                                        diff --git a/2.9/generated/torch.cuda.get_device_capability.html b/2.9/generated/torch.cuda.get_device_capability.html index 21c70adb069..33632f93887 100644 --- a/2.9/generated/torch.cuda.get_device_capability.html +++ b/2.9/generated/torch.cuda.get_device_capability.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.get_device_capability#

                                        -torch.cuda.get_device_capability(device=None)[source]#
                                        +torch.cuda.get_device_capability(device=None)[source]#

                                        Get the cuda capability of a device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.get_device_name.html b/2.9/generated/torch.cuda.get_device_name.html index 1b77fda2e18..be48fe7070e 100644 --- a/2.9/generated/torch.cuda.get_device_name.html +++ b/2.9/generated/torch.cuda.get_device_name.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.get_device_name#

                                        -torch.cuda.get_device_name(device=None)[source]#
                                        +torch.cuda.get_device_name(device=None)[source]#

                                        Get the name of a device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.get_device_properties.html b/2.9/generated/torch.cuda.get_device_properties.html index 22ae9bf0d70..d0a80de34b8 100644 --- a/2.9/generated/torch.cuda.get_device_properties.html +++ b/2.9/generated/torch.cuda.get_device_properties.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.get_device_properties#

                                        -torch.cuda.get_device_properties(device=None)[source]#
                                        +torch.cuda.get_device_properties(device=None)[source]#

                                        Get the properties of a device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.get_gencode_flags.html b/2.9/generated/torch.cuda.get_gencode_flags.html index 17e196f2fac..6dc149320d1 100644 --- a/2.9/generated/torch.cuda.get_gencode_flags.html +++ b/2.9/generated/torch.cuda.get_gencode_flags.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.get_gencode_flags#

                                        -torch.cuda.get_gencode_flags()[source]#
                                        +torch.cuda.get_gencode_flags()[source]#

                                        Return NVCC gencode flags this library was compiled with.

                                        Return type
                                        diff --git a/2.9/generated/torch.cuda.get_rng_state.html b/2.9/generated/torch.cuda.get_rng_state.html index 2334940dddb..102d99c42cd 100644 --- a/2.9/generated/torch.cuda.get_rng_state.html +++ b/2.9/generated/torch.cuda.get_rng_state.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.get_rng_state#

                                        -torch.cuda.get_rng_state(device='cuda')[source]#
                                        +torch.cuda.get_rng_state(device='cuda')[source]#

                                        Return the random number generator state of the specified GPU as a ByteTensor.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.get_rng_state_all.html b/2.9/generated/torch.cuda.get_rng_state_all.html index 79ac000279f..cf91adc8a75 100644 --- a/2.9/generated/torch.cuda.get_rng_state_all.html +++ b/2.9/generated/torch.cuda.get_rng_state_all.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.get_rng_state_all#

                                        -torch.cuda.get_rng_state_all()[source]#
                                        +torch.cuda.get_rng_state_all()[source]#

                                        Return a list of ByteTensor representing the random number states of all devices.

                                        Return type
                                        diff --git a/2.9/generated/torch.cuda.get_stream_from_external.html b/2.9/generated/torch.cuda.get_stream_from_external.html index ca3e4514d3d..58dc6cb7591 100644 --- a/2.9/generated/torch.cuda.get_stream_from_external.html +++ b/2.9/generated/torch.cuda.get_stream_from_external.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.get_stream_from_external#

                                        -torch.cuda.get_stream_from_external(data_ptr, device=None)[source]#
                                        +torch.cuda.get_stream_from_external(data_ptr, device=None)[source]#

                                        Return a Stream from an externally allocated CUDA stream.

                                        This function is used to wrap streams allocated in other libraries in order to facilitate data exchange and multi-library interactions.

                                        diff --git a/2.9/generated/torch.cuda.get_sync_debug_mode.html b/2.9/generated/torch.cuda.get_sync_debug_mode.html index afca0891e96..8b498b0b472 100644 --- a/2.9/generated/torch.cuda.get_sync_debug_mode.html +++ b/2.9/generated/torch.cuda.get_sync_debug_mode.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.get_sync_debug_mode#

                                        -torch.cuda.get_sync_debug_mode()[source]#
                                        +torch.cuda.get_sync_debug_mode()[source]#

                                        Return current value of debug mode for cuda synchronizing operations.

                                        Return type
                                        diff --git a/2.9/generated/torch.cuda.graph.html b/2.9/generated/torch.cuda.graph.html index f576744de05..88c5ab20287 100644 --- a/2.9/generated/torch.cuda.graph.html +++ b/2.9/generated/torch.cuda.graph.html @@ -4404,7 +4404,7 @@

                                        graph#

                                        -class torch.cuda.graph(cuda_graph, pool=None, stream=None, capture_error_mode='global')[source]#
                                        +class torch.cuda.graph(cuda_graph, pool=None, stream=None, capture_error_mode='global')[source]#

                                        Context-manager that captures CUDA work into a torch.cuda.CUDAGraph object for later replay.

                                        See CUDA Graphs for a general introduction, detailed use, and constraints.

                                        diff --git a/2.9/generated/torch.cuda.graph_pool_handle.html b/2.9/generated/torch.cuda.graph_pool_handle.html index c5b67a47e53..98b26d9b6de 100644 --- a/2.9/generated/torch.cuda.graph_pool_handle.html +++ b/2.9/generated/torch.cuda.graph_pool_handle.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.graph_pool_handle#

                                        -torch.cuda.graph_pool_handle()[source]#
                                        +torch.cuda.graph_pool_handle()[source]#

                                        Return an opaque token representing the id of a graph memory pool.

                                        See Graph memory management.

                                        diff --git a/2.9/generated/torch.cuda.init.html b/2.9/generated/torch.cuda.init.html index b0410fc40bb..0720e74a593 100644 --- a/2.9/generated/torch.cuda.init.html +++ b/2.9/generated/torch.cuda.init.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.init#

                                        -torch.cuda.init()[source]#
                                        +torch.cuda.init()[source]#

                                        Initialize PyTorch’s CUDA state.

                                        You may need to call this explicitly if you are interacting with PyTorch via its C API, as Python bindings for CUDA functionality diff --git a/2.9/generated/torch.cuda.initial_seed.html b/2.9/generated/torch.cuda.initial_seed.html index d5644f3e07c..29f6ee55425 100644 --- a/2.9/generated/torch.cuda.initial_seed.html +++ b/2.9/generated/torch.cuda.initial_seed.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.initial_seed#

                                        -torch.cuda.initial_seed()[source]#
                                        +torch.cuda.initial_seed()[source]#

                                        Return the current random seed of the current GPU.

                                        Warning

                                        diff --git a/2.9/generated/torch.cuda.ipc_collect.html b/2.9/generated/torch.cuda.ipc_collect.html index 8191ab78182..b4b902be922 100644 --- a/2.9/generated/torch.cuda.ipc_collect.html +++ b/2.9/generated/torch.cuda.ipc_collect.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.ipc_collect#

                                        -torch.cuda.ipc_collect()[source]#
                                        +torch.cuda.ipc_collect()[source]#

                                        Force collects GPU memory after it has been released by CUDA IPC.

                                        Note

                                        diff --git a/2.9/generated/torch.cuda.is_available.html b/2.9/generated/torch.cuda.is_available.html index 4782fc601b0..b97840f066d 100644 --- a/2.9/generated/torch.cuda.is_available.html +++ b/2.9/generated/torch.cuda.is_available.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.is_available#

                                        -torch.cuda.is_available()[source]#
                                        +torch.cuda.is_available()[source]#

                                        Return a bool indicating if CUDA is currently available.

                                        Note

                                        diff --git a/2.9/generated/torch.cuda.is_current_stream_capturing.html b/2.9/generated/torch.cuda.is_current_stream_capturing.html index d68ef6732b1..1088f75dfd6 100644 --- a/2.9/generated/torch.cuda.is_current_stream_capturing.html +++ b/2.9/generated/torch.cuda.is_current_stream_capturing.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.is_current_stream_capturing#

                                        -torch.cuda.is_current_stream_capturing()[source]#
                                        +torch.cuda.is_current_stream_capturing()[source]#

                                        Return True if CUDA graph capture is underway on the current CUDA stream, False otherwise.

                                        If a CUDA context does not exist on the current device, returns False without initializing the context.

                                        diff --git a/2.9/generated/torch.cuda.is_initialized.html b/2.9/generated/torch.cuda.is_initialized.html index c78ebbb3913..3799186579c 100644 --- a/2.9/generated/torch.cuda.is_initialized.html +++ b/2.9/generated/torch.cuda.is_initialized.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.is_initialized#

                                        -torch.cuda.is_initialized()[source]#
                                        +torch.cuda.is_initialized()[source]#

                                        Return whether PyTorch’s CUDA state has been initialized.

                                        diff --git a/2.9/generated/torch.cuda.is_tf32_supported.html b/2.9/generated/torch.cuda.is_tf32_supported.html index 22a0413a8dc..f1976be964e 100644 --- a/2.9/generated/torch.cuda.is_tf32_supported.html +++ b/2.9/generated/torch.cuda.is_tf32_supported.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.is_tf32_supported#

                                        -torch.cuda.is_tf32_supported()[source]#
                                        +torch.cuda.is_tf32_supported()[source]#

                                        Return a bool indicating if the current CUDA/ROCm device supports dtype tf32.

                                        Return type
                                        diff --git a/2.9/generated/torch.cuda.jiterator._create_jit_fn.html b/2.9/generated/torch.cuda.jiterator._create_jit_fn.html index 4d19c355fae..017784d116c 100644 --- a/2.9/generated/torch.cuda.jiterator._create_jit_fn.html +++ b/2.9/generated/torch.cuda.jiterator._create_jit_fn.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.jiterator._create_jit_fn#

                                        -torch.cuda.jiterator._create_jit_fn(code_string, **kwargs)[source]#
                                        +torch.cuda.jiterator._create_jit_fn(code_string, **kwargs)[source]#

                                        Create a jiterator-generated cuda kernel for an elementwise op.

                                        The code string has to be a valid CUDA function that describes the computation for a single element. The code string has to follow the c++ template pattern, as shown in the example below. This function will be inlined diff --git a/2.9/generated/torch.cuda.jiterator._create_multi_output_jit_fn.html b/2.9/generated/torch.cuda.jiterator._create_multi_output_jit_fn.html index 30ef0493771..cadb9a12b27 100644 --- a/2.9/generated/torch.cuda.jiterator._create_multi_output_jit_fn.html +++ b/2.9/generated/torch.cuda.jiterator._create_multi_output_jit_fn.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.jiterator._create_multi_output_jit_fn#

                                        -torch.cuda.jiterator._create_multi_output_jit_fn(code_string, num_outputs, **kwargs)[source]#
                                        +torch.cuda.jiterator._create_multi_output_jit_fn(code_string, num_outputs, **kwargs)[source]#

                                        Create a jiterator-generated cuda kernel for an elementwise op that supports returning one or more outputs.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.make_graphed_callables.html b/2.9/generated/torch.cuda.make_graphed_callables.html index d5583be66ed..86bddd60439 100644 --- a/2.9/generated/torch.cuda.make_graphed_callables.html +++ b/2.9/generated/torch.cuda.make_graphed_callables.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.make_graphed_callables#

                                        -torch.cuda.make_graphed_callables(callables: Union[Module, Callable[[...], object]], sample_args: tuple[torch.Tensor, ...], num_warmup_iters: int = 3, allow_unused_input: bool = False, pool: Optional[_POOL_HANDLE] = None) Union[Module, Callable[[...], object]][source]#
                                        +torch.cuda.make_graphed_callables(callables: Union[Module, Callable[[...], object]], sample_args: tuple[torch.Tensor, ...], num_warmup_iters: int = 3, allow_unused_input: bool = False, pool: Optional[_POOL_HANDLE] = None) Union[Module, Callable[[...], object]][source]#
                                        torch.cuda.make_graphed_callables(callables: tuple[Union[torch.nn.modules.module.Module, Callable[..., object]], ...], sample_args: tuple[tuple[torch.Tensor, ...], ...], num_warmup_iters: int = 3, allow_unused_input: bool = False, pool: Optional[_POOL_HANDLE] = None) tuple[Union[torch.nn.modules.module.Module, Callable[..., object]], ...]

                                        Accept callables (functions or nn.Modules) and returns graphed versions.

                                        diff --git a/2.9/generated/torch.cuda.manual_seed.html b/2.9/generated/torch.cuda.manual_seed.html index 0b52dfaeba2..4f794d0b625 100644 --- a/2.9/generated/torch.cuda.manual_seed.html +++ b/2.9/generated/torch.cuda.manual_seed.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.manual_seed#

                                        -torch.cuda.manual_seed(seed)[source]#
                                        +torch.cuda.manual_seed(seed)[source]#

                                        Set the seed for generating random numbers for the current GPU.

                                        It’s safe to call this function if CUDA is not available; in that case, it is silently ignored.

                                        diff --git a/2.9/generated/torch.cuda.manual_seed_all.html b/2.9/generated/torch.cuda.manual_seed_all.html index 24488e68289..29f069141c4 100644 --- a/2.9/generated/torch.cuda.manual_seed_all.html +++ b/2.9/generated/torch.cuda.manual_seed_all.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.manual_seed_all#

                                        -torch.cuda.manual_seed_all(seed)[source]#
                                        +torch.cuda.manual_seed_all(seed)[source]#

                                        Set the seed for generating random numbers on all GPUs.

                                        It’s safe to call this function if CUDA is not available; in that case, it is silently ignored.

                                        diff --git a/2.9/generated/torch.cuda.memory.CUDAPluggableAllocator.html b/2.9/generated/torch.cuda.memory.CUDAPluggableAllocator.html index bf35c2bc530..b86380c0e13 100644 --- a/2.9/generated/torch.cuda.memory.CUDAPluggableAllocator.html +++ b/2.9/generated/torch.cuda.memory.CUDAPluggableAllocator.html @@ -4404,7 +4404,7 @@

                                        CUDAPluggableAllocator#

                                        -class torch.cuda.memory.CUDAPluggableAllocator(path_to_so_file, alloc_fn_name, free_fn_name)[source]#
                                        +class torch.cuda.memory.CUDAPluggableAllocator(path_to_so_file, alloc_fn_name, free_fn_name)[source]#

                                        CUDA memory allocator loaded from a so file.

                                        diff --git a/2.9/generated/torch.cuda.memory.MemPool.html b/2.9/generated/torch.cuda.memory.MemPool.html index b75dafab472..f157c5ca5f5 100644 --- a/2.9/generated/torch.cuda.memory.MemPool.html +++ b/2.9/generated/torch.cuda.memory.MemPool.html @@ -4404,7 +4404,7 @@

                                        MemPool#

                                        -class torch.cuda.memory.MemPool(*args, **kwargs)[source]#
                                        +class torch.cuda.memory.MemPool(*args, **kwargs)[source]#

                                        MemPool represents a pool of memory in a caching allocator. Currently, it’s just the ID of the pool object maintained in the CUDACachingAllocator.

                                        @@ -4435,7 +4435,7 @@

                                        MemPool
                                        -snapshot()[source]#
                                        +snapshot()[source]#

                                        Return a snapshot of the CUDA memory allocator pool state across all devices.

                                        Interpreting the output of this function requires familiarity with the @@ -4449,7 +4449,7 @@

                                        MemPool
                                        -use_count()[source]#
                                        +use_count()[source]#

                                        Returns the reference count of this pool.

                                        Return type
                                        diff --git a/2.9/generated/torch.cuda.memory.caching_allocator_alloc.html b/2.9/generated/torch.cuda.memory.caching_allocator_alloc.html index d6a052d436f..a4239d552da 100644 --- a/2.9/generated/torch.cuda.memory.caching_allocator_alloc.html +++ b/2.9/generated/torch.cuda.memory.caching_allocator_alloc.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.caching_allocator_alloc#

                                        -torch.cuda.memory.caching_allocator_alloc(size, device=None, stream=None)[source]#
                                        +torch.cuda.memory.caching_allocator_alloc(size, device=None, stream=None)[source]#

                                        Perform a memory allocation using the CUDA memory allocator.

                                        Memory is allocated for a given device and a stream, this function is intended to be used for interoperability with other diff --git a/2.9/generated/torch.cuda.memory.caching_allocator_delete.html b/2.9/generated/torch.cuda.memory.caching_allocator_delete.html index 4a6a1c92e66..c5c9e6d32a5 100644 --- a/2.9/generated/torch.cuda.memory.caching_allocator_delete.html +++ b/2.9/generated/torch.cuda.memory.caching_allocator_delete.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.caching_allocator_delete#

                                        -torch.cuda.memory.caching_allocator_delete(mem_ptr)[source]#
                                        +torch.cuda.memory.caching_allocator_delete(mem_ptr)[source]#

                                        Delete memory allocated using the CUDA memory allocator.

                                        Memory allocated with caching_allocator_alloc(). is freed here. The associated device and stream are tracked inside diff --git a/2.9/generated/torch.cuda.memory.caching_allocator_enable.html b/2.9/generated/torch.cuda.memory.caching_allocator_enable.html index fc3faa061f1..ed0e041281b 100644 --- a/2.9/generated/torch.cuda.memory.caching_allocator_enable.html +++ b/2.9/generated/torch.cuda.memory.caching_allocator_enable.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.caching_allocator_enable#

                                        -torch.cuda.memory.caching_allocator_enable(value=True)[source]#
                                        +torch.cuda.memory.caching_allocator_enable(value=True)[source]#

                                        Enable or disable the CUDA memory allocator. On by default.

                                        diff --git a/2.9/generated/torch.cuda.memory.change_current_allocator.html b/2.9/generated/torch.cuda.memory.change_current_allocator.html index 1706822b426..7faa5a483fd 100644 --- a/2.9/generated/torch.cuda.memory.change_current_allocator.html +++ b/2.9/generated/torch.cuda.memory.change_current_allocator.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.change_current_allocator#

                                        -torch.cuda.memory.change_current_allocator(allocator)[source]#
                                        +torch.cuda.memory.change_current_allocator(allocator)[source]#

                                        Change the currently used memory allocator to be the one provided.

                                        If the current allocator has already been used/initialized, this function will error.

                                        diff --git a/2.9/generated/torch.cuda.memory.empty_cache.html b/2.9/generated/torch.cuda.memory.empty_cache.html index 5e43b0f82a2..279700025e0 100644 --- a/2.9/generated/torch.cuda.memory.empty_cache.html +++ b/2.9/generated/torch.cuda.memory.empty_cache.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.empty_cache#

                                        -torch.cuda.memory.empty_cache()[source]#
                                        +torch.cuda.memory.empty_cache()[source]#

                                        Release all unoccupied cached memory currently held by the caching allocator so that those can be used in other GPU application and visible in nvidia-smi.

                                        diff --git a/2.9/generated/torch.cuda.memory.get_allocator_backend.html b/2.9/generated/torch.cuda.memory.get_allocator_backend.html index 3b8921e4546..b406611eed5 100644 --- a/2.9/generated/torch.cuda.memory.get_allocator_backend.html +++ b/2.9/generated/torch.cuda.memory.get_allocator_backend.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.get_allocator_backend#

                                        -torch.cuda.memory.get_allocator_backend()[source]#
                                        +torch.cuda.memory.get_allocator_backend()[source]#

                                        Return a string describing the active allocator backend as set by PYTORCH_CUDA_ALLOC_CONF. Currently available backends are native (PyTorch’s native caching allocator) and cudaMallocAsync` diff --git a/2.9/generated/torch.cuda.memory.get_per_process_memory_fraction.html b/2.9/generated/torch.cuda.memory.get_per_process_memory_fraction.html index 8400bb38942..6f805e471dd 100644 --- a/2.9/generated/torch.cuda.memory.get_per_process_memory_fraction.html +++ b/2.9/generated/torch.cuda.memory.get_per_process_memory_fraction.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.get_per_process_memory_fraction#

                                        -torch.cuda.memory.get_per_process_memory_fraction(device=None)[source]#
                                        +torch.cuda.memory.get_per_process_memory_fraction(device=None)[source]#

                                        Get memory fraction for a process.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.memory.host_memory_stats.html b/2.9/generated/torch.cuda.memory.host_memory_stats.html index 813742a59ca..6fb86c0c0b6 100644 --- a/2.9/generated/torch.cuda.memory.host_memory_stats.html +++ b/2.9/generated/torch.cuda.memory.host_memory_stats.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.host_memory_stats#

                                        -torch.cuda.memory.host_memory_stats()[source]#
                                        +torch.cuda.memory.host_memory_stats()[source]#

                                        Return a dictionary of CUDA memory allocator statistics for a given device.

                                        The return value of this function is a dictionary of statistics, each of diff --git a/2.9/generated/torch.cuda.memory.host_memory_stats_as_nested_dict.html b/2.9/generated/torch.cuda.memory.host_memory_stats_as_nested_dict.html index de0a227c4d3..6fc783d03b8 100644 --- a/2.9/generated/torch.cuda.memory.host_memory_stats_as_nested_dict.html +++ b/2.9/generated/torch.cuda.memory.host_memory_stats_as_nested_dict.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.host_memory_stats_as_nested_dict#

                                        -torch.cuda.memory.host_memory_stats_as_nested_dict()[source]#
                                        +torch.cuda.memory.host_memory_stats_as_nested_dict()[source]#

                                        Return the result of host_memory_stats() as a nested dictionary.

                                        Return type
                                        diff --git a/2.9/generated/torch.cuda.memory.list_gpu_processes.html b/2.9/generated/torch.cuda.memory.list_gpu_processes.html index c7086d339a8..280780d8fe5 100644 --- a/2.9/generated/torch.cuda.memory.list_gpu_processes.html +++ b/2.9/generated/torch.cuda.memory.list_gpu_processes.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.list_gpu_processes#

                                        -torch.cuda.memory.list_gpu_processes(device=None)[source]#
                                        +torch.cuda.memory.list_gpu_processes(device=None)[source]#

                                        Return a human-readable printout of the running processes and their GPU memory use for a given device.

                                        This can be useful to display periodically during training, or when handling out-of-memory exceptions.

                                        diff --git a/2.9/generated/torch.cuda.memory.max_memory_allocated.html b/2.9/generated/torch.cuda.memory.max_memory_allocated.html index f68e19f68e3..8ab0e533cab 100644 --- a/2.9/generated/torch.cuda.memory.max_memory_allocated.html +++ b/2.9/generated/torch.cuda.memory.max_memory_allocated.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.max_memory_allocated#

                                        -torch.cuda.memory.max_memory_allocated(device=None)[source]#
                                        +torch.cuda.memory.max_memory_allocated(device=None)[source]#

                                        Return the maximum GPU memory occupied by tensors in bytes for a given device.

                                        By default, this returns the peak allocated memory since the beginning of this program. reset_peak_memory_stats() can be used to diff --git a/2.9/generated/torch.cuda.memory.max_memory_cached.html b/2.9/generated/torch.cuda.memory.max_memory_cached.html index 8e728abb061..52d53ad609a 100644 --- a/2.9/generated/torch.cuda.memory.max_memory_cached.html +++ b/2.9/generated/torch.cuda.memory.max_memory_cached.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.max_memory_cached#

                                        -torch.cuda.memory.max_memory_cached(device=None)[source]#
                                        +torch.cuda.memory.max_memory_cached(device=None)[source]#

                                        Deprecated; see max_memory_reserved().

                                        Return type
                                        diff --git a/2.9/generated/torch.cuda.memory.max_memory_reserved.html b/2.9/generated/torch.cuda.memory.max_memory_reserved.html index 436a56dcc4b..58f95ec7083 100644 --- a/2.9/generated/torch.cuda.memory.max_memory_reserved.html +++ b/2.9/generated/torch.cuda.memory.max_memory_reserved.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.max_memory_reserved#

                                        -torch.cuda.memory.max_memory_reserved(device=None)[source]#
                                        +torch.cuda.memory.max_memory_reserved(device=None)[source]#

                                        Return the maximum GPU memory managed by the caching allocator in bytes for a given device.

                                        By default, this returns the peak cached memory since the beginning of this program. reset_peak_memory_stats() can be used to reset diff --git a/2.9/generated/torch.cuda.memory.mem_get_info.html b/2.9/generated/torch.cuda.memory.mem_get_info.html index 7575f0bcef5..52d12392515 100644 --- a/2.9/generated/torch.cuda.memory.mem_get_info.html +++ b/2.9/generated/torch.cuda.memory.mem_get_info.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.mem_get_info#

                                        -torch.cuda.memory.mem_get_info(device=None)[source]#
                                        +torch.cuda.memory.mem_get_info(device=None)[source]#

                                        Return the global free and total GPU memory for a given device using cudaMemGetInfo.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.memory.memory_allocated.html b/2.9/generated/torch.cuda.memory.memory_allocated.html index 70fe3ee9ca9..f27625b4164 100644 --- a/2.9/generated/torch.cuda.memory.memory_allocated.html +++ b/2.9/generated/torch.cuda.memory.memory_allocated.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.memory_allocated#

                                        -torch.cuda.memory.memory_allocated(device=None)[source]#
                                        +torch.cuda.memory.memory_allocated(device=None)[source]#

                                        Return the current GPU memory occupied by tensors in bytes for a given device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.memory.memory_cached.html b/2.9/generated/torch.cuda.memory.memory_cached.html index 69e027e67be..38cf9bf8f9e 100644 --- a/2.9/generated/torch.cuda.memory.memory_cached.html +++ b/2.9/generated/torch.cuda.memory.memory_cached.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.memory_cached#

                                        -torch.cuda.memory.memory_cached(device=None)[source]#
                                        +torch.cuda.memory.memory_cached(device=None)[source]#

                                        Deprecated; see memory_reserved().

                                        Return type
                                        diff --git a/2.9/generated/torch.cuda.memory.memory_reserved.html b/2.9/generated/torch.cuda.memory.memory_reserved.html index 1dffa8bed58..4f88dbd6e3a 100644 --- a/2.9/generated/torch.cuda.memory.memory_reserved.html +++ b/2.9/generated/torch.cuda.memory.memory_reserved.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.memory_reserved#

                                        -torch.cuda.memory.memory_reserved(device=None)[source]#
                                        +torch.cuda.memory.memory_reserved(device=None)[source]#

                                        Return the current GPU memory managed by the caching allocator in bytes for a given device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.memory.memory_snapshot.html b/2.9/generated/torch.cuda.memory.memory_snapshot.html index d1c491c8b5b..b210e60f845 100644 --- a/2.9/generated/torch.cuda.memory.memory_snapshot.html +++ b/2.9/generated/torch.cuda.memory.memory_snapshot.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.memory_snapshot#

                                        -torch.cuda.memory.memory_snapshot(mempool_id=None)[source]#
                                        +torch.cuda.memory.memory_snapshot(mempool_id=None)[source]#

                                        Return a snapshot of the CUDA memory allocator state across all devices.

                                        Interpreting the output of this function requires familiarity with the memory allocator internals.

                                        diff --git a/2.9/generated/torch.cuda.memory.memory_stats.html b/2.9/generated/torch.cuda.memory.memory_stats.html index 06bcda3e484..807bdbac310 100644 --- a/2.9/generated/torch.cuda.memory.memory_stats.html +++ b/2.9/generated/torch.cuda.memory.memory_stats.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.memory_stats#

                                        -torch.cuda.memory.memory_stats(device=None)[source]#
                                        +torch.cuda.memory.memory_stats(device=None)[source]#

                                        Return a dictionary of CUDA memory allocator statistics for a given device.

                                        The return value of this function is a dictionary of statistics, each of which is a non-negative integer.

                                        diff --git a/2.9/generated/torch.cuda.memory.memory_stats_as_nested_dict.html b/2.9/generated/torch.cuda.memory.memory_stats_as_nested_dict.html index 02f926039c3..6a59a96306c 100644 --- a/2.9/generated/torch.cuda.memory.memory_stats_as_nested_dict.html +++ b/2.9/generated/torch.cuda.memory.memory_stats_as_nested_dict.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.memory_stats_as_nested_dict#

                                        -torch.cuda.memory.memory_stats_as_nested_dict(device=None)[source]#
                                        +torch.cuda.memory.memory_stats_as_nested_dict(device=None)[source]#

                                        Return the result of memory_stats() as a nested dictionary.

                                        Return type
                                        diff --git a/2.9/generated/torch.cuda.memory.memory_summary.html b/2.9/generated/torch.cuda.memory.memory_summary.html index 917e8950ab7..2c52288d829 100644 --- a/2.9/generated/torch.cuda.memory.memory_summary.html +++ b/2.9/generated/torch.cuda.memory.memory_summary.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.memory_summary#

                                        -torch.cuda.memory.memory_summary(device=None, abbreviated=False)[source]#
                                        +torch.cuda.memory.memory_summary(device=None, abbreviated=False)[source]#

                                        Return a human-readable printout of the current memory allocator statistics for a given device.

                                        This can be useful to display periodically during training, or when handling out-of-memory exceptions.

                                        diff --git a/2.9/generated/torch.cuda.memory.reset_accumulated_host_memory_stats.html b/2.9/generated/torch.cuda.memory.reset_accumulated_host_memory_stats.html index e225e64407e..012242ed752 100644 --- a/2.9/generated/torch.cuda.memory.reset_accumulated_host_memory_stats.html +++ b/2.9/generated/torch.cuda.memory.reset_accumulated_host_memory_stats.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.reset_accumulated_host_memory_stats#

                                        -torch.cuda.memory.reset_accumulated_host_memory_stats()[source]#
                                        +torch.cuda.memory.reset_accumulated_host_memory_stats()[source]#

                                        Reset the “accumulated” (historical) stats tracked by the host memory allocator.

                                        See host_memory_stats() for details. Accumulated stats correspond to the “allocated” and “freed” keys in each individual stat dict.

                                        diff --git a/2.9/generated/torch.cuda.memory.reset_accumulated_memory_stats.html b/2.9/generated/torch.cuda.memory.reset_accumulated_memory_stats.html index b8353705343..15e70b33528 100644 --- a/2.9/generated/torch.cuda.memory.reset_accumulated_memory_stats.html +++ b/2.9/generated/torch.cuda.memory.reset_accumulated_memory_stats.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.reset_accumulated_memory_stats#

                                        -torch.cuda.memory.reset_accumulated_memory_stats(device=None)[source]#
                                        +torch.cuda.memory.reset_accumulated_memory_stats(device=None)[source]#

                                        Reset the “accumulated” (historical) stats tracked by the CUDA memory allocator.

                                        See memory_stats() for details. Accumulated stats correspond to the “allocated” and “freed” keys in each individual stat dict, as well as diff --git a/2.9/generated/torch.cuda.memory.reset_max_memory_allocated.html b/2.9/generated/torch.cuda.memory.reset_max_memory_allocated.html index 6489b1bc5e7..4b7dd22f4dd 100644 --- a/2.9/generated/torch.cuda.memory.reset_max_memory_allocated.html +++ b/2.9/generated/torch.cuda.memory.reset_max_memory_allocated.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.reset_max_memory_allocated#

                                        -torch.cuda.memory.reset_max_memory_allocated(device=None)[source]#
                                        +torch.cuda.memory.reset_max_memory_allocated(device=None)[source]#

                                        Reset the starting point in tracking maximum GPU memory occupied by tensors for a given device.

                                        See max_memory_allocated() for details.

                                        diff --git a/2.9/generated/torch.cuda.memory.reset_max_memory_cached.html b/2.9/generated/torch.cuda.memory.reset_max_memory_cached.html index 60c30b680d4..71f84f494bf 100644 --- a/2.9/generated/torch.cuda.memory.reset_max_memory_cached.html +++ b/2.9/generated/torch.cuda.memory.reset_max_memory_cached.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.reset_max_memory_cached#

                                        -torch.cuda.memory.reset_max_memory_cached(device=None)[source]#
                                        +torch.cuda.memory.reset_max_memory_cached(device=None)[source]#

                                        Reset the starting point in tracking maximum GPU memory managed by the caching allocator for a given device.

                                        See max_memory_cached() for details.

                                        diff --git a/2.9/generated/torch.cuda.memory.reset_peak_host_memory_stats.html b/2.9/generated/torch.cuda.memory.reset_peak_host_memory_stats.html index abe31abe6e4..587aecf570c 100644 --- a/2.9/generated/torch.cuda.memory.reset_peak_host_memory_stats.html +++ b/2.9/generated/torch.cuda.memory.reset_peak_host_memory_stats.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.reset_peak_host_memory_stats#

                                        -torch.cuda.memory.reset_peak_host_memory_stats()[source]#
                                        +torch.cuda.memory.reset_peak_host_memory_stats()[source]#

                                        Reset the “peak” stats tracked by the host memory allocator.

                                        See host_memory_stats() for details. Peak stats correspond to the “peak” key in each individual stat dict.

                                        diff --git a/2.9/generated/torch.cuda.memory.reset_peak_memory_stats.html b/2.9/generated/torch.cuda.memory.reset_peak_memory_stats.html index 37d039de614..96ff9bd5ef9 100644 --- a/2.9/generated/torch.cuda.memory.reset_peak_memory_stats.html +++ b/2.9/generated/torch.cuda.memory.reset_peak_memory_stats.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.reset_peak_memory_stats#

                                        -torch.cuda.memory.reset_peak_memory_stats(device=None)[source]#
                                        +torch.cuda.memory.reset_peak_memory_stats(device=None)[source]#

                                        Reset the “peak” stats tracked by the CUDA memory allocator.

                                        See memory_stats() for details. Peak stats correspond to the “peak” key in each individual stat dict.

                                        diff --git a/2.9/generated/torch.cuda.memory.set_per_process_memory_fraction.html b/2.9/generated/torch.cuda.memory.set_per_process_memory_fraction.html index bdce038c8df..ba40284e826 100644 --- a/2.9/generated/torch.cuda.memory.set_per_process_memory_fraction.html +++ b/2.9/generated/torch.cuda.memory.set_per_process_memory_fraction.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory.set_per_process_memory_fraction#

                                        -torch.cuda.memory.set_per_process_memory_fraction(fraction, device=None)[source]#
                                        +torch.cuda.memory.set_per_process_memory_fraction(fraction, device=None)[source]#

                                        Set memory fraction for a process.

                                        The fraction is used to limit an caching allocator to allocated memory on a CUDA device. The allowed value equals the total visible memory multiplied fraction. diff --git a/2.9/generated/torch.cuda.memory_usage.html b/2.9/generated/torch.cuda.memory_usage.html index bf358a102a7..d061f7d1824 100644 --- a/2.9/generated/torch.cuda.memory_usage.html +++ b/2.9/generated/torch.cuda.memory_usage.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.memory_usage#

                                        -torch.cuda.memory_usage(device=None)[source]#
                                        +torch.cuda.memory_usage(device=None)[source]#

                                        Return the percent of time over the past sample period during which global (device) memory was being read or written as given by nvidia-smi.

                                        diff --git a/2.9/generated/torch.cuda.nvtx.mark.html b/2.9/generated/torch.cuda.nvtx.mark.html index 6dcb767e6a6..0a2caa665c4 100644 --- a/2.9/generated/torch.cuda.nvtx.mark.html +++ b/2.9/generated/torch.cuda.nvtx.mark.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.nvtx.mark#

                                        -torch.cuda.nvtx.mark(msg)[source]#
                                        +torch.cuda.nvtx.mark(msg)[source]#

                                        Describe an instantaneous event that occurred at some point.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.nvtx.range.html b/2.9/generated/torch.cuda.nvtx.range.html index 04d3f42c789..0f61b7ec6a6 100644 --- a/2.9/generated/torch.cuda.nvtx.range.html +++ b/2.9/generated/torch.cuda.nvtx.range.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.nvtx.range#

                                        -torch.cuda.nvtx.range(msg, *args, **kwargs)[source]#
                                        +torch.cuda.nvtx.range(msg, *args, **kwargs)[source]#

                                        Context manager / decorator that pushes an NVTX range at the beginning of its scope, and pops it at the end. If extra arguments are given, they are passed as arguments to msg.format().

                                        diff --git a/2.9/generated/torch.cuda.nvtx.range_pop.html b/2.9/generated/torch.cuda.nvtx.range_pop.html index c917ce9fbe6..cf26b324891 100644 --- a/2.9/generated/torch.cuda.nvtx.range_pop.html +++ b/2.9/generated/torch.cuda.nvtx.range_pop.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.nvtx.range_pop#

                                        -torch.cuda.nvtx.range_pop()[source]#
                                        +torch.cuda.nvtx.range_pop()[source]#

                                        Pop a range off of a stack of nested range spans. Returns the zero-based depth of the range that is ended.

                                        diff --git a/2.9/generated/torch.cuda.nvtx.range_push.html b/2.9/generated/torch.cuda.nvtx.range_push.html index 0162afd731d..990b927004b 100644 --- a/2.9/generated/torch.cuda.nvtx.range_push.html +++ b/2.9/generated/torch.cuda.nvtx.range_push.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.nvtx.range_push#

                                        -torch.cuda.nvtx.range_push(msg)[source]#
                                        +torch.cuda.nvtx.range_push(msg)[source]#

                                        Push a range onto a stack of nested range span. Returns zero-based depth of the range that is started.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.power_draw.html b/2.9/generated/torch.cuda.power_draw.html index ee593292bd9..431b64d4ee5 100644 --- a/2.9/generated/torch.cuda.power_draw.html +++ b/2.9/generated/torch.cuda.power_draw.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.power_draw#

                                        -torch.cuda.power_draw(device=None)[source]#
                                        +torch.cuda.power_draw(device=None)[source]#
                                        Return the average power draw of the GPU sensor in mW (MilliWatts)

                                        over the past sample period as given by nvidia-smi for Fermi or newer fully supported devices.

                                        diff --git a/2.9/generated/torch.cuda.seed.html b/2.9/generated/torch.cuda.seed.html index ce5bae9577e..40f0a2eb5ed 100644 --- a/2.9/generated/torch.cuda.seed.html +++ b/2.9/generated/torch.cuda.seed.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.seed#

                                        -torch.cuda.seed()[source]#
                                        +torch.cuda.seed()[source]#

                                        Set the seed for generating random numbers to a random number for the current GPU.

                                        It’s safe to call this function if CUDA is not available; in that case, it is silently ignored.

                                        diff --git a/2.9/generated/torch.cuda.seed_all.html b/2.9/generated/torch.cuda.seed_all.html index 3e9a86cd6f1..dfd95d31e6d 100644 --- a/2.9/generated/torch.cuda.seed_all.html +++ b/2.9/generated/torch.cuda.seed_all.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.seed_all#

                                        -torch.cuda.seed_all()[source]#
                                        +torch.cuda.seed_all()[source]#

                                        Set the seed for generating random numbers to a random number on all GPUs.

                                        It’s safe to call this function if CUDA is not available; in that case, it is silently ignored.

                                        diff --git a/2.9/generated/torch.cuda.set_device.html b/2.9/generated/torch.cuda.set_device.html index 9cf7e554b02..6d5fc0eed2b 100644 --- a/2.9/generated/torch.cuda.set_device.html +++ b/2.9/generated/torch.cuda.set_device.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.set_device#

                                        -torch.cuda.set_device(device)[source]#
                                        +torch.cuda.set_device(device)[source]#

                                        Set the current device.

                                        Usage of this function is discouraged in favor of device. In most cases it’s better to use CUDA_VISIBLE_DEVICES environmental variable.

                                        diff --git a/2.9/generated/torch.cuda.set_rng_state.html b/2.9/generated/torch.cuda.set_rng_state.html index 7d8743e189d..b70b8db1cb8 100644 --- a/2.9/generated/torch.cuda.set_rng_state.html +++ b/2.9/generated/torch.cuda.set_rng_state.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.set_rng_state#

                                        -torch.cuda.set_rng_state(new_state, device='cuda')[source]#
                                        +torch.cuda.set_rng_state(new_state, device='cuda')[source]#

                                        Set the random number generator state of the specified GPU.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.set_rng_state_all.html b/2.9/generated/torch.cuda.set_rng_state_all.html index 8d1f6ffd113..24ce201c713 100644 --- a/2.9/generated/torch.cuda.set_rng_state_all.html +++ b/2.9/generated/torch.cuda.set_rng_state_all.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.set_rng_state_all#

                                        -torch.cuda.set_rng_state_all(new_states)[source]#
                                        +torch.cuda.set_rng_state_all(new_states)[source]#

                                        Set the random number generator state of all devices.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.set_stream.html b/2.9/generated/torch.cuda.set_stream.html index 435fda08e3b..4a93e53f972 100644 --- a/2.9/generated/torch.cuda.set_stream.html +++ b/2.9/generated/torch.cuda.set_stream.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.set_stream#

                                        -torch.cuda.set_stream(stream)[source]#
                                        +torch.cuda.set_stream(stream)[source]#
                                        Set the current stream.This is a wrapper API to set the stream.

                                        Usage of this function is discouraged in favor of the stream context manager.

                                        diff --git a/2.9/generated/torch.cuda.set_sync_debug_mode.html b/2.9/generated/torch.cuda.set_sync_debug_mode.html index c9ce3d6bcbc..6657abd252b 100644 --- a/2.9/generated/torch.cuda.set_sync_debug_mode.html +++ b/2.9/generated/torch.cuda.set_sync_debug_mode.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.set_sync_debug_mode#

                                        -torch.cuda.set_sync_debug_mode(debug_mode)[source]#
                                        +torch.cuda.set_sync_debug_mode(debug_mode)[source]#

                                        Set the debug mode for cuda synchronizing operations.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.stream.html b/2.9/generated/torch.cuda.stream.html index 2bae494fe23..0ee08349e9c 100644 --- a/2.9/generated/torch.cuda.stream.html +++ b/2.9/generated/torch.cuda.stream.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.stream#

                                        -torch.cuda.stream(stream)[source]#
                                        +torch.cuda.stream(stream)[source]#

                                        Wrap around the Context-manager StreamContext that selects a given stream.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.synchronize.html b/2.9/generated/torch.cuda.synchronize.html index 011dff83bbf..137aa5ccd4f 100644 --- a/2.9/generated/torch.cuda.synchronize.html +++ b/2.9/generated/torch.cuda.synchronize.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.synchronize#

                                        -torch.cuda.synchronize(device=None)[source]#
                                        +torch.cuda.synchronize(device=None)[source]#

                                        Wait for all kernels in all streams on a CUDA device to complete.

                                        Parameters
                                        diff --git a/2.9/generated/torch.cuda.temperature.html b/2.9/generated/torch.cuda.temperature.html index 00518a71692..1385002be48 100644 --- a/2.9/generated/torch.cuda.temperature.html +++ b/2.9/generated/torch.cuda.temperature.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.temperature#

                                        -torch.cuda.temperature(device=None)[source]#
                                        +torch.cuda.temperature(device=None)[source]#

                                        Return the average temperature of the GPU sensor in Degrees C (Centigrades).

                                        The average temperature is computed based on past sample period as given by nvidia-smi.

                                        diff --git a/2.9/generated/torch.cuda.utilization.html b/2.9/generated/torch.cuda.utilization.html index d07b2988dda..48688953a7b 100644 --- a/2.9/generated/torch.cuda.utilization.html +++ b/2.9/generated/torch.cuda.utilization.html @@ -4404,7 +4404,7 @@

                                        torch.cuda.utilization#

                                        -torch.cuda.utilization(device=None)[source]#
                                        +torch.cuda.utilization(device=None)[source]#

                                        Return the percent of time over the past sample period during which one or more kernels was executing on the GPU as given by nvidia-smi.

                                        diff --git a/2.9/generated/torch.einsum.html b/2.9/generated/torch.einsum.html index c4a54606ce6..641e7c7f6d7 100644 --- a/2.9/generated/torch.einsum.html +++ b/2.9/generated/torch.einsum.html @@ -4404,7 +4404,7 @@

                                        torch.einsum#

                                        -torch.einsum(equation, *operands) Tensor[source]#
                                        +torch.einsum(equation, *operands) Tensor[source]#

                                        Sums the product of the elements of the input operands along dimensions specified using a notation based on the Einstein summation convention.

                                        Einsum allows computing many common multi-dimensional linear algebraic array operations by representing them diff --git a/2.9/generated/torch.enable_grad.html b/2.9/generated/torch.enable_grad.html index 2f1a4f7fa05..f6790e252d9 100644 --- a/2.9/generated/torch.enable_grad.html +++ b/2.9/generated/torch.enable_grad.html @@ -4404,7 +4404,7 @@

                                        enable_grad#

                                        -class torch.enable_grad(orig_func=None)[source]#
                                        +class torch.enable_grad(orig_func=None)[source]#

                                        Context-manager that enables gradient calculation.

                                        Enables gradient calculation, if it has been disabled via no_grad or set_grad_enabled.

                                        diff --git a/2.9/generated/torch.from_dlpack.html b/2.9/generated/torch.from_dlpack.html index 177490a4593..ab49e0576b4 100644 --- a/2.9/generated/torch.from_dlpack.html +++ b/2.9/generated/torch.from_dlpack.html @@ -4404,7 +4404,7 @@

                                        torch.from_dlpack#

                                        -torch.from_dlpack(ext_tensor) Tensor[source]#
                                        +torch.from_dlpack(ext_tensor) Tensor[source]#

                                        Converts a tensor from an external library into a torch.Tensor.

                                        The returned PyTorch tensor will share the memory with the input tensor (which may have come from another library). Note that in-place operations diff --git a/2.9/generated/torch.func.debug_unwrap.html b/2.9/generated/torch.func.debug_unwrap.html index 1c8351d05a6..104eb9d3e90 100644 --- a/2.9/generated/torch.func.debug_unwrap.html +++ b/2.9/generated/torch.func.debug_unwrap.html @@ -4415,7 +4415,7 @@

                                        torch.func.debug_unwrap#

                                        -torch.func.debug_unwrap(tensor, *, recurse=True)[source]#
                                        +torch.func.debug_unwrap(tensor, *, recurse=True)[source]#

                                        Unwraps a functorch tensor (e.g. BatchedTensor, GradTrackingTensor) to its underlying tensor.

                                        This function should only be used in a debug setting (e.g. trying to print the value of a Tensor in a debugger). Otherwise, using the result of function diff --git a/2.9/generated/torch.func.functional_call.html b/2.9/generated/torch.func.functional_call.html index 4fa4a0aa71a..738b8964444 100644 --- a/2.9/generated/torch.func.functional_call.html +++ b/2.9/generated/torch.func.functional_call.html @@ -4415,7 +4415,7 @@

                                        torch.func.functional_call#

                                        -torch.func.functional_call(module, parameter_and_buffer_dicts, args=None, kwargs=None, *, tie_weights=True, strict=False)[source]#
                                        +torch.func.functional_call(module, parameter_and_buffer_dicts, args=None, kwargs=None, *, tie_weights=True, strict=False)[source]#

                                        Performs a functional call on the module by replacing the module parameters and buffers with the provided ones.

                                        diff --git a/2.9/generated/torch.func.functionalize.html b/2.9/generated/torch.func.functionalize.html index 9b76c826b9b..c57162f46b2 100644 --- a/2.9/generated/torch.func.functionalize.html +++ b/2.9/generated/torch.func.functionalize.html @@ -4415,7 +4415,7 @@

                                        torch.func.functionalize#

                                        -torch.func.functionalize(func, *, remove='mutations')[source]#
                                        +torch.func.functionalize(func, *, remove='mutations')[source]#

                                        functionalize is a transform that can be used to remove (intermediate) mutations and aliasing from a function, while preserving the function’s semantics.

                                        diff --git a/2.9/generated/torch.func.grad.html b/2.9/generated/torch.func.grad.html index 9ee22966e97..588a709cfe6 100644 --- a/2.9/generated/torch.func.grad.html +++ b/2.9/generated/torch.func.grad.html @@ -4415,7 +4415,7 @@

                                        torch.func.grad#

                                        -torch.func.grad(func, argnums=0, has_aux=False)[source]#
                                        +torch.func.grad(func, argnums=0, has_aux=False)[source]#

                                        grad operator helps computing gradients of func with respect to the input(s) specified by argnums. This operator can be nested to compute higher-order gradients.

                                        diff --git a/2.9/generated/torch.func.grad_and_value.html b/2.9/generated/torch.func.grad_and_value.html index fced6808ed5..133bf0c16d7 100644 --- a/2.9/generated/torch.func.grad_and_value.html +++ b/2.9/generated/torch.func.grad_and_value.html @@ -4415,7 +4415,7 @@

                                        torch.func.grad_and_value#

                                        -torch.func.grad_and_value(func, argnums=0, has_aux=False)[source]#
                                        +torch.func.grad_and_value(func, argnums=0, has_aux=False)[source]#

                                        Returns a function to compute a tuple of the gradient and primal, or forward, computation.

                                        diff --git a/2.9/generated/torch.func.hessian.html b/2.9/generated/torch.func.hessian.html index fd9299efb7f..cf4c559da40 100644 --- a/2.9/generated/torch.func.hessian.html +++ b/2.9/generated/torch.func.hessian.html @@ -4415,7 +4415,7 @@

                                        torch.func.hessian#

                                        -torch.func.hessian(func, argnums=0)[source]#
                                        +torch.func.hessian(func, argnums=0)[source]#

                                        Computes the Hessian of func with respect to the arg(s) at index argnum via a forward-over-reverse strategy.

                                        The forward-over-reverse strategy (composing jacfwd(jacrev(func))) is diff --git a/2.9/generated/torch.func.jacfwd.html b/2.9/generated/torch.func.jacfwd.html index dfa6db4261a..755f2107798 100644 --- a/2.9/generated/torch.func.jacfwd.html +++ b/2.9/generated/torch.func.jacfwd.html @@ -4415,7 +4415,7 @@

                                        torch.func.jacfwd#

                                        -torch.func.jacfwd(func, argnums=0, has_aux=False, *, randomness='error')[source]#
                                        +torch.func.jacfwd(func, argnums=0, has_aux=False, *, randomness='error')[source]#

                                        Computes the Jacobian of func with respect to the arg(s) at index argnum using forward-mode autodiff

                                        diff --git a/2.9/generated/torch.func.jacrev.html b/2.9/generated/torch.func.jacrev.html index 98f946a2b78..e001a0a0710 100644 --- a/2.9/generated/torch.func.jacrev.html +++ b/2.9/generated/torch.func.jacrev.html @@ -4415,7 +4415,7 @@

                                        torch.func.jacrev#

                                        -torch.func.jacrev(func, argnums=0, *, has_aux=False, chunk_size=None, _preallocate_and_copy=False)[source]#
                                        +torch.func.jacrev(func, argnums=0, *, has_aux=False, chunk_size=None, _preallocate_and_copy=False)[source]#

                                        Computes the Jacobian of func with respect to the arg(s) at index argnum using reverse mode autodiff

                                        diff --git a/2.9/generated/torch.func.jvp.html b/2.9/generated/torch.func.jvp.html index 28a9a626ed8..654a39313d8 100644 --- a/2.9/generated/torch.func.jvp.html +++ b/2.9/generated/torch.func.jvp.html @@ -4415,7 +4415,7 @@

                                        torch.func.jvp#

                                        -torch.func.jvp(func, primals, tangents, *, strict=False, has_aux=False)[source]#
                                        +torch.func.jvp(func, primals, tangents, *, strict=False, has_aux=False)[source]#

                                        Standing for the Jacobian-vector product, returns a tuple containing the output of func(*primals) and the “Jacobian of func evaluated at primals” times tangents. This is also known as forward-mode autodiff.

                                        diff --git a/2.9/generated/torch.func.linearize.html b/2.9/generated/torch.func.linearize.html index 8d6a4e1aa1e..0f59daa7723 100644 --- a/2.9/generated/torch.func.linearize.html +++ b/2.9/generated/torch.func.linearize.html @@ -4415,7 +4415,7 @@

                                        torch.func.linearize#

                                        -torch.func.linearize(func, *primals)[source]#
                                        +torch.func.linearize(func, *primals)[source]#

                                        Returns the value of func at primals and linear approximation at primals.

                                        diff --git a/2.9/generated/torch.func.replace_all_batch_norm_modules_.html b/2.9/generated/torch.func.replace_all_batch_norm_modules_.html index 4ea5b1ed9e6..7c5b0417760 100644 --- a/2.9/generated/torch.func.replace_all_batch_norm_modules_.html +++ b/2.9/generated/torch.func.replace_all_batch_norm_modules_.html @@ -4415,7 +4415,7 @@

                                        torch.func.replace_all_batch_norm_modules_#

                                        -torch.func.replace_all_batch_norm_modules_(root)[source]#
                                        +torch.func.replace_all_batch_norm_modules_(root)[source]#

                                        In place updates root by setting the running_mean and running_var to be None and setting track_running_stats to be False for any nn.BatchNorm module in root

                                        diff --git a/2.9/generated/torch.func.stack_module_state.html b/2.9/generated/torch.func.stack_module_state.html index dd77aab7a63..5522743e1cc 100644 --- a/2.9/generated/torch.func.stack_module_state.html +++ b/2.9/generated/torch.func.stack_module_state.html @@ -4415,7 +4415,7 @@

                                        torch.func.stack_module_state#

                                        -torch.func.stack_module_state(models) params, buffers[source]#
                                        +torch.func.stack_module_state(models) params, buffers[source]#

                                        Prepares a list of torch.nn.Modules for ensembling with vmap().

                                        Given a list of M nn.Modules of the same class, returns two dictionaries that stack all of their parameters and buffers together, indexed by name. diff --git a/2.9/generated/torch.func.vjp.html b/2.9/generated/torch.func.vjp.html index 79b057fe2b9..281ebbb1ddd 100644 --- a/2.9/generated/torch.func.vjp.html +++ b/2.9/generated/torch.func.vjp.html @@ -4415,7 +4415,7 @@

                                        torch.func.vjp#

                                        -torch.func.vjp(func, *primals, has_aux=False)[source]#
                                        +torch.func.vjp(func, *primals, has_aux=False)[source]#

                                        Standing for the vector-Jacobian product, returns a tuple containing the results of func applied to primals and a function that, when given cotangents, computes the reverse-mode Jacobian of func with diff --git a/2.9/generated/torch.func.vmap.html b/2.9/generated/torch.func.vmap.html index 01ce96ca5f4..39660919905 100644 --- a/2.9/generated/torch.func.vmap.html +++ b/2.9/generated/torch.func.vmap.html @@ -4415,7 +4415,7 @@

                                        torch.func.vmap#

                                        -torch.func.vmap(func, in_dims=0, out_dims=0, randomness='error', *, chunk_size=None)[source]#
                                        +torch.func.vmap(func, in_dims=0, out_dims=0, randomness='error', *, chunk_size=None)[source]#

                                        vmap is the vectorizing map; vmap(func) returns a new function that maps func over some dimension of the inputs. Semantically, vmap pushes the map into PyTorch operations called by func, effectively diff --git a/2.9/generated/torch.functional.align_tensors.html b/2.9/generated/torch.functional.align_tensors.html index 38c94357481..20f57bb6b6b 100644 --- a/2.9/generated/torch.functional.align_tensors.html +++ b/2.9/generated/torch.functional.align_tensors.html @@ -4415,7 +4415,7 @@

                                        torch.functional.align_tensors#

                                        -torch.functional.align_tensors(*tensors)[source]#
                                        +torch.functional.align_tensors(*tensors)[source]#
                                        diff --git a/2.9/generated/torch.functional.atleast_1d.html b/2.9/generated/torch.functional.atleast_1d.html index 140bc3208b5..ed9deead29a 100644 --- a/2.9/generated/torch.functional.atleast_1d.html +++ b/2.9/generated/torch.functional.atleast_1d.html @@ -4415,7 +4415,7 @@

                                        torch.functional.atleast_1d#

                                        -torch.functional.atleast_1d(*tensors)[source]#
                                        +torch.functional.atleast_1d(*tensors)[source]#

                                        Returns a 1-dimensional view of each input tensor with zero dimensions. Input tensors with one or more dimensions are returned as-is.

                                        diff --git a/2.9/generated/torch.functional.atleast_2d.html b/2.9/generated/torch.functional.atleast_2d.html index 13663174786..4affc069d7d 100644 --- a/2.9/generated/torch.functional.atleast_2d.html +++ b/2.9/generated/torch.functional.atleast_2d.html @@ -4415,7 +4415,7 @@

                                        torch.functional.atleast_2d#

                                        -torch.functional.atleast_2d(*tensors)[source]#
                                        +torch.functional.atleast_2d(*tensors)[source]#

                                        Returns a 2-dimensional view of each input tensor with zero dimensions. Input tensors with two or more dimensions are returned as-is.

                                        diff --git a/2.9/generated/torch.functional.atleast_3d.html b/2.9/generated/torch.functional.atleast_3d.html index 2e7412435c6..164d7adc17b 100644 --- a/2.9/generated/torch.functional.atleast_3d.html +++ b/2.9/generated/torch.functional.atleast_3d.html @@ -4415,7 +4415,7 @@

                                        torch.functional.atleast_3d#

                                        -torch.functional.atleast_3d(*tensors)[source]#
                                        +torch.functional.atleast_3d(*tensors)[source]#

                                        Returns a 3-dimensional view of each input tensor with zero dimensions. Input tensors with three or more dimensions are returned as-is.

                                        diff --git a/2.9/generated/torch.functional.block_diag.html b/2.9/generated/torch.functional.block_diag.html index f491a475d77..907f91b6258 100644 --- a/2.9/generated/torch.functional.block_diag.html +++ b/2.9/generated/torch.functional.block_diag.html @@ -4415,7 +4415,7 @@

                                        torch.functional.block_diag#

                                        -torch.functional.block_diag(*tensors)[source]#
                                        +torch.functional.block_diag(*tensors)[source]#

                                        Create a block diagonal matrix from provided tensors.

                                        Parameters
                                        diff --git a/2.9/generated/torch.functional.broadcast_shapes.html b/2.9/generated/torch.functional.broadcast_shapes.html index 4c064911b8e..6538994d04b 100644 --- a/2.9/generated/torch.functional.broadcast_shapes.html +++ b/2.9/generated/torch.functional.broadcast_shapes.html @@ -4415,7 +4415,7 @@

                                        torch.functional.broadcast_shapes#

                                        -torch.functional.broadcast_shapes(*shapes) Size[source]#
                                        +torch.functional.broadcast_shapes(*shapes) Size[source]#

                                        Similar to broadcast_tensors() but for shapes.

                                        This is equivalent to torch.broadcast_tensors(*map(torch.empty, shapes))[0].shape diff --git a/2.9/generated/torch.functional.broadcast_tensors.html b/2.9/generated/torch.functional.broadcast_tensors.html index 469aafc9247..c4116606c71 100644 --- a/2.9/generated/torch.functional.broadcast_tensors.html +++ b/2.9/generated/torch.functional.broadcast_tensors.html @@ -4415,7 +4415,7 @@

                                        torch.functional.broadcast_tensors#

                                        -torch.functional.broadcast_tensors(*tensors) List of Tensors[source]#
                                        +torch.functional.broadcast_tensors(*tensors) List of Tensors[source]#

                                        Broadcasts the given tensors according to Broadcasting semantics.

                                        Parameters
                                        diff --git a/2.9/generated/torch.functional.cartesian_prod.html b/2.9/generated/torch.functional.cartesian_prod.html index f9bbf319fb3..2243acc9873 100644 --- a/2.9/generated/torch.functional.cartesian_prod.html +++ b/2.9/generated/torch.functional.cartesian_prod.html @@ -4415,7 +4415,7 @@

                                        torch.functional.cartesian_prod#

                                        -torch.functional.cartesian_prod(*tensors)[source]#
                                        +torch.functional.cartesian_prod(*tensors)[source]#

                                        Do cartesian product of the given sequence of tensors. The behavior is similar to python’s itertools.product.

                                        diff --git a/2.9/generated/torch.functional.cdist.html b/2.9/generated/torch.functional.cdist.html index 92ce04c0907..2c92b327330 100644 --- a/2.9/generated/torch.functional.cdist.html +++ b/2.9/generated/torch.functional.cdist.html @@ -4415,7 +4415,7 @@

                                        torch.functional.cdist#

                                        -torch.functional.cdist(x1, x2, p=2.0, compute_mode='use_mm_for_euclid_dist_if_necessary')[source]#
                                        +torch.functional.cdist(x1, x2, p=2.0, compute_mode='use_mm_for_euclid_dist_if_necessary')[source]#

                                        Computes batched the p-norm distance between each pair of the two collections of row vectors.

                                        Parameters
                                        diff --git a/2.9/generated/torch.functional.chain_matmul.html b/2.9/generated/torch.functional.chain_matmul.html index ddd19b1523c..29d91b2258f 100644 --- a/2.9/generated/torch.functional.chain_matmul.html +++ b/2.9/generated/torch.functional.chain_matmul.html @@ -4415,7 +4415,7 @@

                                        torch.functional.chain_matmul#

                                        -torch.functional.chain_matmul(*matrices, out=None)[source]#
                                        +torch.functional.chain_matmul(*matrices, out=None)[source]#

                                        Returns the matrix product of the NN 2-D tensors. This product is efficiently computed using the matrix chain order algorithm which selects the order in which incurs the lowest cost in terms of arithmetic operations ([CLRS]). Note that since this is a function to compute the product, NN diff --git a/2.9/generated/torch.functional.einsum.html b/2.9/generated/torch.functional.einsum.html index 6be47f94f9e..78a38169ab2 100644 --- a/2.9/generated/torch.functional.einsum.html +++ b/2.9/generated/torch.functional.einsum.html @@ -4415,7 +4415,7 @@

                                        torch.functional.einsum#

                                        -torch.functional.einsum(equation, *operands) Tensor[source]#
                                        +torch.functional.einsum(equation, *operands) Tensor[source]#

                                        Sums the product of the elements of the input operands along dimensions specified using a notation based on the Einstein summation convention.

                                        Einsum allows computing many common multi-dimensional linear algebraic array operations by representing them diff --git a/2.9/generated/torch.functional.lu.html b/2.9/generated/torch.functional.lu.html index a2cf06b7f39..c7c901bd839 100644 --- a/2.9/generated/torch.functional.lu.html +++ b/2.9/generated/torch.functional.lu.html @@ -4415,7 +4415,7 @@

                                        torch.functional.lu#

                                        -torch.functional.lu(*args, **kwargs)[source]#
                                        +torch.functional.lu(*args, **kwargs)[source]#

                                        Computes the LU factorization of a matrix or batches of matrices A. Returns a tuple containing the LU factorization and pivots of A. Pivoting is done if pivot is set to diff --git a/2.9/generated/torch.functional.meshgrid.html b/2.9/generated/torch.functional.meshgrid.html index 5a8442d3fa3..e29b3e84187 100644 --- a/2.9/generated/torch.functional.meshgrid.html +++ b/2.9/generated/torch.functional.meshgrid.html @@ -4415,7 +4415,7 @@

                                        torch.functional.meshgrid#

                                        -torch.functional.meshgrid(*tensors, indexing=None)[source]#
                                        +torch.functional.meshgrid(*tensors, indexing=None)[source]#

                                        Creates grids of coordinates specified by the 1D inputs in attr:tensors.

                                        This is helpful when you want to visualize data over some range of inputs. See below for a plotting example.

                                        diff --git a/2.9/generated/torch.functional.norm.html b/2.9/generated/torch.functional.norm.html index 4442880d0b6..eb3061ec738 100644 --- a/2.9/generated/torch.functional.norm.html +++ b/2.9/generated/torch.functional.norm.html @@ -4415,7 +4415,7 @@

                                        torch.functional.norm#

                                        -torch.functional.norm(input, p='fro', dim=None, keepdim=False, out=None, dtype=None)[source]#
                                        +torch.functional.norm(input, p='fro', dim=None, keepdim=False, out=None, dtype=None)[source]#

                                        Returns the matrix norm or vector norm of a given tensor.

                                        Warning

                                        diff --git a/2.9/generated/torch.functional.split.html b/2.9/generated/torch.functional.split.html index 6d2998eb85e..a23a65bcd72 100644 --- a/2.9/generated/torch.functional.split.html +++ b/2.9/generated/torch.functional.split.html @@ -4415,7 +4415,7 @@

                                        torch.functional.split#

                                        -torch.functional.split(tensor, split_size_or_sections, dim=0)[source]#
                                        +torch.functional.split(tensor, split_size_or_sections, dim=0)[source]#

                                        Splits the tensor into chunks. Each chunk is a view of the original tensor.

                                        If split_size_or_sections is an integer type, then tensor will be split into equally sized chunks (if possible). Last chunk will be smaller if diff --git a/2.9/generated/torch.functional.stft.html b/2.9/generated/torch.functional.stft.html index 097c81d1e48..db138e72fc8 100644 --- a/2.9/generated/torch.functional.stft.html +++ b/2.9/generated/torch.functional.stft.html @@ -4415,7 +4415,7 @@

                                        torch.functional.stft#

                                        -torch.functional.stft(input, n_fft, hop_length=None, win_length=None, window=None, center=True, pad_mode='reflect', normalized=False, onesided=None, return_complex=None, align_to_window=None)[source]#
                                        +torch.functional.stft(input, n_fft, hop_length=None, win_length=None, window=None, center=True, pad_mode='reflect', normalized=False, onesided=None, return_complex=None, align_to_window=None)[source]#

                                        Short-time Fourier transform (STFT).

                                        Warning

                                        diff --git a/2.9/generated/torch.functional.tensordot.html b/2.9/generated/torch.functional.tensordot.html index 7b020fab283..3e8c79d3960 100644 --- a/2.9/generated/torch.functional.tensordot.html +++ b/2.9/generated/torch.functional.tensordot.html @@ -4415,7 +4415,7 @@

                                        torch.functional.tensordot#

                                        -torch.functional.tensordot(a, b, dims=2, out=None)[source]#
                                        +torch.functional.tensordot(a, b, dims=2, out=None)[source]#

                                        Returns a contraction of a and b over multiple dimensions.

                                        tensordot implements a generalized matrix product.

                                        diff --git a/2.9/generated/torch.functional.unique.html b/2.9/generated/torch.functional.unique.html index 20e2aae96d4..78ff1bbdade 100644 --- a/2.9/generated/torch.functional.unique.html +++ b/2.9/generated/torch.functional.unique.html @@ -4415,7 +4415,7 @@

                                        torch.functional.unique#

                                        -torch.functional.unique(input, sorted=True, return_inverse=False, return_counts=False, dim=None) tuple[Tensor, Tensor, Tensor][source]#
                                        +torch.functional.unique(input, sorted=True, return_inverse=False, return_counts=False, dim=None) tuple[Tensor, Tensor, Tensor][source]#

                                        Returns the unique elements of the input tensor.

                                        Note

                                        diff --git a/2.9/generated/torch.functional.unique_consecutive.html b/2.9/generated/torch.functional.unique_consecutive.html index 17dd1673936..5d9190a8e04 100644 --- a/2.9/generated/torch.functional.unique_consecutive.html +++ b/2.9/generated/torch.functional.unique_consecutive.html @@ -4415,7 +4415,7 @@

                                        torch.functional.unique_consecutive#

                                        -torch.functional.unique_consecutive(*args, **kwargs)[source]#
                                        +torch.functional.unique_consecutive(*args, **kwargs)[source]#

                                        Eliminates all but the first element from every consecutive group of equivalent elements.

                                        Note

                                        diff --git a/2.9/generated/torch.functional.unravel_index.html b/2.9/generated/torch.functional.unravel_index.html index a736f311a44..6306aefe25f 100644 --- a/2.9/generated/torch.functional.unravel_index.html +++ b/2.9/generated/torch.functional.unravel_index.html @@ -4415,7 +4415,7 @@

                                        torch.functional.unravel_index#

                                        -torch.functional.unravel_index(indices, shape)[source]#
                                        +torch.functional.unravel_index(indices, shape)[source]#

                                        Converts a tensor of flat indices into a tuple of coordinate tensors that index into an arbitrary tensor of the specified shape.

                                        diff --git a/2.9/generated/torch.fx.experimental.proxy_tensor.get_proxy_mode.html b/2.9/generated/torch.fx.experimental.proxy_tensor.get_proxy_mode.html index d2317a8c6d1..5cb3a6eedf5 100644 --- a/2.9/generated/torch.fx.experimental.proxy_tensor.get_proxy_mode.html +++ b/2.9/generated/torch.fx.experimental.proxy_tensor.get_proxy_mode.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.proxy_tensor.get_proxy_mode#

                                        -torch.fx.experimental.proxy_tensor.get_proxy_mode()[source]#
                                        +torch.fx.experimental.proxy_tensor.get_proxy_mode()[source]#

                                        Current the currently active proxy tracing mode, or None if we are not currently tracing. This includes pre-dispatch proxy tracing.

                                        diff --git a/2.9/generated/torch.fx.experimental.proxy_tensor.handle_sym_dispatch.html b/2.9/generated/torch.fx.experimental.proxy_tensor.handle_sym_dispatch.html index 1f5bc6cf28d..d7e64f70964 100644 --- a/2.9/generated/torch.fx.experimental.proxy_tensor.handle_sym_dispatch.html +++ b/2.9/generated/torch.fx.experimental.proxy_tensor.handle_sym_dispatch.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.proxy_tensor.handle_sym_dispatch#

                                        -torch.fx.experimental.proxy_tensor.handle_sym_dispatch(func, args, kwargs)[source]#
                                        +torch.fx.experimental.proxy_tensor.handle_sym_dispatch(func, args, kwargs)[source]#

                                        Call into the currently active proxy tracing mode to do a SymInt/SymFloat/SymBool dispatch trace on a function that operates on these arguments.

                                        diff --git a/2.9/generated/torch.fx.experimental.proxy_tensor.make_fx.html b/2.9/generated/torch.fx.experimental.proxy_tensor.make_fx.html index d5195f5e3b3..5d18036ea70 100644 --- a/2.9/generated/torch.fx.experimental.proxy_tensor.make_fx.html +++ b/2.9/generated/torch.fx.experimental.proxy_tensor.make_fx.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.proxy_tensor.make_fx#

                                        -torch.fx.experimental.proxy_tensor.make_fx(f, decomposition_table=None, tracing_mode='real', _allow_non_fake_inputs=False, *, pre_dispatch=False, record_module_stack=False, _allow_fake_constant=False, _error_on_data_dependent_ops=True, record_stack_traces=False)[source]#
                                        +torch.fx.experimental.proxy_tensor.make_fx(f, decomposition_table=None, tracing_mode='real', _allow_non_fake_inputs=False, *, pre_dispatch=False, record_module_stack=False, _allow_fake_constant=False, _error_on_data_dependent_ops=True, record_stack_traces=False)[source]#

                                        Given a function f, return a new function which when executed with valid arguments to f, returns an FX GraphModule representing the set of operations that were executed during the course of execution.

                                        diff --git a/2.9/generated/torch.fx.experimental.proxy_tensor.maybe_disable_thunkify.html b/2.9/generated/torch.fx.experimental.proxy_tensor.maybe_disable_thunkify.html index 3793061772d..897ea207230 100644 --- a/2.9/generated/torch.fx.experimental.proxy_tensor.maybe_disable_thunkify.html +++ b/2.9/generated/torch.fx.experimental.proxy_tensor.maybe_disable_thunkify.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.proxy_tensor.maybe_disable_thunkify#

                                        -torch.fx.experimental.proxy_tensor.maybe_disable_thunkify()[source]#
                                        +torch.fx.experimental.proxy_tensor.maybe_disable_thunkify()[source]#

                                        Within a context, disable thunkification. See maybe_enable_thunkify() for more details. This is helpful if you have a wrapper function which you want to enable thunkification on, but in some segment on the inside (say, diff --git a/2.9/generated/torch.fx.experimental.proxy_tensor.maybe_enable_thunkify.html b/2.9/generated/torch.fx.experimental.proxy_tensor.maybe_enable_thunkify.html index 69c34c1a3fd..556e6cd680d 100644 --- a/2.9/generated/torch.fx.experimental.proxy_tensor.maybe_enable_thunkify.html +++ b/2.9/generated/torch.fx.experimental.proxy_tensor.maybe_enable_thunkify.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.proxy_tensor.maybe_enable_thunkify#

                                        -torch.fx.experimental.proxy_tensor.maybe_enable_thunkify()[source]#
                                        +torch.fx.experimental.proxy_tensor.maybe_enable_thunkify()[source]#

                                        Within this context manager, if you are doing make_fx tracing, we will thunkify all SymNode compute and avoid tracing it into the graph unless it is actually needed. You should prefer to avoid using this as much as possible, as lazy evaluation of diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.CallMethodKey.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.CallMethodKey.html index c2e21256c16..91f9721f1e2 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.CallMethodKey.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.CallMethodKey.html @@ -4404,12 +4404,12 @@

                                        CallMethodKey#

                                        -class torch.fx.experimental.symbolic_shapes.CallMethodKey(name: 'str')[source]#
                                        +class torch.fx.experimental.symbolic_shapes.CallMethodKey(name: 'str')[source]#
                                        -get(o)[source]#
                                        +get(o)[source]#

                                        Call the method on object

                                        Return type
                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.ConvertIntKey.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.ConvertIntKey.html index 0a00a47746f..654287d84ba 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.ConvertIntKey.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.ConvertIntKey.html @@ -4404,12 +4404,12 @@

                                        ConvertIntKey#

                                        -class torch.fx.experimental.symbolic_shapes.ConvertIntKey[source]#
                                        +class torch.fx.experimental.symbolic_shapes.ConvertIntKey[source]#
                                        -get(b)[source]#
                                        +get(b)[source]#

                                        Get the int value from bool

                                        Return type
                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.DimConstraints.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.DimConstraints.html index 83d32af6676..7da3e52a0e8 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.DimConstraints.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.DimConstraints.html @@ -4404,14 +4404,14 @@

                                        DimConstraints#

                                        -class torch.fx.experimental.symbolic_shapes.DimConstraints(symbol_to_source, var_to_val, marked_dynamic, source_name_to_debug_name)[source]#
                                        +class torch.fx.experimental.symbolic_shapes.DimConstraints(symbol_to_source, var_to_val, marked_dynamic, source_name_to_debug_name)[source]#

                                        Custom solver for a system of constraints on symbolic dimensions. Solutions are “static” values or simplified “dynamic” constraints.

                                        -add(expr)[source]#
                                        +add(expr)[source]#

                                        Add an expression to the set of constraints.

                                        Return whether the expression is a trivial constraint (i.e., an obvious tautology).

                                        @@ -4423,7 +4423,7 @@

                                        DimConstraints
                                        -add_equality(source, expr)[source]#
                                        +add_equality(source, expr)[source]#

                                        Add an equality constraint

                                        @@ -4431,7 +4431,7 @@

                                        DimConstraints
                                        -forced_specializations()[source]#
                                        +forced_specializations()[source]#

                                        Returns a dictionary of the names of symbols to their specialized value

                                        Return type
                                        @@ -4442,7 +4442,7 @@

                                        DimConstraints
                                        -prettify_results(original_signature, dynamic_shapes, constraint_violation_error, forced_specializations)[source]#
                                        +prettify_results(original_signature, dynamic_shapes, constraint_violation_error, forced_specializations)[source]#

                                        Format a message for constraint violation errors

                                        Return type
                                        @@ -4453,7 +4453,7 @@

                                        DimConstraints
                                        -rewrite_with_congruences(s, expr)[source]#
                                        +rewrite_with_congruences(s, expr)[source]#

                                        Eliminate expressions of the form b // d and b % d while adding congruences of the form b % d == k. This leaves rational operators (in particular of the form b / d) that our inequality solver can handle. We solve the added congruences separately (using our congruence solver, see below).

                                        @@ -4466,7 +4466,7 @@

                                        DimConstraints
                                        -solve()[source]#
                                        +solve()[source]#

                                        Solve the system of constraint equations to find simplified constraints

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.DimDynamic.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.DimDynamic.html index 0cc13becad1..472a1dff1f3 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.DimDynamic.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.DimDynamic.html @@ -4404,7 +4404,7 @@

                                        DimDynamic#

                                        -class torch.fx.experimental.symbolic_shapes.DimDynamic(value)[source]#
                                        +class torch.fx.experimental.symbolic_shapes.DimDynamic(value)[source]#

                                        Controls how to perform symbol allocation for a dimension. It is always sound to default this to DYNAMIC, but the policies DUCK and STATIC can result in better trace-time and compile-time performance, as they reduce diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.DivideByKey.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.DivideByKey.html index 59fecd6af6f..d4f7f5f402f 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.DivideByKey.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.DivideByKey.html @@ -4404,12 +4404,12 @@

                                        DivideByKey#

                                        -class torch.fx.experimental.symbolic_shapes.DivideByKey(divisor: 'IntLikeType')[source]#
                                        +class torch.fx.experimental.symbolic_shapes.DivideByKey(divisor: 'IntLikeType')[source]#
                                        -get(o)[source]#
                                        +get(o)[source]#

                                        Divide object by divisor

                                        Return type
                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.EqualityConstraint.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.EqualityConstraint.html index 9591cb1ff7f..3b46e71b1dd 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.EqualityConstraint.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.EqualityConstraint.html @@ -4404,7 +4404,7 @@

                                        EqualityConstraint#

                                        -class torch.fx.experimental.symbolic_shapes.EqualityConstraint(warn_only, source_pairs, derived_equalities, phantom_symbols, relaxed_sources)[source]#
                                        +class torch.fx.experimental.symbolic_shapes.EqualityConstraint(warn_only, source_pairs, derived_equalities, phantom_symbols, relaxed_sources)[source]#

                                        Represent and decide various kinds of equality constraints between input sources.

                                        A “source pair” is a pair of input sources for dynamic dimensions that are specified equal. We represent source_pairs in a union-find forest diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.InnerTensorKey.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.InnerTensorKey.html index ed1819f3d0c..0e58128213a 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.InnerTensorKey.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.InnerTensorKey.html @@ -4404,12 +4404,12 @@

                                        InnerTensorKey#

                                        -class torch.fx.experimental.symbolic_shapes.InnerTensorKey(inner_name: 'str')[source]#
                                        +class torch.fx.experimental.symbolic_shapes.InnerTensorKey(inner_name: 'str')[source]#
                                        -get(o)[source]#
                                        +get(o)[source]#

                                        Get the inner tensor attribute

                                        Return type
                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.PropagateUnbackedSymInts.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.PropagateUnbackedSymInts.html index 8f7b792d405..1cb773053a6 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.PropagateUnbackedSymInts.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.PropagateUnbackedSymInts.html @@ -4404,12 +4404,12 @@

                                        PropagateUnbackedSymInts#

                                        -class torch.fx.experimental.symbolic_shapes.PropagateUnbackedSymInts(module, garbage_collect_values=True, graph=None)[source]#
                                        +class torch.fx.experimental.symbolic_shapes.PropagateUnbackedSymInts(module, garbage_collect_values=True, graph=None)[source]#
                                        -boxed_run(args_list)[source]#
                                        +boxed_run(args_list)[source]#

                                        Run module via interpretation and return the result. This uses the “boxed” calling convention, where you pass a list of arguments, which will be cleared by the interpreter. This ensures that input tensors are promptly deallocated.

                                        @@ -4421,7 +4421,7 @@

                                        PropagateUnbackedSymInts
                                        -call_function(target, args, kwargs)[source]#
                                        +call_function(target, args, kwargs)[source]#

                                        Execute a call_function node and return the result.

                                        Parameters
                                        @@ -4449,7 +4449,7 @@

                                        PropagateUnbackedSymInts
                                        -call_method(target, args, kwargs)[source]#
                                        +call_method(target, args, kwargs)[source]#

                                        Execute a call_method node and return the result.

                                        Parameters
                                        @@ -4477,7 +4477,7 @@

                                        PropagateUnbackedSymInts
                                        -call_module(target, args, kwargs)[source]#
                                        +call_module(target, args, kwargs)[source]#

                                        Execute a call_module node and return the result.

                                        Parameters
                                        @@ -4505,7 +4505,7 @@

                                        PropagateUnbackedSymInts
                                        -fetch_args_kwargs_from_env(n)[source]#
                                        +fetch_args_kwargs_from_env(n)[source]#

                                        Fetch the concrete values of args and kwargs of node n from the current execution environment.

                                        @@ -4527,7 +4527,7 @@

                                        PropagateUnbackedSymInts
                                        -fetch_attr(target)[source]#
                                        +fetch_attr(target)[source]#

                                        Fetch an attribute from the Module hierarchy of self.module.

                                        Parameters
                                        @@ -4548,7 +4548,7 @@

                                        PropagateUnbackedSymInts
                                        -get_attr(target, args, kwargs)[source]#
                                        +get_attr(target, args, kwargs)[source]#

                                        Execute a get_attr node. Will retrieve an attribute value from the Module hierarchy of self.module.

                                        @@ -4576,7 +4576,7 @@

                                        PropagateUnbackedSymInts
                                        -map_nodes_to_values(args, n)[source]#
                                        +map_nodes_to_values(args, n)[source]#

                                        Recursively descend through args and look up the concrete value for each Node in the current execution environment.

                                        @@ -4598,7 +4598,7 @@

                                        PropagateUnbackedSymInts
                                        -output(target, args, kwargs)[source]#
                                        +output(target, args, kwargs)[source]#

                                        Execute an output node. This really just retrieves the value referenced by the output node and returns it.

                                        @@ -4626,7 +4626,7 @@

                                        PropagateUnbackedSymInts
                                        -placeholder(target, args, kwargs)[source]#
                                        +placeholder(target, args, kwargs)[source]#

                                        Execute a placeholder node. Note that this is stateful: Interpreter maintains an internal iterator over arguments passed to run and this method returns @@ -4656,7 +4656,7 @@

                                        PropagateUnbackedSymInts
                                        -run(*args, initial_env=None, enable_io_processing=True)[source]#
                                        +run(*args, initial_env=None, enable_io_processing=True)[source]#

                                        Run module via interpretation and return the result.

                                        Parameters
                                        @@ -4685,7 +4685,7 @@

                                        PropagateUnbackedSymInts
                                        -run_node(n)[source]#
                                        +run_node(n)[source]#

                                        Run an FX node, propagating unbacked Symbol bindings to the new fake tensor

                                        Return type
                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.RelaxedUnspecConstraint.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.RelaxedUnspecConstraint.html index 8bc1d81c82a..f3c7ec2d488 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.RelaxedUnspecConstraint.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.RelaxedUnspecConstraint.html @@ -4404,7 +4404,7 @@

                                        RelaxedUnspecConstraint#

                                        -class torch.fx.experimental.symbolic_shapes.RelaxedUnspecConstraint(warn_only)[source]#
                                        +class torch.fx.experimental.symbolic_shapes.RelaxedUnspecConstraint(warn_only)[source]#

                                        For clients: no explicit constraint; constraint is whatever is implicitly inferred by guards from tracing.

                                        For backends: there must exist at least TWO possible values for the diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.ShapeEnv.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.ShapeEnv.html index 4ee002e9c8f..796bc0cf988 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.ShapeEnv.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.ShapeEnv.html @@ -4404,12 +4404,12 @@

                                        ShapeEnv#

                                        -class torch.fx.experimental.symbolic_shapes.ShapeEnv(*, should_record_events=None, tracked_fakes=None, **kwargs)[source]#
                                        +class torch.fx.experimental.symbolic_shapes.ShapeEnv(*, should_record_events=None, tracked_fakes=None, **kwargs)[source]#
                                        -add_var_to_val(expr, val)[source]#
                                        +add_var_to_val(expr, val)[source]#

                                        Adds a new symbol to the symbolic environment.

                                        @@ -4417,7 +4417,7 @@

                                        ShapeEnv
                                        -bind_symbols(placeholders, args)[source]#
                                        +bind_symbols(placeholders, args)[source]#

                                        Given a paired list of placeholders (fake tensors with symbolic sizes) and concrete arguments (regular tensors with real sizes), returns a dictionary mapping each @@ -4440,7 +4440,7 @@

                                        ShapeEnv
                                        -bound_sympy(expr, size_oblivious=False)[source]#
                                        +bound_sympy(expr, size_oblivious=False)[source]#

                                        Given a sympy expression, computes a ValueRanges bound for what values it can be

                                        Return type
                                        @@ -4451,7 +4451,7 @@

                                        ShapeEnv
                                        -check_equal(other)[source]#
                                        +check_equal(other)[source]#

                                        Compare another ShapeEnv for equivalence

                                        @@ -4459,7 +4459,7 @@

                                        ShapeEnv
                                        -cleanup()[source]#
                                        +cleanup()[source]#

                                        Break reference cycles.

                                        This destroys the stacks. If you really want to keep them, we just need some way to break references on code objects.

                                        @@ -4469,7 +4469,7 @@

                                        ShapeEnv
                                        -create_symbol(val, source, dynamic_dim=DimDynamic.DUCK, constraint_dim=None, positive=True, do_not_specialize_zero_one=False, symbolic_context=None)[source]#
                                        +create_symbol(val, source, dynamic_dim=DimDynamic.DUCK, constraint_dim=None, positive=True, do_not_specialize_zero_one=False, symbolic_context=None)[source]#

                                        Create a new symbol which is tracked by this ShapeEnv

                                        Return type
                                        @@ -4480,7 +4480,7 @@

                                        ShapeEnv
                                        -create_symbolic_sizes_strides_storage_offset(ex, source, *, symbolic_context=None)[source]#
                                        +create_symbolic_sizes_strides_storage_offset(ex, source, *, symbolic_context=None)[source]#

                                        Returns a list of symbolic sizes and strides for the given tensor. We try our best to express stride in terms of the sizes, so as to not introduce new symbolic variables.

                                        @@ -4493,7 +4493,7 @@

                                        ShapeEnv
                                        -create_symboolnode(sym)[source]#
                                        +create_symboolnode(sym)[source]#

                                        Create a SymBool object from a sympy boolean expression

                                        Return type
                                        @@ -4504,7 +4504,7 @@

                                        ShapeEnv
                                        -create_symfloatnode(sym, *, hint, source=None)[source]#
                                        +create_symfloatnode(sym, *, hint, source=None)[source]#

                                        Create a SymFloat value from a symbolic expression

                                        Return type
                                        @@ -4515,7 +4515,7 @@

                                        ShapeEnv
                                        -create_symintnode(sym, *, hint, source=None)[source]#
                                        +create_symintnode(sym, *, hint, source=None)[source]#

                                        Create a SymInt value from a symbolic expression

                                        If you know what the current hint value of the SymInt to be created is, pass it into hint. Otherwise, pass None and we will make our best @@ -4529,7 +4529,7 @@

                                        ShapeEnv
                                        -create_unbacked_symbool()[source]#
                                        +create_unbacked_symbool()[source]#

                                        Create a symbolic boolean without a hint value

                                        Return type
                                        @@ -4540,7 +4540,7 @@

                                        ShapeEnv
                                        -create_unbacked_symfloat()[source]#
                                        +create_unbacked_symfloat()[source]#

                                        Create a symbolic float without a hint value

                                        Return type
                                        @@ -4551,7 +4551,7 @@

                                        ShapeEnv
                                        -create_unbacked_symint(source=None)[source]#
                                        +create_unbacked_symint(source=None)[source]#

                                        Create a symbolic integer without a hint value

                                        Return type
                                        @@ -4562,7 +4562,7 @@

                                        ShapeEnv
                                        -create_unspecified_symbol(val, source, dynamic_dim=DimDynamic.DUCK, constraint_dim=None, symbolic_context=None)[source]#
                                        +create_unspecified_symbol(val, source, dynamic_dim=DimDynamic.DUCK, constraint_dim=None, symbolic_context=None)[source]#

                                        Create a symbol with an unspecified value

                                        Compared to standard symbols we do not assume the value is positive, nor do we specialze on zero or one values.

                                        @@ -4575,7 +4575,7 @@

                                        ShapeEnv
                                        -create_unspecified_symint_and_symbol(value, source, dynamic_dim)[source]#
                                        +create_unspecified_symint_and_symbol(value, source, dynamic_dim)[source]#

                                        Create a SymInt wrapping a new unspecified symbol

                                        Return type
                                        @@ -4586,7 +4586,7 @@

                                        ShapeEnv
                                        -deserialize_symexpr(code)[source]#
                                        +deserialize_symexpr(code)[source]#

                                        To be used by compile_fx to deserialize symexprs

                                        Return type
                                        @@ -4597,7 +4597,7 @@

                                        ShapeEnv
                                        -evaluate_expr(orig_expr, hint=None, fx_node=None, size_oblivious=False, fallback_value=None, *, forcing_spec=False)[source]#
                                        +evaluate_expr(orig_expr, hint=None, fx_node=None, size_oblivious=False, fallback_value=None, *, forcing_spec=False)[source]#

                                        Given an expression, evaluates it, adding guards if necessary When fallback_value is not None the function return fallback_value instead of failing with data dependent error.

                                        @@ -4609,7 +4609,7 @@

                                        ShapeEnv
                                        -evaluate_guards_expression(code, args)[source]#
                                        +evaluate_guards_expression(code, args)[source]#

                                        Expected to be used with produce_guards_expression(). Evaluates an expression generated by produce_guards_expression for the given concrete args.

                                        @@ -4621,7 +4621,7 @@

                                        ShapeEnv
                                        -evaluate_guards_for_args(placeholders, args, *, ignore_static=True)[source]#
                                        +evaluate_guards_for_args(placeholders, args, *, ignore_static=True)[source]#

                                        Generate guards for a graph’s placeholder values and evaluate the guards with args

                                        Return type
                                        @@ -4632,7 +4632,7 @@

                                        ShapeEnv
                                        -evaluate_sym_node(sym_node, size_oblivious=False, fallback_value=None)[source]#
                                        +evaluate_sym_node(sym_node, size_oblivious=False, fallback_value=None)[source]#

                                        Given a a SymNode, evaluates sym_node.expr, adding guards if necessary.

                                        Return type
                                        @@ -4643,7 +4643,7 @@

                                        ShapeEnv
                                        -evaluate_symexpr(code)[source]#
                                        +evaluate_symexpr(code)[source]#

                                        To be used by compile_fx to evaluate symexprs

                                        Return type
                                        @@ -4654,7 +4654,7 @@

                                        ShapeEnv
                                        -format_guards(verbose=False)[source]#
                                        +format_guards(verbose=False)[source]#

                                        Format this shape env’s guard expressions with optional traceback info if verbose

                                        Return type
                                        @@ -4665,7 +4665,7 @@

                                        ShapeEnv
                                        -freeze()[source]#
                                        +freeze()[source]#

                                        Freeze this ShapeEnv to stop accumulating guards

                                        A frozen ShapeEnv will ignore any further guards generated on it and only emit a warning which may lead to accuracy problems.

                                        @@ -4675,7 +4675,7 @@

                                        ShapeEnv
                                        -freeze_runtime_asserts()[source]#
                                        +freeze_runtime_asserts()[source]#

                                        Freeze this ShapeEnv to stop adding deferred runtime asserts.

                                        We will error if you try to install a new runtime assert when it is frozen. This would indicate a lowering violation, or perhaps something @@ -4687,7 +4687,7 @@

                                        ShapeEnv
                                        -get_axioms(symbols=None, compute_hint=False)[source]#
                                        +get_axioms(symbols=None, compute_hint=False)[source]#

                                        Given the symbols in an expression, it returns all the runtime asserts that have those symbols concatenated with all the guards. If symbols is None, it returns all the runtime asserts (and all the guards)

                                        @@ -4700,7 +4700,7 @@

                                        ShapeEnv
                                        -get_implications(e)[source]#
                                        +get_implications(e)[source]#

                                        Given a expression, it returns a list of predicates that follow from it

                                        Return type
                                        @@ -4711,7 +4711,7 @@

                                        ShapeEnv
                                        -get_nontrivial_guards()[source]#
                                        +get_nontrivial_guards()[source]#

                                        Returns a list of guard expressions that aren’t statically known (i.e. not trivial)

                                        Return type
                                        @@ -4722,7 +4722,7 @@

                                        ShapeEnv
                                        -get_pruned_guards(symints)[source]#
                                        +get_pruned_guards(symints)[source]#

                                        Get a list of guards, but pruned so it only provides guards that reference symints from the passed in input

                                        @@ -4734,7 +4734,7 @@

                                        ShapeEnv
                                        -guard_or_defer_runtime_assert(orig_expr, msg, fx_node=None)[source]#
                                        +guard_or_defer_runtime_assert(orig_expr, msg, fx_node=None)[source]#

                                        Adds a guard that orig_expr is True if we can or fall back to adding an assert that is checked at runtime.

                                        @@ -4754,7 +4754,7 @@

                                        ShapeEnv
                                        -ignore_fresh_unbacked_symbols()[source]#
                                        +ignore_fresh_unbacked_symbols()[source]#

                                        Indicates that the newly allocated unbacked SymInts are being discarded

                                        @@ -4766,7 +4766,7 @@

                                        ShapeEnv
                                        -is_unbacked_symint(symbol)[source]#
                                        +is_unbacked_symint(symbol)[source]#

                                        Check if a sympy symbol matches the naming convention for unbacked symbols

                                        Return type
                                        @@ -4777,7 +4777,7 @@

                                        ShapeEnv
                                        -patch_source_specialization(source, check_fn)[source]#
                                        +patch_source_specialization(source, check_fn)[source]#

                                        Temporarily add symbol-level axioms to the ShapeEnv. This is useful when you want to “fork” and have parallel universes of ShapeEnvs. For example, we use this when doing multi-graph compile so we can support various graphs with varying levels of specializations.

                                        @@ -4799,7 +4799,7 @@

                                        ShapeEnv
                                        -produce_guards(*args, **kwargs)[source]#
                                        +produce_guards(*args, **kwargs)[source]#

                                        Like produce_guards_verbose, but only returns the non-verbose python guard expressions (no verbose guards produced.)

                                        @@ -4811,7 +4811,7 @@

                                        ShapeEnv
                                        -produce_guards_expression(placeholders, *, guards=None, ignore_static=True)[source]#
                                        +produce_guards_expression(placeholders, *, guards=None, ignore_static=True)[source]#

                                        Expected to be used with evaluate_guards_expression(). Produces the guards for the given placeholders and returns a string expression to be evaluated by evaluate_guards_expression given concrete values for the placeholders.

                                        @@ -4824,7 +4824,7 @@

                                        ShapeEnv
                                        -produce_guards_verbose(placeholders, sources, source_ref=<function ShapeEnv.<lambda>>, *, guards=None, input_contexts=None, equalities_inputs=None, _simplified=False, ignore_static=True, langs=('python', 'verbose_python'))[source]#
                                        +produce_guards_verbose(placeholders, sources, source_ref=<function ShapeEnv.<lambda>>, *, guards=None, input_contexts=None, equalities_inputs=None, _simplified=False, ignore_static=True, langs=('python', 'verbose_python'))[source]#

                                        Generates a list of guards strings which, when evaluated in a context that defines tensors for all the sources, returns True or False depending on if the guards in the list evaluated to True or not. Primarily used by Dynamo, @@ -4849,7 +4849,7 @@

                                        ShapeEnv
                                        -replace(expr)[source]#
                                        +replace(expr)[source]#

                                        Apply symbol replacements to any symbols in the given expression.

                                        Return type
                                        @@ -4860,7 +4860,7 @@

                                        ShapeEnv
                                        -set_unbacked_var_to_val(k, v)[source]#
                                        +set_unbacked_var_to_val(k, v)[source]#

                                        Used only when propagate_real_tensors; registers a value for an unbacked symbol, which can be used last resort to resolve hints.

                                        @@ -4869,7 +4869,7 @@

                                        ShapeEnv
                                        -simplify(expr, size_oblivious=False)[source]#
                                        +simplify(expr, size_oblivious=False)[source]#

                                        Use known constraints and replacements to simplify the given expr

                                        Return type
                                        @@ -4880,7 +4880,7 @@

                                        ShapeEnv
                                        -size_hint(expr, *, allow_none=False)[source]#
                                        +size_hint(expr, *, allow_none=False)[source]#

                                        Gets a size hint for a given expression from the underlying shapes we had. Does not introduce a guard, so only use this when you can guarantee that your code is still valid for arbitrary shapes (such as optimization decisions)

                                        @@ -4893,7 +4893,7 @@

                                        ShapeEnv
                                        -suppress_guards()[source]#
                                        +suppress_guards()[source]#

                                        Context manager to ignore all guards generated inside

                                        Return type
                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.ShapeEnvSettings.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.ShapeEnvSettings.html index 597b35ded89..759124146d1 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.ShapeEnvSettings.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.ShapeEnvSettings.html @@ -4404,7 +4404,7 @@

                                        ShapeEnvSettings#

                                        -class torch.fx.experimental.symbolic_shapes.ShapeEnvSettings(allow_scalar_outputs, allow_dynamic_output_shape_ops, assume_static_by_default, specialize_zero_one, duck_shape, prefer_deferred_runtime_asserts_over_guards, trace_asserts)[source]#
                                        +class torch.fx.experimental.symbolic_shapes.ShapeEnvSettings(allow_scalar_outputs, allow_dynamic_output_shape_ops, assume_static_by_default, specialize_zero_one, duck_shape, prefer_deferred_runtime_asserts_over_guards, trace_asserts)[source]#

                                        Encapsulates all shape env settings that could potentially affect FakeTensor dispatch. Used when creating dispatch cache keys.

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.Specialization.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.Specialization.html index 6d91c176be8..6c5b5fd1f74 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.Specialization.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.Specialization.html @@ -4404,7 +4404,7 @@

                                        Specialization#

                                        -class torch.fx.experimental.symbolic_shapes.Specialization(source, check_fn)[source]#
                                        +class torch.fx.experimental.symbolic_shapes.Specialization(source, check_fn)[source]#

                                        This class is used in multi-graph compilation contexts where we generate multiple specialized graphs and dispatch to the appropriate one at runtime. This allows us to optimize the trade-off between performance and generality diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.StatefulSymbolicContext.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.StatefulSymbolicContext.html index edc9f9caede..0041831c9c1 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.StatefulSymbolicContext.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.StatefulSymbolicContext.html @@ -4404,7 +4404,7 @@

                                        StatefulSymbolicContext#

                                        -class torch.fx.experimental.symbolic_shapes.StatefulSymbolicContext(dynamic_sizes, dynamic_strides=None, constraint_sizes=None, constraint_strides=None, specialize_on=None, view_base_context=None, tensor_source=None, shape_env_to_source_to_symbol_cache=None)[source]#
                                        +class torch.fx.experimental.symbolic_shapes.StatefulSymbolicContext(dynamic_sizes, dynamic_strides=None, constraint_sizes=None, constraint_strides=None, specialize_on=None, view_base_context=None, tensor_source=None, shape_env_to_source_to_symbol_cache=None)[source]#

                                        Create symbols in create_symbolic_sizes_strides_storage_offset via a symbolic_context determination as given by a cache of Source:Symbol. A cache hit will reuse a stored symbol, and a cache miss will write to this cache.

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.StatelessSymbolicContext.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.StatelessSymbolicContext.html index 7d084f748cc..a2fa39abf99 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.StatelessSymbolicContext.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.StatelessSymbolicContext.html @@ -4404,7 +4404,7 @@

                                        StatelessSymbolicContext#

                                        -class torch.fx.experimental.symbolic_shapes.StatelessSymbolicContext(dynamic_sizes, dynamic_strides=None, constraint_sizes=None, constraint_strides=None, specialize_on=None, view_base_context=None)[source]#
                                        +class torch.fx.experimental.symbolic_shapes.StatelessSymbolicContext(dynamic_sizes, dynamic_strides=None, constraint_sizes=None, constraint_strides=None, specialize_on=None, view_base_context=None)[source]#

                                        Create symbols in create_symbolic_sizes_strides_storage_offset via a symbolic_context determination as given by DimDynamic and DimConstraint. This will cause fresh symbols to be allocated

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.StrictMinMaxConstraint.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.StrictMinMaxConstraint.html index 88b76ff2ba8..1706df74d91 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.StrictMinMaxConstraint.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.StrictMinMaxConstraint.html @@ -4404,7 +4404,7 @@

                                        StrictMinMaxConstraint#

                                        -class torch.fx.experimental.symbolic_shapes.StrictMinMaxConstraint(warn_only, vr)[source]#
                                        +class torch.fx.experimental.symbolic_shapes.StrictMinMaxConstraint(warn_only, vr)[source]#

                                        For clients: the size at this dimension must be within ‘vr’ (which specifies a lower and upper bound, inclusive-inclusive) AND it must be non-negative and should not be 0 or 1 (but see NB below).

                                        @@ -4422,7 +4422,7 @@

                                        StrictMinMaxConstraint
                                        -render(source)[source]#
                                        +render(source)[source]#

                                        Format the constrain equation

                                        Return type
                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.SubclassSymbolicContext.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.SubclassSymbolicContext.html index ad9ff610e88..0ec06d0f7d3 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.SubclassSymbolicContext.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.SubclassSymbolicContext.html @@ -4404,7 +4404,7 @@

                                        SubclassSymbolicContext#

                                        -class torch.fx.experimental.symbolic_shapes.SubclassSymbolicContext(dynamic_sizes, dynamic_strides=None, constraint_sizes=None, constraint_strides=None, specialize_on=None, view_base_context=None, tensor_source=None, shape_env_to_source_to_symbol_cache=None, inner_contexts=None)[source]#
                                        +class torch.fx.experimental.symbolic_shapes.SubclassSymbolicContext(dynamic_sizes, dynamic_strides=None, constraint_sizes=None, constraint_strides=None, specialize_on=None, view_base_context=None, tensor_source=None, shape_env_to_source_to_symbol_cache=None, inner_contexts=None)[source]#

                                        The correct symbolic context for a given inner tensor of a traceable tensor subclass may differ from that of the outer symbolic context. This structure allows for this flexibility, with inner symbolic contexts mapped via attr -> symbolic context.

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.SymbolicContext.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.SymbolicContext.html index 077c7e48259..0699cd7930c 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.SymbolicContext.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.SymbolicContext.html @@ -4404,7 +4404,7 @@

                                        SymbolicContext#

                                        -class torch.fx.experimental.symbolic_shapes.SymbolicContext[source]#
                                        +class torch.fx.experimental.symbolic_shapes.SymbolicContext[source]#

                                        Data structure specifying how we should create symbols in create_symbolic_sizes_strides_storage_offset; e.g., should they be static or dynamic.

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.canonicalize_bool_expr.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.canonicalize_bool_expr.html index ab1cb8ff898..2f45fff1d8c 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.canonicalize_bool_expr.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.canonicalize_bool_expr.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.canonicalize_bool_expr#

                                        -torch.fx.experimental.symbolic_shapes.canonicalize_bool_expr(expr)[source]#
                                        +torch.fx.experimental.symbolic_shapes.canonicalize_bool_expr(expr)[source]#

                                        Canonicalize a boolean expression by transforming it into a lt / le inequality and moving all the non-constant terms to the rhs. We canonicalize And / Ors / Not via cnf and then canonicalize their subexpr diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.check_consistent.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.check_consistent.html index 6086c1e8eb5..0e2f71de5a8 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.check_consistent.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.check_consistent.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.check_consistent#

                                        -torch.fx.experimental.symbolic_shapes.check_consistent(new, old)[source]#
                                        +torch.fx.experimental.symbolic_shapes.check_consistent(new, old)[source]#

                                        Test that two “meta” values (typically either Tensor or SymInt) have the same values, e.g., after retracing. If we don’t understand the quantities in question, we’ll just skip the consistency check.

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.compute_unbacked_bindings.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.compute_unbacked_bindings.html index 8eb835495c7..aa4472cbf75 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.compute_unbacked_bindings.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.compute_unbacked_bindings.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.compute_unbacked_bindings#

                                        -torch.fx.experimental.symbolic_shapes.compute_unbacked_bindings(shape_env, example_value, old_example_value=None, peek=False)[source]#
                                        +torch.fx.experimental.symbolic_shapes.compute_unbacked_bindings(shape_env, example_value, old_example_value=None, peek=False)[source]#

                                        After having run fake tensor propagation and producing example_value result, traverse example_value looking for freshly bound unbacked symbols and record their paths for later. It is an error if diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.constrain_range.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.constrain_range.html index 1f837edcafc..dd944317683 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.constrain_range.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.constrain_range.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.constrain_range#

                                        -torch.fx.experimental.symbolic_shapes.constrain_range(a, *, min, max=None)[source]#
                                        +torch.fx.experimental.symbolic_shapes.constrain_range(a, *, min, max=None)[source]#

                                        Applies a constraint that the passed in SymInt must lie between min-max inclusive-inclusive, WITHOUT introducing a guard on the SymInt (meaning that it can be used on unbacked SymInts). If min/max are None, we assume diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.constrain_unify.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.constrain_unify.html index 40e6f503171..b8efb56c96c 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.constrain_unify.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.constrain_unify.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.constrain_unify#

                                        -torch.fx.experimental.symbolic_shapes.constrain_unify(a, b)[source]#
                                        +torch.fx.experimental.symbolic_shapes.constrain_unify(a, b)[source]#

                                        Given two SymInts, constrain them so that they must be equal. NB: this will not work with SymInts that represent nontrivial expressions (yet!)

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.guard_or_false.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.guard_or_false.html index 41ea8f18555..bb0dc2ca9dc 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.guard_or_false.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.guard_or_false.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.guard_or_false#

                                        -torch.fx.experimental.symbolic_shapes.guard_or_false(a)[source]#
                                        +torch.fx.experimental.symbolic_shapes.guard_or_false(a)[source]#

                                        Try to guard a, if data dependent error encountered just return false.

                                        Return type
                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.guard_or_true.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.guard_or_true.html index ce3681a64b8..3909bb25fc2 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.guard_or_true.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.guard_or_true.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.guard_or_true#

                                        -torch.fx.experimental.symbolic_shapes.guard_or_true(a)[source]#
                                        +torch.fx.experimental.symbolic_shapes.guard_or_true(a)[source]#

                                        Try to guard a, if data dependent error encountered just return true.

                                        Return type
                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.guard_size_oblivious.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.guard_size_oblivious.html index 70d70b3c275..13499695460 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.guard_size_oblivious.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.guard_size_oblivious.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.guard_size_oblivious#

                                        -torch.fx.experimental.symbolic_shapes.guard_size_oblivious(expr)[source]#
                                        +torch.fx.experimental.symbolic_shapes.guard_size_oblivious(expr)[source]#

                                        Perform a guard on a symbolic boolean expression in a size oblivious way. This is typically used when a non-oblivious test would result in a guard on a data dependent value of which we don’t know the value of at compile time. diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.has_free_symbols.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.has_free_symbols.html index ed9f99ad856..3142db050a4 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.has_free_symbols.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.has_free_symbols.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.has_free_symbols#

                                        -torch.fx.experimental.symbolic_shapes.has_free_symbols(val)[source]#
                                        +torch.fx.experimental.symbolic_shapes.has_free_symbols(val)[source]#

                                        Faster version of bool(free_symbols(val))

                                        Return type
                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.has_free_unbacked_symbols.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.has_free_unbacked_symbols.html index 1384d677302..ee743dbf597 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.has_free_unbacked_symbols.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.has_free_unbacked_symbols.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.has_free_unbacked_symbols#

                                        -torch.fx.experimental.symbolic_shapes.has_free_unbacked_symbols(x)[source]#
                                        +torch.fx.experimental.symbolic_shapes.has_free_unbacked_symbols(x)[source]#

                                        Faster version of bool(free_unbacked_symbols(val))

                                        Return type
                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.has_static_value.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.has_static_value.html index 9683bf434bb..967600d58d3 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.has_static_value.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.has_static_value.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.has_static_value#

                                        -torch.fx.experimental.symbolic_shapes.has_static_value(a)[source]#
                                        +torch.fx.experimental.symbolic_shapes.has_static_value(a)[source]#

                                        User-code friendly utility to check if a value is static or dynamic. Returns true if given a constant, or a symbolic expression with a fixed value.

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.hint_int.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.hint_int.html index b0a1c00e998..e1d681420f3 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.hint_int.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.hint_int.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.hint_int#

                                        -torch.fx.experimental.symbolic_shapes.hint_int(a, fallback=None)[source]#
                                        +torch.fx.experimental.symbolic_shapes.hint_int(a, fallback=None)[source]#

                                        Retrieve the hint for an int (based on the underlying real values as observed at runtime). If no hint is available (e.g., because data dependent shapes), if fallback is not None, use that instead (otherwise raise an error).

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.is_accessor_node.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.is_accessor_node.html index 8daf3ae2bab..df249cc8ff2 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.is_accessor_node.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.is_accessor_node.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.is_accessor_node#

                                        -torch.fx.experimental.symbolic_shapes.is_accessor_node(node)[source]#
                                        +torch.fx.experimental.symbolic_shapes.is_accessor_node(node)[source]#

                                        Helper function to determine if a node is trying to access a symbolic integer such as size, stride, offset or item. Currently primarily only used in a DCE pass to figure out purity.

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.is_concrete_bool.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.is_concrete_bool.html index 415e187bbe0..8f05b91a62c 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.is_concrete_bool.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.is_concrete_bool.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.is_concrete_bool#

                                        -torch.fx.experimental.symbolic_shapes.is_concrete_bool(a)[source]#
                                        +torch.fx.experimental.symbolic_shapes.is_concrete_bool(a)[source]#

                                        Utility to check if underlying object in SymBool is concrete value. Also returns true if integer is passed in.

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.is_concrete_float.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.is_concrete_float.html index 08fd55d4b73..5d244dea150 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.is_concrete_float.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.is_concrete_float.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.is_concrete_float#

                                        -torch.fx.experimental.symbolic_shapes.is_concrete_float(a)[source]#
                                        +torch.fx.experimental.symbolic_shapes.is_concrete_float(a)[source]#

                                        Utility to check if underlying object in SymInt is concrete value. Also returns true if integer is passed in.

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.is_concrete_int.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.is_concrete_int.html index 0bceb1f340c..eb7976366c6 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.is_concrete_int.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.is_concrete_int.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.is_concrete_int#

                                        -torch.fx.experimental.symbolic_shapes.is_concrete_int(a)[source]#
                                        +torch.fx.experimental.symbolic_shapes.is_concrete_int(a)[source]#

                                        Utility to check if underlying object in SymInt is concrete value. Also returns true if integer is passed in.

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.lru_cache.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.lru_cache.html index c7a6800443e..ba7daa5c32f 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.lru_cache.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.lru_cache.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.lru_cache#

                                        -torch.fx.experimental.symbolic_shapes.lru_cache(maxsize)[source]#
                                        +torch.fx.experimental.symbolic_shapes.lru_cache(maxsize)[source]#
                                        Return type

                                        Callable[[Callable[…, _T]], functools._lru_cache_wrapper[_T]]

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.rebind_unbacked.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.rebind_unbacked.html index 0f31ddcbdaa..3ddf4af81c2 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.rebind_unbacked.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.rebind_unbacked.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.rebind_unbacked#

                                        -torch.fx.experimental.symbolic_shapes.rebind_unbacked(shape_env, n, result)[source]#
                                        +torch.fx.experimental.symbolic_shapes.rebind_unbacked(shape_env, n, result)[source]#

                                        Suppose we are retracing a pre-existing FX graph that previously had fake tensor propagation (and therefore unbacked SymInts). When we retrace, we re-propagate fake tensors, which results in new unbacked SymInts. diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.resolve_unbacked_bindings.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.resolve_unbacked_bindings.html index a4e2449f035..c6e280cce72 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.resolve_unbacked_bindings.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.resolve_unbacked_bindings.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.resolve_unbacked_bindings#

                                        -torch.fx.experimental.symbolic_shapes.resolve_unbacked_bindings(shape_env, bindings)[source]#
                                        +torch.fx.experimental.symbolic_shapes.resolve_unbacked_bindings(shape_env, bindings)[source]#

                                        When we do fake tensor prop, we oftentimes will allocate new unbacked symints. We then run proxy tensor mode, which populates node.meta[“unbacked_bindings”] with these new symints. To ensure consistency we use PropagateUnbackedSymInts diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.statically_known_false.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.statically_known_false.html index be200b5d6f7..5978b226bd8 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.statically_known_false.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.statically_known_false.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.statically_known_false#

                                        -torch.fx.experimental.symbolic_shapes.statically_known_false(x)[source]#
                                        +torch.fx.experimental.symbolic_shapes.statically_known_false(x)[source]#

                                        Returns True if x can be simplified to a constant and is False. If x cannot be evaluated from static, we return False

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.statically_known_true.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.statically_known_true.html index 0684aa9f158..ca696928402 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.statically_known_true.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.statically_known_true.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.statically_known_true#

                                        -torch.fx.experimental.symbolic_shapes.statically_known_true(x)[source]#
                                        +torch.fx.experimental.symbolic_shapes.statically_known_true(x)[source]#

                                        Returns True if x can be simplified to a constant and is true.

                                        Note

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.sym_and.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.sym_and.html index 7f9a8d16241..6bd3655883d 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.sym_and.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.sym_and.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.sym_and#

                                        -torch.fx.experimental.symbolic_shapes.sym_and(x, *others)[source]#
                                        +torch.fx.experimental.symbolic_shapes.sym_and(x, *others)[source]#

                                        and, but for symbolic expressions, without bool casting.

                                        Return type
                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.sym_eq.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.sym_eq.html index 3f85370a190..f978c9b664a 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.sym_eq.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.sym_eq.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.sym_eq#

                                        -torch.fx.experimental.symbolic_shapes.sym_eq(x, y)[source]#
                                        +torch.fx.experimental.symbolic_shapes.sym_eq(x, y)[source]#

                                        Like ==, but when run on list/tuple, it will recursively test equality and use sym_and to join the results together, without guarding.

                                        diff --git a/2.9/generated/torch.fx.experimental.symbolic_shapes.sym_or.html b/2.9/generated/torch.fx.experimental.symbolic_shapes.sym_or.html index 658f422acf6..357832d30d5 100644 --- a/2.9/generated/torch.fx.experimental.symbolic_shapes.sym_or.html +++ b/2.9/generated/torch.fx.experimental.symbolic_shapes.sym_or.html @@ -4404,7 +4404,7 @@

                                        torch.fx.experimental.symbolic_shapes.sym_or#

                                        -torch.fx.experimental.symbolic_shapes.sym_or(x, *others)[source]#
                                        +torch.fx.experimental.symbolic_shapes.sym_or(x, *others)[source]#

                                        or, but for symbolic expressions, without bool casting.

                                        Return type
                                        diff --git a/2.9/generated/torch.get_default_device.html b/2.9/generated/torch.get_default_device.html index 129b134337d..d3603fd698a 100644 --- a/2.9/generated/torch.get_default_device.html +++ b/2.9/generated/torch.get_default_device.html @@ -4404,7 +4404,7 @@

                                        torch.get_default_device#

                                        -torch.get_default_device()[source]#
                                        +torch.get_default_device()[source]#

                                        Gets the default torch.Tensor to be allocated on device

                                        Return type
                                        diff --git a/2.9/generated/torch.get_deterministic_debug_mode.html b/2.9/generated/torch.get_deterministic_debug_mode.html index 4d59aa9f1b8..f46ccc10a18 100644 --- a/2.9/generated/torch.get_deterministic_debug_mode.html +++ b/2.9/generated/torch.get_deterministic_debug_mode.html @@ -4404,7 +4404,7 @@

                                        torch.get_deterministic_debug_mode#

                                        -torch.get_deterministic_debug_mode()[source]#
                                        +torch.get_deterministic_debug_mode()[source]#

                                        Returns the current value of the debug mode for deterministic operations. Refer to torch.set_deterministic_debug_mode() documentation for more details.

                                        diff --git a/2.9/generated/torch.get_device_module.html b/2.9/generated/torch.get_device_module.html index 60696cf5778..bfd013c7644 100644 --- a/2.9/generated/torch.get_device_module.html +++ b/2.9/generated/torch.get_device_module.html @@ -4404,7 +4404,7 @@

                                        torch.get_device_module#

                                        -torch.get_device_module(device=None)[source]#
                                        +torch.get_device_module(device=None)[source]#

                                        Returns the module associated with a given device(e.g., torch.device(‘cuda’), “mtia:0”, “xpu”, …). If no device is given, return the module for the current accelerator or CPU if none is present.

                                        diff --git a/2.9/generated/torch.get_float32_matmul_precision.html b/2.9/generated/torch.get_float32_matmul_precision.html index cd676f0d7b8..73ad9a8229f 100644 --- a/2.9/generated/torch.get_float32_matmul_precision.html +++ b/2.9/generated/torch.get_float32_matmul_precision.html @@ -4404,7 +4404,7 @@

                                        torch.get_float32_matmul_precision#

                                        -torch.get_float32_matmul_precision()[source]#
                                        +torch.get_float32_matmul_precision()[source]#

                                        Returns the current value of float32 matrix multiplication precision. Refer to torch.set_float32_matmul_precision() documentation for more details.

                                        diff --git a/2.9/generated/torch.get_rng_state.html b/2.9/generated/torch.get_rng_state.html index 6562713a78a..9460ba6fd5f 100644 --- a/2.9/generated/torch.get_rng_state.html +++ b/2.9/generated/torch.get_rng_state.html @@ -4404,7 +4404,7 @@

                                        torch.get_rng_state#

                                        -torch.get_rng_state()[source]#
                                        +torch.get_rng_state()[source]#

                                        Returns the random number generator state as a torch.ByteTensor.

                                        Note

                                        diff --git a/2.9/generated/torch.initial_seed.html b/2.9/generated/torch.initial_seed.html index 45ca4d7652c..79bc74fc406 100644 --- a/2.9/generated/torch.initial_seed.html +++ b/2.9/generated/torch.initial_seed.html @@ -4404,7 +4404,7 @@

                                        torch.initial_seed#

                                        -torch.initial_seed()[source]#
                                        +torch.initial_seed()[source]#

                                        Returns the initial seed for generating random numbers as a Python long.

                                        diff --git a/2.9/generated/torch.is_deterministic_algorithms_warn_only_enabled.html b/2.9/generated/torch.is_deterministic_algorithms_warn_only_enabled.html index 541ebd4e6d9..48820724b7e 100644 --- a/2.9/generated/torch.is_deterministic_algorithms_warn_only_enabled.html +++ b/2.9/generated/torch.is_deterministic_algorithms_warn_only_enabled.html @@ -4404,7 +4404,7 @@

                                        torch.is_deterministic_algorithms_warn_only_enabled#

                                        -torch.is_deterministic_algorithms_warn_only_enabled()[source]#
                                        +torch.is_deterministic_algorithms_warn_only_enabled()[source]#

                                        Returns True if the global deterministic flag is set to warn only. Refer to torch.use_deterministic_algorithms() documentation for more details.

                                        diff --git a/2.9/generated/torch.is_storage.html b/2.9/generated/torch.is_storage.html index d7b86238a70..fec8c4096b9 100644 --- a/2.9/generated/torch.is_storage.html +++ b/2.9/generated/torch.is_storage.html @@ -4404,7 +4404,7 @@

                                        torch.is_storage#

                                        -torch.is_storage(obj, /)[source]#
                                        +torch.is_storage(obj, /)[source]#

                                        Returns True if obj is a PyTorch storage object.

                                        Parameters
                                        diff --git a/2.9/generated/torch.is_tensor.html b/2.9/generated/torch.is_tensor.html index 61bae0f694f..eb06fb9bd94 100644 --- a/2.9/generated/torch.is_tensor.html +++ b/2.9/generated/torch.is_tensor.html @@ -4404,7 +4404,7 @@

                                        torch.is_tensor#

                                        -torch.is_tensor(obj, /)[source]#
                                        +torch.is_tensor(obj, /)[source]#

                                        Returns True if obj is a PyTorch tensor.

                                        Note that this function is simply doing isinstance(obj, Tensor). Using that isinstance check is better for type checking with mypy, diff --git a/2.9/generated/torch.is_warn_always_enabled.html b/2.9/generated/torch.is_warn_always_enabled.html index 2418646734f..913057a28f4 100644 --- a/2.9/generated/torch.is_warn_always_enabled.html +++ b/2.9/generated/torch.is_warn_always_enabled.html @@ -4404,7 +4404,7 @@

                                        torch.is_warn_always_enabled#

                                        -torch.is_warn_always_enabled()[source]#
                                        +torch.is_warn_always_enabled()[source]#

                                        Returns True if the global warn_always flag is turned on. Refer to torch.set_warn_always() documentation for more details.

                                        diff --git a/2.9/generated/torch.jit.ScriptModule.html b/2.9/generated/torch.jit.ScriptModule.html index 461830ce467..d9f8fe9f408 100644 --- a/2.9/generated/torch.jit.ScriptModule.html +++ b/2.9/generated/torch.jit.ScriptModule.html @@ -4404,7 +4404,7 @@

                                        ScriptModule#

                                        -class torch.jit.ScriptModule[source]#
                                        +class torch.jit.ScriptModule[source]#

                                        Wrapper for C++ torch::jit::Module with methods, attributes, and parameters.

                                        A wrapper around C++ torch::jit::Module. ScriptModules contain methods, attributes, parameters, and @@ -4413,7 +4413,7 @@

                                        ScriptModule
                                        -add_module(name, module)[source]#
                                        +add_module(name, module)[source]#

                                        Add a child module to the current module.

                                        The module can be accessed as an attribute using the given name.

                                        @@ -4429,7 +4429,7 @@

                                        ScriptModule
                                        -apply(fn)[source]#
                                        +apply(fn)[source]#

                                        Apply fn recursively to every submodule (as returned by .children()) as well as self.

                                        Typical use includes initializing the parameters of a model (see also torch.nn.init).

                                        @@ -4471,7 +4471,7 @@

                                        ScriptModule
                                        -bfloat16()[source]#
                                        +bfloat16()[source]#

                                        Casts all floating point parameters and buffers to bfloat16 datatype.

                                        Note

                                        @@ -4489,7 +4489,7 @@

                                        ScriptModule
                                        -buffers(recurse=True)[source]#
                                        +buffers(recurse=True)[source]#

                                        Return an iterator over module buffers.

                                        Parameters
                                        @@ -4515,7 +4515,7 @@

                                        ScriptModule
                                        -children()[source]#
                                        +children()[source]#

                                        Return an iterator over immediate children modules.

                                        Yields
                                        @@ -4546,7 +4546,7 @@

                                        ScriptModule
                                        -compile(*args, **kwargs)[source]#
                                        +compile(*args, **kwargs)[source]#

                                        Compile this Module’s forward using torch.compile().

                                        This Module’s __call__ method is compiled and all arguments are passed as-is to torch.compile().

                                        @@ -4555,7 +4555,7 @@

                                        ScriptModule
                                        -cpu()[source]#
                                        +cpu()[source]#

                                        Move all model parameters and buffers to the CPU.

                                        Note

                                        @@ -4573,7 +4573,7 @@

                                        ScriptModule
                                        -cuda(device=None)[source]#
                                        +cuda(device=None)[source]#

                                        Move all model parameters and buffers to the GPU.

                                        This also makes associated parameters and buffers different objects. So it should be called before constructing the optimizer if the module will @@ -4598,7 +4598,7 @@

                                        ScriptModule
                                        -double()[source]#
                                        +double()[source]#

                                        Casts all floating point parameters and buffers to double datatype.

                                        Note

                                        @@ -4616,7 +4616,7 @@

                                        ScriptModule
                                        -eval()[source]#
                                        +eval()[source]#

                                        Set the module in evaluation mode.

                                        This has an effect only on certain modules. See the documentation of particular modules for details of their behaviors in training/evaluation @@ -4637,7 +4637,7 @@

                                        ScriptModule
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        To print customized extra information, you should re-implement this method in your own modules. Both single-line and multi-line @@ -4651,7 +4651,7 @@

                                        ScriptModule
                                        -float()[source]#
                                        +float()[source]#

                                        Casts all floating point parameters and buffers to float datatype.

                                        Note

                                        @@ -4669,7 +4669,7 @@

                                        ScriptModule
                                        -get_buffer(target)[source]#
                                        +get_buffer(target)[source]#

                                        Return the buffer given by target if it exists, otherwise throw an error.

                                        See the docstring for get_submodule for a more detailed explanation of this method’s functionality as well as how to @@ -4696,7 +4696,7 @@

                                        ScriptModule
                                        -get_extra_state()[source]#
                                        +get_extra_state()[source]#

                                        Return any extra state to include in the module’s state_dict.

                                        Implement this and a corresponding set_extra_state() for your module if you need to store extra state. This function is called when building the @@ -4717,7 +4717,7 @@

                                        ScriptModule
                                        -get_parameter(target)[source]#
                                        +get_parameter(target)[source]#

                                        Return the parameter given by target if it exists, otherwise throw an error.

                                        See the docstring for get_submodule for a more detailed explanation of this method’s functionality as well as how to @@ -4744,7 +4744,7 @@

                                        ScriptModule
                                        -get_submodule(target)[source]#
                                        +get_submodule(target)[source]#

                                        Return the submodule given by target if it exists, otherwise throw an error.

                                        For example, let’s say you have an nn.Module A that looks like this:

                                        @@ -4799,7 +4799,7 @@

                                        ScriptModule
                                        -half()[source]#
                                        +half()[source]#

                                        Casts all floating point parameters and buffers to half datatype.

                                        Note

                                        @@ -4824,7 +4824,7 @@

                                        ScriptModule
                                        -ipu(device=None)[source]#
                                        +ipu(device=None)[source]#

                                        Move all model parameters and buffers to the IPU.

                                        This also makes associated parameters and buffers different objects. So it should be called before constructing the optimizer if the module will @@ -4849,7 +4849,7 @@

                                        ScriptModule
                                        -load_state_dict(state_dict, strict=True, assign=False)[source]#
                                        +load_state_dict(state_dict, strict=True, assign=False)[source]#

                                        Copy parameters and buffers from state_dict into this module and its descendants.

                                        If strict is True, then the keys of state_dict must exactly match the keys returned @@ -4904,7 +4904,7 @@

                                        ScriptModule
                                        -modules()[source]#
                                        +modules()[source]#

                                        Return an iterator over all modules in the network.

                                        Yields
                                        @@ -4936,7 +4936,7 @@

                                        ScriptModule
                                        -mtia(device=None)[source]#
                                        +mtia(device=None)[source]#

                                        Move all model parameters and buffers to the MTIA.

                                        This also makes associated parameters and buffers different objects. So it should be called before constructing the optimizer if the module will @@ -4961,7 +4961,7 @@

                                        ScriptModule
                                        -named_buffers(prefix='', recurse=True, remove_duplicate=True)[source]#
                                        +named_buffers(prefix='', recurse=True, remove_duplicate=True)[source]#

                                        Return an iterator over module buffers, yielding both the name of the buffer as well as the buffer itself.

                                        Parameters
                                        @@ -4990,7 +4990,7 @@

                                        ScriptModule
                                        -named_children()[source]#
                                        +named_children()[source]#

                                        Return an iterator over immediate children modules, yielding both the name of the module as well as the module itself.

                                        Yields
                                        @@ -5010,7 +5010,7 @@

                                        ScriptModule
                                        -named_modules(memo=None, prefix='', remove_duplicate=True)[source]#
                                        +named_modules(memo=None, prefix='', remove_duplicate=True)[source]#

                                        Return an iterator over all modules in the network, yielding both the name of the module as well as the module itself.

                                        Parameters
                                        @@ -5047,7 +5047,7 @@

                                        ScriptModule
                                        -named_parameters(prefix='', recurse=True, remove_duplicate=True)[source]#
                                        +named_parameters(prefix='', recurse=True, remove_duplicate=True)[source]#

                                        Return an iterator over module parameters, yielding both the name of the parameter as well as the parameter itself.

                                        Parameters
                                        @@ -5077,7 +5077,7 @@

                                        ScriptModule
                                        -parameters(recurse=True)[source]#
                                        +parameters(recurse=True)[source]#

                                        Return an iterator over module parameters.

                                        This is typically passed to an optimizer.

                                        @@ -5104,7 +5104,7 @@

                                        ScriptModule
                                        -register_backward_hook(hook)[source]#
                                        +register_backward_hook(hook)[source]#

                                        Register a backward hook on the module.

                                        This function is deprecated in favor of register_full_backward_hook() and the behavior of this function will change in future versions.

                                        @@ -5121,7 +5121,7 @@

                                        ScriptModule
                                        -register_buffer(name, tensor, persistent=True)[source]#
                                        +register_buffer(name, tensor, persistent=True)[source]#

                                        Add a buffer to the module.

                                        This is typically used to register a buffer that should not be considered a model parameter. For example, BatchNorm’s running_mean @@ -5153,7 +5153,7 @@

                                        ScriptModule
                                        -register_forward_hook(hook, *, prepend=False, with_kwargs=False, always_call=False)[source]#
                                        +register_forward_hook(hook, *, prepend=False, with_kwargs=False, always_call=False)[source]#

                                        Register a forward hook on the module.

                                        The hook will be called every time after forward() has computed an output.

                                        If with_kwargs is False or not specified, the input contains only @@ -5204,7 +5204,7 @@

                                        ScriptModule
                                        -register_forward_pre_hook(hook, *, prepend=False, with_kwargs=False)[source]#
                                        +register_forward_pre_hook(hook, *, prepend=False, with_kwargs=False)[source]#

                                        Register a forward pre-hook on the module.

                                        The hook will be called every time before forward() is invoked.

                                        If with_kwargs is false or not specified, the input contains only @@ -5254,7 +5254,7 @@

                                        ScriptModule
                                        -register_full_backward_hook(hook, prepend=False)[source]#
                                        +register_full_backward_hook(hook, prepend=False)[source]#

                                        Register a backward hook on the module.

                                        The hook will be called every time the gradients with respect to a module are computed, and its firing rules are as follows:

                                        @@ -5311,7 +5311,7 @@

                                        ScriptModule
                                        -register_full_backward_pre_hook(hook, prepend=False)[source]#
                                        +register_full_backward_pre_hook(hook, prepend=False)[source]#

                                        Register a backward pre-hook on the module.

                                        The hook will be called every time the gradients for the module are computed. The hook should have the following signature:

                                        @@ -5357,7 +5357,7 @@

                                        ScriptModule
                                        -register_load_state_dict_post_hook(hook)[source]#
                                        +register_load_state_dict_post_hook(hook)[source]#

                                        Register a post-hook to be run after module’s load_state_dict() is called.

                                        It should have the following signature::

                                        hook(module, incompatible_keys) -> None

                                        @@ -5387,7 +5387,7 @@

                                        ScriptModule
                                        -register_load_state_dict_pre_hook(hook)[source]#
                                        +register_load_state_dict_pre_hook(hook)[source]#

                                        Register a pre-hook to be run before module’s load_state_dict() is called.

                                        It should have the following signature::

                                        hook(module, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs) -> None # noqa: B950

                                        @@ -5403,7 +5403,7 @@

                                        ScriptModule
                                        -register_module(name, module)[source]#
                                        +register_module(name, module)[source]#

                                        Alias for add_module().

                                        @@ -5411,7 +5411,7 @@

                                        ScriptModule
                                        -register_parameter(name, param)[source]#
                                        +register_parameter(name, param)[source]#

                                        Add a parameter to the module.

                                        The parameter can be accessed as an attribute using given name.

                                        @@ -5430,7 +5430,7 @@

                                        ScriptModule
                                        -register_state_dict_post_hook(hook)[source]#
                                        +register_state_dict_post_hook(hook)[source]#

                                        Register a post-hook for the state_dict() method.

                                        It should have the following signature::

                                        hook(module, state_dict, prefix, local_metadata) -> None

                                        @@ -5441,7 +5441,7 @@

                                        ScriptModule
                                        -register_state_dict_pre_hook(hook)[source]#
                                        +register_state_dict_pre_hook(hook)[source]#

                                        Register a pre-hook for the state_dict() method.

                                        It should have the following signature::

                                        hook(module, prefix, keep_vars) -> None

                                        @@ -5453,7 +5453,7 @@

                                        ScriptModule
                                        -requires_grad_(requires_grad=True)[source]#
                                        +requires_grad_(requires_grad=True)[source]#

                                        Change if autograd should record operations on parameters in this module.

                                        This method sets the parameters’ requires_grad attributes in-place.

                                        @@ -5477,7 +5477,7 @@

                                        ScriptModule
                                        -save(f, **kwargs)[source]#
                                        +save(f, **kwargs)[source]#

                                        Save with a file-like object.

                                        save(f, _extra_files={})

                                        See torch.jit.save which accepts a file-like object. @@ -5487,7 +5487,7 @@

                                        ScriptModule
                                        -set_extra_state(state)[source]#
                                        +set_extra_state(state)[source]#

                                        Set extra state contained in the loaded state_dict.

                                        This function is called from load_state_dict() to handle any extra state found within the state_dict. Implement this function and a corresponding @@ -5502,7 +5502,7 @@

                                        ScriptModule
                                        -set_submodule(target, module, strict=False)[source]#
                                        +set_submodule(target, module, strict=False)[source]#

                                        Set the submodule given by target if it exists, otherwise throw an error.

                                        Note

                                        @@ -5560,7 +5560,7 @@

                                        ScriptModule
                                        -share_memory()[source]#
                                        +share_memory()[source]#

                                        See torch.Tensor.share_memory_().

                                        Return type
                                        @@ -5571,7 +5571,7 @@

                                        ScriptModule
                                        -state_dict(*args, destination=None, prefix='', keep_vars=False)[source]#
                                        +state_dict(*args, destination=None, prefix='', keep_vars=False)[source]#

                                        Return a dictionary containing references to the whole state of the module.

                                        Both parameters and persistent buffers (e.g. running averages) are included. Keys are corresponding parameter and buffer names. @@ -5624,27 +5624,27 @@

                                        ScriptModule
                                        -to(*args, **kwargs)[source]#
                                        +to(*args, **kwargs)[source]#

                                        Move and/or cast the parameters and buffers.

                                        This can be called as

                                        -to(device=None, dtype=None, non_blocking=False)[source]
                                        +to(device=None, dtype=None, non_blocking=False)[source]
                                        -to(dtype, non_blocking=False)[source]
                                        +to(dtype, non_blocking=False)[source]
                                        -to(tensor, non_blocking=False)[source]
                                        +to(tensor, non_blocking=False)[source]
                                        -to(memory_format=torch.channels_last)[source]
                                        +to(memory_format=torch.channels_last)[source]

                                        Its signature is similar to torch.Tensor.to(), but only accepts @@ -5723,7 +5723,7 @@

                                        ScriptModule
                                        -to_empty(*, device, recurse=True)[source]#
                                        +to_empty(*, device, recurse=True)[source]#

                                        Move the parameters and buffers to the specified device without copying storage.

                                        Parameters
                                        @@ -5745,7 +5745,7 @@

                                        ScriptModule
                                        -train(mode=True)[source]#
                                        +train(mode=True)[source]#

                                        Set the module in training mode.

                                        This has an effect only on certain modules. See the documentation of particular modules for details of their behaviors in training/evaluation @@ -5767,7 +5767,7 @@

                                        ScriptModule
                                        -type(dst_type)[source]#
                                        +type(dst_type)[source]#

                                        Casts all parameters and buffers to dst_type.

                                        Note

                                        @@ -5788,7 +5788,7 @@

                                        ScriptModule
                                        -xpu(device=None)[source]#
                                        +xpu(device=None)[source]#

                                        Move all model parameters and buffers to the XPU.

                                        This also makes associated parameters and buffers different objects. So it should be called before constructing optimizer if the module will @@ -5813,7 +5813,7 @@

                                        ScriptModule
                                        -zero_grad(set_to_none=True)[source]#
                                        +zero_grad(set_to_none=True)[source]#

                                        Reset gradients of all model parameters.

                                        See similar function under torch.optim.Optimizer for more context.

                                        diff --git a/2.9/generated/torch.jit.annotate.html b/2.9/generated/torch.jit.annotate.html index d1cd69169c4..fe9695e8715 100644 --- a/2.9/generated/torch.jit.annotate.html +++ b/2.9/generated/torch.jit.annotate.html @@ -4404,7 +4404,7 @@

                                        torch.jit.annotate#

                                        -torch.jit.annotate(the_type, the_value)[source]#
                                        +torch.jit.annotate(the_type, the_value)[source]#

                                        Use to give type of the_value in TorchScript compiler.

                                        This method is a pass-through function that returns the_value, used to hint TorchScript compiler the type of the_value. It is a no-op when running outside of TorchScript.

                                        diff --git a/2.9/generated/torch.jit.enable_onednn_fusion.html b/2.9/generated/torch.jit.enable_onednn_fusion.html index 3a52731595c..63af8a3cf2b 100644 --- a/2.9/generated/torch.jit.enable_onednn_fusion.html +++ b/2.9/generated/torch.jit.enable_onednn_fusion.html @@ -4404,7 +4404,7 @@

                                        torch.jit.enable_onednn_fusion#

                                        -torch.jit.enable_onednn_fusion(enabled)[source]#
                                        +torch.jit.enable_onednn_fusion(enabled)[source]#

                                        Enable or disables onednn JIT fusion based on the parameter enabled.

                                        diff --git a/2.9/generated/torch.jit.fork.html b/2.9/generated/torch.jit.fork.html index 4734e4aa9e2..cd62f0a1de9 100644 --- a/2.9/generated/torch.jit.fork.html +++ b/2.9/generated/torch.jit.fork.html @@ -4404,7 +4404,7 @@

                                        torch.jit.fork#

                                        -torch.jit.fork(func, *args, **kwargs)[source]#
                                        +torch.jit.fork(func, *args, **kwargs)[source]#

                                        Create an asynchronous task executing func and a reference to the value of the result of this execution.

                                        fork will return immediately, so the return value of func may not have been computed yet. To force completion of the task and access the return value invoke torch.jit.wait on the Future. fork invoked diff --git a/2.9/generated/torch.jit.freeze.html b/2.9/generated/torch.jit.freeze.html index afee67d158c..b67563f30f5 100644 --- a/2.9/generated/torch.jit.freeze.html +++ b/2.9/generated/torch.jit.freeze.html @@ -4404,7 +4404,7 @@

                                        torch.jit.freeze#

                                        -torch.jit.freeze(mod, preserved_attrs=None, optimize_numerics=True)[source]#
                                        +torch.jit.freeze(mod, preserved_attrs=None, optimize_numerics=True)[source]#

                                        Freeze ScriptModule, inline submodules, and attributes as constants.

                                        Freezing a ScriptModule will clone it and attempt to inline the cloned module’s submodules, parameters, and attributes as constants in the TorchScript IR Graph. diff --git a/2.9/generated/torch.jit.ignore.html b/2.9/generated/torch.jit.ignore.html index 02357e6bf8a..1a4701cbbe1 100644 --- a/2.9/generated/torch.jit.ignore.html +++ b/2.9/generated/torch.jit.ignore.html @@ -4404,7 +4404,7 @@

                                        torch.jit.ignore#

                                        -torch.jit.ignore(drop=False, **kwargs)[source]#
                                        +torch.jit.ignore(drop=False, **kwargs)[source]#

                                        This decorator indicates to the compiler that a function or method should be ignored and left as a Python function. This allows you to leave code in your model that is not yet TorchScript compatible. If called from TorchScript, diff --git a/2.9/generated/torch.jit.interface.html b/2.9/generated/torch.jit.interface.html index 566c8b7217e..7a2d3a32bcc 100644 --- a/2.9/generated/torch.jit.interface.html +++ b/2.9/generated/torch.jit.interface.html @@ -4404,7 +4404,7 @@

                                        torch.jit.interface#

                                        -torch.jit.interface(obj)[source]#
                                        +torch.jit.interface(obj)[source]#

                                        Decorate to annotate classes or modules of different types.

                                        This decorator can be used to define an interface that can be used to annotate classes or modules of different types. This can be used for to annotate a submodule diff --git a/2.9/generated/torch.jit.isinstance.html b/2.9/generated/torch.jit.isinstance.html index fc521658e82..c8e0fede7b1 100644 --- a/2.9/generated/torch.jit.isinstance.html +++ b/2.9/generated/torch.jit.isinstance.html @@ -4404,7 +4404,7 @@

                                        torch.jit.isinstance#

                                        -torch.jit.isinstance(obj, target_type)[source]#
                                        +torch.jit.isinstance(obj, target_type)[source]#

                                        Provide container type refinement in TorchScript.

                                        It can refine parameterized containers of the List, Dict, Tuple, and Optional types. E.g. List[str], Dict[str, List[torch.Tensor]], Optional[Tuple[int,str,int]]. It can also diff --git a/2.9/generated/torch.jit.load.html b/2.9/generated/torch.jit.load.html index db7a49a28c5..ccb0e332f0a 100644 --- a/2.9/generated/torch.jit.load.html +++ b/2.9/generated/torch.jit.load.html @@ -4404,7 +4404,7 @@

                                        torch.jit.load#

                                        -torch.jit.load(f, map_location=None, _extra_files=None, _restore_shapes=False)[source]#
                                        +torch.jit.load(f, map_location=None, _extra_files=None, _restore_shapes=False)[source]#

                                        Load a ScriptModule or ScriptFunction previously saved with torch.jit.save.

                                        All previously saved modules, no matter their device, are first loaded onto CPU, and then are moved to the devices they were saved from. If this fails (e.g. diff --git a/2.9/generated/torch.jit.onednn_fusion_enabled.html b/2.9/generated/torch.jit.onednn_fusion_enabled.html index 1c43fb4a7b3..811bd2a67c8 100644 --- a/2.9/generated/torch.jit.onednn_fusion_enabled.html +++ b/2.9/generated/torch.jit.onednn_fusion_enabled.html @@ -4404,7 +4404,7 @@

                                        torch.jit.onednn_fusion_enabled#

                                        -torch.jit.onednn_fusion_enabled()[source]#
                                        +torch.jit.onednn_fusion_enabled()[source]#

                                        Return whether onednn JIT fusion is enabled.

                                        diff --git a/2.9/generated/torch.jit.optimize_for_inference.html b/2.9/generated/torch.jit.optimize_for_inference.html index 024d0575fa3..34bf32e3a19 100644 --- a/2.9/generated/torch.jit.optimize_for_inference.html +++ b/2.9/generated/torch.jit.optimize_for_inference.html @@ -4404,7 +4404,7 @@

                                        torch.jit.optimize_for_inference#

                                        -torch.jit.optimize_for_inference(mod, other_methods=None)[source]#
                                        +torch.jit.optimize_for_inference(mod, other_methods=None)[source]#

                                        Perform a set of optimization passes to optimize a model for the purposes of inference.

                                        If the model is not already frozen, optimize_for_inference will invoke torch.jit.freeze automatically.

                                        diff --git a/2.9/generated/torch.jit.save.html b/2.9/generated/torch.jit.save.html index 3e6cf1a6c51..ec99bbb059d 100644 --- a/2.9/generated/torch.jit.save.html +++ b/2.9/generated/torch.jit.save.html @@ -4404,7 +4404,7 @@

                                        torch.jit.save#

                                        -torch.jit.save(m, f, _extra_files=None)[source]#
                                        +torch.jit.save(m, f, _extra_files=None)[source]#

                                        Save an offline version of this module for use in a separate process.

                                        The saved module serializes all of the methods, submodules, parameters, and attributes of this module. It can be loaded into the C++ API using diff --git a/2.9/generated/torch.jit.script.html b/2.9/generated/torch.jit.script.html index e85165b1794..1a29dda9e06 100644 --- a/2.9/generated/torch.jit.script.html +++ b/2.9/generated/torch.jit.script.html @@ -4404,7 +4404,7 @@

                                        torch.jit.script#

                                        -torch.jit.script(obj, optimize=None, _frames_up=0, _rcb=None, example_inputs=None)[source]#
                                        +torch.jit.script(obj, optimize=None, _frames_up=0, _rcb=None, example_inputs=None)[source]#

                                        Script the function.

                                        Scripting a function or nn.Module will inspect the source code, compile it as TorchScript code using the TorchScript compiler, and return a ScriptModule or diff --git a/2.9/generated/torch.jit.script_if_tracing.html b/2.9/generated/torch.jit.script_if_tracing.html index 362ef509db3..8bbd3b157c5 100644 --- a/2.9/generated/torch.jit.script_if_tracing.html +++ b/2.9/generated/torch.jit.script_if_tracing.html @@ -4404,7 +4404,7 @@

                                        torch.jit.script_if_tracing#

                                        -torch.jit.script_if_tracing(fn)[source]#
                                        +torch.jit.script_if_tracing(fn)[source]#

                                        Compiles fn when it is first called during tracing.

                                        torch.jit.script has a non-negligible start up time when it is first called due to lazy-initializations of many compiler builtins. Therefore you should not use diff --git a/2.9/generated/torch.jit.set_fusion_strategy.html b/2.9/generated/torch.jit.set_fusion_strategy.html index d7bfd1dce34..7b092ffb832 100644 --- a/2.9/generated/torch.jit.set_fusion_strategy.html +++ b/2.9/generated/torch.jit.set_fusion_strategy.html @@ -4404,7 +4404,7 @@

                                        torch.jit.set_fusion_strategy#

                                        -torch.jit.set_fusion_strategy(strategy)[source]#
                                        +torch.jit.set_fusion_strategy(strategy)[source]#

                                        Set the type and number of specializations that can occur during fusion.

                                        Usage: provide a list of pairs (type, depth) where type is one of “STATIC” or “DYNAMIC” and depth is an integer.

                                        diff --git a/2.9/generated/torch.jit.strict_fusion.html b/2.9/generated/torch.jit.strict_fusion.html index c5ccc16cc81..e6b6eddf6a9 100644 --- a/2.9/generated/torch.jit.strict_fusion.html +++ b/2.9/generated/torch.jit.strict_fusion.html @@ -4404,7 +4404,7 @@

                                        strict_fusion#

                                        -class torch.jit.strict_fusion[source]#
                                        +class torch.jit.strict_fusion[source]#

                                        Give errors if not all nodes have been fused in inference, or symbolically differentiated in training.

                                        Example: Forcing fusion of additions.

                                        diff --git a/2.9/generated/torch.jit.trace.html b/2.9/generated/torch.jit.trace.html index f1da0281fb6..ab2df480433 100644 --- a/2.9/generated/torch.jit.trace.html +++ b/2.9/generated/torch.jit.trace.html @@ -4404,7 +4404,7 @@

                                        torch.jit.trace#

                                        -torch.jit.trace(func, example_inputs=None, optimize=None, check_trace=True, check_inputs=None, check_tolerance=1e-05, strict=True, _force_outplace=False, _module_class=None, _compilation_unit=<torch.jit.CompilationUnit object>, example_kwarg_inputs=None, _store_inputs=True)[source]#
                                        +torch.jit.trace(func, example_inputs=None, optimize=None, check_trace=True, check_inputs=None, check_tolerance=1e-05, strict=True, _force_outplace=False, _module_class=None, _compilation_unit=<torch.jit.CompilationUnit object>, example_kwarg_inputs=None, _store_inputs=True)[source]#

                                        Trace a function and return an executable or ScriptFunction that will be optimized using just-in-time compilation.

                                        Tracing is ideal for code that operates only on Tensor\s and lists, dictionaries, and diff --git a/2.9/generated/torch.jit.trace_module.html b/2.9/generated/torch.jit.trace_module.html index fd34ba9980e..d9259b41775 100644 --- a/2.9/generated/torch.jit.trace_module.html +++ b/2.9/generated/torch.jit.trace_module.html @@ -4404,7 +4404,7 @@

                                        torch.jit.trace_module#

                                        -torch.jit.trace_module(mod, inputs, optimize=None, check_trace=True, check_inputs=None, check_tolerance=1e-05, strict=True, _force_outplace=False, _module_class=None, _compilation_unit=<torch.jit.CompilationUnit object>, example_inputs_is_kwarg=False, _store_inputs=True)[source]#
                                        +torch.jit.trace_module(mod, inputs, optimize=None, check_trace=True, check_inputs=None, check_tolerance=1e-05, strict=True, _force_outplace=False, _module_class=None, _compilation_unit=<torch.jit.CompilationUnit object>, example_inputs_is_kwarg=False, _store_inputs=True)[source]#

                                        Trace a module and return an executable ScriptModule that will be optimized using just-in-time compilation.

                                        When a module is passed to torch.jit.trace, only the forward method is run and traced. With trace_module, you can specify a dictionary of diff --git a/2.9/generated/torch.jit.unused.html b/2.9/generated/torch.jit.unused.html index 5e1b7327a7e..c69c435f74c 100644 --- a/2.9/generated/torch.jit.unused.html +++ b/2.9/generated/torch.jit.unused.html @@ -4404,7 +4404,7 @@

                                        torch.jit.unused#

                                        -torch.jit.unused(fn)[source]#
                                        +torch.jit.unused(fn)[source]#

                                        This decorator indicates to the compiler that a function or method should be ignored and replaced with the raising of an exception. This allows you to leave code in your model that is not yet TorchScript compatible and still diff --git a/2.9/generated/torch.jit.wait.html b/2.9/generated/torch.jit.wait.html index 47307ea8b29..4ac8b6fc16f 100644 --- a/2.9/generated/torch.jit.wait.html +++ b/2.9/generated/torch.jit.wait.html @@ -4404,7 +4404,7 @@

                                        torch.jit.wait#

                                        -torch.jit.wait(future)[source]#
                                        +torch.jit.wait(future)[source]#

                                        Force completion of a torch.jit.Future[T] asynchronous task, returning the result of the task.

                                        See fork() for docs and examples. :param future: an asynchronous task reference, created through torch.jit.fork diff --git a/2.9/generated/torch.load.html b/2.9/generated/torch.load.html index 442b6f4fa0a..848ed404e3c 100644 --- a/2.9/generated/torch.load.html +++ b/2.9/generated/torch.load.html @@ -4404,7 +4404,7 @@

                                        torch.load#

                                        -torch.load(f, map_location=None, pickle_module=pickle, *, weights_only=True, mmap=None, **pickle_load_args)[source]#
                                        +torch.load(f, map_location=None, pickle_module=pickle, *, weights_only=True, mmap=None, **pickle_load_args)[source]#

                                        Loads an object saved with torch.save() from a file.

                                        torch.load() uses Python’s unpickling facilities but treats storages, which underlie tensors, specially. They are first deserialized on the diff --git a/2.9/generated/torch.lobpcg.html b/2.9/generated/torch.lobpcg.html index 80749688852..2a41ecd87e7 100644 --- a/2.9/generated/torch.lobpcg.html +++ b/2.9/generated/torch.lobpcg.html @@ -4404,7 +4404,7 @@

                                        torch.lobpcg#

                                        -torch.lobpcg(A, k=None, B=None, X=None, n=None, iK=None, niter=None, tol=None, largest=None, method=None, tracker=None, ortho_iparams=None, ortho_fparams=None, ortho_bparams=None)[source]#
                                        +torch.lobpcg(A, k=None, B=None, X=None, n=None, iK=None, niter=None, tol=None, largest=None, method=None, tracker=None, ortho_iparams=None, ortho_fparams=None, ortho_bparams=None)[source]#

                                        Find the k largest (or smallest) eigenvalues and the corresponding eigenvectors of a symmetric positive definite generalized eigenvalue problem using matrix-free LOBPCG methods.

                                        diff --git a/2.9/generated/torch.lu.html b/2.9/generated/torch.lu.html index 4336ede7936..377e213c4ab 100644 --- a/2.9/generated/torch.lu.html +++ b/2.9/generated/torch.lu.html @@ -4404,7 +4404,7 @@

                                        torch.lu#

                                        -torch.lu(*args, **kwargs)[source]#
                                        +torch.lu(*args, **kwargs)[source]#

                                        Computes the LU factorization of a matrix or batches of matrices A. Returns a tuple containing the LU factorization and pivots of A. Pivoting is done if pivot is set to diff --git a/2.9/generated/torch.manual_seed.html b/2.9/generated/torch.manual_seed.html index 15792d2ed3a..82a1d030095 100644 --- a/2.9/generated/torch.manual_seed.html +++ b/2.9/generated/torch.manual_seed.html @@ -4404,7 +4404,7 @@

                                        torch.manual_seed#

                                        -torch.manual_seed(seed)[source]#
                                        +torch.manual_seed(seed)[source]#

                                        Sets the seed for generating random numbers on all devices. Returns a torch.Generator object.

                                        diff --git a/2.9/generated/torch.meshgrid.html b/2.9/generated/torch.meshgrid.html index acb551746e4..a6705c70d35 100644 --- a/2.9/generated/torch.meshgrid.html +++ b/2.9/generated/torch.meshgrid.html @@ -4404,7 +4404,7 @@

                                        torch.meshgrid#

                                        -torch.meshgrid(*tensors, indexing=None)[source]#
                                        +torch.meshgrid(*tensors, indexing=None)[source]#

                                        Creates grids of coordinates specified by the 1D inputs in attr:tensors.

                                        This is helpful when you want to visualize data over some range of inputs. See below for a plotting example.

                                        diff --git a/2.9/generated/torch.mps.compile_shader.html b/2.9/generated/torch.mps.compile_shader.html index 0fce7560a7b..ee45f528f70 100644 --- a/2.9/generated/torch.mps.compile_shader.html +++ b/2.9/generated/torch.mps.compile_shader.html @@ -4404,7 +4404,7 @@

                                        torch.mps.compile_shader#

                                        -torch.mps.compile_shader(source)[source]#
                                        +torch.mps.compile_shader(source)[source]#

                                        Compiles compute shader from source and allows one to invoke kernels defined there from the comfort of Python runtime Example:

                                        diff --git a/2.9/generated/torch.mps.current_allocated_memory.html b/2.9/generated/torch.mps.current_allocated_memory.html index 2a210d81350..6a4d6939d89 100644 --- a/2.9/generated/torch.mps.current_allocated_memory.html +++ b/2.9/generated/torch.mps.current_allocated_memory.html @@ -4404,7 +4404,7 @@

                                        torch.mps.current_allocated_memory#

                                        -torch.mps.current_allocated_memory()[source]#
                                        +torch.mps.current_allocated_memory()[source]#

                                        Returns the current GPU memory occupied by tensors in bytes.

                                        Note

                                        diff --git a/2.9/generated/torch.mps.device_count.html b/2.9/generated/torch.mps.device_count.html index e80b949be4b..dc5bc9fa8bc 100644 --- a/2.9/generated/torch.mps.device_count.html +++ b/2.9/generated/torch.mps.device_count.html @@ -4404,7 +4404,7 @@

                                        torch.mps.device_count#

                                        -torch.mps.device_count()[source]#
                                        +torch.mps.device_count()[source]#

                                        Returns the number of available MPS devices.

                                        Return type
                                        diff --git a/2.9/generated/torch.mps.driver_allocated_memory.html b/2.9/generated/torch.mps.driver_allocated_memory.html index 39c5f012ef2..dcce886b2da 100644 --- a/2.9/generated/torch.mps.driver_allocated_memory.html +++ b/2.9/generated/torch.mps.driver_allocated_memory.html @@ -4404,7 +4404,7 @@

                                        torch.mps.driver_allocated_memory#

                                        -torch.mps.driver_allocated_memory()[source]#
                                        +torch.mps.driver_allocated_memory()[source]#

                                        Returns total GPU memory allocated by Metal driver for the process in bytes.

                                        Note

                                        diff --git a/2.9/generated/torch.mps.empty_cache.html b/2.9/generated/torch.mps.empty_cache.html index fcd48f9d1d5..e783ef1f105 100644 --- a/2.9/generated/torch.mps.empty_cache.html +++ b/2.9/generated/torch.mps.empty_cache.html @@ -4404,7 +4404,7 @@

                                        torch.mps.empty_cache#

                                        -torch.mps.empty_cache()[source]#
                                        +torch.mps.empty_cache()[source]#

                                        Releases all unoccupied cached memory currently held by the caching allocator so that those can be used in other GPU applications.

                                        diff --git a/2.9/generated/torch.mps.event.Event.html b/2.9/generated/torch.mps.event.Event.html index 95d89fd4afb..13777750f3f 100644 --- a/2.9/generated/torch.mps.event.Event.html +++ b/2.9/generated/torch.mps.event.Event.html @@ -4404,7 +4404,7 @@

                                        Event#

                                        -class torch.mps.event.Event(enable_timing=False)[source]#
                                        +class torch.mps.event.Event(enable_timing=False)[source]#

                                        Wrapper around an MPS event.

                                        MPS events are synchronization markers that can be used to monitor the device’s progress, to accurately measure timing, and to synchronize MPS streams.

                                        @@ -4416,7 +4416,7 @@

                                        Event#

                                        -elapsed_time(end_event)[source]#
                                        +elapsed_time(end_event)[source]#

                                        Returns the time elapsed in milliseconds after the event was recorded and before the end_event was recorded.

                                        @@ -4428,7 +4428,7 @@

                                        Event#
                                        -query()[source]#
                                        +query()[source]#

                                        Returns True if all work currently captured by event has completed.

                                        Return type
                                        @@ -4439,7 +4439,7 @@

                                        Event#
                                        -record()[source]#
                                        +record()[source]#

                                        Records the event in the default stream.

                                        @@ -4447,7 +4447,7 @@

                                        Event#
                                        -synchronize()[source]#
                                        +synchronize()[source]#

                                        Waits until the completion of all work currently captured in this event. This prevents the CPU thread from proceeding until the event completes.

                                        @@ -4456,7 +4456,7 @@

                                        Event#
                                        -wait()[source]#
                                        +wait()[source]#

                                        Makes all future work submitted to the default stream wait for this event.

                                        diff --git a/2.9/generated/torch.mps.get_rng_state.html b/2.9/generated/torch.mps.get_rng_state.html index 000ca2de3ee..25479c52e8f 100644 --- a/2.9/generated/torch.mps.get_rng_state.html +++ b/2.9/generated/torch.mps.get_rng_state.html @@ -4404,7 +4404,7 @@

                                        torch.mps.get_rng_state#

                                        -torch.mps.get_rng_state(device='mps')[source]#
                                        +torch.mps.get_rng_state(device='mps')[source]#

                                        Returns the random number generator state as a ByteTensor.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mps.manual_seed.html b/2.9/generated/torch.mps.manual_seed.html index a23069730ee..3c9dbaa7596 100644 --- a/2.9/generated/torch.mps.manual_seed.html +++ b/2.9/generated/torch.mps.manual_seed.html @@ -4404,7 +4404,7 @@

                                        torch.mps.manual_seed#

                                        -torch.mps.manual_seed(seed)[source]#
                                        +torch.mps.manual_seed(seed)[source]#

                                        Sets the seed for generating random numbers.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mps.profiler.is_capturing_metal.html b/2.9/generated/torch.mps.profiler.is_capturing_metal.html index 9a255de484a..19a96452964 100644 --- a/2.9/generated/torch.mps.profiler.is_capturing_metal.html +++ b/2.9/generated/torch.mps.profiler.is_capturing_metal.html @@ -4404,7 +4404,7 @@

                                        torch.mps.profiler.is_capturing_metal#

                                        -torch.mps.profiler.is_capturing_metal()[source]#
                                        +torch.mps.profiler.is_capturing_metal()[source]#

                                        Checks if metal capture is in progress

                                        Return type
                                        diff --git a/2.9/generated/torch.mps.profiler.is_metal_capture_enabled.html b/2.9/generated/torch.mps.profiler.is_metal_capture_enabled.html index fc2ce10a8f4..aee000ebd8c 100644 --- a/2.9/generated/torch.mps.profiler.is_metal_capture_enabled.html +++ b/2.9/generated/torch.mps.profiler.is_metal_capture_enabled.html @@ -4404,7 +4404,7 @@

                                        torch.mps.profiler.is_metal_capture_enabled#

                                        -torch.mps.profiler.is_metal_capture_enabled()[source]#
                                        +torch.mps.profiler.is_metal_capture_enabled()[source]#

                                        Checks if metal_capture context manager is usable To enable metal capture, set MTL_CAPTURE_ENABLED envvar

                                        diff --git a/2.9/generated/torch.mps.profiler.metal_capture.html b/2.9/generated/torch.mps.profiler.metal_capture.html index c66a433a0fb..c0946cf4a71 100644 --- a/2.9/generated/torch.mps.profiler.metal_capture.html +++ b/2.9/generated/torch.mps.profiler.metal_capture.html @@ -4404,7 +4404,7 @@

                                        torch.mps.profiler.metal_capture#

                                        -torch.mps.profiler.metal_capture(fname)[source]#
                                        +torch.mps.profiler.metal_capture(fname)[source]#

                                        Context manager that enables capturing of Metal calls into gputrace

                                        diff --git a/2.9/generated/torch.mps.profiler.profile.html b/2.9/generated/torch.mps.profiler.profile.html index 8b728529031..90187963ed3 100644 --- a/2.9/generated/torch.mps.profiler.profile.html +++ b/2.9/generated/torch.mps.profiler.profile.html @@ -4404,7 +4404,7 @@

                                        torch.mps.profiler.profile#

                                        -torch.mps.profiler.profile(mode='interval', wait_until_completed=False)[source]#
                                        +torch.mps.profiler.profile(mode='interval', wait_until_completed=False)[source]#

                                        Context Manager to enabling generating OS Signpost tracing from MPS backend.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mps.profiler.start.html b/2.9/generated/torch.mps.profiler.start.html index 707858f6443..2ffa146d492 100644 --- a/2.9/generated/torch.mps.profiler.start.html +++ b/2.9/generated/torch.mps.profiler.start.html @@ -4404,7 +4404,7 @@

                                        torch.mps.profiler.start#

                                        -torch.mps.profiler.start(mode='interval', wait_until_completed=False)[source]#
                                        +torch.mps.profiler.start(mode='interval', wait_until_completed=False)[source]#

                                        Start OS Signpost tracing from MPS backend.

                                        The generated OS Signposts could be recorded and viewed in XCode Instruments Logging tool.

                                        diff --git a/2.9/generated/torch.mps.profiler.stop.html b/2.9/generated/torch.mps.profiler.stop.html index 1345180c8ee..edf80138d75 100644 --- a/2.9/generated/torch.mps.profiler.stop.html +++ b/2.9/generated/torch.mps.profiler.stop.html @@ -4404,7 +4404,7 @@

                                        torch.mps.profiler.stop#

                                        -torch.mps.profiler.stop()[source]#
                                        +torch.mps.profiler.stop()[source]#

                                        Stops generating OS Signpost tracing from MPS backend.

                                        diff --git a/2.9/generated/torch.mps.recommended_max_memory.html b/2.9/generated/torch.mps.recommended_max_memory.html index 1552e981033..eea6e593104 100644 --- a/2.9/generated/torch.mps.recommended_max_memory.html +++ b/2.9/generated/torch.mps.recommended_max_memory.html @@ -4404,7 +4404,7 @@

                                        torch.mps.recommended_max_memory#

                                        -torch.mps.recommended_max_memory()[source]#
                                        +torch.mps.recommended_max_memory()[source]#

                                        Returns recommended max Working set size for GPU memory in bytes.

                                        Note

                                        diff --git a/2.9/generated/torch.mps.seed.html b/2.9/generated/torch.mps.seed.html index f23b3af25a2..e7cd919cd36 100644 --- a/2.9/generated/torch.mps.seed.html +++ b/2.9/generated/torch.mps.seed.html @@ -4404,7 +4404,7 @@

                                        torch.mps.seed#

                                        -torch.mps.seed()[source]#
                                        +torch.mps.seed()[source]#

                                        Sets the seed for generating random numbers to a random number.

                                        diff --git a/2.9/generated/torch.mps.set_per_process_memory_fraction.html b/2.9/generated/torch.mps.set_per_process_memory_fraction.html index 86a5f3bf48f..50d95f88f65 100644 --- a/2.9/generated/torch.mps.set_per_process_memory_fraction.html +++ b/2.9/generated/torch.mps.set_per_process_memory_fraction.html @@ -4404,7 +4404,7 @@

                                        torch.mps.set_per_process_memory_fraction#

                                        -torch.mps.set_per_process_memory_fraction(fraction)[source]#
                                        +torch.mps.set_per_process_memory_fraction(fraction)[source]#

                                        Set memory fraction for limiting process’s memory allocation on MPS device. The allowed value equals the fraction multiplied by recommended maximum device memory (obtained from Metal API device.recommendedMaxWorkingSetSize). diff --git a/2.9/generated/torch.mps.set_rng_state.html b/2.9/generated/torch.mps.set_rng_state.html index eb4d87b9870..41ff29d1d36 100644 --- a/2.9/generated/torch.mps.set_rng_state.html +++ b/2.9/generated/torch.mps.set_rng_state.html @@ -4404,7 +4404,7 @@

                                        torch.mps.set_rng_state#

                                        -torch.mps.set_rng_state(new_state, device='mps')[source]#
                                        +torch.mps.set_rng_state(new_state, device='mps')[source]#

                                        Sets the random number generator state.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mps.synchronize.html b/2.9/generated/torch.mps.synchronize.html index 9712418394c..bb02c2131ab 100644 --- a/2.9/generated/torch.mps.synchronize.html +++ b/2.9/generated/torch.mps.synchronize.html @@ -4404,7 +4404,7 @@

                                        torch.mps.synchronize#

                                        -torch.mps.synchronize()[source]#
                                        +torch.mps.synchronize()[source]#

                                        Waits for all kernels in all streams on a MPS device to complete.

                                        diff --git a/2.9/generated/torch.mtia.DeferredMtiaCallError.html b/2.9/generated/torch.mtia.DeferredMtiaCallError.html index 71659c47537..49fbbe555b0 100644 --- a/2.9/generated/torch.mtia.DeferredMtiaCallError.html +++ b/2.9/generated/torch.mtia.DeferredMtiaCallError.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.DeferredMtiaCallError#

                                        -exception torch.mtia.DeferredMtiaCallError[source]#
                                        +exception torch.mtia.DeferredMtiaCallError[source]#
                                        diff --git a/2.9/generated/torch.mtia.StreamContext.html b/2.9/generated/torch.mtia.StreamContext.html index 0a084bd07fa..f512aa53a4e 100644 --- a/2.9/generated/torch.mtia.StreamContext.html +++ b/2.9/generated/torch.mtia.StreamContext.html @@ -4404,7 +4404,7 @@

                                        StreamContext#

                                        -class torch.mtia.StreamContext(stream)[source]#
                                        +class torch.mtia.StreamContext(stream)[source]#

                                        Context-manager that selects a given stream.

                                        All MTIA kernels queued within its context will be enqueued on a selected stream.

                                        diff --git a/2.9/generated/torch.mtia.attach_out_of_memory_observer.html b/2.9/generated/torch.mtia.attach_out_of_memory_observer.html index 6bb69d6158c..0801cffcbd8 100644 --- a/2.9/generated/torch.mtia.attach_out_of_memory_observer.html +++ b/2.9/generated/torch.mtia.attach_out_of_memory_observer.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.attach_out_of_memory_observer#

                                        -torch.mtia.attach_out_of_memory_observer(observer)[source]#
                                        +torch.mtia.attach_out_of_memory_observer(observer)[source]#

                                        Attach an out-of-memory observer to MTIA memory allocator

                                        diff --git a/2.9/generated/torch.mtia.current_device.html b/2.9/generated/torch.mtia.current_device.html index 0c3112f5924..24e373f0367 100644 --- a/2.9/generated/torch.mtia.current_device.html +++ b/2.9/generated/torch.mtia.current_device.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.current_device#

                                        -torch.mtia.current_device()[source]#
                                        +torch.mtia.current_device()[source]#

                                        Return the index of a currently selected device.

                                        Return type
                                        diff --git a/2.9/generated/torch.mtia.current_stream.html b/2.9/generated/torch.mtia.current_stream.html index b202cfd237c..ad5193c172c 100644 --- a/2.9/generated/torch.mtia.current_stream.html +++ b/2.9/generated/torch.mtia.current_stream.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.current_stream#

                                        -torch.mtia.current_stream(device=None)[source]#
                                        +torch.mtia.current_stream(device=None)[source]#

                                        Return the currently selected Stream for a given device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mtia.default_stream.html b/2.9/generated/torch.mtia.default_stream.html index 06eb79237b5..68688558b1d 100644 --- a/2.9/generated/torch.mtia.default_stream.html +++ b/2.9/generated/torch.mtia.default_stream.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.default_stream#

                                        -torch.mtia.default_stream(device=None)[source]#
                                        +torch.mtia.default_stream(device=None)[source]#

                                        Return the default Stream for a given device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mtia.device.html b/2.9/generated/torch.mtia.device.html index 205dc7fda07..3886235e179 100644 --- a/2.9/generated/torch.mtia.device.html +++ b/2.9/generated/torch.mtia.device.html @@ -4404,7 +4404,7 @@

                                        device#

                                        -class torch.mtia.device(device)[source]#
                                        +class torch.mtia.device(device)[source]#

                                        Context-manager that changes the selected device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mtia.device_count.html b/2.9/generated/torch.mtia.device_count.html index 58db148a317..cafe3c62c5a 100644 --- a/2.9/generated/torch.mtia.device_count.html +++ b/2.9/generated/torch.mtia.device_count.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.device_count#

                                        -torch.mtia.device_count()[source]#
                                        +torch.mtia.device_count()[source]#

                                        Return the number of MTIA devices available.

                                        Return type
                                        diff --git a/2.9/generated/torch.mtia.empty_cache.html b/2.9/generated/torch.mtia.empty_cache.html index c8b0428149b..336c88cc0bd 100644 --- a/2.9/generated/torch.mtia.empty_cache.html +++ b/2.9/generated/torch.mtia.empty_cache.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.empty_cache#

                                        -torch.mtia.empty_cache()[source]#
                                        +torch.mtia.empty_cache()[source]#

                                        Empty the MTIA device cache.

                                        diff --git a/2.9/generated/torch.mtia.get_device_capability.html b/2.9/generated/torch.mtia.get_device_capability.html index 37dd3cccb38..1ef47803079 100644 --- a/2.9/generated/torch.mtia.get_device_capability.html +++ b/2.9/generated/torch.mtia.get_device_capability.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.get_device_capability#

                                        -torch.mtia.get_device_capability(device=None)[source]#
                                        +torch.mtia.get_device_capability(device=None)[source]#

                                        Return capability of a given device as a tuple of (major version, minor version).

                                        Parameters
                                        diff --git a/2.9/generated/torch.mtia.get_rng_state.html b/2.9/generated/torch.mtia.get_rng_state.html index dba0e23a6c6..935f13de470 100644 --- a/2.9/generated/torch.mtia.get_rng_state.html +++ b/2.9/generated/torch.mtia.get_rng_state.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.get_rng_state#

                                        -torch.mtia.get_rng_state(device='mtia')[source]#
                                        +torch.mtia.get_rng_state(device='mtia')[source]#

                                        Returns the random number generator state as a ByteTensor.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mtia.init.html b/2.9/generated/torch.mtia.init.html index afb99aa473c..1b63f1d828e 100644 --- a/2.9/generated/torch.mtia.init.html +++ b/2.9/generated/torch.mtia.init.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.init#

                                        -torch.mtia.init()[source]#
                                        +torch.mtia.init()[source]#

                                        diff --git a/2.9/generated/torch.mtia.is_available.html b/2.9/generated/torch.mtia.is_available.html index bb631c81d1a..f969311ea77 100644 --- a/2.9/generated/torch.mtia.is_available.html +++ b/2.9/generated/torch.mtia.is_available.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.is_available#

                                        -torch.mtia.is_available()[source]#
                                        +torch.mtia.is_available()[source]#

                                        Return true if MTIA device is available

                                        Return type
                                        diff --git a/2.9/generated/torch.mtia.is_initialized.html b/2.9/generated/torch.mtia.is_initialized.html index 81c0f58f150..28e92e56f73 100644 --- a/2.9/generated/torch.mtia.is_initialized.html +++ b/2.9/generated/torch.mtia.is_initialized.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.is_initialized#

                                        -torch.mtia.is_initialized()[source]#
                                        +torch.mtia.is_initialized()[source]#

                                        Return whether PyTorch’s MTIA state has been initialized.

                                        diff --git a/2.9/generated/torch.mtia.memory.memory_allocated.html b/2.9/generated/torch.mtia.memory.memory_allocated.html index 8f6d48c8083..3756043dc27 100644 --- a/2.9/generated/torch.mtia.memory.memory_allocated.html +++ b/2.9/generated/torch.mtia.memory.memory_allocated.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.memory.memory_allocated#

                                        -torch.mtia.memory.memory_allocated(device=None)[source]#
                                        +torch.mtia.memory.memory_allocated(device=None)[source]#

                                        Return the current MTIA memory occupied by tensors in bytes for a given device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mtia.memory.memory_stats.html b/2.9/generated/torch.mtia.memory.memory_stats.html index 66b8456058b..d41a7e8d01e 100644 --- a/2.9/generated/torch.mtia.memory.memory_stats.html +++ b/2.9/generated/torch.mtia.memory.memory_stats.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.memory.memory_stats#

                                        -torch.mtia.memory.memory_stats(device=None)[source]#
                                        +torch.mtia.memory.memory_stats(device=None)[source]#

                                        Return a dictionary of MTIA memory allocator statistics for a given device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mtia.memory_stats.html b/2.9/generated/torch.mtia.memory_stats.html index bb931969e53..af35a12ab57 100644 --- a/2.9/generated/torch.mtia.memory_stats.html +++ b/2.9/generated/torch.mtia.memory_stats.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.memory_stats#

                                        -torch.mtia.memory_stats(device=None)[source]#
                                        +torch.mtia.memory_stats(device=None)[source]#

                                        Return a dictionary of MTIA memory allocator statistics for a given device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mtia.record_memory_history.html b/2.9/generated/torch.mtia.record_memory_history.html index 2e7591c5dc1..24b38ff40f6 100644 --- a/2.9/generated/torch.mtia.record_memory_history.html +++ b/2.9/generated/torch.mtia.record_memory_history.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.record_memory_history#

                                        -torch.mtia.record_memory_history(enabled='all', stacks='python', max_entries=0)[source]#
                                        +torch.mtia.record_memory_history(enabled='all', stacks='python', max_entries=0)[source]#

                                        Enable/Disable the memory profiler on MTIA allocator

                                        Parameters
                                        diff --git a/2.9/generated/torch.mtia.set_device.html b/2.9/generated/torch.mtia.set_device.html index 18466225053..ff00f2b7536 100644 --- a/2.9/generated/torch.mtia.set_device.html +++ b/2.9/generated/torch.mtia.set_device.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.set_device#

                                        -torch.mtia.set_device(device)[source]#
                                        +torch.mtia.set_device(device)[source]#

                                        Set the current device.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mtia.set_rng_state.html b/2.9/generated/torch.mtia.set_rng_state.html index 8e6bc530819..119985449e3 100644 --- a/2.9/generated/torch.mtia.set_rng_state.html +++ b/2.9/generated/torch.mtia.set_rng_state.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.set_rng_state#

                                        -torch.mtia.set_rng_state(new_state, device='mtia')[source]#
                                        +torch.mtia.set_rng_state(new_state, device='mtia')[source]#

                                        Sets the random number generator state.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mtia.set_stream.html b/2.9/generated/torch.mtia.set_stream.html index 14e41b75c59..394dfbfbe3b 100644 --- a/2.9/generated/torch.mtia.set_stream.html +++ b/2.9/generated/torch.mtia.set_stream.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.set_stream#

                                        -torch.mtia.set_stream(stream)[source]#
                                        +torch.mtia.set_stream(stream)[source]#
                                        Set the current stream.This is a wrapper API to set the stream.

                                        Usage of this function is discouraged in favor of the stream context manager.

                                        diff --git a/2.9/generated/torch.mtia.snapshot.html b/2.9/generated/torch.mtia.snapshot.html index 11049ff8d2c..505298b8702 100644 --- a/2.9/generated/torch.mtia.snapshot.html +++ b/2.9/generated/torch.mtia.snapshot.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.snapshot#

                                        -torch.mtia.snapshot()[source]#
                                        +torch.mtia.snapshot()[source]#

                                        Return a dictionary of MTIA memory allocator history

                                        Return type
                                        diff --git a/2.9/generated/torch.mtia.stream.html b/2.9/generated/torch.mtia.stream.html index 47cb1064b76..c2be92fd57f 100644 --- a/2.9/generated/torch.mtia.stream.html +++ b/2.9/generated/torch.mtia.stream.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.stream#

                                        -torch.mtia.stream(stream)[source]#
                                        +torch.mtia.stream(stream)[source]#

                                        Wrap around the Context-manager StreamContext that selects a given stream.

                                        Parameters
                                        diff --git a/2.9/generated/torch.mtia.synchronize.html b/2.9/generated/torch.mtia.synchronize.html index 9925e6b8d02..258bd2f962c 100644 --- a/2.9/generated/torch.mtia.synchronize.html +++ b/2.9/generated/torch.mtia.synchronize.html @@ -4404,7 +4404,7 @@

                                        torch.mtia.synchronize#

                                        -torch.mtia.synchronize(device=None)[source]#
                                        +torch.mtia.synchronize(device=None)[source]#

                                        Waits for all jobs in all streams on a MTIA device to complete.

                                        diff --git a/2.9/generated/torch.nn.AdaptiveAvgPool1d.html b/2.9/generated/torch.nn.AdaptiveAvgPool1d.html index d5ead0f01aa..b008a359eaa 100644 --- a/2.9/generated/torch.nn.AdaptiveAvgPool1d.html +++ b/2.9/generated/torch.nn.AdaptiveAvgPool1d.html @@ -4404,7 +4404,7 @@

                                        AdaptiveAvgPool1d#

                                        -class torch.nn.AdaptiveAvgPool1d(output_size)[source]#
                                        +class torch.nn.AdaptiveAvgPool1d(output_size)[source]#

                                        Applies a 1D adaptive average pooling over an input signal composed of several input planes.

                                        The output size is LoutL_{out}, for any input size. The number of output features is equal to the number of input planes.

                                        @@ -4430,7 +4430,7 @@

                                        AdaptiveAvgPool1d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.AdaptiveAvgPool2d.html b/2.9/generated/torch.nn.AdaptiveAvgPool2d.html index 8096e39c63e..9af4550940c 100644 --- a/2.9/generated/torch.nn.AdaptiveAvgPool2d.html +++ b/2.9/generated/torch.nn.AdaptiveAvgPool2d.html @@ -4404,7 +4404,7 @@

                                        AdaptiveAvgPool2d#

                                        -class torch.nn.AdaptiveAvgPool2d(output_size)[source]#
                                        +class torch.nn.AdaptiveAvgPool2d(output_size)[source]#

                                        Applies a 2D adaptive average pooling over an input signal composed of several input planes.

                                        The output is of size H x W, for any input size. The number of output features is equal to the number of input planes.

                                        @@ -4441,7 +4441,7 @@

                                        AdaptiveAvgPool2d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.AdaptiveAvgPool3d.html b/2.9/generated/torch.nn.AdaptiveAvgPool3d.html index 74725ab2b7c..f7b3060475b 100644 --- a/2.9/generated/torch.nn.AdaptiveAvgPool3d.html +++ b/2.9/generated/torch.nn.AdaptiveAvgPool3d.html @@ -4404,7 +4404,7 @@

                                        AdaptiveAvgPool3d#

                                        -class torch.nn.AdaptiveAvgPool3d(output_size)[source]#
                                        +class torch.nn.AdaptiveAvgPool3d(output_size)[source]#

                                        Applies a 3D adaptive average pooling over an input signal composed of several input planes.

                                        The output is of size D x H x W, for any input size. The number of output features is equal to the number of input planes.

                                        @@ -4441,7 +4441,7 @@

                                        AdaptiveAvgPool3d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.AdaptiveLogSoftmaxWithLoss.html b/2.9/generated/torch.nn.AdaptiveLogSoftmaxWithLoss.html index 9646ca0fc59..0e38c7fad17 100644 --- a/2.9/generated/torch.nn.AdaptiveLogSoftmaxWithLoss.html +++ b/2.9/generated/torch.nn.AdaptiveLogSoftmaxWithLoss.html @@ -4404,7 +4404,7 @@

                                        AdaptiveLogSoftmaxWithLoss#

                                        -class torch.nn.AdaptiveLogSoftmaxWithLoss(in_features, n_classes, cutoffs, div_value=4.0, head_bias=False, device=None, dtype=None)[source]#
                                        +class torch.nn.AdaptiveLogSoftmaxWithLoss(in_features, n_classes, cutoffs, div_value=4.0, head_bias=False, device=None, dtype=None)[source]#

                                        Efficient softmax approximation.

                                        As described in Efficient softmax approximation for GPUs by Edouard Grave, Armand Joulin, @@ -4498,7 +4498,7 @@

                                        AdaptiveLogSoftmaxWithLoss
                                        -forward(input_, target_)[source]#
                                        +forward(input_, target_)[source]#

                                        Runs the forward pass.

                                        Return type
                                        @@ -4509,7 +4509,7 @@

                                        AdaptiveLogSoftmaxWithLoss
                                        -log_prob(input)[source]#
                                        +log_prob(input)[source]#

                                        Compute log probabilities for all n_classes\texttt{n\_classes}.

                                        Parameters
                                        @@ -4535,7 +4535,7 @@

                                        AdaptiveLogSoftmaxWithLoss
                                        -predict(input)[source]#
                                        +predict(input)[source]#

                                        Return the class with the highest probability for each example in the input minibatch.

                                        This is equivalent to self.log_prob(input).argmax(dim=1), but is more efficient in some cases.

                                        @@ -4560,7 +4560,7 @@

                                        AdaptiveLogSoftmaxWithLoss
                                        -reset_parameters()[source]#
                                        +reset_parameters()[source]#

                                        Resets parameters based on their initialization used in __init__.

                                        diff --git a/2.9/generated/torch.nn.AdaptiveMaxPool1d.html b/2.9/generated/torch.nn.AdaptiveMaxPool1d.html index 8a532a477e7..2400d7dd6dd 100644 --- a/2.9/generated/torch.nn.AdaptiveMaxPool1d.html +++ b/2.9/generated/torch.nn.AdaptiveMaxPool1d.html @@ -4404,7 +4404,7 @@

                                        AdaptiveMaxPool1d#

                                        -class torch.nn.AdaptiveMaxPool1d(output_size, return_indices=False)[source]#
                                        +class torch.nn.AdaptiveMaxPool1d(output_size, return_indices=False)[source]#

                                        Applies a 1D adaptive max pooling over an input signal composed of several input planes.

                                        The output size is LoutL_{out}, for any input size. The number of output features is equal to the number of input planes.

                                        @@ -4434,7 +4434,7 @@

                                        AdaptiveMaxPool1d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        diff --git a/2.9/generated/torch.nn.AdaptiveMaxPool2d.html b/2.9/generated/torch.nn.AdaptiveMaxPool2d.html index 59fcad1fd1f..f6171330e20 100644 --- a/2.9/generated/torch.nn.AdaptiveMaxPool2d.html +++ b/2.9/generated/torch.nn.AdaptiveMaxPool2d.html @@ -4404,7 +4404,7 @@

                                        AdaptiveMaxPool2d#

                                        -class torch.nn.AdaptiveMaxPool2d(output_size, return_indices=False)[source]#
                                        +class torch.nn.AdaptiveMaxPool2d(output_size, return_indices=False)[source]#

                                        Applies a 2D adaptive max pooling over an input signal composed of several input planes.

                                        The output is of size Hout×WoutH_{out} \times W_{out}, for any input size. The number of output features is equal to the number of input planes.

                                        @@ -4446,7 +4446,7 @@

                                        AdaptiveMaxPool2d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        diff --git a/2.9/generated/torch.nn.AdaptiveMaxPool3d.html b/2.9/generated/torch.nn.AdaptiveMaxPool3d.html index 6ac3053eb49..ed7ec531ba5 100644 --- a/2.9/generated/torch.nn.AdaptiveMaxPool3d.html +++ b/2.9/generated/torch.nn.AdaptiveMaxPool3d.html @@ -4404,7 +4404,7 @@

                                        AdaptiveMaxPool3d#

                                        -class torch.nn.AdaptiveMaxPool3d(output_size, return_indices=False)[source]#
                                        +class torch.nn.AdaptiveMaxPool3d(output_size, return_indices=False)[source]#

                                        Applies a 3D adaptive max pooling over an input signal composed of several input planes.

                                        The output is of size Dout×Hout×WoutD_{out} \times H_{out} \times W_{out}, for any input size. The number of output features is equal to the number of input planes.

                                        @@ -4446,7 +4446,7 @@

                                        AdaptiveMaxPool3d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        diff --git a/2.9/generated/torch.nn.AlphaDropout.html b/2.9/generated/torch.nn.AlphaDropout.html index 571546e8ddb..b4da11e7c67 100644 --- a/2.9/generated/torch.nn.AlphaDropout.html +++ b/2.9/generated/torch.nn.AlphaDropout.html @@ -4404,7 +4404,7 @@

                                        AlphaDropout#

                                        -class torch.nn.AlphaDropout(p=0.5, inplace=False)[source]#
                                        +class torch.nn.AlphaDropout(p=0.5, inplace=False)[source]#

                                        Applies Alpha Dropout over the input.

                                        Alpha Dropout is a type of Dropout that maintains the self-normalizing property. @@ -4443,7 +4443,7 @@

                                        AlphaDropout
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.AvgPool1d.html b/2.9/generated/torch.nn.AvgPool1d.html index 176567ef298..7a62245989a 100644 --- a/2.9/generated/torch.nn.AvgPool1d.html +++ b/2.9/generated/torch.nn.AvgPool1d.html @@ -4404,7 +4404,7 @@

                                        AvgPool1d#

                                        -class torch.nn.AvgPool1d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True)[source]#
                                        +class torch.nn.AvgPool1d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True)[source]#

                                        Applies a 1D average pooling over an input signal composed of several input planes.

                                        In the simplest case, the output value of the layer with input size (N,C,L)(N, C, L), output (N,C,Lout)(N, C, L_{out}) and kernel_size kk @@ -4459,7 +4459,7 @@

                                        AvgPool1d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.AvgPool2d.html b/2.9/generated/torch.nn.AvgPool2d.html index 8431ef54e82..15c12581c67 100644 --- a/2.9/generated/torch.nn.AvgPool2d.html +++ b/2.9/generated/torch.nn.AvgPool2d.html @@ -4404,7 +4404,7 @@

                                        AvgPool2d#

                                        -class torch.nn.AvgPool2d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)[source]#
                                        +class torch.nn.AvgPool2d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)[source]#

                                        Applies a 2D average pooling over an input signal composed of several input planes.

                                        In the simplest case, the output value of the layer with input size (N,C,H,W)(N, C, H, W), output (N,C,Hout,Wout)(N, C, H_{out}, W_{out}) and kernel_size (kH,kW)(kH, kW) @@ -4473,7 +4473,7 @@

                                        AvgPool2d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.AvgPool3d.html b/2.9/generated/torch.nn.AvgPool3d.html index 16270a83d2b..58abd9605c5 100644 --- a/2.9/generated/torch.nn.AvgPool3d.html +++ b/2.9/generated/torch.nn.AvgPool3d.html @@ -4404,7 +4404,7 @@

                                        AvgPool3d#

                                        -class torch.nn.AvgPool3d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)[source]#
                                        +class torch.nn.AvgPool3d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)[source]#

                                        Applies a 3D average pooling over an input signal composed of several input planes.

                                        In the simplest case, the output value of the layer with input size (N,C,D,H,W)(N, C, D, H, W), output (N,C,Dout,Hout,Wout)(N, C, D_{out}, H_{out}, W_{out}) and kernel_size (kD,kH,kW)(kD, kH, kW) @@ -4484,7 +4484,7 @@

                                        AvgPool3d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.BCELoss.html b/2.9/generated/torch.nn.BCELoss.html index 3d681d1ba50..63cad042008 100644 --- a/2.9/generated/torch.nn.BCELoss.html +++ b/2.9/generated/torch.nn.BCELoss.html @@ -4404,7 +4404,7 @@

                                        BCELoss#

                                        -class torch.nn.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')[source]#

                                        Creates a criterion that measures the Binary Cross Entropy between the target and the input probabilities:

                                        The unreduced (i.e. with reduction set to 'none') loss can be described as:

                                        @@ -4479,7 +4479,7 @@

                                        BCELoss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.BCEWithLogitsLoss.html b/2.9/generated/torch.nn.BCEWithLogitsLoss.html index 861420f3329..40027fd3b8c 100644 --- a/2.9/generated/torch.nn.BCEWithLogitsLoss.html +++ b/2.9/generated/torch.nn.BCEWithLogitsLoss.html @@ -4404,7 +4404,7 @@

                                        BCEWithLogitsLoss#

                                        -class torch.nn.BCEWithLogitsLoss(weight=None, size_average=None, reduce=None, reduction='mean', pos_weight=None)[source]#
                                        +class torch.nn.BCEWithLogitsLoss(weight=None, size_average=None, reduce=None, reduction='mean', pos_weight=None)[source]#

                                        This loss combines a Sigmoid layer and the BCELoss in one single class. This version is more numerically stable than using a plain Sigmoid followed by a BCELoss as, by combining the operations into one layer, @@ -4505,7 +4505,7 @@

                                        BCEWithLogitsLoss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.BatchNorm1d.html b/2.9/generated/torch.nn.BatchNorm1d.html index f90b694f505..eebdf4878d6 100644 --- a/2.9/generated/torch.nn.BatchNorm1d.html +++ b/2.9/generated/torch.nn.BatchNorm1d.html @@ -4404,7 +4404,7 @@

                                        BatchNorm1d#

                                        -class torch.nn.BatchNorm1d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.BatchNorm1d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        Applies Batch Normalization over a 2D or 3D input.

                                        Method described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing diff --git a/2.9/generated/torch.nn.BatchNorm2d.html b/2.9/generated/torch.nn.BatchNorm2d.html index 18c62905d76..f58b15c5009 100644 --- a/2.9/generated/torch.nn.BatchNorm2d.html +++ b/2.9/generated/torch.nn.BatchNorm2d.html @@ -4404,7 +4404,7 @@

                                        BatchNorm2d#

                                        -class torch.nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        Applies Batch Normalization over a 4D input.

                                        4D is a mini-batch of 2D inputs with additional channel dimension. Method described in the paper diff --git a/2.9/generated/torch.nn.BatchNorm3d.html b/2.9/generated/torch.nn.BatchNorm3d.html index 08f04e817eb..f7677e7db97 100644 --- a/2.9/generated/torch.nn.BatchNorm3d.html +++ b/2.9/generated/torch.nn.BatchNorm3d.html @@ -4404,7 +4404,7 @@

                                        BatchNorm3d#

                                        -class torch.nn.BatchNorm3d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.BatchNorm3d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        Applies Batch Normalization over a 5D input.

                                        5D is a mini-batch of 3D inputs with additional channel dimension as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing diff --git a/2.9/generated/torch.nn.Bilinear.html b/2.9/generated/torch.nn.Bilinear.html index 2c91b1bf150..150fbc9b333 100644 --- a/2.9/generated/torch.nn.Bilinear.html +++ b/2.9/generated/torch.nn.Bilinear.html @@ -4404,7 +4404,7 @@

                                        Bilinear#

                                        -class torch.nn.Bilinear(in1_features, in2_features, out_features, bias=True, device=None, dtype=None)[source]#
                                        +class torch.nn.Bilinear(in1_features, in2_features, out_features, bias=True, device=None, dtype=None)[source]#

                                        Applies a bilinear transformation to the incoming data: y=x1TAx2+by = x_1^T A x_2 + b.

                                        Parameters
                                        @@ -4497,7 +4497,7 @@

                                        Bilinear
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4508,7 +4508,7 @@

                                        Bilinear
                                        -forward(input1, input2)[source]#
                                        +forward(input1, input2)[source]#

                                        Runs the forward pass.

                                        Return type
                                        @@ -4519,7 +4519,7 @@

                                        Bilinear
                                        -reset_parameters()[source]#
                                        +reset_parameters()[source]#

                                        Resets parameters based on their initialization used in __init__.

                                        diff --git a/2.9/generated/torch.nn.CELU.html b/2.9/generated/torch.nn.CELU.html index ac87e5722d0..25ffbaa5b1e 100644 --- a/2.9/generated/torch.nn.CELU.html +++ b/2.9/generated/torch.nn.CELU.html @@ -4404,7 +4404,7 @@

                                        CELU#

                                        -class torch.nn.CELU(alpha=1.0, inplace=False)[source]#
                                        +class torch.nn.CELU(alpha=1.0, inplace=False)[source]#

                                        Applies the CELU function element-wise.

                                        CELU(x)=max(0,x)+min(0,α(exp(x/α)1))\text{CELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x/\alpha) - 1)) @@ -4434,7 +4434,7 @@

                                        CELU#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4445,7 +4445,7 @@

                                        CELU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.CTCLoss.html b/2.9/generated/torch.nn.CTCLoss.html index f36ba7a89af..b2bd1973562 100644 --- a/2.9/generated/torch.nn.CTCLoss.html +++ b/2.9/generated/torch.nn.CTCLoss.html @@ -4404,7 +4404,7 @@

                                        CTCLoss#

                                        -class torch.nn.CTCLoss(blank=0, reduction='mean', zero_infinity=False)[source]#
                                        +class torch.nn.CTCLoss(blank=0, reduction='mean', zero_infinity=False)[source]#

                                        The Connectionist Temporal Classification loss.

                                        Calculates loss between a continuous (unsegmented) time series and a target sequence. CTCLoss sums over the probability of possible alignments of input to target, producing a loss value which is differentiable @@ -4559,7 +4559,7 @@

                                        CTCLoss
                                        -forward(log_probs, targets, input_lengths, target_lengths)[source]#
                                        +forward(log_probs, targets, input_lengths, target_lengths)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.ChannelShuffle.html b/2.9/generated/torch.nn.ChannelShuffle.html index 0b8618b381d..3b0c9d34b2c 100644 --- a/2.9/generated/torch.nn.ChannelShuffle.html +++ b/2.9/generated/torch.nn.ChannelShuffle.html @@ -4404,7 +4404,7 @@

                                        ChannelShuffle#

                                        -class torch.nn.ChannelShuffle(groups)[source]#
                                        +class torch.nn.ChannelShuffle(groups)[source]#

                                        Divides and rearranges the channels in a tensor.

                                        This operation divides the channels in a tensor of shape (N,C,)(N, C, *) into g groups as (N,Cg,g,)(N, \frac{C}{g}, g, *) and shuffles them, @@ -4440,7 +4440,7 @@

                                        ChannelShuffle
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4451,7 +4451,7 @@

                                        ChannelShuffle
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.CircularPad1d.html b/2.9/generated/torch.nn.CircularPad1d.html index c5c7bb8d24b..7a00f11cbc0 100644 --- a/2.9/generated/torch.nn.CircularPad1d.html +++ b/2.9/generated/torch.nn.CircularPad1d.html @@ -4404,7 +4404,7 @@

                                        CircularPad1d#

                                        -class torch.nn.CircularPad1d(padding)[source]#
                                        +class torch.nn.CircularPad1d(padding)[source]#

                                        Pads the input tensor using circular padding of the input boundary.

                                        Tensor values at the beginning of the dimension are used to pad the end, and values at the end are used to pad the beginning. If negative padding is diff --git a/2.9/generated/torch.nn.CircularPad2d.html b/2.9/generated/torch.nn.CircularPad2d.html index baf6c883be7..bea7ef2c46b 100644 --- a/2.9/generated/torch.nn.CircularPad2d.html +++ b/2.9/generated/torch.nn.CircularPad2d.html @@ -4404,7 +4404,7 @@

                                        CircularPad2d#

                                        -class torch.nn.CircularPad2d(padding)[source]#
                                        +class torch.nn.CircularPad2d(padding)[source]#

                                        Pads the input tensor using circular padding of the input boundary.

                                        Tensor values at the beginning of the dimension are used to pad the end, and values at the end are used to pad the beginning. If negative padding is diff --git a/2.9/generated/torch.nn.CircularPad3d.html b/2.9/generated/torch.nn.CircularPad3d.html index 0342354ee02..8194fdeceb5 100644 --- a/2.9/generated/torch.nn.CircularPad3d.html +++ b/2.9/generated/torch.nn.CircularPad3d.html @@ -4404,7 +4404,7 @@

                                        CircularPad3d#

                                        -class torch.nn.CircularPad3d(padding)[source]#
                                        +class torch.nn.CircularPad3d(padding)[source]#

                                        Pads the input tensor using circular padding of the input boundary.

                                        Tensor values at the beginning of the dimension are used to pad the end, and values at the end are used to pad the beginning. If negative padding is diff --git a/2.9/generated/torch.nn.ConstantPad1d.html b/2.9/generated/torch.nn.ConstantPad1d.html index 90268b76381..ac492e918e8 100644 --- a/2.9/generated/torch.nn.ConstantPad1d.html +++ b/2.9/generated/torch.nn.ConstantPad1d.html @@ -4404,7 +4404,7 @@

                                        ConstantPad1d#

                                        -class torch.nn.ConstantPad1d(padding, value)[source]#
                                        +class torch.nn.ConstantPad1d(padding, value)[source]#

                                        Pads the input tensor boundaries with a constant value.

                                        For N-dimensional padding, use torch.nn.functional.pad().

                                        diff --git a/2.9/generated/torch.nn.ConstantPad2d.html b/2.9/generated/torch.nn.ConstantPad2d.html index 040a3e719ff..28d95029761 100644 --- a/2.9/generated/torch.nn.ConstantPad2d.html +++ b/2.9/generated/torch.nn.ConstantPad2d.html @@ -4404,7 +4404,7 @@

                                        ConstantPad2d#

                                        -class torch.nn.ConstantPad2d(padding, value)[source]#
                                        +class torch.nn.ConstantPad2d(padding, value)[source]#

                                        Pads the input tensor boundaries with a constant value.

                                        For N-dimensional padding, use torch.nn.functional.pad().

                                        diff --git a/2.9/generated/torch.nn.ConstantPad3d.html b/2.9/generated/torch.nn.ConstantPad3d.html index 020ffcffb2a..0d09eba07f8 100644 --- a/2.9/generated/torch.nn.ConstantPad3d.html +++ b/2.9/generated/torch.nn.ConstantPad3d.html @@ -4404,7 +4404,7 @@

                                        ConstantPad3d#

                                        -class torch.nn.ConstantPad3d(padding, value)[source]#
                                        +class torch.nn.ConstantPad3d(padding, value)[source]#

                                        Pads the input tensor boundaries with a constant value.

                                        For N-dimensional padding, use torch.nn.functional.pad().

                                        diff --git a/2.9/generated/torch.nn.Conv1d.html b/2.9/generated/torch.nn.Conv1d.html index 3106e0c856f..0b1ef8bbfcf 100644 --- a/2.9/generated/torch.nn.Conv1d.html +++ b/2.9/generated/torch.nn.Conv1d.html @@ -4404,7 +4404,7 @@

                                        Conv1d#

                                        -class torch.nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                                        Applies a 1D convolution over an input signal composed of several input planes.

                                        In the simplest case, the output value of the layer with input size diff --git a/2.9/generated/torch.nn.Conv2d.html b/2.9/generated/torch.nn.Conv2d.html index df860512a74..0ce1f5a7181 100644 --- a/2.9/generated/torch.nn.Conv2d.html +++ b/2.9/generated/torch.nn.Conv2d.html @@ -4404,7 +4404,7 @@

                                        Conv2d#

                                        -class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                                        Applies a 2D convolution over an input signal composed of several input planes.

                                        In the simplest case, the output value of the layer with input size diff --git a/2.9/generated/torch.nn.Conv3d.html b/2.9/generated/torch.nn.Conv3d.html index d20aa165fce..350e03d055d 100644 --- a/2.9/generated/torch.nn.Conv3d.html +++ b/2.9/generated/torch.nn.Conv3d.html @@ -4404,7 +4404,7 @@

                                        Conv3d#

                                        -class torch.nn.Conv3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.Conv3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                                        Applies a 3D convolution over an input signal composed of several input planes.

                                        In the simplest case, the output value of the layer with input size (N,Cin,D,H,W)(N, C_{in}, D, H, W) diff --git a/2.9/generated/torch.nn.ConvTranspose1d.html b/2.9/generated/torch.nn.ConvTranspose1d.html index 7165a0fdb6b..f400999bd28 100644 --- a/2.9/generated/torch.nn.ConvTranspose1d.html +++ b/2.9/generated/torch.nn.ConvTranspose1d.html @@ -4404,7 +4404,7 @@

                                        ConvTranspose1d#

                                        -class torch.nn.ConvTranspose1d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.ConvTranspose1d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                                        Applies a 1D transposed convolution operator over an input image composed of several input planes.

                                        This module can be seen as the gradient of Conv1d with respect to its input. diff --git a/2.9/generated/torch.nn.ConvTranspose2d.html b/2.9/generated/torch.nn.ConvTranspose2d.html index f77268a0d5d..15bb0eca1b3 100644 --- a/2.9/generated/torch.nn.ConvTranspose2d.html +++ b/2.9/generated/torch.nn.ConvTranspose2d.html @@ -4404,7 +4404,7 @@

                                        ConvTranspose2d#

                                        -class torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                                        Applies a 2D transposed convolution operator over an input image composed of several input planes.

                                        This module can be seen as the gradient of Conv2d with respect to its input. @@ -4582,7 +4582,7 @@

                                        ConvTranspose2d
                                        -forward(input, output_size=None)[source]#
                                        +forward(input, output_size=None)[source]#

                                        Performs the forward pass.

                                        Variables
                                        diff --git a/2.9/generated/torch.nn.ConvTranspose3d.html b/2.9/generated/torch.nn.ConvTranspose3d.html index fa3a5bf3dc6..d47c65fcdd4 100644 --- a/2.9/generated/torch.nn.ConvTranspose3d.html +++ b/2.9/generated/torch.nn.ConvTranspose3d.html @@ -4404,7 +4404,7 @@

                                        ConvTranspose3d#

                                        -class torch.nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                                        Applies a 3D transposed convolution operator over an input image composed of several input planes. The transposed convolution operator multiplies each input value element-wise by a learnable kernel, diff --git a/2.9/generated/torch.nn.CosineEmbeddingLoss.html b/2.9/generated/torch.nn.CosineEmbeddingLoss.html index 56587835b46..f80dfa065a8 100644 --- a/2.9/generated/torch.nn.CosineEmbeddingLoss.html +++ b/2.9/generated/torch.nn.CosineEmbeddingLoss.html @@ -4404,7 +4404,7 @@

                                        CosineEmbeddingLoss#

                                        -class torch.nn.CosineEmbeddingLoss(margin=0.0, size_average=None, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.CosineEmbeddingLoss(margin=0.0, size_average=None, reduce=None, reduction='mean')[source]#

                                        Creates a criterion that measures the loss given input tensors x1x_1, x2x_2 and a Tensor label yy with values 1 or -1. Use (y=1y=1) to maximize the cosine similarity of two inputs, and (y=1y=-1) otherwise. @@ -4462,7 +4462,7 @@

                                        CosineEmbeddingLoss
                                        -forward(input1, input2, target)[source]#
                                        +forward(input1, input2, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.CosineSimilarity.html b/2.9/generated/torch.nn.CosineSimilarity.html index 0964f66caec..f402827c545 100644 --- a/2.9/generated/torch.nn.CosineSimilarity.html +++ b/2.9/generated/torch.nn.CosineSimilarity.html @@ -4404,7 +4404,7 @@

                                        CosineSimilarity#

                                        -class torch.nn.CosineSimilarity(dim=1, eps=1e-08)[source]#
                                        +class torch.nn.CosineSimilarity(dim=1, eps=1e-08)[source]#

                                        Returns cosine similarity between x1x_1 and x2x_2, computed along dim.

                                        similarity=x1x2max(x12x22,ϵ).\text{similarity} = \dfrac{x_1 \cdot x_2}{\max(\Vert x_1 \Vert _2 \cdot \Vert x_2 \Vert _2, \epsilon)}. @@ -4436,7 +4436,7 @@

                                        CosineSimilarity
                                        -forward(x1, x2)[source]#
                                        +forward(x1, x2)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.CrossEntropyLoss.html b/2.9/generated/torch.nn.CrossEntropyLoss.html index 813d0d23b92..be0a2cd7e65 100644 --- a/2.9/generated/torch.nn.CrossEntropyLoss.html +++ b/2.9/generated/torch.nn.CrossEntropyLoss.html @@ -4404,7 +4404,7 @@

                                        CrossEntropyLoss#

                                        -class torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean', label_smoothing=0.0)[source]#
                                        +class torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean', label_smoothing=0.0)[source]#

                                        This criterion computes the cross entropy loss between input logits and target.

                                        It is useful when training a classification problem with C classes. @@ -4587,7 +4587,7 @@

                                        CrossEntropyLoss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.DataParallel.html b/2.9/generated/torch.nn.DataParallel.html index 75b4f406357..771fb026168 100644 --- a/2.9/generated/torch.nn.DataParallel.html +++ b/2.9/generated/torch.nn.DataParallel.html @@ -4404,7 +4404,7 @@

                                        DataParallel#

                                        -class torch.nn.DataParallel(module, device_ids=None, output_device=None, dim=0)[source]#
                                        +class torch.nn.DataParallel(module, device_ids=None, output_device=None, dim=0)[source]#

                                        Implements data parallelism at the module level.

                                        This container parallelizes the application of the given module by splitting the input across the specified devices by chunking in the batch diff --git a/2.9/generated/torch.nn.Dropout.html b/2.9/generated/torch.nn.Dropout.html index 02d32146d27..547cf282d8f 100644 --- a/2.9/generated/torch.nn.Dropout.html +++ b/2.9/generated/torch.nn.Dropout.html @@ -4404,7 +4404,7 @@

                                        Dropout#

                                        -class torch.nn.Dropout(p=0.5, inplace=False)[source]#
                                        +class torch.nn.Dropout(p=0.5, inplace=False)[source]#

                                        During training, randomly zeroes some of the elements of the input tensor with probability p.

                                        The zeroed elements are chosen independently for each forward call and are sampled from a Bernoulli distribution.

                                        Each channel will be zeroed out independently on every forward call.

                                        @@ -4438,7 +4438,7 @@

                                        Dropout
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Dropout1d.html b/2.9/generated/torch.nn.Dropout1d.html index 444188a671e..866a911de3d 100644 --- a/2.9/generated/torch.nn.Dropout1d.html +++ b/2.9/generated/torch.nn.Dropout1d.html @@ -4404,7 +4404,7 @@

                                        Dropout1d#

                                        -class torch.nn.Dropout1d(p=0.5, inplace=False)[source]#
                                        +class torch.nn.Dropout1d(p=0.5, inplace=False)[source]#

                                        Randomly zero out entire channels.

                                        A channel is a 1D feature map, e.g., the jj-th channel of the ii-th sample in the @@ -4444,7 +4444,7 @@

                                        Dropout1d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Dropout2d.html b/2.9/generated/torch.nn.Dropout2d.html index 5eae7f14f2f..c89ed871f49 100644 --- a/2.9/generated/torch.nn.Dropout2d.html +++ b/2.9/generated/torch.nn.Dropout2d.html @@ -4404,7 +4404,7 @@

                                        Dropout2d#

                                        -class torch.nn.Dropout2d(p=0.5, inplace=False)[source]#
                                        +class torch.nn.Dropout2d(p=0.5, inplace=False)[source]#

                                        Randomly zero out entire channels.

                                        A channel is a 2D feature map, e.g., the jj-th channel of the ii-th sample in the @@ -4452,7 +4452,7 @@

                                        Dropout2d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Dropout3d.html b/2.9/generated/torch.nn.Dropout3d.html index a0d54b5784f..69ab45cba8e 100644 --- a/2.9/generated/torch.nn.Dropout3d.html +++ b/2.9/generated/torch.nn.Dropout3d.html @@ -4404,7 +4404,7 @@

                                        Dropout3d#

                                        -class torch.nn.Dropout3d(p=0.5, inplace=False)[source]#
                                        +class torch.nn.Dropout3d(p=0.5, inplace=False)[source]#

                                        Randomly zero out entire channels.

                                        A channel is a 3D feature map, e.g., the jj-th channel of the ii-th sample in the @@ -4444,7 +4444,7 @@

                                        Dropout3d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.ELU.html b/2.9/generated/torch.nn.ELU.html index bd4c76f1a20..cac18d3f7e6 100644 --- a/2.9/generated/torch.nn.ELU.html +++ b/2.9/generated/torch.nn.ELU.html @@ -4404,7 +4404,7 @@

                                        ELU#

                                        -class torch.nn.ELU(alpha=1.0, inplace=False)[source]#
                                        +class torch.nn.ELU(alpha=1.0, inplace=False)[source]#

                                        Applies the Exponential Linear Unit (ELU) function, element-wise.

                                        Method described in the paper: Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs).

                                        @@ -4439,7 +4439,7 @@

                                        ELU#

                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4450,7 +4450,7 @@

                                        ELU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Embedding.html b/2.9/generated/torch.nn.Embedding.html index f76645eb56b..7f310afe4f5 100644 --- a/2.9/generated/torch.nn.Embedding.html +++ b/2.9/generated/torch.nn.Embedding.html @@ -4404,7 +4404,7 @@

                                        Embedding#

                                        -class torch.nn.Embedding(num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None, _freeze=False, device=None, dtype=None)[source]#
                                        +class torch.nn.Embedding(num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None, _freeze=False, device=None, dtype=None)[source]#

                                        A simple lookup table that stores embeddings of a fixed dictionary and size.

                                        This module is often used to store word embeddings and retrieve them using indices. The input to the module is a list of indices, and the output is the corresponding @@ -4512,7 +4512,7 @@

                                        Embedding
                                        -classmethod from_pretrained(embeddings, freeze=True, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False)[source]#
                                        +classmethod from_pretrained(embeddings, freeze=True, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False)[source]#

                                        Create Embedding instance from given 2-dimensional FloatTensor.

                                        Parameters
                                        diff --git a/2.9/generated/torch.nn.EmbeddingBag.html b/2.9/generated/torch.nn.EmbeddingBag.html index 0e055863d31..aab2ea73144 100644 --- a/2.9/generated/torch.nn.EmbeddingBag.html +++ b/2.9/generated/torch.nn.EmbeddingBag.html @@ -4404,7 +4404,7 @@

                                        EmbeddingBag#

                                        -class torch.nn.EmbeddingBag(num_embeddings, embedding_dim, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, mode='mean', sparse=False, _weight=None, include_last_offset=False, padding_idx=None, device=None, dtype=None)[source]#
                                        +class torch.nn.EmbeddingBag(num_embeddings, embedding_dim, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, mode='mean', sparse=False, _weight=None, include_last_offset=False, padding_idx=None, device=None, dtype=None)[source]#

                                        Compute sums or means of ‘bags’ of embeddings, without instantiating the intermediate embeddings.

                                        For bags of constant length, no per_sample_weights, no indices equal to padding_idx, and with 2D inputs, this class

                                        @@ -4485,7 +4485,7 @@

                                        EmbeddingBag
                                        -forward(input, offsets=None, per_sample_weights=None)[source]#
                                        +forward(input, offsets=None, per_sample_weights=None)[source]#

                                        Forward pass of EmbeddingBag.

                                        Parameters
                                        @@ -4525,7 +4525,7 @@

                                        EmbeddingBag
                                        -classmethod from_pretrained(embeddings, freeze=True, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, mode='mean', sparse=False, include_last_offset=False, padding_idx=None)[source]#
                                        +classmethod from_pretrained(embeddings, freeze=True, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, mode='mean', sparse=False, include_last_offset=False, padding_idx=None)[source]#

                                        Create EmbeddingBag instance from given 2-dimensional FloatTensor.

                                        Parameters
                                        diff --git a/2.9/generated/torch.nn.FeatureAlphaDropout.html b/2.9/generated/torch.nn.FeatureAlphaDropout.html index c7db0894ea8..c018e47e016 100644 --- a/2.9/generated/torch.nn.FeatureAlphaDropout.html +++ b/2.9/generated/torch.nn.FeatureAlphaDropout.html @@ -4404,7 +4404,7 @@

                                        FeatureAlphaDropout#

                                        -class torch.nn.FeatureAlphaDropout(p=0.5, inplace=False)[source]#
                                        +class torch.nn.FeatureAlphaDropout(p=0.5, inplace=False)[source]#

                                        Randomly masks out entire channels.

                                        A channel is a feature map, e.g. the jj-th channel of the ii-th sample in the batch input @@ -4449,7 +4449,7 @@

                                        FeatureAlphaDropout
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Fold.html b/2.9/generated/torch.nn.Fold.html index 8260ffeea20..f6fbe138d07 100644 --- a/2.9/generated/torch.nn.Fold.html +++ b/2.9/generated/torch.nn.Fold.html @@ -4404,7 +4404,7 @@

                                        Fold#

                                        -class torch.nn.Fold(output_size, kernel_size, dilation=1, padding=0, stride=1)[source]#
                                        +class torch.nn.Fold(output_size, kernel_size, dilation=1, padding=0, stride=1)[source]#

                                        Combines an array of sliding local blocks into a large containing tensor.

                                        Consider a batched input tensor containing sliding local blocks, e.g., patches of images, of shape (N,C×(kernel_size),L)(N, C \times \prod(\text{kernel\_size}), L), @@ -4514,7 +4514,7 @@

                                        Fold#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4525,7 +4525,7 @@

                                        Fold#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.FractionalMaxPool2d.html b/2.9/generated/torch.nn.FractionalMaxPool2d.html index 2a0a9597883..40f77bb7ded 100644 --- a/2.9/generated/torch.nn.FractionalMaxPool2d.html +++ b/2.9/generated/torch.nn.FractionalMaxPool2d.html @@ -4404,7 +4404,7 @@

                                        FractionalMaxPool2d#

                                        -class torch.nn.FractionalMaxPool2d(kernel_size, output_size=None, output_ratio=None, return_indices=False, _random_samples=None)[source]#
                                        +class torch.nn.FractionalMaxPool2d(kernel_size, output_size=None, output_ratio=None, return_indices=False, _random_samples=None)[source]#

                                        Applies a 2D fractional max pooling over an input signal composed of several input planes.

                                        Fractional MaxPooling is described in detail in the paper Fractional MaxPooling by Ben Graham

                                        The max-pooling operation is applied in kH×kWkH \times kW regions by a stochastic diff --git a/2.9/generated/torch.nn.FractionalMaxPool3d.html b/2.9/generated/torch.nn.FractionalMaxPool3d.html index b4a1f1701fd..e4fffd7a602 100644 --- a/2.9/generated/torch.nn.FractionalMaxPool3d.html +++ b/2.9/generated/torch.nn.FractionalMaxPool3d.html @@ -4404,7 +4404,7 @@

                                        FractionalMaxPool3d#

                                        -class torch.nn.FractionalMaxPool3d(kernel_size, output_size=None, output_ratio=None, return_indices=False, _random_samples=None)[source]#
                                        +class torch.nn.FractionalMaxPool3d(kernel_size, output_size=None, output_ratio=None, return_indices=False, _random_samples=None)[source]#

                                        Applies a 3D fractional max pooling over an input signal composed of several input planes.

                                        Fractional MaxPooling is described in detail in the paper Fractional MaxPooling by Ben Graham

                                        The max-pooling operation is applied in kT×kH×kWkT \times kH \times kW regions by a stochastic diff --git a/2.9/generated/torch.nn.GELU.html b/2.9/generated/torch.nn.GELU.html index f30e6a7b6af..c6415a22ad6 100644 --- a/2.9/generated/torch.nn.GELU.html +++ b/2.9/generated/torch.nn.GELU.html @@ -4404,7 +4404,7 @@

                                        GELU#

                                        -class torch.nn.GELU(approximate='none')[source]#
                                        +class torch.nn.GELU(approximate='none')[source]#

                                        Applies the Gaussian Error Linear Units function.

                                        GELU(x)=xΦ(x)\text{GELU}(x) = x * \Phi(x) @@ -4446,7 +4446,7 @@

                                        GELU#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4457,7 +4457,7 @@

                                        GELU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.GLU.html b/2.9/generated/torch.nn.GLU.html index b39a01e8a21..9c3f2c37a76 100644 --- a/2.9/generated/torch.nn.GLU.html +++ b/2.9/generated/torch.nn.GLU.html @@ -4404,7 +4404,7 @@

                                        GLU#

                                        -class torch.nn.GLU(dim=-1)[source]#
                                        +class torch.nn.GLU(dim=-1)[source]#

                                        Applies the gated linear unit function.

                                        GLU(a,b)=aσ(b){GLU}(a, b)= a \otimes \sigma(b) where aa is the first half of the input matrices and bb is the second half.

                                        @@ -4430,7 +4430,7 @@

                                        GLU#

                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4441,7 +4441,7 @@

                                        GLU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.GRU.html b/2.9/generated/torch.nn.GRU.html index a27b47b82e5..2d04e19240d 100644 --- a/2.9/generated/torch.nn.GRU.html +++ b/2.9/generated/torch.nn.GRU.html @@ -4404,7 +4404,7 @@

                                        GRU#

                                        -class torch.nn.GRU(input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, device=None, dtype=None)[source]#
                                        +class torch.nn.GRU(input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, device=None, dtype=None)[source]#

                                        Apply a multi-layer gated recurrent unit (GRU) RNN to an input sequence. For each element in the input sequence, each layer computes the following function:

                                        diff --git a/2.9/generated/torch.nn.GRUCell.html b/2.9/generated/torch.nn.GRUCell.html index 65437de7bd9..8707782e4b3 100644 --- a/2.9/generated/torch.nn.GRUCell.html +++ b/2.9/generated/torch.nn.GRUCell.html @@ -4404,7 +4404,7 @@

                                        GRUCell#

                                        -class torch.nn.GRUCell(input_size, hidden_size, bias=True, device=None, dtype=None)[source]#
                                        +class torch.nn.GRUCell(input_size, hidden_size, bias=True, device=None, dtype=None)[source]#

                                        A gated recurrent unit (GRU) cell.

                                        r=σ(Wirx+bir+Whrh+bhr)z=σ(Wizx+biz+Whzh+bhz)n=tanh(Winx+bin+r(Whnh+bhn))h=(1z)n+zh\begin{array}{ll} diff --git a/2.9/generated/torch.nn.GaussianNLLLoss.html b/2.9/generated/torch.nn.GaussianNLLLoss.html index 970fbede3b8..8b4d7712d02 100644 --- a/2.9/generated/torch.nn.GaussianNLLLoss.html +++ b/2.9/generated/torch.nn.GaussianNLLLoss.html @@ -4404,7 +4404,7 @@

                                        GaussianNLLLoss#

                                        -class torch.nn.GaussianNLLLoss(*, full=False, eps=1e-06, reduction='mean')[source]#
                                        +class torch.nn.GaussianNLLLoss(*, full=False, eps=1e-06, reduction='mean')[source]#

                                        Gaussian negative log likelihood loss.

                                        The targets are treated as samples from Gaussian distributions with expectations and variances predicted by the neural network. For a @@ -4480,7 +4480,7 @@

                                        GaussianNLLLoss
                                        -forward(input, target, var)[source]#
                                        +forward(input, target, var)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.GroupNorm.html b/2.9/generated/torch.nn.GroupNorm.html index 08d37119b6e..cccb6069462 100644 --- a/2.9/generated/torch.nn.GroupNorm.html +++ b/2.9/generated/torch.nn.GroupNorm.html @@ -4404,7 +4404,7 @@

                                        GroupNorm#

                                        -class torch.nn.GroupNorm(num_groups, num_channels, eps=1e-05, affine=True, device=None, dtype=None)[source]#
                                        +class torch.nn.GroupNorm(num_groups, num_channels, eps=1e-05, affine=True, device=None, dtype=None)[source]#

                                        Applies Group Normalization over a mini-batch of inputs.

                                        This layer implements the operation as described in the paper Group Normalization

                                        diff --git a/2.9/generated/torch.nn.Hardshrink.html b/2.9/generated/torch.nn.Hardshrink.html index 31dcafedc48..1521ff4621c 100644 --- a/2.9/generated/torch.nn.Hardshrink.html +++ b/2.9/generated/torch.nn.Hardshrink.html @@ -4404,7 +4404,7 @@

                                        Hardshrink#

                                        -class torch.nn.Hardshrink(lambd=0.5)[source]#
                                        +class torch.nn.Hardshrink(lambd=0.5)[source]#

                                        Applies the Hard Shrinkage (Hardshrink) function element-wise.

                                        Hardshrink is defined as:

                                        @@ -4436,7 +4436,7 @@

                                        Hardshrink
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4447,7 +4447,7 @@

                                        Hardshrink
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Run forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Hardsigmoid.html b/2.9/generated/torch.nn.Hardsigmoid.html index 904e7184816..f69d6571510 100644 --- a/2.9/generated/torch.nn.Hardsigmoid.html +++ b/2.9/generated/torch.nn.Hardsigmoid.html @@ -4404,7 +4404,7 @@

                                        Hardsigmoid#

                                        -class torch.nn.Hardsigmoid(inplace=False)[source]#
                                        +class torch.nn.Hardsigmoid(inplace=False)[source]#

                                        Applies the Hardsigmoid function element-wise.

                                        Hardsigmoid is defined as:

                                        @@ -4435,7 +4435,7 @@

                                        Hardsigmoid
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Hardswish.html b/2.9/generated/torch.nn.Hardswish.html index 5a4cf31bf73..d07b5ae512c 100644 --- a/2.9/generated/torch.nn.Hardswish.html +++ b/2.9/generated/torch.nn.Hardswish.html @@ -4404,7 +4404,7 @@

                                        Hardswish#

                                        -class torch.nn.Hardswish(inplace=False)[source]#
                                        +class torch.nn.Hardswish(inplace=False)[source]#

                                        Applies the Hardswish function, element-wise.

                                        Method described in the paper: Searching for MobileNetV3.

                                        Hardswish is defined as:

                                        @@ -4436,7 +4436,7 @@

                                        Hardswish
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Hardtanh.html b/2.9/generated/torch.nn.Hardtanh.html index 668b18ab618..4b6bc0576af 100644 --- a/2.9/generated/torch.nn.Hardtanh.html +++ b/2.9/generated/torch.nn.Hardtanh.html @@ -4404,7 +4404,7 @@

                                        Hardtanh#

                                        -class torch.nn.Hardtanh(min_val=-1.0, max_val=1.0, inplace=False, min_value=None, max_value=None)[source]#
                                        +class torch.nn.Hardtanh(min_val=-1.0, max_val=1.0, inplace=False, min_value=None, max_value=None)[source]#

                                        Applies the HardTanh function element-wise.

                                        HardTanh is defined as:

                                        @@ -4441,7 +4441,7 @@

                                        Hardtanh
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4452,7 +4452,7 @@

                                        Hardtanh
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.HingeEmbeddingLoss.html b/2.9/generated/torch.nn.HingeEmbeddingLoss.html index 98e1e7be663..1fa5a4a209c 100644 --- a/2.9/generated/torch.nn.HingeEmbeddingLoss.html +++ b/2.9/generated/torch.nn.HingeEmbeddingLoss.html @@ -4404,7 +4404,7 @@

                                        HingeEmbeddingLoss#

                                        -class torch.nn.HingeEmbeddingLoss(margin=1.0, size_average=None, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.HingeEmbeddingLoss(margin=1.0, size_average=None, reduce=None, reduction='mean')[source]#

                                        Measures the loss given an input tensor xx and a labels tensor yy (containing 1 or -1). This is usually used for measuring whether two inputs are similar or @@ -4458,7 +4458,7 @@

                                        HingeEmbeddingLoss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.HuberLoss.html b/2.9/generated/torch.nn.HuberLoss.html index c9c229f31ed..3a908a5c1aa 100644 --- a/2.9/generated/torch.nn.HuberLoss.html +++ b/2.9/generated/torch.nn.HuberLoss.html @@ -4404,7 +4404,7 @@

                                        HuberLoss#

                                        -class torch.nn.HuberLoss(reduction='mean', delta=1.0)[source]#
                                        +class torch.nn.HuberLoss(reduction='mean', delta=1.0)[source]#

                                        Creates a criterion that uses a squared term if the absolute element-wise error falls below delta and a delta-scaled L1 term otherwise. This loss combines advantages of both L1Loss and MSELoss; the @@ -4460,7 +4460,7 @@

                                        HuberLoss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Identity.html b/2.9/generated/torch.nn.Identity.html index 90d4ed1b6ec..63d7e837efd 100644 --- a/2.9/generated/torch.nn.Identity.html +++ b/2.9/generated/torch.nn.Identity.html @@ -4404,7 +4404,7 @@

                                        Identity#

                                        -class torch.nn.Identity(*args, **kwargs)[source]#
                                        +class torch.nn.Identity(*args, **kwargs)[source]#

                                        A placeholder identity operator that is argument-insensitive.

                                        Parameters
                                        @@ -4431,7 +4431,7 @@

                                        Identity
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.InstanceNorm1d.html b/2.9/generated/torch.nn.InstanceNorm1d.html index 54f7cb07190..9fed35d3e55 100644 --- a/2.9/generated/torch.nn.InstanceNorm1d.html +++ b/2.9/generated/torch.nn.InstanceNorm1d.html @@ -4404,7 +4404,7 @@

                                        InstanceNorm1d#

                                        -class torch.nn.InstanceNorm1d(num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#
                                        +class torch.nn.InstanceNorm1d(num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#

                                        Applies Instance Normalization.

                                        This operation applies Instance Normalization over a 2D (unbatched) or 3D (batched) input as described in the paper diff --git a/2.9/generated/torch.nn.InstanceNorm2d.html b/2.9/generated/torch.nn.InstanceNorm2d.html index d8d8208ec42..7c56b5b5f28 100644 --- a/2.9/generated/torch.nn.InstanceNorm2d.html +++ b/2.9/generated/torch.nn.InstanceNorm2d.html @@ -4404,7 +4404,7 @@

                                        InstanceNorm2d#

                                        -class torch.nn.InstanceNorm2d(num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#
                                        +class torch.nn.InstanceNorm2d(num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#

                                        Applies Instance Normalization.

                                        This operation applies Instance Normalization over a 4D input (a mini-batch of 2D inputs diff --git a/2.9/generated/torch.nn.InstanceNorm3d.html b/2.9/generated/torch.nn.InstanceNorm3d.html index c688330cca7..e98853b1294 100644 --- a/2.9/generated/torch.nn.InstanceNorm3d.html +++ b/2.9/generated/torch.nn.InstanceNorm3d.html @@ -4404,7 +4404,7 @@

                                        InstanceNorm3d#

                                        -class torch.nn.InstanceNorm3d(num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#
                                        +class torch.nn.InstanceNorm3d(num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#

                                        Applies Instance Normalization.

                                        This operation applies Instance Normalization over a 5D input (a mini-batch of 3D inputs with additional channel dimension) as described in the paper diff --git a/2.9/generated/torch.nn.KLDivLoss.html b/2.9/generated/torch.nn.KLDivLoss.html index d90efeb8f55..a4684df8267 100644 --- a/2.9/generated/torch.nn.KLDivLoss.html +++ b/2.9/generated/torch.nn.KLDivLoss.html @@ -4404,7 +4404,7 @@

                                        KLDivLoss#

                                        -class torch.nn.KLDivLoss(size_average=None, reduce=None, reduction='mean', log_target=False)[source]#
                                        +class torch.nn.KLDivLoss(size_average=None, reduce=None, reduction='mean', log_target=False)[source]#

                                        The Kullback-Leibler divergence loss.

                                        For tensors of the same shape ypred, ytruey_{\text{pred}},\ y_{\text{true}}, where ypredy_{\text{pred}} is the input and ytruey_{\text{true}} is the @@ -4487,7 +4487,7 @@

                                        KLDivLoss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.L1Loss.html b/2.9/generated/torch.nn.L1Loss.html index 41a44714349..9a483467672 100644 --- a/2.9/generated/torch.nn.L1Loss.html +++ b/2.9/generated/torch.nn.L1Loss.html @@ -4404,7 +4404,7 @@

                                        L1Loss#

                                        -class torch.nn.L1Loss(size_average=None, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.L1Loss(size_average=None, reduce=None, reduction='mean')[source]#

                                        Creates a criterion that measures the mean absolute error (MAE) between each element in the input xx and target yy.

                                        The unreduced (i.e. with reduction set to 'none') loss can be described as:

                                        @@ -4466,7 +4466,7 @@

                                        L1Loss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.LPPool1d.html b/2.9/generated/torch.nn.LPPool1d.html index 37e3f65de95..4e79ade8840 100644 --- a/2.9/generated/torch.nn.LPPool1d.html +++ b/2.9/generated/torch.nn.LPPool1d.html @@ -4404,7 +4404,7 @@

                                        LPPool1d#

                                        -class torch.nn.LPPool1d(norm_type, kernel_size, stride=None, ceil_mode=False)[source]#
                                        +class torch.nn.LPPool1d(norm_type, kernel_size, stride=None, ceil_mode=False)[source]#

                                        Applies a 1D power-average pooling over an input signal composed of several input planes.

                                        On each window, the function computed is:

                                        @@ -4455,7 +4455,7 @@

                                        LPPool1d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.LPPool2d.html b/2.9/generated/torch.nn.LPPool2d.html index 0147081c2a3..c64a083219b 100644 --- a/2.9/generated/torch.nn.LPPool2d.html +++ b/2.9/generated/torch.nn.LPPool2d.html @@ -4404,7 +4404,7 @@

                                        LPPool2d#

                                        -class torch.nn.LPPool2d(norm_type, kernel_size, stride=None, ceil_mode=False)[source]#
                                        +class torch.nn.LPPool2d(norm_type, kernel_size, stride=None, ceil_mode=False)[source]#

                                        Applies a 2D power-average pooling over an input signal composed of several input planes.

                                        On each window, the function computed is:

                                        @@ -4468,7 +4468,7 @@

                                        LPPool2d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.LPPool3d.html b/2.9/generated/torch.nn.LPPool3d.html index 2d2c21bd184..df933e9f5e9 100644 --- a/2.9/generated/torch.nn.LPPool3d.html +++ b/2.9/generated/torch.nn.LPPool3d.html @@ -4404,7 +4404,7 @@

                                        LPPool3d#

                                        -class torch.nn.LPPool3d(norm_type, kernel_size, stride=None, ceil_mode=False)[source]#
                                        +class torch.nn.LPPool3d(norm_type, kernel_size, stride=None, ceil_mode=False)[source]#

                                        Applies a 3D power-average pooling over an input signal composed of several input planes.

                                        On each window, the function computed is:

                                        @@ -4472,7 +4472,7 @@

                                        LPPool3d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.LSTM.html b/2.9/generated/torch.nn.LSTM.html index 7146d195c8a..3a623f01684 100644 --- a/2.9/generated/torch.nn.LSTM.html +++ b/2.9/generated/torch.nn.LSTM.html @@ -4404,7 +4404,7 @@

                                        LSTM#

                                        -class torch.nn.LSTM(input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, proj_size=0, device=None, dtype=None)[source]#
                                        +class torch.nn.LSTM(input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, proj_size=0, device=None, dtype=None)[source]#

                                        Apply a multi-layer long short-term memory (LSTM) RNN to an input sequence. For each element in the input sequence, each layer computes the following function:

                                        diff --git a/2.9/generated/torch.nn.LSTMCell.html b/2.9/generated/torch.nn.LSTMCell.html index 42d15872656..d74c1acc728 100644 --- a/2.9/generated/torch.nn.LSTMCell.html +++ b/2.9/generated/torch.nn.LSTMCell.html @@ -4404,7 +4404,7 @@

                                        LSTMCell#

                                        -class torch.nn.LSTMCell(input_size, hidden_size, bias=True, device=None, dtype=None)[source]#
                                        +class torch.nn.LSTMCell(input_size, hidden_size, bias=True, device=None, dtype=None)[source]#

                                        A long short-term memory (LSTM) cell.

                                        i=σ(Wiix+bii+Whih+bhi)f=σ(Wifx+bif+Whfh+bhf)g=tanh(Wigx+big+Whgh+bhg)o=σ(Wiox+bio+Whoh+bho)c=fc+igh=otanh(c)\begin{array}{ll} diff --git a/2.9/generated/torch.nn.LayerNorm.html b/2.9/generated/torch.nn.LayerNorm.html index 2941787f5cb..02d4fe6fa65 100644 --- a/2.9/generated/torch.nn.LayerNorm.html +++ b/2.9/generated/torch.nn.LayerNorm.html @@ -4404,7 +4404,7 @@

                                        LayerNorm#

                                        -class torch.nn.LayerNorm(normalized_shape, eps=1e-05, elementwise_affine=True, bias=True, device=None, dtype=None)[source]#
                                        +class torch.nn.LayerNorm(normalized_shape, eps=1e-05, elementwise_affine=True, bias=True, device=None, dtype=None)[source]#

                                        Applies Layer Normalization over a mini-batch of inputs.

                                        This layer implements the operation as described in the paper Layer Normalization

                                        diff --git a/2.9/generated/torch.nn.LazyBatchNorm1d.html b/2.9/generated/torch.nn.LazyBatchNorm1d.html index dc3fbc8ec24..c381368f76e 100644 --- a/2.9/generated/torch.nn.LazyBatchNorm1d.html +++ b/2.9/generated/torch.nn.LazyBatchNorm1d.html @@ -4404,7 +4404,7 @@

                                        LazyBatchNorm1d#

                                        -class torch.nn.LazyBatchNorm1d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.LazyBatchNorm1d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        A torch.nn.BatchNorm1d module with lazy initialization.

                                        Lazy initialization based on the num_features argument of the BatchNorm1d that is inferred from the input.size(1). @@ -4433,7 +4433,7 @@

                                        LazyBatchNorm1d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of BatchNorm1d

                                        diff --git a/2.9/generated/torch.nn.LazyBatchNorm2d.html b/2.9/generated/torch.nn.LazyBatchNorm2d.html index c8a54963d7b..dda5fb1ec92 100644 --- a/2.9/generated/torch.nn.LazyBatchNorm2d.html +++ b/2.9/generated/torch.nn.LazyBatchNorm2d.html @@ -4404,7 +4404,7 @@

                                        LazyBatchNorm2d#

                                        -class torch.nn.LazyBatchNorm2d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.LazyBatchNorm2d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        A torch.nn.BatchNorm2d module with lazy initialization.

                                        Lazy initialization is done for the num_features argument of the BatchNorm2d that is inferred from the input.size(1). @@ -4433,7 +4433,7 @@

                                        LazyBatchNorm2d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of BatchNorm2d

                                        diff --git a/2.9/generated/torch.nn.LazyBatchNorm3d.html b/2.9/generated/torch.nn.LazyBatchNorm3d.html index 57194836fa5..ba4ab54bc1e 100644 --- a/2.9/generated/torch.nn.LazyBatchNorm3d.html +++ b/2.9/generated/torch.nn.LazyBatchNorm3d.html @@ -4404,7 +4404,7 @@

                                        LazyBatchNorm3d#

                                        -class torch.nn.LazyBatchNorm3d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.LazyBatchNorm3d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        A torch.nn.BatchNorm3d module with lazy initialization.

                                        Lazy initialization is done for the num_features argument of the BatchNorm3d that is inferred from the input.size(1). @@ -4433,7 +4433,7 @@

                                        LazyBatchNorm3d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of BatchNorm3d

                                        diff --git a/2.9/generated/torch.nn.LazyConv1d.html b/2.9/generated/torch.nn.LazyConv1d.html index c97bd993bed..e6f0373316e 100644 --- a/2.9/generated/torch.nn.LazyConv1d.html +++ b/2.9/generated/torch.nn.LazyConv1d.html @@ -4404,7 +4404,7 @@

                                        LazyConv1d#

                                        -class torch.nn.LazyConv1d(out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.LazyConv1d(out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                                        A torch.nn.Conv1d module with lazy initialization of the in_channels argument.

                                        The in_channels argument of the Conv1d is inferred from the input.size(1). The attributes that will be lazily initialized are weight and bias.

                                        @@ -4435,7 +4435,7 @@

                                        LazyConv1d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of Conv1d

                                        diff --git a/2.9/generated/torch.nn.LazyConv2d.html b/2.9/generated/torch.nn.LazyConv2d.html index 24b1d4437ec..1c02669b1ff 100644 --- a/2.9/generated/torch.nn.LazyConv2d.html +++ b/2.9/generated/torch.nn.LazyConv2d.html @@ -4404,7 +4404,7 @@

                                        LazyConv2d#

                                        -class torch.nn.LazyConv2d(out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.LazyConv2d(out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                                        A torch.nn.Conv2d module with lazy initialization of the in_channels argument.

                                        The in_channels argument of the Conv2d that is inferred from the input.size(1). The attributes that will be lazily initialized are weight and bias.

                                        @@ -4435,7 +4435,7 @@

                                        LazyConv2d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of Conv2d

                                        diff --git a/2.9/generated/torch.nn.LazyConv3d.html b/2.9/generated/torch.nn.LazyConv3d.html index 9661bdc848a..ed565e4f582 100644 --- a/2.9/generated/torch.nn.LazyConv3d.html +++ b/2.9/generated/torch.nn.LazyConv3d.html @@ -4404,7 +4404,7 @@

                                        LazyConv3d#

                                        -class torch.nn.LazyConv3d(out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.LazyConv3d(out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                                        A torch.nn.Conv3d module with lazy initialization of the in_channels argument.

                                        The in_channels argument of the Conv3d that is inferred from the input.size(1). @@ -4436,7 +4436,7 @@

                                        LazyConv3d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of Conv3d

                                        diff --git a/2.9/generated/torch.nn.LazyConvTranspose1d.html b/2.9/generated/torch.nn.LazyConvTranspose1d.html index ec37abec6c6..b7988f57311 100644 --- a/2.9/generated/torch.nn.LazyConvTranspose1d.html +++ b/2.9/generated/torch.nn.LazyConvTranspose1d.html @@ -4404,7 +4404,7 @@

                                        LazyConvTranspose1d#

                                        -class torch.nn.LazyConvTranspose1d(out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.LazyConvTranspose1d(out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                                        A torch.nn.ConvTranspose1d module with lazy initialization of the in_channels argument.

                                        The in_channels argument of the ConvTranspose1d that is inferred from the input.size(1). @@ -4433,7 +4433,7 @@

                                        LazyConvTranspose1d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of ConvTranspose1d

                                        diff --git a/2.9/generated/torch.nn.LazyConvTranspose2d.html b/2.9/generated/torch.nn.LazyConvTranspose2d.html index fb4c49e5811..bcc774c1f64 100644 --- a/2.9/generated/torch.nn.LazyConvTranspose2d.html +++ b/2.9/generated/torch.nn.LazyConvTranspose2d.html @@ -4404,7 +4404,7 @@

                                        LazyConvTranspose2d#

                                        -class torch.nn.LazyConvTranspose2d(out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.LazyConvTranspose2d(out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                                        A torch.nn.ConvTranspose2d module with lazy initialization of the in_channels argument.

                                        The in_channels argument of the ConvTranspose2d is inferred from the input.size(1). @@ -4433,7 +4433,7 @@

                                        LazyConvTranspose2d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of ConvTranspose2d

                                        diff --git a/2.9/generated/torch.nn.LazyConvTranspose3d.html b/2.9/generated/torch.nn.LazyConvTranspose3d.html index a3c2df525ea..28d75b735dd 100644 --- a/2.9/generated/torch.nn.LazyConvTranspose3d.html +++ b/2.9/generated/torch.nn.LazyConvTranspose3d.html @@ -4404,7 +4404,7 @@

                                        LazyConvTranspose3d#

                                        -class torch.nn.LazyConvTranspose3d(out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.LazyConvTranspose3d(out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                                        A torch.nn.ConvTranspose3d module with lazy initialization of the in_channels argument.

                                        The in_channels argument of the ConvTranspose3d is inferred from the input.size(1). @@ -4433,7 +4433,7 @@

                                        LazyConvTranspose3d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of ConvTranspose3d

                                        diff --git a/2.9/generated/torch.nn.LazyInstanceNorm1d.html b/2.9/generated/torch.nn.LazyInstanceNorm1d.html index 18a2af1a929..baf51cd92ae 100644 --- a/2.9/generated/torch.nn.LazyInstanceNorm1d.html +++ b/2.9/generated/torch.nn.LazyInstanceNorm1d.html @@ -4404,7 +4404,7 @@

                                        LazyInstanceNorm1d#

                                        -class torch.nn.LazyInstanceNorm1d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.LazyInstanceNorm1d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        A torch.nn.InstanceNorm1d module with lazy initialization of the num_features argument.

                                        The num_features argument of the InstanceNorm1d is inferred from the input.size(1). The attributes that will be lazily initialized are weight, bias, running_mean and running_var.

                                        @@ -4436,7 +4436,7 @@

                                        LazyInstanceNorm1d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of InstanceNorm1d

                                        diff --git a/2.9/generated/torch.nn.LazyInstanceNorm2d.html b/2.9/generated/torch.nn.LazyInstanceNorm2d.html index 0df58756008..2b1280e71b1 100644 --- a/2.9/generated/torch.nn.LazyInstanceNorm2d.html +++ b/2.9/generated/torch.nn.LazyInstanceNorm2d.html @@ -4404,7 +4404,7 @@

                                        LazyInstanceNorm2d#

                                        -class torch.nn.LazyInstanceNorm2d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.LazyInstanceNorm2d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        A torch.nn.InstanceNorm2d module with lazy initialization of the num_features argument.

                                        The num_features argument of the InstanceNorm2d is inferred from the input.size(1). The attributes that will be lazily initialized are weight, bias, @@ -4437,7 +4437,7 @@

                                        LazyInstanceNorm2d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of InstanceNorm2d

                                        diff --git a/2.9/generated/torch.nn.LazyInstanceNorm3d.html b/2.9/generated/torch.nn.LazyInstanceNorm3d.html index 9213c093615..1b8078c05b5 100644 --- a/2.9/generated/torch.nn.LazyInstanceNorm3d.html +++ b/2.9/generated/torch.nn.LazyInstanceNorm3d.html @@ -4404,7 +4404,7 @@

                                        LazyInstanceNorm3d#

                                        -class torch.nn.LazyInstanceNorm3d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.LazyInstanceNorm3d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        A torch.nn.InstanceNorm3d module with lazy initialization of the num_features argument.

                                        The num_features argument of the InstanceNorm3d is inferred from the input.size(1). The attributes that will be lazily initialized are weight, bias, @@ -4437,7 +4437,7 @@

                                        LazyInstanceNorm3d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of InstanceNorm3d

                                        diff --git a/2.9/generated/torch.nn.LazyLinear.html b/2.9/generated/torch.nn.LazyLinear.html index 0ac16b6977a..cddfba5fff0 100644 --- a/2.9/generated/torch.nn.LazyLinear.html +++ b/2.9/generated/torch.nn.LazyLinear.html @@ -4404,7 +4404,7 @@

                                        LazyLinear#

                                        -class torch.nn.LazyLinear(out_features, bias=True, device=None, dtype=None)[source]#
                                        +class torch.nn.LazyLinear(out_features, bias=True, device=None, dtype=None)[source]#

                                        A torch.nn.Linear module where in_features is inferred.

                                        In this module, the weight and bias are of torch.nn.UninitializedParameter class. They will be initialized after the first call to forward is done and the @@ -4479,13 +4479,13 @@

                                        LazyLinear
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of Linear

                                        -initialize_parameters(input)[source]#
                                        +initialize_parameters(input)[source]#

                                        Infers in_features based on input and initializes parameters.

                                        @@ -4493,7 +4493,7 @@

                                        LazyLinear
                                        -reset_parameters()[source]#
                                        +reset_parameters()[source]#

                                        Resets parameters based on their initialization used in __init__.

                                        diff --git a/2.9/generated/torch.nn.LeakyReLU.html b/2.9/generated/torch.nn.LeakyReLU.html index 0783dc473bf..db1c0ddd613 100644 --- a/2.9/generated/torch.nn.LeakyReLU.html +++ b/2.9/generated/torch.nn.LeakyReLU.html @@ -4404,7 +4404,7 @@

                                        LeakyReLU#

                                        -class torch.nn.LeakyReLU(negative_slope=0.01, inplace=False)[source]#
                                        +class torch.nn.LeakyReLU(negative_slope=0.01, inplace=False)[source]#

                                        Applies the LeakyReLU function element-wise.

                                        LeakyReLU(x)=max(0,x)+negative_slopemin(0,x)\text{LeakyReLU}(x) = \max(0, x) + \text{negative\_slope} * \min(0, x) @@ -4443,7 +4443,7 @@

                                        LeakyReLU
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4454,7 +4454,7 @@

                                        LeakyReLU
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Run forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Linear.html b/2.9/generated/torch.nn.Linear.html index b04be0ac2a1..83c5bfb0705 100644 --- a/2.9/generated/torch.nn.Linear.html +++ b/2.9/generated/torch.nn.Linear.html @@ -4404,7 +4404,7 @@

                                        Linear#

                                        -class torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None)[source]#
                                        +class torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None)[source]#

                                        Applies an affine linear transformation to the incoming data: y=xAT+by = xA^T + b.

                                        This module supports TensorFloat32.

                                        On certain ROCm devices, when using float16 inputs this module will use different precision for backward.

                                        @@ -4495,7 +4495,7 @@

                                        Linear
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4506,7 +4506,7 @@

                                        Linear
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        @@ -4517,7 +4517,7 @@

                                        Linear
                                        -reset_parameters()[source]#
                                        +reset_parameters()[source]#

                                        Resets parameters based on their initialization used in __init__.

                                        diff --git a/2.9/generated/torch.nn.LocalResponseNorm.html b/2.9/generated/torch.nn.LocalResponseNorm.html index 2eb1304d2bf..d4a58e3ab9e 100644 --- a/2.9/generated/torch.nn.LocalResponseNorm.html +++ b/2.9/generated/torch.nn.LocalResponseNorm.html @@ -4404,7 +4404,7 @@

                                        LocalResponseNorm#

                                        -class torch.nn.LocalResponseNorm(size, alpha=0.0001, beta=0.75, k=1.0)[source]#
                                        +class torch.nn.LocalResponseNorm(size, alpha=0.0001, beta=0.75, k=1.0)[source]#

                                        Applies local response normalization over an input signal.

                                        The input signal is composed of several input planes, where channels occupy the second dimension. Applies normalization across channels.

                                        @@ -4454,13 +4454,13 @@

                                        LocalResponseNorm
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.LogSigmoid.html b/2.9/generated/torch.nn.LogSigmoid.html index 26098ad317e..1399a67a7a6 100644 --- a/2.9/generated/torch.nn.LogSigmoid.html +++ b/2.9/generated/torch.nn.LogSigmoid.html @@ -4404,7 +4404,7 @@

                                        LogSigmoid#

                                        -class torch.nn.LogSigmoid(*args, **kwargs)[source]#
                                        +class torch.nn.LogSigmoid(*args, **kwargs)[source]#

                                        Applies the Logsigmoid function element-wise.

                                        LogSigmoid(x)=log(11+exp(x))\text{LogSigmoid}(x) = \log\left(\frac{ 1 }{ 1 + \exp(-x)}\right) @@ -4427,7 +4427,7 @@

                                        LogSigmoid
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Run forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.LogSoftmax.html b/2.9/generated/torch.nn.LogSoftmax.html index 9cee210fa5d..85cfb3c25bf 100644 --- a/2.9/generated/torch.nn.LogSoftmax.html +++ b/2.9/generated/torch.nn.LogSoftmax.html @@ -4404,7 +4404,7 @@

                                        LogSoftmax#

                                        -class torch.nn.LogSoftmax(dim=None)[source]#
                                        +class torch.nn.LogSoftmax(dim=None)[source]#

                                        Applies the log(Softmax(x))\log(\text{Softmax}(x)) function to an n-dimensional input Tensor.

                                        The LogSoftmax formulation can be simplified as:

                                        @@ -4438,7 +4438,7 @@

                                        LogSoftmax
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4449,7 +4449,7 @@

                                        LogSoftmax
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.MSELoss.html b/2.9/generated/torch.nn.MSELoss.html index 239c72a8bd4..46fc7cfb4cd 100644 --- a/2.9/generated/torch.nn.MSELoss.html +++ b/2.9/generated/torch.nn.MSELoss.html @@ -4404,7 +4404,7 @@

                                        MSELoss#

                                        -class torch.nn.MSELoss(size_average=None, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.MSELoss(size_average=None, reduce=None, reduction='mean')[source]#

                                        Creates a criterion that measures the mean squared error (squared L2 norm) between each element in the input xx and target yy.

                                        The unreduced (i.e. with reduction set to 'none') loss can be described as:

                                        @@ -4463,7 +4463,7 @@

                                        MSELoss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.MarginRankingLoss.html b/2.9/generated/torch.nn.MarginRankingLoss.html index e835b9c8922..07d5e92e43b 100644 --- a/2.9/generated/torch.nn.MarginRankingLoss.html +++ b/2.9/generated/torch.nn.MarginRankingLoss.html @@ -4404,7 +4404,7 @@

                                        MarginRankingLoss#

                                        -class torch.nn.MarginRankingLoss(margin=0.0, size_average=None, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.MarginRankingLoss(margin=0.0, size_average=None, reduce=None, reduction='mean')[source]#

                                        Creates a criterion that measures the loss given inputs x1x1, x2x2, two 1D mini-batch or 0D Tensors, and a label 1D mini-batch or 0D Tensor yy (containing 1 or -1).

                                        @@ -4456,7 +4456,7 @@

                                        MarginRankingLoss
                                        -forward(input1, input2, target)[source]#
                                        +forward(input1, input2, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.MaxPool1d.html b/2.9/generated/torch.nn.MaxPool1d.html index b9a5d4473f5..53fc9e4616f 100644 --- a/2.9/generated/torch.nn.MaxPool1d.html +++ b/2.9/generated/torch.nn.MaxPool1d.html @@ -4404,7 +4404,7 @@

                                        MaxPool1d#

                                        -class torch.nn.MaxPool1d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)[source]#
                                        +class torch.nn.MaxPool1d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)[source]#

                                        Applies a 1D max pooling over an input signal composed of several input planes.

                                        In the simplest case, the output value of the layer with input size (N,C,L)(N, C, L) and output (N,C,Lout)(N, C, L_{out}) can be precisely described as:

                                        @@ -4463,7 +4463,7 @@

                                        MaxPool1d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        diff --git a/2.9/generated/torch.nn.MaxPool2d.html b/2.9/generated/torch.nn.MaxPool2d.html index c9a4aebd660..3062f5b4f78 100644 --- a/2.9/generated/torch.nn.MaxPool2d.html +++ b/2.9/generated/torch.nn.MaxPool2d.html @@ -4404,7 +4404,7 @@

                                        MaxPool2d#

                                        -class torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)[source]#
                                        +class torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)[source]#

                                        Applies a 2D max pooling over an input signal composed of several input planes.

                                        In the simplest case, the output value of the layer with input size (N,C,H,W)(N, C, H, W), output (N,C,Hout,Wout)(N, C, H_{out}, W_{out}) and kernel_size (kH,kW)(kH, kW) @@ -4472,7 +4472,7 @@

                                        MaxPool2d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        diff --git a/2.9/generated/torch.nn.MaxPool3d.html b/2.9/generated/torch.nn.MaxPool3d.html index a0f96b01c3e..a49bdc55236 100644 --- a/2.9/generated/torch.nn.MaxPool3d.html +++ b/2.9/generated/torch.nn.MaxPool3d.html @@ -4404,7 +4404,7 @@

                                        MaxPool3d#

                                        -class torch.nn.MaxPool3d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)[source]#
                                        +class torch.nn.MaxPool3d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)[source]#

                                        Applies a 3D max pooling over an input signal composed of several input planes.

                                        In the simplest case, the output value of the layer with input size (N,C,D,H,W)(N, C, D, H, W), output (N,C,Dout,Hout,Wout)(N, C, D_{out}, H_{out}, W_{out}) and kernel_size (kD,kH,kW)(kD, kH, kW) @@ -4476,7 +4476,7 @@

                                        MaxPool3d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        diff --git a/2.9/generated/torch.nn.MaxUnpool1d.html b/2.9/generated/torch.nn.MaxUnpool1d.html index 074d92c7abc..2488241c9e2 100644 --- a/2.9/generated/torch.nn.MaxUnpool1d.html +++ b/2.9/generated/torch.nn.MaxUnpool1d.html @@ -4404,7 +4404,7 @@

                                        MaxUnpool1d#

                                        -class torch.nn.MaxUnpool1d(kernel_size, stride=None, padding=0)[source]#
                                        +class torch.nn.MaxUnpool1d(kernel_size, stride=None, padding=0)[source]#

                                        Computes a partial inverse of MaxPool1d.

                                        MaxPool1d is not fully invertible, since the non-maximal values are lost.

                                        MaxUnpool1d takes in as input the output of MaxPool1d @@ -4471,7 +4471,7 @@

                                        MaxUnpool1d
                                        -forward(input, indices, output_size=None)[source]#
                                        +forward(input, indices, output_size=None)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.MaxUnpool2d.html b/2.9/generated/torch.nn.MaxUnpool2d.html index 512af3f3552..20b29653505 100644 --- a/2.9/generated/torch.nn.MaxUnpool2d.html +++ b/2.9/generated/torch.nn.MaxUnpool2d.html @@ -4404,7 +4404,7 @@

                                        MaxUnpool2d#

                                        -class torch.nn.MaxUnpool2d(kernel_size, stride=None, padding=0)[source]#
                                        +class torch.nn.MaxUnpool2d(kernel_size, stride=None, padding=0)[source]#

                                        Computes a partial inverse of MaxPool2d.

                                        MaxPool2d is not fully invertible, since the non-maximal values are lost.

                                        MaxUnpool2d takes in as input the output of MaxPool2d @@ -4483,7 +4483,7 @@

                                        MaxUnpool2d
                                        -forward(input, indices, output_size=None)[source]#
                                        +forward(input, indices, output_size=None)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.MaxUnpool3d.html b/2.9/generated/torch.nn.MaxUnpool3d.html index c7687ceb102..40b42ece6ef 100644 --- a/2.9/generated/torch.nn.MaxUnpool3d.html +++ b/2.9/generated/torch.nn.MaxUnpool3d.html @@ -4404,7 +4404,7 @@

                                        MaxUnpool3d#

                                        -class torch.nn.MaxUnpool3d(kernel_size, stride=None, padding=0)[source]#
                                        +class torch.nn.MaxUnpool3d(kernel_size, stride=None, padding=0)[source]#

                                        Computes a partial inverse of MaxPool3d.

                                        MaxPool3d is not fully invertible, since the non-maximal values are lost. MaxUnpool3d takes in as input the output of MaxPool3d @@ -4469,7 +4469,7 @@

                                        MaxUnpool3d
                                        -forward(input, indices, output_size=None)[source]#
                                        +forward(input, indices, output_size=None)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Mish.html b/2.9/generated/torch.nn.Mish.html index dba115638aa..667989db887 100644 --- a/2.9/generated/torch.nn.Mish.html +++ b/2.9/generated/torch.nn.Mish.html @@ -4404,7 +4404,7 @@

                                        Mish#

                                        -class torch.nn.Mish(inplace=False)[source]#
                                        +class torch.nn.Mish(inplace=False)[source]#

                                        Applies the Mish function, element-wise.

                                        Mish: A Self Regularized Non-Monotonic Neural Activation Function.

                                        @@ -4432,7 +4432,7 @@

                                        Mish#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4443,7 +4443,7 @@

                                        Mish#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Module.html b/2.9/generated/torch.nn.Module.html index fc286529b8e..c406003fe75 100644 --- a/2.9/generated/torch.nn.Module.html +++ b/2.9/generated/torch.nn.Module.html @@ -4404,7 +4404,7 @@

                                        Module#

                                        -class torch.nn.Module(*args, **kwargs)[source]#
                                        +class torch.nn.Module(*args, **kwargs)[source]#

                                        Base class for all neural network modules.

                                        Your models should also subclass this class.

                                        Modules can also contain other Modules, allowing them to be nested in @@ -4439,7 +4439,7 @@

                                        Module
                                        -add_module(name, module)[source]#
                                        +add_module(name, module)[source]#

                                        Add a child module to the current module.

                                        The module can be accessed as an attribute using the given name.

                                        @@ -4455,7 +4455,7 @@

                                        Module
                                        -apply(fn)[source]#
                                        +apply(fn)[source]#

                                        Apply fn recursively to every submodule (as returned by .children()) as well as self.

                                        Typical use includes initializing the parameters of a model (see also torch.nn.init).

                                        @@ -4497,7 +4497,7 @@

                                        Module
                                        -bfloat16()[source]#
                                        +bfloat16()[source]#

                                        Casts all floating point parameters and buffers to bfloat16 datatype.

                                        Note

                                        @@ -4515,7 +4515,7 @@

                                        Module
                                        -buffers(recurse=True)[source]#
                                        +buffers(recurse=True)[source]#

                                        Return an iterator over module buffers.

                                        Parameters
                                        @@ -4541,7 +4541,7 @@

                                        Module
                                        -children()[source]#
                                        +children()[source]#

                                        Return an iterator over immediate children modules.

                                        Yields
                                        @@ -4555,7 +4555,7 @@

                                        Module
                                        -compile(*args, **kwargs)[source]#
                                        +compile(*args, **kwargs)[source]#

                                        Compile this Module’s forward using torch.compile().

                                        This Module’s __call__ method is compiled and all arguments are passed as-is to torch.compile().

                                        @@ -4564,7 +4564,7 @@

                                        Module
                                        -cpu()[source]#
                                        +cpu()[source]#

                                        Move all model parameters and buffers to the CPU.

                                        Note

                                        @@ -4582,7 +4582,7 @@

                                        Module
                                        -cuda(device=None)[source]#
                                        +cuda(device=None)[source]#

                                        Move all model parameters and buffers to the GPU.

                                        This also makes associated parameters and buffers different objects. So it should be called before constructing the optimizer if the module will @@ -4607,7 +4607,7 @@

                                        Module
                                        -double()[source]#
                                        +double()[source]#

                                        Casts all floating point parameters and buffers to double datatype.

                                        Note

                                        @@ -4625,7 +4625,7 @@

                                        Module
                                        -eval()[source]#
                                        +eval()[source]#

                                        Set the module in evaluation mode.

                                        This has an effect only on certain modules. See the documentation of particular modules for details of their behaviors in training/evaluation @@ -4646,7 +4646,7 @@

                                        Module
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        To print customized extra information, you should re-implement this method in your own modules. Both single-line and multi-line @@ -4660,7 +4660,7 @@

                                        Module
                                        -float()[source]#
                                        +float()[source]#

                                        Casts all floating point parameters and buffers to float datatype.

                                        Note

                                        @@ -4678,7 +4678,7 @@

                                        Module
                                        -forward(*input)[source]#
                                        +forward(*input)[source]#

                                        Define the computation performed at every call.

                                        Should be overridden by all subclasses.

                                        @@ -4694,7 +4694,7 @@

                                        Module
                                        -get_buffer(target)[source]#
                                        +get_buffer(target)[source]#

                                        Return the buffer given by target if it exists, otherwise throw an error.

                                        See the docstring for get_submodule for a more detailed explanation of this method’s functionality as well as how to @@ -4721,7 +4721,7 @@

                                        Module
                                        -get_extra_state()[source]#
                                        +get_extra_state()[source]#

                                        Return any extra state to include in the module’s state_dict.

                                        Implement this and a corresponding set_extra_state() for your module if you need to store extra state. This function is called when building the @@ -4742,7 +4742,7 @@

                                        Module
                                        -get_parameter(target)[source]#
                                        +get_parameter(target)[source]#

                                        Return the parameter given by target if it exists, otherwise throw an error.

                                        See the docstring for get_submodule for a more detailed explanation of this method’s functionality as well as how to @@ -4769,7 +4769,7 @@

                                        Module
                                        -get_submodule(target)[source]#
                                        +get_submodule(target)[source]#

                                        Return the submodule given by target if it exists, otherwise throw an error.

                                        For example, let’s say you have an nn.Module A that looks like this:

                                        @@ -4818,7 +4818,7 @@

                                        Module
                                        -half()[source]#
                                        +half()[source]#

                                        Casts all floating point parameters and buffers to half datatype.

                                        Note

                                        @@ -4836,7 +4836,7 @@

                                        Module
                                        -ipu(device=None)[source]#
                                        +ipu(device=None)[source]#

                                        Move all model parameters and buffers to the IPU.

                                        This also makes associated parameters and buffers different objects. So it should be called before constructing the optimizer if the module will @@ -4861,7 +4861,7 @@

                                        Module
                                        -load_state_dict(state_dict, strict=True, assign=False)[source]#
                                        +load_state_dict(state_dict, strict=True, assign=False)[source]#

                                        Copy parameters and buffers from state_dict into this module and its descendants.

                                        If strict is True, then the keys of state_dict must exactly match the keys returned @@ -4916,7 +4916,7 @@

                                        Module
                                        -modules()[source]#
                                        +modules()[source]#

                                        Return an iterator over all modules in the network.

                                        Yields
                                        @@ -4948,7 +4948,7 @@

                                        Module
                                        -mtia(device=None)[source]#
                                        +mtia(device=None)[source]#

                                        Move all model parameters and buffers to the MTIA.

                                        This also makes associated parameters and buffers different objects. So it should be called before constructing the optimizer if the module will @@ -4973,7 +4973,7 @@

                                        Module
                                        -named_buffers(prefix='', recurse=True, remove_duplicate=True)[source]#
                                        +named_buffers(prefix='', recurse=True, remove_duplicate=True)[source]#

                                        Return an iterator over module buffers, yielding both the name of the buffer as well as the buffer itself.

                                        Parameters
                                        @@ -5002,7 +5002,7 @@

                                        Module
                                        -named_children()[source]#
                                        +named_children()[source]#

                                        Return an iterator over immediate children modules, yielding both the name of the module as well as the module itself.

                                        Yields
                                        @@ -5022,7 +5022,7 @@

                                        Module
                                        -named_modules(memo=None, prefix='', remove_duplicate=True)[source]#
                                        +named_modules(memo=None, prefix='', remove_duplicate=True)[source]#

                                        Return an iterator over all modules in the network, yielding both the name of the module as well as the module itself.

                                        Parameters
                                        @@ -5059,7 +5059,7 @@

                                        Module
                                        -named_parameters(prefix='', recurse=True, remove_duplicate=True)[source]#
                                        +named_parameters(prefix='', recurse=True, remove_duplicate=True)[source]#

                                        Return an iterator over module parameters, yielding both the name of the parameter as well as the parameter itself.

                                        Parameters
                                        @@ -5089,7 +5089,7 @@

                                        Module
                                        -parameters(recurse=True)[source]#
                                        +parameters(recurse=True)[source]#

                                        Return an iterator over module parameters.

                                        This is typically passed to an optimizer.

                                        @@ -5116,7 +5116,7 @@

                                        Module
                                        -register_backward_hook(hook)[source]#
                                        +register_backward_hook(hook)[source]#

                                        Register a backward hook on the module.

                                        This function is deprecated in favor of register_full_backward_hook() and the behavior of this function will change in future versions.

                                        @@ -5133,7 +5133,7 @@

                                        Module
                                        -register_buffer(name, tensor, persistent=True)[source]#
                                        +register_buffer(name, tensor, persistent=True)[source]#

                                        Add a buffer to the module.

                                        This is typically used to register a buffer that should not be considered a model parameter. For example, BatchNorm’s running_mean @@ -5165,7 +5165,7 @@

                                        Module
                                        -register_forward_hook(hook, *, prepend=False, with_kwargs=False, always_call=False)[source]#
                                        +register_forward_hook(hook, *, prepend=False, with_kwargs=False, always_call=False)[source]#

                                        Register a forward hook on the module.

                                        The hook will be called every time after forward() has computed an output.

                                        If with_kwargs is False or not specified, the input contains only @@ -5216,7 +5216,7 @@

                                        Module
                                        -register_forward_pre_hook(hook, *, prepend=False, with_kwargs=False)[source]#
                                        +register_forward_pre_hook(hook, *, prepend=False, with_kwargs=False)[source]#

                                        Register a forward pre-hook on the module.

                                        The hook will be called every time before forward() is invoked.

                                        If with_kwargs is false or not specified, the input contains only @@ -5266,7 +5266,7 @@

                                        Module
                                        -register_full_backward_hook(hook, prepend=False)[source]#
                                        +register_full_backward_hook(hook, prepend=False)[source]#

                                        Register a backward hook on the module.

                                        The hook will be called every time the gradients with respect to a module are computed, and its firing rules are as follows:

                                        @@ -5323,7 +5323,7 @@

                                        Module
                                        -register_full_backward_pre_hook(hook, prepend=False)[source]#
                                        +register_full_backward_pre_hook(hook, prepend=False)[source]#

                                        Register a backward pre-hook on the module.

                                        The hook will be called every time the gradients for the module are computed. The hook should have the following signature:

                                        @@ -5369,7 +5369,7 @@

                                        Module
                                        -register_load_state_dict_post_hook(hook)[source]#
                                        +register_load_state_dict_post_hook(hook)[source]#

                                        Register a post-hook to be run after module’s load_state_dict() is called.

                                        It should have the following signature::

                                        hook(module, incompatible_keys) -> None

                                        @@ -5399,7 +5399,7 @@

                                        Module
                                        -register_load_state_dict_pre_hook(hook)[source]#
                                        +register_load_state_dict_pre_hook(hook)[source]#

                                        Register a pre-hook to be run before module’s load_state_dict() is called.

                                        It should have the following signature::

                                        hook(module, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs) -> None # noqa: B950

                                        @@ -5415,7 +5415,7 @@

                                        Module
                                        -register_module(name, module)[source]#
                                        +register_module(name, module)[source]#

                                        Alias for add_module().

                                        @@ -5423,7 +5423,7 @@

                                        Module
                                        -register_parameter(name, param)[source]#
                                        +register_parameter(name, param)[source]#

                                        Add a parameter to the module.

                                        The parameter can be accessed as an attribute using given name.

                                        @@ -5442,7 +5442,7 @@

                                        Module
                                        -register_state_dict_post_hook(hook)[source]#
                                        +register_state_dict_post_hook(hook)[source]#

                                        Register a post-hook for the state_dict() method.

                                        It should have the following signature::

                                        hook(module, state_dict, prefix, local_metadata) -> None

                                        @@ -5453,7 +5453,7 @@

                                        Module
                                        -register_state_dict_pre_hook(hook)[source]#
                                        +register_state_dict_pre_hook(hook)[source]#

                                        Register a pre-hook for the state_dict() method.

                                        It should have the following signature::

                                        hook(module, prefix, keep_vars) -> None

                                        @@ -5465,7 +5465,7 @@

                                        Module
                                        -requires_grad_(requires_grad=True)[source]#
                                        +requires_grad_(requires_grad=True)[source]#

                                        Change if autograd should record operations on parameters in this module.

                                        This method sets the parameters’ requires_grad attributes in-place.

                                        @@ -5489,7 +5489,7 @@

                                        Module
                                        -set_extra_state(state)[source]#
                                        +set_extra_state(state)[source]#

                                        Set extra state contained in the loaded state_dict.

                                        This function is called from load_state_dict() to handle any extra state found within the state_dict. Implement this function and a corresponding @@ -5504,7 +5504,7 @@

                                        Module
                                        -set_submodule(target, module, strict=False)[source]#
                                        +set_submodule(target, module, strict=False)[source]#

                                        Set the submodule given by target if it exists, otherwise throw an error.

                                        Note

                                        @@ -5562,7 +5562,7 @@

                                        Module
                                        -share_memory()[source]#
                                        +share_memory()[source]#

                                        See torch.Tensor.share_memory_().

                                        Return type
                                        @@ -5573,7 +5573,7 @@

                                        Module
                                        -state_dict(*, destination: T_destination, prefix: str = '', keep_vars: bool = False) T_destination[source]#
                                        +state_dict(*, destination: T_destination, prefix: str = '', keep_vars: bool = False) T_destination[source]#
                                        state_dict(*, prefix: str = '', keep_vars: bool = False) dict[str, Any]

                                        Return a dictionary containing references to the whole state of the module.

                                        @@ -5628,7 +5628,7 @@

                                        Module
                                        -to(device: Optional[Union[str, device, int]] = ..., dtype: Optional[dtype] = ..., non_blocking: bool = ...) Self[source]#
                                        +to(device: Optional[Union[str, device, int]] = ..., dtype: Optional[dtype] = ..., non_blocking: bool = ...) Self[source]#
                                        to(dtype: dtype, non_blocking: bool = ...) Self
                                        @@ -5637,22 +5637,22 @@

                                        ModuleThis can be called as

                                        -to(device=None, dtype=None, non_blocking=False)[source]
                                        +to(device=None, dtype=None, non_blocking=False)[source]

                                        -to(dtype, non_blocking=False)[source]
                                        +to(dtype, non_blocking=False)[source]
                                        -to(tensor, non_blocking=False)[source]
                                        +to(tensor, non_blocking=False)[source]
                                        -to(memory_format=torch.channels_last)[source]
                                        +to(memory_format=torch.channels_last)[source]

                                        Its signature is similar to torch.Tensor.to(), but only accepts @@ -5731,7 +5731,7 @@

                                        Module
                                        -to_empty(*, device, recurse=True)[source]#
                                        +to_empty(*, device, recurse=True)[source]#

                                        Move the parameters and buffers to the specified device without copying storage.

                                        Parameters
                                        @@ -5753,7 +5753,7 @@

                                        Module
                                        -train(mode=True)[source]#
                                        +train(mode=True)[source]#

                                        Set the module in training mode.

                                        This has an effect only on certain modules. See the documentation of particular modules for details of their behaviors in training/evaluation @@ -5775,7 +5775,7 @@

                                        Module
                                        -type(dst_type)[source]#
                                        +type(dst_type)[source]#

                                        Casts all parameters and buffers to dst_type.

                                        Note

                                        @@ -5796,7 +5796,7 @@

                                        Module
                                        -xpu(device=None)[source]#
                                        +xpu(device=None)[source]#

                                        Move all model parameters and buffers to the XPU.

                                        This also makes associated parameters and buffers different objects. So it should be called before constructing optimizer if the module will @@ -5821,7 +5821,7 @@

                                        Module
                                        -zero_grad(set_to_none=True)[source]#
                                        +zero_grad(set_to_none=True)[source]#

                                        Reset gradients of all model parameters.

                                        See similar function under torch.optim.Optimizer for more context.

                                        diff --git a/2.9/generated/torch.nn.ModuleDict.html b/2.9/generated/torch.nn.ModuleDict.html index acb7596b0be..e9166ee4698 100644 --- a/2.9/generated/torch.nn.ModuleDict.html +++ b/2.9/generated/torch.nn.ModuleDict.html @@ -4404,7 +4404,7 @@

                                        ModuleDict#

                                        -class torch.nn.ModuleDict(modules=None)[source]#
                                        +class torch.nn.ModuleDict(modules=None)[source]#

                                        Holds submodules in a dictionary.

                                        ModuleDict can be indexed like a regular Python dictionary, but modules it contains are properly registered, and will be visible by all @@ -4445,7 +4445,7 @@

                                        ModuleDict
                                        -clear()[source]#
                                        +clear()[source]#

                                        Remove all items from the ModuleDict.

                                        @@ -4453,7 +4453,7 @@

                                        ModuleDict
                                        -items()[source]#
                                        +items()[source]#

                                        Return an iterable of the ModuleDict key/value pairs.

                                        Return type
                                        @@ -4464,7 +4464,7 @@

                                        ModuleDict
                                        -keys()[source]#
                                        +keys()[source]#

                                        Return an iterable of the ModuleDict keys.

                                        Return type
                                        @@ -4475,7 +4475,7 @@

                                        ModuleDict
                                        -pop(key)[source]#
                                        +pop(key)[source]#

                                        Remove key from the ModuleDict and return its module.

                                        Parameters
                                        @@ -4489,7 +4489,7 @@

                                        ModuleDict
                                        -update(modules)[source]#
                                        +update(modules)[source]#

                                        Update the ModuleDict with key-value pairs from a mapping, overwriting existing keys.

                                        Note

                                        @@ -4506,7 +4506,7 @@

                                        ModuleDict
                                        -values()[source]#
                                        +values()[source]#

                                        Return an iterable of the ModuleDict values.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.ModuleList.html b/2.9/generated/torch.nn.ModuleList.html index c1db1f1809b..0b894b442f4 100644 --- a/2.9/generated/torch.nn.ModuleList.html +++ b/2.9/generated/torch.nn.ModuleList.html @@ -4404,7 +4404,7 @@

                                        ModuleList#

                                        -class torch.nn.ModuleList(modules=None)[source]#
                                        +class torch.nn.ModuleList(modules=None)[source]#

                                        Holds submodules in a list.

                                        ModuleList can be indexed like a regular Python list, but modules it contains are properly registered, and will be visible by all @@ -4429,7 +4429,7 @@

                                        ModuleList
                                        -append(module)[source]#
                                        +append(module)[source]#

                                        Append a given module to the end of the list.

                                        Parameters
                                        @@ -4443,7 +4443,7 @@

                                        ModuleList
                                        -extend(modules)[source]#
                                        +extend(modules)[source]#

                                        Append modules from a Python iterable to the end of the list.

                                        Parameters
                                        @@ -4457,7 +4457,7 @@

                                        ModuleList
                                        -insert(index, module)[source]#
                                        +insert(index, module)[source]#

                                        Insert a given module before a given index in the list.

                                        Parameters
                                        diff --git a/2.9/generated/torch.nn.MultiLabelMarginLoss.html b/2.9/generated/torch.nn.MultiLabelMarginLoss.html index 29cdc2858e4..13e87ec0054 100644 --- a/2.9/generated/torch.nn.MultiLabelMarginLoss.html +++ b/2.9/generated/torch.nn.MultiLabelMarginLoss.html @@ -4404,7 +4404,7 @@

                                        MultiLabelMarginLoss#

                                        -class torch.nn.MultiLabelMarginLoss(size_average=None, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.MultiLabelMarginLoss(size_average=None, reduce=None, reduction='mean')[source]#

                                        Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss) between input xx (a 2D mini-batch Tensor) and output yy (which is a 2D Tensor of target class indices). @@ -4459,7 +4459,7 @@

                                        MultiLabelMarginLoss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.MultiLabelSoftMarginLoss.html b/2.9/generated/torch.nn.MultiLabelSoftMarginLoss.html index 4d2f1af55eb..3d1c45845b4 100644 --- a/2.9/generated/torch.nn.MultiLabelSoftMarginLoss.html +++ b/2.9/generated/torch.nn.MultiLabelSoftMarginLoss.html @@ -4404,7 +4404,7 @@

                                        MultiLabelSoftMarginLoss#

                                        -class torch.nn.MultiLabelSoftMarginLoss(weight=None, size_average=None, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.MultiLabelSoftMarginLoss(weight=None, size_average=None, reduce=None, reduction='mean')[source]#

                                        Creates a criterion that optimizes a multi-label one-versus-all loss based on max-entropy, between input xx and target yy of size (N,C)(N, C). @@ -4449,7 +4449,7 @@

                                        MultiLabelSoftMarginLoss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.MultiMarginLoss.html b/2.9/generated/torch.nn.MultiMarginLoss.html index b6f74343459..14474a460b7 100644 --- a/2.9/generated/torch.nn.MultiMarginLoss.html +++ b/2.9/generated/torch.nn.MultiMarginLoss.html @@ -4404,7 +4404,7 @@

                                        MultiMarginLoss#

                                        -class torch.nn.MultiMarginLoss(p=1, margin=1.0, weight=None, size_average=None, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.MultiMarginLoss(p=1, margin=1.0, weight=None, size_average=None, reduce=None, reduction='mean')[source]#

                                        Creates a criterion that optimizes a multi-class classification hinge loss (margin-based loss) between input xx (a 2D mini-batch Tensor) and output yy (which is a 1D tensor of target class indices, @@ -4468,7 +4468,7 @@

                                        MultiMarginLoss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.MultiheadAttention.html b/2.9/generated/torch.nn.MultiheadAttention.html index 58850db9552..9df158a051c 100644 --- a/2.9/generated/torch.nn.MultiheadAttention.html +++ b/2.9/generated/torch.nn.MultiheadAttention.html @@ -4404,7 +4404,7 @@

                                        MultiheadAttention#

                                        -class torch.nn.MultiheadAttention(embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, batch_first=False, device=None, dtype=None)[source]#
                                        +class torch.nn.MultiheadAttention(embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, batch_first=False, device=None, dtype=None)[source]#

                                        Allows the model to jointly attend to information from different representation subspaces.

                                        This MultiheadAttention layer implements the original architecture described in the Attention Is All You Need paper. The @@ -4467,7 +4467,7 @@

                                        MultiheadAttention
                                        -forward(query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None, average_attn_weights=True, is_causal=False)[source]#
                                        +forward(query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None, average_attn_weights=True, is_causal=False)[source]#

                                        Compute attention outputs using query, key, and value embeddings.

                                        Supports optional parameters for padding, masks and attention weights.

                                        @@ -4543,7 +4543,7 @@

                                        MultiheadAttention
                                        -merge_masks(attn_mask, key_padding_mask, query)[source]#
                                        +merge_masks(attn_mask, key_padding_mask, query)[source]#

                                        Determine mask type and combine masks if necessary.

                                        If only one mask is provided, that mask and the corresponding mask type will be returned. If both masks are provided, they will be both diff --git a/2.9/generated/torch.nn.NLLLoss.html b/2.9/generated/torch.nn.NLLLoss.html index 1e210c6c7c9..7ad7eaecd4a 100644 --- a/2.9/generated/torch.nn.NLLLoss.html +++ b/2.9/generated/torch.nn.NLLLoss.html @@ -4404,7 +4404,7 @@

                                        NLLLoss#

                                        -class torch.nn.NLLLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.NLLLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')[source]#

                                        The negative log likelihood loss. It is useful to train a classification problem with C classes.

                                        If provided, the optional argument weight should be a 1D Tensor assigning @@ -4511,7 +4511,7 @@

                                        NLLLoss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.PReLU.html b/2.9/generated/torch.nn.PReLU.html index 0d084bb70ba..89d7bc7b088 100644 --- a/2.9/generated/torch.nn.PReLU.html +++ b/2.9/generated/torch.nn.PReLU.html @@ -4404,7 +4404,7 @@

                                        PReLU#

                                        -class torch.nn.PReLU(num_parameters=1, init=0.25, device=None, dtype=None)[source]#
                                        +class torch.nn.PReLU(num_parameters=1, init=0.25, device=None, dtype=None)[source]#

                                        Applies the element-wise PReLU function.

                                        PReLU(x)=max(0,x)+amin(0,x)\text{PReLU}(x) = \max(0,x) + a * \min(0,x) @@ -4461,7 +4461,7 @@

                                        PReLU#

                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4472,7 +4472,7 @@

                                        PReLU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        @@ -4483,7 +4483,7 @@

                                        PReLU#
                                        -reset_parameters()[source]#
                                        +reset_parameters()[source]#

                                        Resets parameters based on their initialization used in __init__.

                                        diff --git a/2.9/generated/torch.nn.PairwiseDistance.html b/2.9/generated/torch.nn.PairwiseDistance.html index e87db9f2405..6821c52ee7b 100644 --- a/2.9/generated/torch.nn.PairwiseDistance.html +++ b/2.9/generated/torch.nn.PairwiseDistance.html @@ -4404,7 +4404,7 @@

                                        PairwiseDistance#

                                        -class torch.nn.PairwiseDistance(p=2.0, eps=1e-06, keepdim=False)[source]#
                                        +class torch.nn.PairwiseDistance(p=2.0, eps=1e-06, keepdim=False)[source]#

                                        Computes the pairwise distance between input vectors, or between columns of input matrices.

                                        Distances are computed using p-norm, with constant eps added to avoid division by zero if p is negative, i.e.:

                                        @@ -4444,7 +4444,7 @@

                                        PairwiseDistance
                                        -forward(x1, x2)[source]#
                                        +forward(x1, x2)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.ParameterDict.html b/2.9/generated/torch.nn.ParameterDict.html index 07150a5f226..abfdd6c3440 100644 --- a/2.9/generated/torch.nn.ParameterDict.html +++ b/2.9/generated/torch.nn.ParameterDict.html @@ -4404,7 +4404,7 @@

                                        ParameterDict#

                                        -class torch.nn.ParameterDict(parameters=None)[source]#
                                        +class torch.nn.ParameterDict(parameters=None)[source]#

                                        Holds parameters in a dictionary.

                                        ParameterDict can be indexed like a regular Python dictionary, but Parameters it contains are properly registered, and will be visible by all Module methods. @@ -4442,7 +4442,7 @@

                                        ParameterDict
                                        -clear()[source]#
                                        +clear()[source]#

                                        Remove all items from the ParameterDict.

                                        @@ -4450,7 +4450,7 @@

                                        ParameterDict
                                        -copy()[source]#
                                        +copy()[source]#

                                        Return a copy of this ParameterDict instance.

                                        Return type
                                        @@ -4461,7 +4461,7 @@

                                        ParameterDict
                                        -fromkeys(keys, default=None)[source]#
                                        +fromkeys(keys, default=None)[source]#

                                        Return a new ParameterDict with the keys provided.

                                        Parameters
                                        @@ -4478,7 +4478,7 @@

                                        ParameterDict
                                        -get(key, default=None)[source]#
                                        +get(key, default=None)[source]#

                                        Return the parameter associated with key if present. Otherwise return default if provided, None if not.

                                        Parameters
                                        @@ -4495,7 +4495,7 @@

                                        ParameterDict
                                        -items()[source]#
                                        +items()[source]#

                                        Return an iterable of the ParameterDict key/value pairs.

                                        Return type
                                        @@ -4506,7 +4506,7 @@

                                        ParameterDict
                                        -keys()[source]#
                                        +keys()[source]#

                                        Return an iterable of the ParameterDict keys.

                                        Return type
                                        @@ -4517,7 +4517,7 @@

                                        ParameterDict
                                        -pop(key)[source]#
                                        +pop(key)[source]#

                                        Remove key from the ParameterDict and return its parameter.

                                        Parameters
                                        @@ -4531,7 +4531,7 @@

                                        ParameterDict
                                        -popitem()[source]#
                                        +popitem()[source]#

                                        Remove and return the last inserted (key, parameter) pair from the ParameterDict.

                                        Return type
                                        @@ -4542,7 +4542,7 @@

                                        ParameterDict
                                        -setdefault(key, default=None)[source]#
                                        +setdefault(key, default=None)[source]#

                                        Set the default for a key in the Parameterdict.

                                        If key is in the ParameterDict, return its value. If not, insert key with a parameter default and return default. @@ -4562,7 +4562,7 @@

                                        ParameterDict
                                        -update(parameters)[source]#
                                        +update(parameters)[source]#

                                        Update the ParameterDict with key-value pairs from parameters, overwriting existing keys.

                                        Note

                                        @@ -4580,7 +4580,7 @@

                                        ParameterDict
                                        -values()[source]#
                                        +values()[source]#

                                        Return an iterable of the ParameterDict values.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.ParameterList.html b/2.9/generated/torch.nn.ParameterList.html index d193c223a70..1134526c2c2 100644 --- a/2.9/generated/torch.nn.ParameterList.html +++ b/2.9/generated/torch.nn.ParameterList.html @@ -4404,7 +4404,7 @@

                                        ParameterList#

                                        -class torch.nn.ParameterList(values=None)[source]#
                                        +class torch.nn.ParameterList(values=None)[source]#

                                        Holds parameters in a list.

                                        ParameterList can be used like a regular Python list, but Tensors that are Parameter are properly registered, @@ -4434,7 +4434,7 @@

                                        ParameterList
                                        -append(value)[source]#
                                        +append(value)[source]#

                                        Append a given value at the end of the list.

                                        Parameters
                                        @@ -4448,7 +4448,7 @@

                                        ParameterList
                                        -extend(values)[source]#
                                        +extend(values)[source]#

                                        Append values from a Python iterable to the end of the list.

                                        Parameters
                                        @@ -4462,7 +4462,7 @@

                                        ParameterList
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.PixelShuffle.html b/2.9/generated/torch.nn.PixelShuffle.html index e6bbfa765bb..aa4f0fbaf8f 100644 --- a/2.9/generated/torch.nn.PixelShuffle.html +++ b/2.9/generated/torch.nn.PixelShuffle.html @@ -4404,7 +4404,7 @@

                                        PixelShuffle#

                                        -class torch.nn.PixelShuffle(upscale_factor)[source]#
                                        +class torch.nn.PixelShuffle(upscale_factor)[source]#

                                        Rearrange elements in a tensor according to an upscaling factor.

                                        Rearranges elements in a tensor of shape (,C×r2,H,W)(*, C \times r^2, H, W) to a tensor of shape (,C,H×r,W×r)(*, C, H \times r, W \times r), where r is an upscale factor.

                                        @@ -4444,7 +4444,7 @@

                                        PixelShuffle
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4455,7 +4455,7 @@

                                        PixelShuffle
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.PixelUnshuffle.html b/2.9/generated/torch.nn.PixelUnshuffle.html index a29efd166cb..3524b9d6d64 100644 --- a/2.9/generated/torch.nn.PixelUnshuffle.html +++ b/2.9/generated/torch.nn.PixelUnshuffle.html @@ -4404,7 +4404,7 @@

                                        PixelUnshuffle#

                                        -class torch.nn.PixelUnshuffle(downscale_factor)[source]#
                                        +class torch.nn.PixelUnshuffle(downscale_factor)[source]#

                                        Reverse the PixelShuffle operation.

                                        Reverses the PixelShuffle operation by rearranging elements in a tensor of shape (,C,H×r,W×r)(*, C, H \times r, W \times r) to a tensor of shape @@ -4443,7 +4443,7 @@

                                        PixelUnshuffle
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4454,7 +4454,7 @@

                                        PixelUnshuffle
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.PoissonNLLLoss.html b/2.9/generated/torch.nn.PoissonNLLLoss.html index 569fca7e9cd..a45448606ff 100644 --- a/2.9/generated/torch.nn.PoissonNLLLoss.html +++ b/2.9/generated/torch.nn.PoissonNLLLoss.html @@ -4404,7 +4404,7 @@

                                        PoissonNLLLoss#

                                        -class torch.nn.PoissonNLLLoss(log_input=True, full=False, size_average=None, eps=1e-08, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.PoissonNLLLoss(log_input=True, full=False, size_average=None, eps=1e-08, reduce=None, reduction='mean')[source]#

                                        Negative log likelihood loss with Poisson distribution of target.

                                        The loss can be described as:

                                        @@ -4465,7 +4465,7 @@

                                        PoissonNLLLoss
                                        -forward(log_input, target)[source]#
                                        +forward(log_input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.RMSNorm.html b/2.9/generated/torch.nn.RMSNorm.html index 257d4e78ede..01fda3a8496 100644 --- a/2.9/generated/torch.nn.RMSNorm.html +++ b/2.9/generated/torch.nn.RMSNorm.html @@ -4404,7 +4404,7 @@

                                        RMSNorm#

                                        -class torch.nn.RMSNorm(normalized_shape, eps=None, elementwise_affine=True, device=None, dtype=None)[source]#
                                        +class torch.nn.RMSNorm(normalized_shape, eps=None, elementwise_affine=True, device=None, dtype=None)[source]#

                                        Applies Root Mean Square Layer Normalization over a mini-batch of inputs.

                                        This layer implements the operation as described in the paper Root Mean Square Layer Normalization

                                        @@ -4453,7 +4453,7 @@

                                        RMSNorm
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4464,7 +4464,7 @@

                                        RMSNorm
                                        -forward(x)[source]#
                                        +forward(x)[source]#

                                        Runs the forward pass.

                                        Return type
                                        @@ -4475,7 +4475,7 @@

                                        RMSNorm
                                        -reset_parameters()[source]#
                                        +reset_parameters()[source]#

                                        Resets parameters based on their initialization used in __init__.

                                        diff --git a/2.9/generated/torch.nn.RNN.html b/2.9/generated/torch.nn.RNN.html index 91ff25b2414..6a1fb393992 100644 --- a/2.9/generated/torch.nn.RNN.html +++ b/2.9/generated/torch.nn.RNN.html @@ -4404,7 +4404,7 @@

                                        RNN#

                                        -class torch.nn.RNN(input_size, hidden_size, num_layers=1, nonlinearity='tanh', bias=True, batch_first=False, dropout=0.0, bidirectional=False, device=None, dtype=None)[source]#
                                        +class torch.nn.RNN(input_size, hidden_size, num_layers=1, nonlinearity='tanh', bias=True, batch_first=False, dropout=0.0, bidirectional=False, device=None, dtype=None)[source]#

                                        Apply a multi-layer Elman RNN with tanh\tanh or ReLU\text{ReLU} non-linearity to an input sequence. For each element in the input sequence, each layer computes the following function:

                                        @@ -4587,7 +4587,7 @@

                                        RNN#

                                        -forward(input: Tensor, hx: Optional[Tensor] = None) tuple[torch.Tensor, torch.Tensor][source]#
                                        +forward(input: Tensor, hx: Optional[Tensor] = None) tuple[torch.Tensor, torch.Tensor][source]#
                                        forward(input: PackedSequence, hx: Optional[Tensor] = None) tuple[torch.nn.utils.rnn.PackedSequence, torch.Tensor]

                                        Runs the forward pass.

                                        diff --git a/2.9/generated/torch.nn.RNNBase.html b/2.9/generated/torch.nn.RNNBase.html index 07a59bc5e7c..f6ded866743 100644 --- a/2.9/generated/torch.nn.RNNBase.html +++ b/2.9/generated/torch.nn.RNNBase.html @@ -4404,7 +4404,7 @@

                                        RNNBase#

                                        -class torch.nn.RNNBase(mode, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, proj_size=0, device=None, dtype=None)[source]#
                                        +class torch.nn.RNNBase(mode, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, proj_size=0, device=None, dtype=None)[source]#

                                        Base class for RNN modules (RNN, LSTM, GRU).

                                        Implements aspects of RNNs shared by the RNN, LSTM, and GRU classes, such as module initialization and utility methods for parameter storage management.

                                        @@ -4420,7 +4420,7 @@

                                        RNNBase
                                        -flatten_parameters()[source]#
                                        +flatten_parameters()[source]#

                                        Reset parameter data pointer so that they can use faster code paths.

                                        Right now, this works only if the module is on the GPU and cuDNN is enabled. Otherwise, it’s a no-op.

                                        diff --git a/2.9/generated/torch.nn.RNNCell.html b/2.9/generated/torch.nn.RNNCell.html index 486363dacde..44fb46454f4 100644 --- a/2.9/generated/torch.nn.RNNCell.html +++ b/2.9/generated/torch.nn.RNNCell.html @@ -4404,7 +4404,7 @@

                                        RNNCell#

                                        -class torch.nn.RNNCell(input_size, hidden_size, bias=True, nonlinearity='tanh', device=None, dtype=None)[source]#
                                        +class torch.nn.RNNCell(input_size, hidden_size, bias=True, nonlinearity='tanh', device=None, dtype=None)[source]#

                                        An Elman RNN cell with tanh or ReLU non-linearity.

                                        h=tanh(Wihx+bih+Whhh+bhh)h' = \tanh(W_{ih} x + b_{ih} + W_{hh} h + b_{hh})

                                        If nonlinearity is ‘relu’, then ReLU is used in place of tanh.

                                        diff --git a/2.9/generated/torch.nn.RReLU.html b/2.9/generated/torch.nn.RReLU.html index d02fff81b46..6834b57a7bb 100644 --- a/2.9/generated/torch.nn.RReLU.html +++ b/2.9/generated/torch.nn.RReLU.html @@ -4404,7 +4404,7 @@

                                        RReLU#

                                        -class torch.nn.RReLU(lower=0.125, upper=0.3333333333333333, inplace=False)[source]#
                                        +class torch.nn.RReLU(lower=0.125, upper=0.3333333333333333, inplace=False)[source]#

                                        Applies the randomized leaky rectified linear unit function, element-wise.

                                        Method described in the paper: Empirical Evaluation of Rectified Activations in Convolutional Network.

                                        @@ -4444,7 +4444,7 @@

                                        RReLU#

                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4455,7 +4455,7 @@

                                        RReLU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.ReLU.html b/2.9/generated/torch.nn.ReLU.html index 5163f41496f..aad12c0ed73 100644 --- a/2.9/generated/torch.nn.ReLU.html +++ b/2.9/generated/torch.nn.ReLU.html @@ -4404,7 +4404,7 @@

                                        ReLU#

                                        -class torch.nn.ReLU(inplace=False)[source]#
                                        +class torch.nn.ReLU(inplace=False)[source]#

                                        Applies the rectified linear unit function element-wise.

                                        ReLU(x)=(x)+=max(0,x)\text{ReLU}(x) = (x)^+ = \max(0, x)

                                        @@ -4435,7 +4435,7 @@

                                        ReLU#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4446,7 +4446,7 @@

                                        ReLU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.ReLU6.html b/2.9/generated/torch.nn.ReLU6.html index 04d3f1837f2..c73956dadec 100644 --- a/2.9/generated/torch.nn.ReLU6.html +++ b/2.9/generated/torch.nn.ReLU6.html @@ -4404,7 +4404,7 @@

                                        ReLU6#

                                        -class torch.nn.ReLU6(inplace=False)[source]#
                                        +class torch.nn.ReLU6(inplace=False)[source]#

                                        Applies the ReLU6 function element-wise.

                                        ReLU6(x)=min(max(0,x),6)\text{ReLU6}(x) = \min(\max(0,x), 6) @@ -4430,7 +4430,7 @@

                                        ReLU6#

                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.ReflectionPad1d.html b/2.9/generated/torch.nn.ReflectionPad1d.html index a773e44ec03..2fe8608a223 100644 --- a/2.9/generated/torch.nn.ReflectionPad1d.html +++ b/2.9/generated/torch.nn.ReflectionPad1d.html @@ -4404,7 +4404,7 @@

                                        ReflectionPad1d#

                                        -class torch.nn.ReflectionPad1d(padding)[source]#
                                        +class torch.nn.ReflectionPad1d(padding)[source]#

                                        Pads the input tensor using the reflection of the input boundary.

                                        For N-dimensional padding, use torch.nn.functional.pad().

                                        diff --git a/2.9/generated/torch.nn.ReflectionPad2d.html b/2.9/generated/torch.nn.ReflectionPad2d.html index 0298f22cb4a..1a8fe9bd229 100644 --- a/2.9/generated/torch.nn.ReflectionPad2d.html +++ b/2.9/generated/torch.nn.ReflectionPad2d.html @@ -4404,7 +4404,7 @@

                                        ReflectionPad2d#

                                        -class torch.nn.ReflectionPad2d(padding)[source]#
                                        +class torch.nn.ReflectionPad2d(padding)[source]#

                                        Pads the input tensor using the reflection of the input boundary.

                                        For N-dimensional padding, use torch.nn.functional.pad().

                                        diff --git a/2.9/generated/torch.nn.ReflectionPad3d.html b/2.9/generated/torch.nn.ReflectionPad3d.html index 0f0878f3d9e..e80e6ff55dc 100644 --- a/2.9/generated/torch.nn.ReflectionPad3d.html +++ b/2.9/generated/torch.nn.ReflectionPad3d.html @@ -4404,7 +4404,7 @@

                                        ReflectionPad3d#

                                        -class torch.nn.ReflectionPad3d(padding)[source]#
                                        +class torch.nn.ReflectionPad3d(padding)[source]#

                                        Pads the input tensor using the reflection of the input boundary.

                                        For N-dimensional padding, use torch.nn.functional.pad().

                                        diff --git a/2.9/generated/torch.nn.ReplicationPad1d.html b/2.9/generated/torch.nn.ReplicationPad1d.html index 3603e687cd3..6b96f18c146 100644 --- a/2.9/generated/torch.nn.ReplicationPad1d.html +++ b/2.9/generated/torch.nn.ReplicationPad1d.html @@ -4404,7 +4404,7 @@

                                        ReplicationPad1d#

                                        -class torch.nn.ReplicationPad1d(padding)[source]#
                                        +class torch.nn.ReplicationPad1d(padding)[source]#

                                        Pads the input tensor using replication of the input boundary.

                                        For N-dimensional padding, use torch.nn.functional.pad().

                                        diff --git a/2.9/generated/torch.nn.ReplicationPad2d.html b/2.9/generated/torch.nn.ReplicationPad2d.html index b6c5927910f..38827cc5ebf 100644 --- a/2.9/generated/torch.nn.ReplicationPad2d.html +++ b/2.9/generated/torch.nn.ReplicationPad2d.html @@ -4404,7 +4404,7 @@

                                        ReplicationPad2d#

                                        -class torch.nn.ReplicationPad2d(padding)[source]#
                                        +class torch.nn.ReplicationPad2d(padding)[source]#

                                        Pads the input tensor using replication of the input boundary.

                                        For N-dimensional padding, use torch.nn.functional.pad().

                                        diff --git a/2.9/generated/torch.nn.ReplicationPad3d.html b/2.9/generated/torch.nn.ReplicationPad3d.html index e6d9c7236cb..79e5fbe9796 100644 --- a/2.9/generated/torch.nn.ReplicationPad3d.html +++ b/2.9/generated/torch.nn.ReplicationPad3d.html @@ -4404,7 +4404,7 @@

                                        ReplicationPad3d#

                                        -class torch.nn.ReplicationPad3d(padding)[source]#
                                        +class torch.nn.ReplicationPad3d(padding)[source]#

                                        Pads the input tensor using replication of the input boundary.

                                        For N-dimensional padding, use torch.nn.functional.pad().

                                        diff --git a/2.9/generated/torch.nn.SELU.html b/2.9/generated/torch.nn.SELU.html index 46159694c28..1fa233b420d 100644 --- a/2.9/generated/torch.nn.SELU.html +++ b/2.9/generated/torch.nn.SELU.html @@ -4404,7 +4404,7 @@

                                        SELU#

                                        -class torch.nn.SELU(inplace=False)[source]#
                                        +class torch.nn.SELU(inplace=False)[source]#

                                        Applies the SELU function element-wise.

                                        SELU(x)=scale(max(0,x)+min(0,α(exp(x)1)))\text{SELU}(x) = \text{scale} * (\max(0,x) + \min(0, \alpha * (\exp(x) - 1))) @@ -4440,7 +4440,7 @@

                                        SELU#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4451,7 +4451,7 @@

                                        SELU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Sequential.html b/2.9/generated/torch.nn.Sequential.html index 771a34eb10d..ca27b3654d1 100644 --- a/2.9/generated/torch.nn.Sequential.html +++ b/2.9/generated/torch.nn.Sequential.html @@ -4404,7 +4404,7 @@

                                        Sequential#

                                        -class torch.nn.Sequential(*args: Module)[source]#
                                        +class torch.nn.Sequential(*args: Module)[source]#
                                        class torch.nn.Sequential(arg: OrderedDict[str, Module])

                                        A sequential container.

                                        @@ -4450,7 +4450,7 @@

                                        Sequential
                                        -append(module)[source]#
                                        +append(module)[source]#

                                        Append a given module to the end.

                                        Parameters
                                        @@ -4475,7 +4475,7 @@

                                        Sequential
                                        -extend(sequential)[source]#
                                        +extend(sequential)[source]#

                                        Extends the current Sequential container with layers from another Sequential container.

                                        Parameters
                                        @@ -4502,13 +4502,13 @@

                                        Sequential
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        -insert(index, module)[source]#
                                        +insert(index, module)[source]#

                                        Inserts a module into the Sequential container at the specified index.

                                        Parameters
                                        @@ -4536,7 +4536,7 @@

                                        Sequential
                                        -pop(key)[source]#
                                        +pop(key)[source]#

                                        Pop key from self.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.SiLU.html b/2.9/generated/torch.nn.SiLU.html index 4d7d6f72746..d0190a89194 100644 --- a/2.9/generated/torch.nn.SiLU.html +++ b/2.9/generated/torch.nn.SiLU.html @@ -4404,7 +4404,7 @@

                                        SiLU#

                                        -class torch.nn.SiLU(inplace=False)[source]#
                                        +class torch.nn.SiLU(inplace=False)[source]#

                                        Applies the Sigmoid Linear Unit (SiLU) function, element-wise.

                                        The SiLU function is also known as the swish function.

                                        @@ -4437,7 +4437,7 @@

                                        SiLU#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4448,7 +4448,7 @@

                                        SiLU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Sigmoid.html b/2.9/generated/torch.nn.Sigmoid.html index 0912788e143..261408963b8 100644 --- a/2.9/generated/torch.nn.Sigmoid.html +++ b/2.9/generated/torch.nn.Sigmoid.html @@ -4404,7 +4404,7 @@

                                        Sigmoid#

                                        -class torch.nn.Sigmoid(*args, **kwargs)[source]#
                                        +class torch.nn.Sigmoid(*args, **kwargs)[source]#

                                        Applies the Sigmoid function element-wise.

                                        Sigmoid(x)=σ(x)=11+exp(x)\text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)} @@ -4427,7 +4427,7 @@

                                        Sigmoid
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.SmoothL1Loss.html b/2.9/generated/torch.nn.SmoothL1Loss.html index 9ee9d14781c..dc5c0ab5f03 100644 --- a/2.9/generated/torch.nn.SmoothL1Loss.html +++ b/2.9/generated/torch.nn.SmoothL1Loss.html @@ -4404,7 +4404,7 @@

                                        SmoothL1Loss#

                                        -class torch.nn.SmoothL1Loss(size_average=None, reduce=None, reduction='mean', beta=1.0)[source]#
                                        +class torch.nn.SmoothL1Loss(size_average=None, reduce=None, reduction='mean', beta=1.0)[source]#

                                        Creates a criterion that uses a squared term if the absolute element-wise error falls below beta and an L1 term otherwise. It is less sensitive to outliers than torch.nn.MSELoss and in some cases @@ -4481,7 +4481,7 @@

                                        SmoothL1Loss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.SoftMarginLoss.html b/2.9/generated/torch.nn.SoftMarginLoss.html index 4b966937616..013ca825c21 100644 --- a/2.9/generated/torch.nn.SoftMarginLoss.html +++ b/2.9/generated/torch.nn.SoftMarginLoss.html @@ -4404,7 +4404,7 @@

                                        SoftMarginLoss#

                                        -class torch.nn.SoftMarginLoss(size_average=None, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.SoftMarginLoss(size_average=None, reduce=None, reduction='mean')[source]#

                                        Creates a criterion that optimizes a two-class classification logistic loss between input tensor xx and target tensor yy (containing 1 or -1).

                                        @@ -4443,7 +4443,7 @@

                                        SoftMarginLoss
                                        -forward(input, target)[source]#
                                        +forward(input, target)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Softmax.html b/2.9/generated/torch.nn.Softmax.html index cdeafc3bd02..ff4d385d76c 100644 --- a/2.9/generated/torch.nn.Softmax.html +++ b/2.9/generated/torch.nn.Softmax.html @@ -4404,7 +4404,7 @@

                                        Softmax#

                                        -class torch.nn.Softmax(dim=None)[source]#
                                        +class torch.nn.Softmax(dim=None)[source]#

                                        Applies the Softmax function to an n-dimensional input Tensor.

                                        Rescales them so that the elements of the n-dimensional output Tensor lie in the range [0,1] and sum to 1.

                                        @@ -4449,7 +4449,7 @@

                                        Softmax
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4460,7 +4460,7 @@

                                        Softmax
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Softmax2d.html b/2.9/generated/torch.nn.Softmax2d.html index b7d96492376..e55263004b2 100644 --- a/2.9/generated/torch.nn.Softmax2d.html +++ b/2.9/generated/torch.nn.Softmax2d.html @@ -4404,7 +4404,7 @@

                                        Softmax2d#

                                        -class torch.nn.Softmax2d(*args, **kwargs)[source]#
                                        +class torch.nn.Softmax2d(*args, **kwargs)[source]#

                                        Applies SoftMax over features to each spatial location.

                                        When given an image of Channels x Height x Width, it will apply Softmax to each location (Channels,hi,wj)(Channels, h_i, w_j)

                                        @@ -4433,7 +4433,7 @@

                                        Softmax2d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Softmin.html b/2.9/generated/torch.nn.Softmin.html index d3e8539935f..86811010781 100644 --- a/2.9/generated/torch.nn.Softmin.html +++ b/2.9/generated/torch.nn.Softmin.html @@ -4404,7 +4404,7 @@

                                        Softmin#

                                        -class torch.nn.Softmin(dim=None)[source]#
                                        +class torch.nn.Softmin(dim=None)[source]#

                                        Applies the Softmin function to an n-dimensional input Tensor.

                                        Rescales them so that the elements of the n-dimensional output Tensor lie in the range [0, 1] and sum to 1.

                                        @@ -4441,7 +4441,7 @@

                                        Softmin
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4452,7 +4452,7 @@

                                        Softmin
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Softplus.html b/2.9/generated/torch.nn.Softplus.html index 6562f73bee8..f8dec8fe9d9 100644 --- a/2.9/generated/torch.nn.Softplus.html +++ b/2.9/generated/torch.nn.Softplus.html @@ -4404,7 +4404,7 @@

                                        Softplus#

                                        -class torch.nn.Softplus(beta=1.0, threshold=20.0)[source]#
                                        +class torch.nn.Softplus(beta=1.0, threshold=20.0)[source]#

                                        Applies the Softplus function element-wise.

                                        Softplus(x)=1βlog(1+exp(βx))\text{Softplus}(x) = \frac{1}{\beta} * \log(1 + \exp(\beta * x)) @@ -4437,7 +4437,7 @@

                                        Softplus
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4448,7 +4448,7 @@

                                        Softplus
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Run forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Softshrink.html b/2.9/generated/torch.nn.Softshrink.html index 3c2cb1b9f8c..064062d7270 100644 --- a/2.9/generated/torch.nn.Softshrink.html +++ b/2.9/generated/torch.nn.Softshrink.html @@ -4404,7 +4404,7 @@

                                        Softshrink#

                                        -class torch.nn.Softshrink(lambd=0.5)[source]#
                                        +class torch.nn.Softshrink(lambd=0.5)[source]#

                                        Applies the soft shrinkage function element-wise.

                                        SoftShrinkage(x)={xλ, if x>λx+λ, if x<λ0, otherwise \text{SoftShrinkage}(x) = @@ -4435,7 +4435,7 @@

                                        Softshrink
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4446,7 +4446,7 @@

                                        Softshrink
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Run forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Softsign.html b/2.9/generated/torch.nn.Softsign.html index 42e30e5d9e2..38721a0290f 100644 --- a/2.9/generated/torch.nn.Softsign.html +++ b/2.9/generated/torch.nn.Softsign.html @@ -4404,7 +4404,7 @@

                                        Softsign#

                                        -class torch.nn.Softsign(*args, **kwargs)[source]#
                                        +class torch.nn.Softsign(*args, **kwargs)[source]#

                                        Applies the element-wise Softsign function.

                                        SoftSign(x)=x1+x\text{SoftSign}(x) = \frac{x}{ 1 + |x|} @@ -4427,7 +4427,7 @@

                                        Softsign
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.SyncBatchNorm.html b/2.9/generated/torch.nn.SyncBatchNorm.html index def65badf8c..d99089e7fed 100644 --- a/2.9/generated/torch.nn.SyncBatchNorm.html +++ b/2.9/generated/torch.nn.SyncBatchNorm.html @@ -4404,7 +4404,7 @@

                                        SyncBatchNorm#

                                        -class torch.nn.SyncBatchNorm(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, process_group=None, device=None, dtype=None)[source]#
                                        +class torch.nn.SyncBatchNorm(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, process_group=None, device=None, dtype=None)[source]#

                                        Applies Batch Normalization over a N-Dimensional input.

                                        The N-D input is a mini-batch of [N-2]D inputs with additional channel dimension) as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing @@ -4516,7 +4516,7 @@

                                        SyncBatchNorm
                                        -classmethod convert_sync_batchnorm(module, process_group=None)[source]#
                                        +classmethod convert_sync_batchnorm(module, process_group=None)[source]#

                                        Converts all BatchNorm*D layers in the model to torch.nn.SyncBatchNorm layers.

                                        Parameters
                                        @@ -4555,7 +4555,7 @@

                                        SyncBatchNorm
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Tanh.html b/2.9/generated/torch.nn.Tanh.html index da8a611339e..44ca0e36025 100644 --- a/2.9/generated/torch.nn.Tanh.html +++ b/2.9/generated/torch.nn.Tanh.html @@ -4404,7 +4404,7 @@

                                        Tanh#

                                        -class torch.nn.Tanh(*args, **kwargs)[source]#
                                        +class torch.nn.Tanh(*args, **kwargs)[source]#

                                        Applies the Hyperbolic Tangent (Tanh) function element-wise.

                                        Tanh is defined as:

                                        @@ -4428,7 +4428,7 @@

                                        Tanh#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Tanhshrink.html b/2.9/generated/torch.nn.Tanhshrink.html index 045c28b7366..c2eaa38243b 100644 --- a/2.9/generated/torch.nn.Tanhshrink.html +++ b/2.9/generated/torch.nn.Tanhshrink.html @@ -4404,7 +4404,7 @@

                                        Tanhshrink#

                                        -class torch.nn.Tanhshrink(*args, **kwargs)[source]#
                                        +class torch.nn.Tanhshrink(*args, **kwargs)[source]#

                                        Applies the element-wise Tanhshrink function.

                                        Tanhshrink(x)=xtanh(x)\text{Tanhshrink}(x) = x - \tanh(x) @@ -4427,7 +4427,7 @@

                                        Tanhshrink
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Threshold.html b/2.9/generated/torch.nn.Threshold.html index 3999d0aacac..37b0228f8c2 100644 --- a/2.9/generated/torch.nn.Threshold.html +++ b/2.9/generated/torch.nn.Threshold.html @@ -4404,7 +4404,7 @@

                                        Threshold#

                                        -class torch.nn.Threshold(threshold, value, inplace=False)[source]#
                                        +class torch.nn.Threshold(threshold, value, inplace=False)[source]#

                                        Thresholds each element of the input Tensor.

                                        Threshold is defined as:

                                        @@ -4439,7 +4439,7 @@

                                        Threshold
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4450,7 +4450,7 @@

                                        Threshold
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Transformer.html b/2.9/generated/torch.nn.Transformer.html index d2d383f1c47..0204ff36d15 100644 --- a/2.9/generated/torch.nn.Transformer.html +++ b/2.9/generated/torch.nn.Transformer.html @@ -4404,7 +4404,7 @@

                                        Transformer#

                                        -class torch.nn.Transformer(d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation=<function relu>, custom_encoder=None, custom_decoder=None, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)[source]#
                                        +class torch.nn.Transformer(d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation=<function relu>, custom_encoder=None, custom_decoder=None, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)[source]#

                                        A basic transformer layer.

                                        This Transformer layer implements the original Transformer architecture described in the Attention Is All You Need paper. The @@ -4448,7 +4448,7 @@

                                        Transformerpytorch/examples

                                        -forward(src, tgt, src_mask=None, tgt_mask=None, memory_mask=None, src_key_padding_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, src_is_causal=None, tgt_is_causal=None, memory_is_causal=False)[source]#
                                        +forward(src, tgt, src_mask=None, tgt_mask=None, memory_mask=None, src_key_padding_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, src_is_causal=None, tgt_is_causal=None, memory_is_causal=False)[source]#

                                        Take in and process masked source/target sequences.

                                        Note

                                        @@ -4537,7 +4537,7 @@

                                        Transformer
                                        -static generate_square_subsequent_mask(sz, device=None, dtype=None)[source]#
                                        +static generate_square_subsequent_mask(sz, device=None, dtype=None)[source]#

                                        Generate a square causal mask for the sequence.

                                        The masked positions are filled with float(‘-inf’). Unmasked positions are filled with float(0.0).

                                        diff --git a/2.9/generated/torch.nn.TransformerDecoder.html b/2.9/generated/torch.nn.TransformerDecoder.html index 5da7c400ab4..027f3bc2743 100644 --- a/2.9/generated/torch.nn.TransformerDecoder.html +++ b/2.9/generated/torch.nn.TransformerDecoder.html @@ -4404,7 +4404,7 @@

                                        TransformerDecoder#

                                        -class torch.nn.TransformerDecoder(decoder_layer, num_layers, norm=None)[source]#
                                        +class torch.nn.TransformerDecoder(decoder_layer, num_layers, norm=None)[source]#

                                        TransformerDecoder is a stack of N decoder layers.

                                        This TransformerDecoder layer implements the original architecture described in the Attention Is All You Need paper. The @@ -4438,7 +4438,7 @@

                                        TransformerDecoder
                                        -forward(tgt, memory, tgt_mask=None, memory_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, tgt_is_causal=None, memory_is_causal=False)[source]#
                                        +forward(tgt, memory, tgt_mask=None, memory_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, tgt_is_causal=None, memory_is_causal=False)[source]#

                                        Pass the inputs (and mask) through the decoder layer in turn.

                                        Parameters
                                        diff --git a/2.9/generated/torch.nn.TransformerDecoderLayer.html b/2.9/generated/torch.nn.TransformerDecoderLayer.html index 4dd7e481b1e..1ba0778ac50 100644 --- a/2.9/generated/torch.nn.TransformerDecoderLayer.html +++ b/2.9/generated/torch.nn.TransformerDecoderLayer.html @@ -4404,7 +4404,7 @@

                                        TransformerDecoderLayer#

                                        -class torch.nn.TransformerDecoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=<function relu>, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)[source]#
                                        +class torch.nn.TransformerDecoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=<function relu>, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)[source]#

                                        TransformerDecoderLayer is made up of self-attn, multi-head-attn and feedforward network.

                                        This TransformerDecoderLayer implements the original architecture described in the Attention Is All You Need paper. The @@ -4454,7 +4454,7 @@

                                        TransformerDecoderLayer
                                        -forward(tgt, memory, tgt_mask=None, memory_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, tgt_is_causal=False, memory_is_causal=False)[source]#
                                        +forward(tgt, memory, tgt_mask=None, memory_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, tgt_is_causal=False, memory_is_causal=False)[source]#

                                        Pass the inputs (and mask) through the decoder layer.

                                        Parameters
                                        diff --git a/2.9/generated/torch.nn.TransformerEncoder.html b/2.9/generated/torch.nn.TransformerEncoder.html index f10d4c5a7a0..4b40f63cebc 100644 --- a/2.9/generated/torch.nn.TransformerEncoder.html +++ b/2.9/generated/torch.nn.TransformerEncoder.html @@ -4404,7 +4404,7 @@

                                        TransformerEncoder#

                                        -class torch.nn.TransformerEncoder(encoder_layer, num_layers, norm=None, enable_nested_tensor=True, mask_check=True)[source]#
                                        +class torch.nn.TransformerEncoder(encoder_layer, num_layers, norm=None, enable_nested_tensor=True, mask_check=True)[source]#

                                        TransformerEncoder is a stack of N encoder layers.

                                        This TransformerEncoder layer implements the original architecture described in the Attention Is All You Need paper. The @@ -4440,7 +4440,7 @@

                                        TransformerEncoder
                                        -forward(src, mask=None, src_key_padding_mask=None, is_causal=None)[source]#
                                        +forward(src, mask=None, src_key_padding_mask=None, is_causal=None)[source]#

                                        Pass the input through the encoder layers in turn.

                                        Parameters
                                        diff --git a/2.9/generated/torch.nn.TransformerEncoderLayer.html b/2.9/generated/torch.nn.TransformerEncoderLayer.html index 8b121fa9079..35f5a9d4a0e 100644 --- a/2.9/generated/torch.nn.TransformerEncoderLayer.html +++ b/2.9/generated/torch.nn.TransformerEncoderLayer.html @@ -4404,7 +4404,7 @@

                                        TransformerEncoderLayer#

                                        -class torch.nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=<function relu>, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)[source]#
                                        +class torch.nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=<function relu>, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)[source]#

                                        TransformerEncoderLayer is made up of self-attn and feedforward network.

                                        This TransformerEncoderLayer implements the original architecture described in the Attention Is All You Need paper. The @@ -4484,7 +4484,7 @@

                                        TransformerEncoderLayer
                                        -forward(src, src_mask=None, src_key_padding_mask=None, is_causal=False)[source]#
                                        +forward(src, src_mask=None, src_key_padding_mask=None, is_causal=False)[source]#

                                        Pass the input through the encoder layer.

                                        Parameters
                                        diff --git a/2.9/generated/torch.nn.TripletMarginLoss.html b/2.9/generated/torch.nn.TripletMarginLoss.html index 8515e912bba..d5a2f134c72 100644 --- a/2.9/generated/torch.nn.TripletMarginLoss.html +++ b/2.9/generated/torch.nn.TripletMarginLoss.html @@ -4404,7 +4404,7 @@

                                        TripletMarginLoss#

                                        -class torch.nn.TripletMarginLoss(margin=1.0, p=2.0, eps=1e-06, swap=False, size_average=None, reduce=None, reduction='mean')[source]#
                                        +class torch.nn.TripletMarginLoss(margin=1.0, p=2.0, eps=1e-06, swap=False, size_average=None, reduce=None, reduction='mean')[source]#

                                        Creates a criterion that measures the triplet loss given an input tensors x1x1, x2x2, x3x3 and a margin with a value greater than 00. This is used for measuring a relative similarity between samples. A triplet @@ -4472,7 +4472,7 @@

                                        TripletMarginLoss
                                        -forward(anchor, positive, negative)[source]#
                                        +forward(anchor, positive, negative)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.TripletMarginWithDistanceLoss.html b/2.9/generated/torch.nn.TripletMarginWithDistanceLoss.html index c022276529b..58678a02d89 100644 --- a/2.9/generated/torch.nn.TripletMarginWithDistanceLoss.html +++ b/2.9/generated/torch.nn.TripletMarginWithDistanceLoss.html @@ -4404,7 +4404,7 @@

                                        TripletMarginWithDistanceLoss#

                                        -class torch.nn.TripletMarginWithDistanceLoss(*, distance_function=None, margin=1.0, swap=False, reduction='mean')[source]#
                                        +class torch.nn.TripletMarginWithDistanceLoss(*, distance_function=None, margin=1.0, swap=False, reduction='mean')[source]#

                                        Creates a criterion that measures the triplet loss given input tensors aa, pp, and nn (representing anchor, positive, and negative examples, respectively), and a nonnegative, @@ -4505,7 +4505,7 @@

                                        TripletMarginWithDistanceLoss
                                        -forward(anchor, positive, negative)[source]#
                                        +forward(anchor, positive, negative)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Unfold.html b/2.9/generated/torch.nn.Unfold.html index 92ec1202d66..5d697ec2f57 100644 --- a/2.9/generated/torch.nn.Unfold.html +++ b/2.9/generated/torch.nn.Unfold.html @@ -4404,7 +4404,7 @@

                                        Unfold#

                                        -class torch.nn.Unfold(kernel_size, dilation=1, padding=0, stride=1)[source]#
                                        +class torch.nn.Unfold(kernel_size, dilation=1, padding=0, stride=1)[source]#

                                        Extracts sliding local blocks from a batched input tensor.

                                        Consider a batched input tensor of shape (N,C,)(N, C, *), where NN is the batch dimension, CC is the channel dimension, @@ -4522,7 +4522,7 @@

                                        Unfold
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4533,7 +4533,7 @@

                                        Unfold
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.Upsample.html b/2.9/generated/torch.nn.Upsample.html index 6ef6d2cb77e..75349c277e7 100644 --- a/2.9/generated/torch.nn.Upsample.html +++ b/2.9/generated/torch.nn.Upsample.html @@ -4404,7 +4404,7 @@

                                        Upsample#

                                        -class torch.nn.Upsample(size=None, scale_factor=None, mode='nearest', align_corners=None, recompute_scale_factor=None)[source]#
                                        +class torch.nn.Upsample(size=None, scale_factor=None, mode='nearest', align_corners=None, recompute_scale_factor=None)[source]#

                                        Upsamples a given multi-channel 1D (temporal), 2D (spatial) or 3D (volumetric) data.

                                        The input data is assumed to be of the form minibatch x channels x [optional depth] x [optional height] x width. @@ -4529,7 +4529,7 @@

                                        Upsample
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4540,7 +4540,7 @@

                                        Upsample
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.UpsamplingBilinear2d.html b/2.9/generated/torch.nn.UpsamplingBilinear2d.html index b51102bebbe..b2c87a06597 100644 --- a/2.9/generated/torch.nn.UpsamplingBilinear2d.html +++ b/2.9/generated/torch.nn.UpsamplingBilinear2d.html @@ -4404,7 +4404,7 @@

                                        UpsamplingBilinear2d#

                                        -class torch.nn.UpsamplingBilinear2d(size=None, scale_factor=None)[source]#
                                        +class torch.nn.UpsamplingBilinear2d(size=None, scale_factor=None)[source]#

                                        Applies a 2D bilinear upsampling to an input signal composed of several input channels.

                                        To specify the scale, it takes either the size or the scale_factor as it’s constructor argument.

                                        diff --git a/2.9/generated/torch.nn.UpsamplingNearest2d.html b/2.9/generated/torch.nn.UpsamplingNearest2d.html index 8840ac12c60..50709bd83ea 100644 --- a/2.9/generated/torch.nn.UpsamplingNearest2d.html +++ b/2.9/generated/torch.nn.UpsamplingNearest2d.html @@ -4404,7 +4404,7 @@

                                        UpsamplingNearest2d#

                                        -class torch.nn.UpsamplingNearest2d(size=None, scale_factor=None)[source]#
                                        +class torch.nn.UpsamplingNearest2d(size=None, scale_factor=None)[source]#

                                        Applies a 2D nearest neighbor upsampling to an input signal composed of several input channels.

                                        To specify the scale, it takes either the size or the scale_factor as it’s constructor argument.

                                        diff --git a/2.9/generated/torch.nn.ZeroPad1d.html b/2.9/generated/torch.nn.ZeroPad1d.html index 7eab91eede6..3d59709c883 100644 --- a/2.9/generated/torch.nn.ZeroPad1d.html +++ b/2.9/generated/torch.nn.ZeroPad1d.html @@ -4404,7 +4404,7 @@

                                        ZeroPad1d#

                                        -class torch.nn.ZeroPad1d(padding)[source]#
                                        +class torch.nn.ZeroPad1d(padding)[source]#

                                        Pads the input tensor boundaries with zero.

                                        For N-dimensional padding, use torch.nn.functional.pad().

                                        @@ -4451,7 +4451,7 @@

                                        ZeroPad1d
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.ZeroPad2d.html b/2.9/generated/torch.nn.ZeroPad2d.html index eca3bb28e3e..478819cef9c 100644 --- a/2.9/generated/torch.nn.ZeroPad2d.html +++ b/2.9/generated/torch.nn.ZeroPad2d.html @@ -4404,7 +4404,7 @@

                                        ZeroPad2d#

                                        -class torch.nn.ZeroPad2d(padding)[source]#
                                        +class torch.nn.ZeroPad2d(padding)[source]#

                                        Pads the input tensor boundaries with zero.

                                        For N-dimensional padding, use torch.nn.functional.pad().

                                        @@ -4451,7 +4451,7 @@

                                        ZeroPad2d
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.ZeroPad3d.html b/2.9/generated/torch.nn.ZeroPad3d.html index 37356ebfc85..7b26b988048 100644 --- a/2.9/generated/torch.nn.ZeroPad3d.html +++ b/2.9/generated/torch.nn.ZeroPad3d.html @@ -4404,7 +4404,7 @@

                                        ZeroPad3d#

                                        -class torch.nn.ZeroPad3d(padding)[source]#
                                        +class torch.nn.ZeroPad3d(padding)[source]#

                                        Pads the input tensor boundaries with zero.

                                        For N-dimensional padding, use torch.nn.functional.pad().

                                        @@ -4439,7 +4439,7 @@

                                        ZeroPad3d
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.attention.bias.CausalBias.html b/2.9/generated/torch.nn.attention.bias.CausalBias.html index feae9e68032..8ee8d0f0476 100644 --- a/2.9/generated/torch.nn.attention.bias.CausalBias.html +++ b/2.9/generated/torch.nn.attention.bias.CausalBias.html @@ -4415,7 +4415,7 @@

                                        torch.nn.attention.bias.CausalBias#

                                        -class torch.nn.attention.bias.CausalBias(variant, seq_len_q, seq_len_kv)[source]#
                                        +class torch.nn.attention.bias.CausalBias(variant, seq_len_q, seq_len_kv)[source]#

                                        A bias representing causal attention patterns. For an overview of the bias structure, see the CausalVariant enum.

                                        This class is used for defining causal (triangular) attention biases. For construing the bias, there exist two factory functions: causal_upper_left() and causal_lower_right().

                                        diff --git a/2.9/generated/torch.nn.attention.bias.CausalVariant.html b/2.9/generated/torch.nn.attention.bias.CausalVariant.html index 1f1ff61c8b0..a02d8cc56b9 100644 --- a/2.9/generated/torch.nn.attention.bias.CausalVariant.html +++ b/2.9/generated/torch.nn.attention.bias.CausalVariant.html @@ -4415,7 +4415,7 @@

                                        CausalVariant#

                                        -class torch.nn.attention.bias.CausalVariant(value)[source]#
                                        +class torch.nn.attention.bias.CausalVariant(value)[source]#

                                        Enum for causal variants used in attention mechanisms.

                                        Defines two types of causal biases:

                                        UPPER_LEFT: Represents upper-left triangular bias for standard causal attention. diff --git a/2.9/generated/torch.nn.attention.bias.causal_lower_right.html b/2.9/generated/torch.nn.attention.bias.causal_lower_right.html index ef521c986d9..2b964ef6b45 100644 --- a/2.9/generated/torch.nn.attention.bias.causal_lower_right.html +++ b/2.9/generated/torch.nn.attention.bias.causal_lower_right.html @@ -4415,7 +4415,7 @@

                                        torch.nn.attention.bias.causal_lower_right#

                                        -torch.nn.attention.bias.causal_lower_right(*size)[source]#
                                        +torch.nn.attention.bias.causal_lower_right(*size)[source]#

                                        Creates a lower-right triangular causal bias.

                                        This function generates a lower-right triangular matrix to represent causal attention bias with a diagonal offset set so that the inclusive values are aligned to the lower right corner of the matrix.

                                        diff --git a/2.9/generated/torch.nn.attention.bias.causal_upper_left.html b/2.9/generated/torch.nn.attention.bias.causal_upper_left.html index 71329d09299..1a664c6bf14 100644 --- a/2.9/generated/torch.nn.attention.bias.causal_upper_left.html +++ b/2.9/generated/torch.nn.attention.bias.causal_upper_left.html @@ -4415,7 +4415,7 @@

                                        torch.nn.attention.bias.causal_upper_left#

                                        -torch.nn.attention.bias.causal_upper_left(*size)[source]#
                                        +torch.nn.attention.bias.causal_upper_left(*size)[source]#

                                        Creates an upper-left triangular causal bias.

                                        This function generates a upper-left triangular matrix to represent causal attention bias with a diagonal offset set so that the inclusive values are aligned to the upper left corner of the matrix. diff --git a/2.9/generated/torch.nn.attention.sdpa_kernel.html b/2.9/generated/torch.nn.attention.sdpa_kernel.html index 5230def9334..74625ed3262 100644 --- a/2.9/generated/torch.nn.attention.sdpa_kernel.html +++ b/2.9/generated/torch.nn.attention.sdpa_kernel.html @@ -4404,7 +4404,7 @@

                                        torch.nn.attention.sdpa_kernel#

                                        -torch.nn.attention.sdpa_kernel(backends, set_priority=False)[source]#
                                        +torch.nn.attention.sdpa_kernel(backends, set_priority=False)[source]#

                                        Context manager to select which backend to use for scaled dot product attention.

                                        Warning

                                        diff --git a/2.9/generated/torch.nn.factory_kwargs.html b/2.9/generated/torch.nn.factory_kwargs.html index 4401cc77e37..99b0a861578 100644 --- a/2.9/generated/torch.nn.factory_kwargs.html +++ b/2.9/generated/torch.nn.factory_kwargs.html @@ -4404,7 +4404,7 @@

                                        torch.nn.factory_kwargs#

                                        -torch.nn.factory_kwargs(kwargs)[source]#
                                        +torch.nn.factory_kwargs(kwargs)[source]#

                                        Return a canonicalized dict of factory kwargs.

                                        Given kwargs, returns a canonicalized dict of factory kwargs that can be directly passed to factory functions like torch.empty, or errors if unrecognized kwargs are present.

                                        diff --git a/2.9/generated/torch.nn.functional.adaptive_avg_pool2d.html b/2.9/generated/torch.nn.functional.adaptive_avg_pool2d.html index 3747293f668..d24294f59cf 100644 --- a/2.9/generated/torch.nn.functional.adaptive_avg_pool2d.html +++ b/2.9/generated/torch.nn.functional.adaptive_avg_pool2d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.adaptive_avg_pool2d#

                                        -torch.nn.functional.adaptive_avg_pool2d(input, output_size)[source]#
                                        +torch.nn.functional.adaptive_avg_pool2d(input, output_size)[source]#

                                        Apply a 2D adaptive average pooling over an input signal composed of several input planes.

                                        See AdaptiveAvgPool2d for details and output shape.

                                        diff --git a/2.9/generated/torch.nn.functional.adaptive_avg_pool3d.html b/2.9/generated/torch.nn.functional.adaptive_avg_pool3d.html index 6f4aeabfde8..ff191f5409a 100644 --- a/2.9/generated/torch.nn.functional.adaptive_avg_pool3d.html +++ b/2.9/generated/torch.nn.functional.adaptive_avg_pool3d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.adaptive_avg_pool3d#

                                        -torch.nn.functional.adaptive_avg_pool3d(input, output_size)[source]#
                                        +torch.nn.functional.adaptive_avg_pool3d(input, output_size)[source]#

                                        Apply a 3D adaptive average pooling over an input signal composed of several input planes.

                                        See AdaptiveAvgPool3d for details and output shape.

                                        diff --git a/2.9/generated/torch.nn.functional.adaptive_max_pool1d.html b/2.9/generated/torch.nn.functional.adaptive_max_pool1d.html index 9cad6ad8f25..f5de05575a7 100644 --- a/2.9/generated/torch.nn.functional.adaptive_max_pool1d.html +++ b/2.9/generated/torch.nn.functional.adaptive_max_pool1d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.adaptive_max_pool1d#

                                        -torch.nn.functional.adaptive_max_pool1d(input, output_size, return_indices=False)[source]#
                                        +torch.nn.functional.adaptive_max_pool1d(input, output_size, return_indices=False)[source]#

                                        Applies a 1D adaptive max pooling over an input signal composed of several input planes.

                                        See AdaptiveMaxPool1d for details and output shape.

                                        diff --git a/2.9/generated/torch.nn.functional.adaptive_max_pool2d.html b/2.9/generated/torch.nn.functional.adaptive_max_pool2d.html index b6214326548..1eb4e49c0af 100644 --- a/2.9/generated/torch.nn.functional.adaptive_max_pool2d.html +++ b/2.9/generated/torch.nn.functional.adaptive_max_pool2d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.adaptive_max_pool2d#

                                        -torch.nn.functional.adaptive_max_pool2d(input, output_size, return_indices=False)[source]#
                                        +torch.nn.functional.adaptive_max_pool2d(input, output_size, return_indices=False)[source]#

                                        Applies a 2D adaptive max pooling over an input signal composed of several input planes.

                                        See AdaptiveMaxPool2d for details and output shape.

                                        diff --git a/2.9/generated/torch.nn.functional.adaptive_max_pool3d.html b/2.9/generated/torch.nn.functional.adaptive_max_pool3d.html index 9205d7e45f0..3f769e4a2a9 100644 --- a/2.9/generated/torch.nn.functional.adaptive_max_pool3d.html +++ b/2.9/generated/torch.nn.functional.adaptive_max_pool3d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.adaptive_max_pool3d#

                                        -torch.nn.functional.adaptive_max_pool3d(input, output_size, return_indices=False)[source]#
                                        +torch.nn.functional.adaptive_max_pool3d(input, output_size, return_indices=False)[source]#

                                        Applies a 3D adaptive max pooling over an input signal composed of several input planes.

                                        See AdaptiveMaxPool3d for details and output shape.

                                        diff --git a/2.9/generated/torch.nn.functional.affine_grid.html b/2.9/generated/torch.nn.functional.affine_grid.html index 66dc8a765ed..24c0e3ed7a1 100644 --- a/2.9/generated/torch.nn.functional.affine_grid.html +++ b/2.9/generated/torch.nn.functional.affine_grid.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.affine_grid#

                                        -torch.nn.functional.affine_grid(theta, size, align_corners=None)[source]#
                                        +torch.nn.functional.affine_grid(theta, size, align_corners=None)[source]#

                                        Generate 2D or 3D flow field (sampling grid), given a batch of affine matrices theta.

                                        Note

                                        diff --git a/2.9/generated/torch.nn.functional.alpha_dropout.html b/2.9/generated/torch.nn.functional.alpha_dropout.html index 148760daf18..025e22597a4 100644 --- a/2.9/generated/torch.nn.functional.alpha_dropout.html +++ b/2.9/generated/torch.nn.functional.alpha_dropout.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.alpha_dropout#

                                        -torch.nn.functional.alpha_dropout(input, p=0.5, training=False, inplace=False)[source]#
                                        +torch.nn.functional.alpha_dropout(input, p=0.5, training=False, inplace=False)[source]#

                                        Apply alpha dropout to the input.

                                        See AlphaDropout for details.

                                        diff --git a/2.9/generated/torch.nn.functional.batch_norm.html b/2.9/generated/torch.nn.functional.batch_norm.html index 60083c8d229..f3e26b52eaf 100644 --- a/2.9/generated/torch.nn.functional.batch_norm.html +++ b/2.9/generated/torch.nn.functional.batch_norm.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.batch_norm#

                                        -torch.nn.functional.batch_norm(input, running_mean, running_var, weight=None, bias=None, training=False, momentum=0.1, eps=1e-05)[source]#
                                        +torch.nn.functional.batch_norm(input, running_mean, running_var, weight=None, bias=None, training=False, momentum=0.1, eps=1e-05)[source]#

                                        Apply Batch Normalization for each channel across a batch of data.

                                        See BatchNorm1d, BatchNorm2d, BatchNorm3d for details.

                                        diff --git a/2.9/generated/torch.nn.functional.binary_cross_entropy.html b/2.9/generated/torch.nn.functional.binary_cross_entropy.html index 790ca201db5..5e196e38ed4 100644 --- a/2.9/generated/torch.nn.functional.binary_cross_entropy.html +++ b/2.9/generated/torch.nn.functional.binary_cross_entropy.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.binary_cross_entropy#

                                        -torch.nn.functional.binary_cross_entropy(input, target, weight=None, size_average=None, reduce=None, reduction='mean')[source]#
                                        +torch.nn.functional.binary_cross_entropy(input, target, weight=None, size_average=None, reduce=None, reduction='mean')[source]#

                                        Compute Binary Cross Entropy between the target and input probabilities.

                                        See BCELoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.binary_cross_entropy_with_logits.html b/2.9/generated/torch.nn.functional.binary_cross_entropy_with_logits.html index 87036e1cee4..95e53658a13 100644 --- a/2.9/generated/torch.nn.functional.binary_cross_entropy_with_logits.html +++ b/2.9/generated/torch.nn.functional.binary_cross_entropy_with_logits.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.binary_cross_entropy_with_logits#

                                        -torch.nn.functional.binary_cross_entropy_with_logits(input, target, weight=None, size_average=None, reduce=None, reduction='mean', pos_weight=None)[source]#
                                        +torch.nn.functional.binary_cross_entropy_with_logits(input, target, weight=None, size_average=None, reduce=None, reduction='mean', pos_weight=None)[source]#

                                        Compute Binary Cross Entropy between target and input logits.

                                        See BCEWithLogitsLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.celu.html b/2.9/generated/torch.nn.functional.celu.html index 96090b68297..04bc6870ca0 100644 --- a/2.9/generated/torch.nn.functional.celu.html +++ b/2.9/generated/torch.nn.functional.celu.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.celu#

                                        -torch.nn.functional.celu(input, alpha=1., inplace=False) Tensor[source]#
                                        +torch.nn.functional.celu(input, alpha=1., inplace=False) Tensor[source]#

                                        Applies element-wise, CELU(x)=max(0,x)+min(0,α(exp(x/α)1))\text{CELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x/\alpha) - 1)).

                                        See CELU for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.cosine_embedding_loss.html b/2.9/generated/torch.nn.functional.cosine_embedding_loss.html index ea8a3648ec8..08cdf3d6232 100644 --- a/2.9/generated/torch.nn.functional.cosine_embedding_loss.html +++ b/2.9/generated/torch.nn.functional.cosine_embedding_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.cosine_embedding_loss#

                                        -torch.nn.functional.cosine_embedding_loss(input1, input2, target, margin=0, size_average=None, reduce=None, reduction='mean')[source]#
                                        +torch.nn.functional.cosine_embedding_loss(input1, input2, target, margin=0, size_average=None, reduce=None, reduction='mean')[source]#

                                        Compute the cosine embedding loss.

                                        See CosineEmbeddingLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.cross_entropy.html b/2.9/generated/torch.nn.functional.cross_entropy.html index 94cd7cea1c4..b3189c43ccb 100644 --- a/2.9/generated/torch.nn.functional.cross_entropy.html +++ b/2.9/generated/torch.nn.functional.cross_entropy.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.cross_entropy#

                                        -torch.nn.functional.cross_entropy(input, target, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean', label_smoothing=0.0)[source]#
                                        +torch.nn.functional.cross_entropy(input, target, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean', label_smoothing=0.0)[source]#

                                        Compute the cross entropy loss between input logits and target.

                                        See CrossEntropyLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.ctc_loss.html b/2.9/generated/torch.nn.functional.ctc_loss.html index c2109de1e55..5b73d49e837 100644 --- a/2.9/generated/torch.nn.functional.ctc_loss.html +++ b/2.9/generated/torch.nn.functional.ctc_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.ctc_loss#

                                        -torch.nn.functional.ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reduction='mean', zero_infinity=False)[source]#
                                        +torch.nn.functional.ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reduction='mean', zero_infinity=False)[source]#

                                        Compute the Connectionist Temporal Classification loss.

                                        See CTCLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.dropout.html b/2.9/generated/torch.nn.functional.dropout.html index 5561b0b64f6..d519808f6b2 100644 --- a/2.9/generated/torch.nn.functional.dropout.html +++ b/2.9/generated/torch.nn.functional.dropout.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.dropout#

                                        -torch.nn.functional.dropout(input, p=0.5, training=True, inplace=False)[source]#
                                        +torch.nn.functional.dropout(input, p=0.5, training=True, inplace=False)[source]#

                                        During training, randomly zeroes some elements of the input tensor with probability p.

                                        Uses samples from a Bernoulli distribution.

                                        See Dropout for details.

                                        diff --git a/2.9/generated/torch.nn.functional.dropout1d.html b/2.9/generated/torch.nn.functional.dropout1d.html index 470a0251b59..559ef8e381e 100644 --- a/2.9/generated/torch.nn.functional.dropout1d.html +++ b/2.9/generated/torch.nn.functional.dropout1d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.dropout1d#

                                        -torch.nn.functional.dropout1d(input, p=0.5, training=True, inplace=False)[source]#
                                        +torch.nn.functional.dropout1d(input, p=0.5, training=True, inplace=False)[source]#

                                        Randomly zero out entire channels (a channel is a 1D feature map).

                                        For example, the jj-th channel of the ii-th sample in the batched input is a 1D tensor input[i,j]\text{input}[i, j] of the input tensor. diff --git a/2.9/generated/torch.nn.functional.dropout2d.html b/2.9/generated/torch.nn.functional.dropout2d.html index d8e9227f0d3..c57ae40d4f7 100644 --- a/2.9/generated/torch.nn.functional.dropout2d.html +++ b/2.9/generated/torch.nn.functional.dropout2d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.dropout2d#

                                        -torch.nn.functional.dropout2d(input, p=0.5, training=True, inplace=False)[source]#
                                        +torch.nn.functional.dropout2d(input, p=0.5, training=True, inplace=False)[source]#

                                        Randomly zero out entire channels (a channel is a 2D feature map).

                                        For example, the jj-th channel of the ii-th sample in the batched input is a 2D tensor input[i,j]\text{input}[i, j] of the input tensor. diff --git a/2.9/generated/torch.nn.functional.dropout3d.html b/2.9/generated/torch.nn.functional.dropout3d.html index 7910bcb1f78..72370a86129 100644 --- a/2.9/generated/torch.nn.functional.dropout3d.html +++ b/2.9/generated/torch.nn.functional.dropout3d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.dropout3d#

                                        -torch.nn.functional.dropout3d(input, p=0.5, training=True, inplace=False)[source]#
                                        +torch.nn.functional.dropout3d(input, p=0.5, training=True, inplace=False)[source]#

                                        Randomly zero out entire channels (a channel is a 3D feature map).

                                        For example, the jj-th channel of the ii-th sample in the batched input is a 3D tensor input[i,j]\text{input}[i, j] of the input tensor. diff --git a/2.9/generated/torch.nn.functional.elu.html b/2.9/generated/torch.nn.functional.elu.html index a5d16a61317..4ed2ebc1ddc 100644 --- a/2.9/generated/torch.nn.functional.elu.html +++ b/2.9/generated/torch.nn.functional.elu.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.elu#

                                        -torch.nn.functional.elu(input, alpha=1.0, inplace=False)[source]#
                                        +torch.nn.functional.elu(input, alpha=1.0, inplace=False)[source]#

                                        Apply the Exponential Linear Unit (ELU) function element-wise.

                                        See ELU for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.embedding.html b/2.9/generated/torch.nn.functional.embedding.html index ece8c0e1fde..991b9865a89 100644 --- a/2.9/generated/torch.nn.functional.embedding.html +++ b/2.9/generated/torch.nn.functional.embedding.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.embedding#

                                        -torch.nn.functional.embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False)[source]#
                                        +torch.nn.functional.embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False)[source]#

                                        Generate a simple lookup table that looks up embeddings in a fixed dictionary and size.

                                        This module is often used to retrieve word embeddings using indices. The input to the module is a list of indices, and the embedding matrix, diff --git a/2.9/generated/torch.nn.functional.embedding_bag.html b/2.9/generated/torch.nn.functional.embedding_bag.html index 0e707db32a6..b0df51ce1ef 100644 --- a/2.9/generated/torch.nn.functional.embedding_bag.html +++ b/2.9/generated/torch.nn.functional.embedding_bag.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.embedding_bag#

                                        -torch.nn.functional.embedding_bag(input, weight, offsets=None, max_norm=None, norm_type=2, scale_grad_by_freq=False, mode='mean', sparse=False, per_sample_weights=None, include_last_offset=False, padding_idx=None)[source]#
                                        +torch.nn.functional.embedding_bag(input, weight, offsets=None, max_norm=None, norm_type=2, scale_grad_by_freq=False, mode='mean', sparse=False, per_sample_weights=None, include_last_offset=False, padding_idx=None)[source]#

                                        Compute sums, means or maxes of bags of embeddings.

                                        Calculation is done without instantiating the intermediate embeddings. See torch.nn.EmbeddingBag for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.feature_alpha_dropout.html b/2.9/generated/torch.nn.functional.feature_alpha_dropout.html index 41ec90a38b0..304e26d6adc 100644 --- a/2.9/generated/torch.nn.functional.feature_alpha_dropout.html +++ b/2.9/generated/torch.nn.functional.feature_alpha_dropout.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.feature_alpha_dropout#

                                        -torch.nn.functional.feature_alpha_dropout(input, p=0.5, training=False, inplace=False)[source]#
                                        +torch.nn.functional.feature_alpha_dropout(input, p=0.5, training=False, inplace=False)[source]#

                                        Randomly masks out entire channels (a channel is a feature map).

                                        For example, the jj-th channel of the ii-th sample in the batch input is a tensor input[i,j]\text{input}[i, j] of the input tensor. Instead of diff --git a/2.9/generated/torch.nn.functional.fold.html b/2.9/generated/torch.nn.functional.fold.html index 8c84d71fdd4..35fd1d7c294 100644 --- a/2.9/generated/torch.nn.functional.fold.html +++ b/2.9/generated/torch.nn.functional.fold.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.fold#

                                        -torch.nn.functional.fold(input, output_size, kernel_size, dilation=1, padding=0, stride=1)[source]#
                                        +torch.nn.functional.fold(input, output_size, kernel_size, dilation=1, padding=0, stride=1)[source]#

                                        Combine an array of sliding local blocks into a large containing tensor.

                                        Warning

                                        diff --git a/2.9/generated/torch.nn.functional.fractional_max_pool2d.html b/2.9/generated/torch.nn.functional.fractional_max_pool2d.html index 42c56156b7f..fe2de2627ea 100644 --- a/2.9/generated/torch.nn.functional.fractional_max_pool2d.html +++ b/2.9/generated/torch.nn.functional.fractional_max_pool2d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.fractional_max_pool2d#

                                        -torch.nn.functional.fractional_max_pool2d(input, kernel_size, output_size=None, output_ratio=None, return_indices=False, _random_samples=None)[source]#
                                        +torch.nn.functional.fractional_max_pool2d(input, kernel_size, output_size=None, output_ratio=None, return_indices=False, _random_samples=None)[source]#

                                        Applies 2D fractional max pooling over an input signal composed of several input planes.

                                        Fractional MaxPooling is described in detail in the paper Fractional MaxPooling by Ben Graham

                                        The max-pooling operation is applied in kH×kWkH \times kW regions by a stochastic diff --git a/2.9/generated/torch.nn.functional.fractional_max_pool3d.html b/2.9/generated/torch.nn.functional.fractional_max_pool3d.html index 1c5f886a6ec..4b239699c80 100644 --- a/2.9/generated/torch.nn.functional.fractional_max_pool3d.html +++ b/2.9/generated/torch.nn.functional.fractional_max_pool3d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.fractional_max_pool3d#

                                        -torch.nn.functional.fractional_max_pool3d(input, kernel_size, output_size=None, output_ratio=None, return_indices=False, _random_samples=None)[source]#
                                        +torch.nn.functional.fractional_max_pool3d(input, kernel_size, output_size=None, output_ratio=None, return_indices=False, _random_samples=None)[source]#

                                        Applies 3D fractional max pooling over an input signal composed of several input planes.

                                        Fractional MaxPooling is described in detail in the paper Fractional MaxPooling by Ben Graham

                                        The max-pooling operation is applied in kT×kH×kWkT \times kH \times kW regions by a stochastic diff --git a/2.9/generated/torch.nn.functional.gaussian_nll_loss.html b/2.9/generated/torch.nn.functional.gaussian_nll_loss.html index f46d9e860f6..2aa7ef72099 100644 --- a/2.9/generated/torch.nn.functional.gaussian_nll_loss.html +++ b/2.9/generated/torch.nn.functional.gaussian_nll_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.gaussian_nll_loss#

                                        -torch.nn.functional.gaussian_nll_loss(input, target, var, full=False, eps=1e-06, reduction='mean')[source]#
                                        +torch.nn.functional.gaussian_nll_loss(input, target, var, full=False, eps=1e-06, reduction='mean')[source]#

                                        Compute the Gaussian negative log likelihood loss.

                                        See GaussianNLLLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.glu.html b/2.9/generated/torch.nn.functional.glu.html index f27ba848597..941ad8ec44d 100644 --- a/2.9/generated/torch.nn.functional.glu.html +++ b/2.9/generated/torch.nn.functional.glu.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.glu#

                                        -torch.nn.functional.glu(input, dim=-1) Tensor[source]#
                                        +torch.nn.functional.glu(input, dim=-1) Tensor[source]#

                                        The gated linear unit. Computes:

                                        GLU(a,b)=aσ(b)\text{GLU}(a, b) = a \otimes \sigma(b) diff --git a/2.9/generated/torch.nn.functional.grid_sample.html b/2.9/generated/torch.nn.functional.grid_sample.html index 08fd12e5f09..a9d763fde8d 100644 --- a/2.9/generated/torch.nn.functional.grid_sample.html +++ b/2.9/generated/torch.nn.functional.grid_sample.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.grid_sample#

                                        -torch.nn.functional.grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corners=None)[source]#
                                        +torch.nn.functional.grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corners=None)[source]#

                                        Compute grid sample.

                                        Given an input and a flow-field grid, computes the output using input values and pixel locations from grid.

                                        diff --git a/2.9/generated/torch.nn.functional.group_norm.html b/2.9/generated/torch.nn.functional.group_norm.html index d52f47ee4a9..b7847129623 100644 --- a/2.9/generated/torch.nn.functional.group_norm.html +++ b/2.9/generated/torch.nn.functional.group_norm.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.group_norm#

                                        -torch.nn.functional.group_norm(input, num_groups, weight=None, bias=None, eps=1e-05)[source]#
                                        +torch.nn.functional.group_norm(input, num_groups, weight=None, bias=None, eps=1e-05)[source]#

                                        Apply Group Normalization for last certain number of dimensions.

                                        See GroupNorm for details.

                                        diff --git a/2.9/generated/torch.nn.functional.gumbel_softmax.html b/2.9/generated/torch.nn.functional.gumbel_softmax.html index 20566939d7a..7eeba290916 100644 --- a/2.9/generated/torch.nn.functional.gumbel_softmax.html +++ b/2.9/generated/torch.nn.functional.gumbel_softmax.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.gumbel_softmax#

                                        -torch.nn.functional.gumbel_softmax(logits, tau=1, hard=False, eps=1e-10, dim=-1)[source]#
                                        +torch.nn.functional.gumbel_softmax(logits, tau=1, hard=False, eps=1e-10, dim=-1)[source]#

                                        Sample from the Gumbel-Softmax distribution (Link 1 Link 2) and optionally discretize.

                                        Parameters
                                        diff --git a/2.9/generated/torch.nn.functional.hardsigmoid.html b/2.9/generated/torch.nn.functional.hardsigmoid.html index dd7c8d143d7..384c56a5cf7 100644 --- a/2.9/generated/torch.nn.functional.hardsigmoid.html +++ b/2.9/generated/torch.nn.functional.hardsigmoid.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.hardsigmoid#

                                        -torch.nn.functional.hardsigmoid(input, inplace=False)[source]#
                                        +torch.nn.functional.hardsigmoid(input, inplace=False)[source]#

                                        Apply the Hardsigmoid function element-wise.

                                        Hardsigmoid(x)={0if x3,1if x+3,x/6+1/2otherwise\text{Hardsigmoid}(x) = \begin{cases} diff --git a/2.9/generated/torch.nn.functional.hardswish.html b/2.9/generated/torch.nn.functional.hardswish.html index f95c5ec4225..32c8b689424 100644 --- a/2.9/generated/torch.nn.functional.hardswish.html +++ b/2.9/generated/torch.nn.functional.hardswish.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.hardswish#

                                        -torch.nn.functional.hardswish(input, inplace=False)[source]#
                                        +torch.nn.functional.hardswish(input, inplace=False)[source]#

                                        Apply hardswish function, element-wise.

                                        Follows implementation as described in the paper: Searching for MobileNetV3.

                                        diff --git a/2.9/generated/torch.nn.functional.hardtanh.html b/2.9/generated/torch.nn.functional.hardtanh.html index 2e747cd6ac0..236fc8e44f9 100644 --- a/2.9/generated/torch.nn.functional.hardtanh.html +++ b/2.9/generated/torch.nn.functional.hardtanh.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.hardtanh#

                                        -torch.nn.functional.hardtanh(input, min_val=-1., max_val=1., inplace=False) Tensor[source]#
                                        +torch.nn.functional.hardtanh(input, min_val=-1., max_val=1., inplace=False) Tensor[source]#

                                        Applies the HardTanh function element-wise. See Hardtanh for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.hinge_embedding_loss.html b/2.9/generated/torch.nn.functional.hinge_embedding_loss.html index f294eecdfbb..43a107d6b91 100644 --- a/2.9/generated/torch.nn.functional.hinge_embedding_loss.html +++ b/2.9/generated/torch.nn.functional.hinge_embedding_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.hinge_embedding_loss#

                                        -torch.nn.functional.hinge_embedding_loss(input, target, margin=1.0, size_average=None, reduce=None, reduction='mean')[source]#
                                        +torch.nn.functional.hinge_embedding_loss(input, target, margin=1.0, size_average=None, reduce=None, reduction='mean')[source]#

                                        Compute the hinge embedding loss.

                                        See HingeEmbeddingLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.huber_loss.html b/2.9/generated/torch.nn.functional.huber_loss.html index 2f95d0a8897..57dadde7eb0 100644 --- a/2.9/generated/torch.nn.functional.huber_loss.html +++ b/2.9/generated/torch.nn.functional.huber_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.huber_loss#

                                        -torch.nn.functional.huber_loss(input, target, reduction='mean', delta=1.0, weight=None)[source]#
                                        +torch.nn.functional.huber_loss(input, target, reduction='mean', delta=1.0, weight=None)[source]#

                                        Compute the Huber loss, with optional weighting.

                                        Function uses a squared term if the absolute element-wise error falls below delta and a delta-scaled L1 term otherwise.

                                        diff --git a/2.9/generated/torch.nn.functional.instance_norm.html b/2.9/generated/torch.nn.functional.instance_norm.html index e5727a02e1b..3ade945385a 100644 --- a/2.9/generated/torch.nn.functional.instance_norm.html +++ b/2.9/generated/torch.nn.functional.instance_norm.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.instance_norm#

                                        -torch.nn.functional.instance_norm(input, running_mean=None, running_var=None, weight=None, bias=None, use_input_stats=True, momentum=0.1, eps=1e-05)[source]#
                                        +torch.nn.functional.instance_norm(input, running_mean=None, running_var=None, weight=None, bias=None, use_input_stats=True, momentum=0.1, eps=1e-05)[source]#

                                        Apply Instance Normalization independently for each channel in every data sample within a batch.

                                        See InstanceNorm1d, InstanceNorm2d, InstanceNorm3d for details.

                                        diff --git a/2.9/generated/torch.nn.functional.interpolate.html b/2.9/generated/torch.nn.functional.interpolate.html index b3a0c7de06a..9ea93d021e5 100644 --- a/2.9/generated/torch.nn.functional.interpolate.html +++ b/2.9/generated/torch.nn.functional.interpolate.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.interpolate#

                                        -torch.nn.functional.interpolate(input, size=None, scale_factor=None, mode='nearest', align_corners=None, recompute_scale_factor=None, antialias=False)[source]#
                                        +torch.nn.functional.interpolate(input, size=None, scale_factor=None, mode='nearest', align_corners=None, recompute_scale_factor=None, antialias=False)[source]#

                                        Down/up samples the input.

                                        Tensor interpolated to either the given size or the given scale_factor

                                        diff --git a/2.9/generated/torch.nn.functional.kl_div.html b/2.9/generated/torch.nn.functional.kl_div.html index 825ae4987c4..883a3c745b5 100644 --- a/2.9/generated/torch.nn.functional.kl_div.html +++ b/2.9/generated/torch.nn.functional.kl_div.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.kl_div#

                                        -torch.nn.functional.kl_div(input, target, size_average=None, reduce=None, reduction='mean', log_target=False)[source]#
                                        +torch.nn.functional.kl_div(input, target, size_average=None, reduce=None, reduction='mean', log_target=False)[source]#

                                        Compute the KL Divergence loss.

                                        Refer - The Kullback-Leibler divergence Loss

                                        See KLDivLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.l1_loss.html b/2.9/generated/torch.nn.functional.l1_loss.html index b6cacb7b4e1..2f3afd90518 100644 --- a/2.9/generated/torch.nn.functional.l1_loss.html +++ b/2.9/generated/torch.nn.functional.l1_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.l1_loss#

                                        -torch.nn.functional.l1_loss(input, target, size_average=None, reduce=None, reduction='mean', weight=None)[source]#
                                        +torch.nn.functional.l1_loss(input, target, size_average=None, reduce=None, reduction='mean', weight=None)[source]#

                                        Compute the L1 loss, with optional weighting.

                                        Function that takes the mean element-wise absolute value difference.

                                        See L1Loss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.layer_norm.html b/2.9/generated/torch.nn.functional.layer_norm.html index 80ac32d4d09..5965e8cf95e 100644 --- a/2.9/generated/torch.nn.functional.layer_norm.html +++ b/2.9/generated/torch.nn.functional.layer_norm.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.layer_norm#

                                        -torch.nn.functional.layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-05)[source]#
                                        +torch.nn.functional.layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-05)[source]#

                                        Apply Layer Normalization for last certain number of dimensions.

                                        See LayerNorm for details.

                                        diff --git a/2.9/generated/torch.nn.functional.leaky_relu.html b/2.9/generated/torch.nn.functional.leaky_relu.html index 816a4b6149a..0f31f9a0738 100644 --- a/2.9/generated/torch.nn.functional.leaky_relu.html +++ b/2.9/generated/torch.nn.functional.leaky_relu.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.leaky_relu#

                                        -torch.nn.functional.leaky_relu(input, negative_slope=0.01, inplace=False) Tensor[source]#
                                        +torch.nn.functional.leaky_relu(input, negative_slope=0.01, inplace=False) Tensor[source]#

                                        Applies element-wise, LeakyReLU(x)=max(0,x)+negative_slopemin(0,x)\text{LeakyReLU}(x) = \max(0, x) + \text{negative\_slope} * \min(0, x)

                                        See LeakyReLU for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.local_response_norm.html b/2.9/generated/torch.nn.functional.local_response_norm.html index 3ca49ad88d1..6de39f4468f 100644 --- a/2.9/generated/torch.nn.functional.local_response_norm.html +++ b/2.9/generated/torch.nn.functional.local_response_norm.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.local_response_norm#

                                        -torch.nn.functional.local_response_norm(input, size, alpha=0.0001, beta=0.75, k=1.0)[source]#
                                        +torch.nn.functional.local_response_norm(input, size, alpha=0.0001, beta=0.75, k=1.0)[source]#

                                        Apply local response normalization over an input signal.

                                        The input signal is composed of several input planes, where channels occupy the second dimension. Normalization is applied across channels.

                                        diff --git a/2.9/generated/torch.nn.functional.log_softmax.html b/2.9/generated/torch.nn.functional.log_softmax.html index 0ec3f489ac9..778f92188ad 100644 --- a/2.9/generated/torch.nn.functional.log_softmax.html +++ b/2.9/generated/torch.nn.functional.log_softmax.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.log_softmax#

                                        -torch.nn.functional.log_softmax(input, dim=None, _stacklevel=3, dtype=None)[source]#
                                        +torch.nn.functional.log_softmax(input, dim=None, _stacklevel=3, dtype=None)[source]#

                                        Apply a softmax followed by a logarithm.

                                        While mathematically equivalent to log(softmax(x)), doing these two operations separately is slower and numerically unstable. This function diff --git a/2.9/generated/torch.nn.functional.lp_pool1d.html b/2.9/generated/torch.nn.functional.lp_pool1d.html index 9010df60d4b..2c503169762 100644 --- a/2.9/generated/torch.nn.functional.lp_pool1d.html +++ b/2.9/generated/torch.nn.functional.lp_pool1d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.lp_pool1d#

                                        -torch.nn.functional.lp_pool1d(input, norm_type, kernel_size, stride=None, ceil_mode=False)[source]#
                                        +torch.nn.functional.lp_pool1d(input, norm_type, kernel_size, stride=None, ceil_mode=False)[source]#

                                        Apply a 1D power-average pooling over an input signal composed of several input planes.

                                        If the sum of all inputs to the power of p is zero, the gradient is set to zero as well.

                                        diff --git a/2.9/generated/torch.nn.functional.lp_pool2d.html b/2.9/generated/torch.nn.functional.lp_pool2d.html index e2355fce751..2ccd2b2bdf9 100644 --- a/2.9/generated/torch.nn.functional.lp_pool2d.html +++ b/2.9/generated/torch.nn.functional.lp_pool2d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.lp_pool2d#

                                        -torch.nn.functional.lp_pool2d(input, norm_type, kernel_size, stride=None, ceil_mode=False)[source]#
                                        +torch.nn.functional.lp_pool2d(input, norm_type, kernel_size, stride=None, ceil_mode=False)[source]#

                                        Apply a 2D power-average pooling over an input signal composed of several input planes.

                                        If the sum of all inputs to the power of p is zero, the gradient is set to zero as well.

                                        diff --git a/2.9/generated/torch.nn.functional.lp_pool3d.html b/2.9/generated/torch.nn.functional.lp_pool3d.html index ca92b04ad18..363b6298710 100644 --- a/2.9/generated/torch.nn.functional.lp_pool3d.html +++ b/2.9/generated/torch.nn.functional.lp_pool3d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.lp_pool3d#

                                        -torch.nn.functional.lp_pool3d(input, norm_type, kernel_size, stride=None, ceil_mode=False)[source]#
                                        +torch.nn.functional.lp_pool3d(input, norm_type, kernel_size, stride=None, ceil_mode=False)[source]#

                                        Apply a 3D power-average pooling over an input signal composed of several input planes.

                                        If the sum of all inputs to the power of p is zero, the gradient is set to zero as well.

                                        diff --git a/2.9/generated/torch.nn.functional.margin_ranking_loss.html b/2.9/generated/torch.nn.functional.margin_ranking_loss.html index 94e94975abb..e1559b930c2 100644 --- a/2.9/generated/torch.nn.functional.margin_ranking_loss.html +++ b/2.9/generated/torch.nn.functional.margin_ranking_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.margin_ranking_loss#

                                        -torch.nn.functional.margin_ranking_loss(input1, input2, target, margin=0, size_average=None, reduce=None, reduction='mean')[source]#
                                        +torch.nn.functional.margin_ranking_loss(input1, input2, target, margin=0, size_average=None, reduce=None, reduction='mean')[source]#

                                        Compute the margin ranking loss.

                                        See MarginRankingLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.max_pool1d.html b/2.9/generated/torch.nn.functional.max_pool1d.html index 4f73c77dcfb..c6571813d9a 100644 --- a/2.9/generated/torch.nn.functional.max_pool1d.html +++ b/2.9/generated/torch.nn.functional.max_pool1d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.max_pool1d#

                                        -torch.nn.functional.max_pool1d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False)[source]#
                                        +torch.nn.functional.max_pool1d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False)[source]#

                                        Applies a 1D max pooling over an input signal composed of several input planes.

                                        diff --git a/2.9/generated/torch.nn.functional.max_pool2d.html b/2.9/generated/torch.nn.functional.max_pool2d.html index 87f4bb9bd50..06d81f6b1a0 100644 --- a/2.9/generated/torch.nn.functional.max_pool2d.html +++ b/2.9/generated/torch.nn.functional.max_pool2d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.max_pool2d#

                                        -torch.nn.functional.max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False)[source]#
                                        +torch.nn.functional.max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False)[source]#

                                        Applies a 2D max pooling over an input signal composed of several input planes.

                                        diff --git a/2.9/generated/torch.nn.functional.max_pool3d.html b/2.9/generated/torch.nn.functional.max_pool3d.html index 27d7d11c186..f47031c342c 100644 --- a/2.9/generated/torch.nn.functional.max_pool3d.html +++ b/2.9/generated/torch.nn.functional.max_pool3d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.max_pool3d#

                                        -torch.nn.functional.max_pool3d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False)[source]#
                                        +torch.nn.functional.max_pool3d(input, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=False, return_indices=False)[source]#

                                        Applies a 3D max pooling over an input signal composed of several input planes.

                                        diff --git a/2.9/generated/torch.nn.functional.max_unpool1d.html b/2.9/generated/torch.nn.functional.max_unpool1d.html index 31ac489a455..381eae72d2b 100644 --- a/2.9/generated/torch.nn.functional.max_unpool1d.html +++ b/2.9/generated/torch.nn.functional.max_unpool1d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.max_unpool1d#

                                        -torch.nn.functional.max_unpool1d(input, indices, kernel_size, stride=None, padding=0, output_size=None)[source]#
                                        +torch.nn.functional.max_unpool1d(input, indices, kernel_size, stride=None, padding=0, output_size=None)[source]#

                                        Compute a partial inverse of MaxPool1d.

                                        See MaxUnpool1d for details.

                                        diff --git a/2.9/generated/torch.nn.functional.max_unpool2d.html b/2.9/generated/torch.nn.functional.max_unpool2d.html index c61769886cb..565f5dc153b 100644 --- a/2.9/generated/torch.nn.functional.max_unpool2d.html +++ b/2.9/generated/torch.nn.functional.max_unpool2d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.max_unpool2d#

                                        -torch.nn.functional.max_unpool2d(input, indices, kernel_size, stride=None, padding=0, output_size=None)[source]#
                                        +torch.nn.functional.max_unpool2d(input, indices, kernel_size, stride=None, padding=0, output_size=None)[source]#

                                        Compute a partial inverse of MaxPool2d.

                                        See MaxUnpool2d for details.

                                        diff --git a/2.9/generated/torch.nn.functional.max_unpool3d.html b/2.9/generated/torch.nn.functional.max_unpool3d.html index 4e89f977b5b..ad3199a40b2 100644 --- a/2.9/generated/torch.nn.functional.max_unpool3d.html +++ b/2.9/generated/torch.nn.functional.max_unpool3d.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.max_unpool3d#

                                        -torch.nn.functional.max_unpool3d(input, indices, kernel_size, stride=None, padding=0, output_size=None)[source]#
                                        +torch.nn.functional.max_unpool3d(input, indices, kernel_size, stride=None, padding=0, output_size=None)[source]#

                                        Compute a partial inverse of MaxPool3d.

                                        See MaxUnpool3d for details.

                                        diff --git a/2.9/generated/torch.nn.functional.mish.html b/2.9/generated/torch.nn.functional.mish.html index f8d73ea736a..63ee4974f3e 100644 --- a/2.9/generated/torch.nn.functional.mish.html +++ b/2.9/generated/torch.nn.functional.mish.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.mish#

                                        -torch.nn.functional.mish(input, inplace=False)[source]#
                                        +torch.nn.functional.mish(input, inplace=False)[source]#

                                        Apply the Mish function, element-wise.

                                        Mish: A Self Regularized Non-Monotonic Neural Activation Function.

                                        diff --git a/2.9/generated/torch.nn.functional.mse_loss.html b/2.9/generated/torch.nn.functional.mse_loss.html index 88b7ff434cc..80391fc23ab 100644 --- a/2.9/generated/torch.nn.functional.mse_loss.html +++ b/2.9/generated/torch.nn.functional.mse_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.mse_loss#

                                        -torch.nn.functional.mse_loss(input, target, size_average=None, reduce=None, reduction='mean', weight=None)[source]#
                                        +torch.nn.functional.mse_loss(input, target, size_average=None, reduce=None, reduction='mean', weight=None)[source]#

                                        Compute the element-wise mean squared error, with optional weighting.

                                        See MSELoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.multi_margin_loss.html b/2.9/generated/torch.nn.functional.multi_margin_loss.html index 1ab2ea59fe4..9cf7f66b888 100644 --- a/2.9/generated/torch.nn.functional.multi_margin_loss.html +++ b/2.9/generated/torch.nn.functional.multi_margin_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.multi_margin_loss#

                                        -torch.nn.functional.multi_margin_loss(input, target, p=1, margin=1.0, weight=None, size_average=None, reduce=None, reduction='mean')[source]#
                                        +torch.nn.functional.multi_margin_loss(input, target, p=1, margin=1.0, weight=None, size_average=None, reduce=None, reduction='mean')[source]#

                                        Compute the multi margin loss, with optional weighting.

                                        See MultiMarginLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.multilabel_margin_loss.html b/2.9/generated/torch.nn.functional.multilabel_margin_loss.html index 1a4da0f2ed4..3f0e9ed95f0 100644 --- a/2.9/generated/torch.nn.functional.multilabel_margin_loss.html +++ b/2.9/generated/torch.nn.functional.multilabel_margin_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.multilabel_margin_loss#

                                        -torch.nn.functional.multilabel_margin_loss(input, target, size_average=None, reduce=None, reduction='mean')[source]#
                                        +torch.nn.functional.multilabel_margin_loss(input, target, size_average=None, reduce=None, reduction='mean')[source]#

                                        Compute the multilabel margin loss.

                                        See MultiLabelMarginLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.multilabel_soft_margin_loss.html b/2.9/generated/torch.nn.functional.multilabel_soft_margin_loss.html index c920ef53b6c..5a47ff0da3c 100644 --- a/2.9/generated/torch.nn.functional.multilabel_soft_margin_loss.html +++ b/2.9/generated/torch.nn.functional.multilabel_soft_margin_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.multilabel_soft_margin_loss#

                                        -torch.nn.functional.multilabel_soft_margin_loss(input, target, weight=None, size_average=None, reduce=None, reduction='mean')[source]#
                                        +torch.nn.functional.multilabel_soft_margin_loss(input, target, weight=None, size_average=None, reduce=None, reduction='mean')[source]#

                                        Compute the multilabel soft margin loss.

                                        See MultiLabelSoftMarginLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.nll_loss.html b/2.9/generated/torch.nn.functional.nll_loss.html index 708b2cecb52..8681ff4184c 100644 --- a/2.9/generated/torch.nn.functional.nll_loss.html +++ b/2.9/generated/torch.nn.functional.nll_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.nll_loss#

                                        -torch.nn.functional.nll_loss(input, target, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')[source]#
                                        +torch.nn.functional.nll_loss(input, target, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')[source]#

                                        Compute the negative log likelihood loss.

                                        See NLLLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.normalize.html b/2.9/generated/torch.nn.functional.normalize.html index 55b63129577..223119a0c3d 100644 --- a/2.9/generated/torch.nn.functional.normalize.html +++ b/2.9/generated/torch.nn.functional.normalize.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.normalize#

                                        -torch.nn.functional.normalize(input, p=2.0, dim=1, eps=1e-12, out=None)[source]#
                                        +torch.nn.functional.normalize(input, p=2.0, dim=1, eps=1e-12, out=None)[source]#

                                        Perform LpL_p normalization of inputs over specified dimension.

                                        For a tensor input of sizes (n0,...,ndim,...,nk)(n_0, ..., n_{dim}, ..., n_k), each ndimn_{dim} -element vector vv along dimension dim is transformed as

                                        diff --git a/2.9/generated/torch.nn.functional.pad.html b/2.9/generated/torch.nn.functional.pad.html index 45c6d755e6c..3e14f8747d5 100644 --- a/2.9/generated/torch.nn.functional.pad.html +++ b/2.9/generated/torch.nn.functional.pad.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.pad#

                                        -torch.nn.functional.pad(input, pad, mode='constant', value=None) Tensor[source]#
                                        +torch.nn.functional.pad(input, pad, mode='constant', value=None) Tensor[source]#

                                        Pads tensor.

                                        Padding size:

                                        The padding size by which to pad some dimensions of input diff --git a/2.9/generated/torch.nn.functional.poisson_nll_loss.html b/2.9/generated/torch.nn.functional.poisson_nll_loss.html index 33624ec5e80..02a5b3b0fde 100644 --- a/2.9/generated/torch.nn.functional.poisson_nll_loss.html +++ b/2.9/generated/torch.nn.functional.poisson_nll_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.poisson_nll_loss#

                                        -torch.nn.functional.poisson_nll_loss(input, target, log_input=True, full=False, size_average=None, eps=1e-08, reduce=None, reduction='mean')[source]#
                                        +torch.nn.functional.poisson_nll_loss(input, target, log_input=True, full=False, size_average=None, eps=1e-08, reduce=None, reduction='mean')[source]#

                                        Compute the Poisson negative log likelihood loss.

                                        See PoissonNLLLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.relu.html b/2.9/generated/torch.nn.functional.relu.html index 1269fd3adbd..dddaefad6fe 100644 --- a/2.9/generated/torch.nn.functional.relu.html +++ b/2.9/generated/torch.nn.functional.relu.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.relu#

                                        -torch.nn.functional.relu(input, inplace=False) Tensor[source]#
                                        +torch.nn.functional.relu(input, inplace=False) Tensor[source]#

                                        Applies the rectified linear unit function element-wise. See ReLU for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.relu6.html b/2.9/generated/torch.nn.functional.relu6.html index 7dd7e7b3262..3bfc57dae92 100644 --- a/2.9/generated/torch.nn.functional.relu6.html +++ b/2.9/generated/torch.nn.functional.relu6.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.relu6#

                                        -torch.nn.functional.relu6(input, inplace=False) Tensor[source]#
                                        +torch.nn.functional.relu6(input, inplace=False) Tensor[source]#

                                        Applies the element-wise function ReLU6(x)=min(max(0,x),6)\text{ReLU6}(x) = \min(\max(0,x), 6).

                                        See ReLU6 for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.rms_norm.html b/2.9/generated/torch.nn.functional.rms_norm.html index 389d97e3d68..f2c8fad3b08 100644 --- a/2.9/generated/torch.nn.functional.rms_norm.html +++ b/2.9/generated/torch.nn.functional.rms_norm.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.rms_norm#

                                        -torch.nn.functional.rms_norm(input, normalized_shape, weight=None, eps=None)[source]#
                                        +torch.nn.functional.rms_norm(input, normalized_shape, weight=None, eps=None)[source]#

                                        Apply Root Mean Square Layer Normalization.

                                        See RMSNorm for details.

                                        diff --git a/2.9/generated/torch.nn.functional.rrelu.html b/2.9/generated/torch.nn.functional.rrelu.html index 9ea07f56e35..0ae73c13faa 100644 --- a/2.9/generated/torch.nn.functional.rrelu.html +++ b/2.9/generated/torch.nn.functional.rrelu.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.rrelu#

                                        -torch.nn.functional.rrelu(input, lower=1. / 8, upper=1. / 3, training=False, inplace=False) Tensor[source]#
                                        +torch.nn.functional.rrelu(input, lower=1. / 8, upper=1. / 3, training=False, inplace=False) Tensor[source]#

                                        Randomized leaky ReLU.

                                        See RReLU for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.selu.html b/2.9/generated/torch.nn.functional.selu.html index 888fc4b115b..e821e4738c3 100644 --- a/2.9/generated/torch.nn.functional.selu.html +++ b/2.9/generated/torch.nn.functional.selu.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.selu#

                                        -torch.nn.functional.selu(input, inplace=False) Tensor[source]#
                                        +torch.nn.functional.selu(input, inplace=False) Tensor[source]#

                                        Applies element-wise, SELU(x)=scale(max(0,x)+min(0,α(exp(x)1)))\text{SELU}(x) = scale * (\max(0,x) + \min(0, \alpha * (\exp(x) - 1))), with α=1.6732632423543772848170429916717\alpha=1.6732632423543772848170429916717 and diff --git a/2.9/generated/torch.nn.functional.sigmoid.html b/2.9/generated/torch.nn.functional.sigmoid.html index 8b925c3a654..971b0619919 100644 --- a/2.9/generated/torch.nn.functional.sigmoid.html +++ b/2.9/generated/torch.nn.functional.sigmoid.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.sigmoid#

                                        -torch.nn.functional.sigmoid(input) Tensor[source]#
                                        +torch.nn.functional.sigmoid(input) Tensor[source]#

                                        Applies the element-wise function Sigmoid(x)=11+exp(x)\text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)}

                                        See Sigmoid for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.silu.html b/2.9/generated/torch.nn.functional.silu.html index ecf462f790c..6ca8e980b31 100644 --- a/2.9/generated/torch.nn.functional.silu.html +++ b/2.9/generated/torch.nn.functional.silu.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.silu#

                                        -torch.nn.functional.silu(input, inplace=False)[source]#
                                        +torch.nn.functional.silu(input, inplace=False)[source]#

                                        Apply the Sigmoid Linear Unit (SiLU) function, element-wise.

                                        The SiLU function is also known as the swish function.

                                        diff --git a/2.9/generated/torch.nn.functional.smooth_l1_loss.html b/2.9/generated/torch.nn.functional.smooth_l1_loss.html index 3e2fc55c882..fdf3c2ce7e5 100644 --- a/2.9/generated/torch.nn.functional.smooth_l1_loss.html +++ b/2.9/generated/torch.nn.functional.smooth_l1_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.smooth_l1_loss#

                                        -torch.nn.functional.smooth_l1_loss(input, target, size_average=None, reduce=None, reduction='mean', beta=1.0)[source]#
                                        +torch.nn.functional.smooth_l1_loss(input, target, size_average=None, reduce=None, reduction='mean', beta=1.0)[source]#

                                        Compute the Smooth L1 loss.

                                        Function uses a squared term if the absolute element-wise error falls below beta and an L1 term otherwise.

                                        diff --git a/2.9/generated/torch.nn.functional.soft_margin_loss.html b/2.9/generated/torch.nn.functional.soft_margin_loss.html index 6fe4ffbdd88..1b4909f407a 100644 --- a/2.9/generated/torch.nn.functional.soft_margin_loss.html +++ b/2.9/generated/torch.nn.functional.soft_margin_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.soft_margin_loss#

                                        -torch.nn.functional.soft_margin_loss(input, target, size_average=None, reduce=None, reduction='mean')[source]#
                                        +torch.nn.functional.soft_margin_loss(input, target, size_average=None, reduce=None, reduction='mean')[source]#

                                        Compute the soft margin loss.

                                        See SoftMarginLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.softmax.html b/2.9/generated/torch.nn.functional.softmax.html index 1723995851d..a759d7921e0 100644 --- a/2.9/generated/torch.nn.functional.softmax.html +++ b/2.9/generated/torch.nn.functional.softmax.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.softmax#

                                        -torch.nn.functional.softmax(input, dim=None, _stacklevel=3, dtype=None)[source]#
                                        +torch.nn.functional.softmax(input, dim=None, _stacklevel=3, dtype=None)[source]#

                                        Apply a softmax function.

                                        Softmax is defined as:

                                        Softmax(xi)=exp(xi)jexp(xj)\text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}

                                        diff --git a/2.9/generated/torch.nn.functional.softmin.html b/2.9/generated/torch.nn.functional.softmin.html index 6fa5b9f9941..dfe9a1c3a29 100644 --- a/2.9/generated/torch.nn.functional.softmin.html +++ b/2.9/generated/torch.nn.functional.softmin.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.softmin#

                                        -torch.nn.functional.softmin(input, dim=None, _stacklevel=3, dtype=None)[source]#
                                        +torch.nn.functional.softmin(input, dim=None, _stacklevel=3, dtype=None)[source]#

                                        Apply a softmin function.

                                        Note that Softmin(x)=Softmax(x)\text{Softmin}(x) = \text{Softmax}(-x). See softmax definition for mathematical formula.

                                        See Softmin for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.softsign.html b/2.9/generated/torch.nn.functional.softsign.html index 03928e8d2b5..c56b56ff7cc 100644 --- a/2.9/generated/torch.nn.functional.softsign.html +++ b/2.9/generated/torch.nn.functional.softsign.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.softsign#

                                        -torch.nn.functional.softsign(input) Tensor[source]#
                                        +torch.nn.functional.softsign(input) Tensor[source]#

                                        Applies element-wise, the function SoftSign(x)=x1+x\text{SoftSign}(x) = \frac{x}{1 + |x|}

                                        See Softsign for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.tanh.html b/2.9/generated/torch.nn.functional.tanh.html index ca11de8802a..6731ef49b01 100644 --- a/2.9/generated/torch.nn.functional.tanh.html +++ b/2.9/generated/torch.nn.functional.tanh.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.tanh#

                                        -torch.nn.functional.tanh(input) Tensor[source]#
                                        +torch.nn.functional.tanh(input) Tensor[source]#

                                        Applies element-wise, Tanh(x)=tanh(x)=exp(x)exp(x)exp(x)+exp(x)\text{Tanh}(x) = \tanh(x) = \frac{\exp(x) - \exp(-x)}{\exp(x) + \exp(-x)}

                                        See Tanh for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.tanhshrink.html b/2.9/generated/torch.nn.functional.tanhshrink.html index 0b265f9d8a8..dc0db48f741 100644 --- a/2.9/generated/torch.nn.functional.tanhshrink.html +++ b/2.9/generated/torch.nn.functional.tanhshrink.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.tanhshrink#

                                        -torch.nn.functional.tanhshrink(input) Tensor[source]#
                                        +torch.nn.functional.tanhshrink(input) Tensor[source]#

                                        Applies element-wise, Tanhshrink(x)=xTanh(x)\text{Tanhshrink}(x) = x - \text{Tanh}(x)

                                        See Tanhshrink for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.threshold.html b/2.9/generated/torch.nn.functional.threshold.html index e3264109b96..085130297d2 100644 --- a/2.9/generated/torch.nn.functional.threshold.html +++ b/2.9/generated/torch.nn.functional.threshold.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.threshold#

                                        -torch.nn.functional.threshold(input, threshold, value, inplace=False)[source]#
                                        +torch.nn.functional.threshold(input, threshold, value, inplace=False)[source]#

                                        Apply a threshold to each element of the input Tensor.

                                        See Threshold for more details.

                                        diff --git a/2.9/generated/torch.nn.functional.torch.nn.parallel.data_parallel.html b/2.9/generated/torch.nn.functional.torch.nn.parallel.data_parallel.html index 7bfc516e151..d478ed63bc3 100644 --- a/2.9/generated/torch.nn.functional.torch.nn.parallel.data_parallel.html +++ b/2.9/generated/torch.nn.functional.torch.nn.parallel.data_parallel.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.torch.nn.parallel.data_parallel#

                                        -torch.nn.parallel.data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None)[source]#
                                        +torch.nn.parallel.data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None)[source]#

                                        Evaluate module(input) in parallel across the GPUs given in device_ids.

                                        This is the functional version of the DataParallel module.

                                        diff --git a/2.9/generated/torch.nn.functional.triplet_margin_loss.html b/2.9/generated/torch.nn.functional.triplet_margin_loss.html index e8fc7e701b9..0f13342dab1 100644 --- a/2.9/generated/torch.nn.functional.triplet_margin_loss.html +++ b/2.9/generated/torch.nn.functional.triplet_margin_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.triplet_margin_loss#

                                        -torch.nn.functional.triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06, swap=False, size_average=None, reduce=None, reduction='mean')[source]#
                                        +torch.nn.functional.triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06, swap=False, size_average=None, reduce=None, reduction='mean')[source]#

                                        Compute the triplet loss between given input tensors and a margin greater than 0.

                                        See TripletMarginLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.triplet_margin_with_distance_loss.html b/2.9/generated/torch.nn.functional.triplet_margin_with_distance_loss.html index 6e8faa22774..30777fa65ed 100644 --- a/2.9/generated/torch.nn.functional.triplet_margin_with_distance_loss.html +++ b/2.9/generated/torch.nn.functional.triplet_margin_with_distance_loss.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.triplet_margin_with_distance_loss#

                                        -torch.nn.functional.triplet_margin_with_distance_loss(anchor, positive, negative, *, distance_function=None, margin=1.0, swap=False, reduction='mean')[source]#
                                        +torch.nn.functional.triplet_margin_with_distance_loss(anchor, positive, negative, *, distance_function=None, margin=1.0, swap=False, reduction='mean')[source]#

                                        Compute the triplet margin loss for input tensors using a custom distance function.

                                        See TripletMarginWithDistanceLoss for details.

                                        diff --git a/2.9/generated/torch.nn.functional.unfold.html b/2.9/generated/torch.nn.functional.unfold.html index ea65e6528e8..87ba81c4aef 100644 --- a/2.9/generated/torch.nn.functional.unfold.html +++ b/2.9/generated/torch.nn.functional.unfold.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.unfold#

                                        -torch.nn.functional.unfold(input, kernel_size, dilation=1, padding=0, stride=1)[source]#
                                        +torch.nn.functional.unfold(input, kernel_size, dilation=1, padding=0, stride=1)[source]#

                                        Extract sliding local blocks from a batched input tensor.

                                        Warning

                                        diff --git a/2.9/generated/torch.nn.functional.upsample.html b/2.9/generated/torch.nn.functional.upsample.html index 8c53ffe90f0..eb29f692708 100644 --- a/2.9/generated/torch.nn.functional.upsample.html +++ b/2.9/generated/torch.nn.functional.upsample.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.upsample#

                                        -torch.nn.functional.upsample(input, size=None, scale_factor=None, mode='nearest', align_corners=None)[source]#
                                        +torch.nn.functional.upsample(input, size=None, scale_factor=None, mode='nearest', align_corners=None)[source]#

                                        Upsample input.

                                        Provided tensor is upsampled to either the given size or the given scale_factor

                                        diff --git a/2.9/generated/torch.nn.functional.upsample_bilinear.html b/2.9/generated/torch.nn.functional.upsample_bilinear.html index 50ac02f583d..ada7c11562f 100644 --- a/2.9/generated/torch.nn.functional.upsample_bilinear.html +++ b/2.9/generated/torch.nn.functional.upsample_bilinear.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.upsample_bilinear#

                                        -torch.nn.functional.upsample_bilinear(input, size=None, scale_factor=None)[source]#
                                        +torch.nn.functional.upsample_bilinear(input, size=None, scale_factor=None)[source]#

                                        Upsamples the input, using bilinear upsampling.

                                        Warning

                                        diff --git a/2.9/generated/torch.nn.functional.upsample_nearest.html b/2.9/generated/torch.nn.functional.upsample_nearest.html index 6d60cf737fe..288b2fa7f72 100644 --- a/2.9/generated/torch.nn.functional.upsample_nearest.html +++ b/2.9/generated/torch.nn.functional.upsample_nearest.html @@ -4404,7 +4404,7 @@

                                        torch.nn.functional.upsample_nearest#

                                        -torch.nn.functional.upsample_nearest(input, size=None, scale_factor=None)[source]#
                                        +torch.nn.functional.upsample_nearest(input, size=None, scale_factor=None)[source]#

                                        Upsamples the input, using nearest neighbours’ pixel values.

                                        Warning

                                        diff --git a/2.9/generated/torch.nn.modules.activation.CELU.html b/2.9/generated/torch.nn.modules.activation.CELU.html index 81b861b09bd..c21e9cb23fa 100644 --- a/2.9/generated/torch.nn.modules.activation.CELU.html +++ b/2.9/generated/torch.nn.modules.activation.CELU.html @@ -4415,7 +4415,7 @@

                                        CELU#

                                        -class torch.nn.modules.activation.CELU(alpha=1.0, inplace=False)[source]#
                                        +class torch.nn.modules.activation.CELU(alpha=1.0, inplace=False)[source]#

                                        Applies the CELU function element-wise.

                                        CELU(x)=max(0,x)+min(0,α(exp(x/α)1))\text{CELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x/\alpha) - 1)) @@ -4445,7 +4445,7 @@

                                        CELU#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4456,7 +4456,7 @@

                                        CELU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.ELU.html b/2.9/generated/torch.nn.modules.activation.ELU.html index b7d638fce97..c05242a896a 100644 --- a/2.9/generated/torch.nn.modules.activation.ELU.html +++ b/2.9/generated/torch.nn.modules.activation.ELU.html @@ -4415,7 +4415,7 @@

                                        ELU#

                                        -class torch.nn.modules.activation.ELU(alpha=1.0, inplace=False)[source]#
                                        +class torch.nn.modules.activation.ELU(alpha=1.0, inplace=False)[source]#

                                        Applies the Exponential Linear Unit (ELU) function, element-wise.

                                        Method described in the paper: Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs).

                                        @@ -4450,7 +4450,7 @@

                                        ELU#

                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4461,7 +4461,7 @@

                                        ELU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.GELU.html b/2.9/generated/torch.nn.modules.activation.GELU.html index 047db6ce898..5573d18f17a 100644 --- a/2.9/generated/torch.nn.modules.activation.GELU.html +++ b/2.9/generated/torch.nn.modules.activation.GELU.html @@ -4415,7 +4415,7 @@

                                        GELU#

                                        -class torch.nn.modules.activation.GELU(approximate='none')[source]#
                                        +class torch.nn.modules.activation.GELU(approximate='none')[source]#

                                        Applies the Gaussian Error Linear Units function.

                                        GELU(x)=xΦ(x)\text{GELU}(x) = x * \Phi(x) @@ -4457,7 +4457,7 @@

                                        GELU#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4468,7 +4468,7 @@

                                        GELU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.GLU.html b/2.9/generated/torch.nn.modules.activation.GLU.html index 964b9927c96..a30b79a79bc 100644 --- a/2.9/generated/torch.nn.modules.activation.GLU.html +++ b/2.9/generated/torch.nn.modules.activation.GLU.html @@ -4415,7 +4415,7 @@

                                        GLU#

                                        -class torch.nn.modules.activation.GLU(dim=-1)[source]#
                                        +class torch.nn.modules.activation.GLU(dim=-1)[source]#

                                        Applies the gated linear unit function.

                                        GLU(a,b)=aσ(b){GLU}(a, b)= a \otimes \sigma(b) where aa is the first half of the input matrices and bb is the second half.

                                        @@ -4441,7 +4441,7 @@

                                        GLU#

                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4452,7 +4452,7 @@

                                        GLU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Hardshrink.html b/2.9/generated/torch.nn.modules.activation.Hardshrink.html index 417dbf93c95..97d389151fe 100644 --- a/2.9/generated/torch.nn.modules.activation.Hardshrink.html +++ b/2.9/generated/torch.nn.modules.activation.Hardshrink.html @@ -4415,7 +4415,7 @@

                                        Hardshrink#

                                        -class torch.nn.modules.activation.Hardshrink(lambd=0.5)[source]#
                                        +class torch.nn.modules.activation.Hardshrink(lambd=0.5)[source]#

                                        Applies the Hard Shrinkage (Hardshrink) function element-wise.

                                        Hardshrink is defined as:

                                        @@ -4447,7 +4447,7 @@

                                        Hardshrink
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4458,7 +4458,7 @@

                                        Hardshrink
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Run forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Hardsigmoid.html b/2.9/generated/torch.nn.modules.activation.Hardsigmoid.html index 40950880704..d9747a319d2 100644 --- a/2.9/generated/torch.nn.modules.activation.Hardsigmoid.html +++ b/2.9/generated/torch.nn.modules.activation.Hardsigmoid.html @@ -4415,7 +4415,7 @@

                                        Hardsigmoid#

                                        -class torch.nn.modules.activation.Hardsigmoid(inplace=False)[source]#
                                        +class torch.nn.modules.activation.Hardsigmoid(inplace=False)[source]#

                                        Applies the Hardsigmoid function element-wise.

                                        Hardsigmoid is defined as:

                                        @@ -4446,7 +4446,7 @@

                                        Hardsigmoid
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Hardswish.html b/2.9/generated/torch.nn.modules.activation.Hardswish.html index aa6d1e836a0..63a83f9b95d 100644 --- a/2.9/generated/torch.nn.modules.activation.Hardswish.html +++ b/2.9/generated/torch.nn.modules.activation.Hardswish.html @@ -4415,7 +4415,7 @@

                                        Hardswish#

                                        -class torch.nn.modules.activation.Hardswish(inplace=False)[source]#
                                        +class torch.nn.modules.activation.Hardswish(inplace=False)[source]#

                                        Applies the Hardswish function, element-wise.

                                        Method described in the paper: Searching for MobileNetV3.

                                        Hardswish is defined as:

                                        @@ -4447,7 +4447,7 @@

                                        Hardswish
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Hardtanh.html b/2.9/generated/torch.nn.modules.activation.Hardtanh.html index e030e5c2873..d10300251f8 100644 --- a/2.9/generated/torch.nn.modules.activation.Hardtanh.html +++ b/2.9/generated/torch.nn.modules.activation.Hardtanh.html @@ -4415,7 +4415,7 @@

                                        Hardtanh#

                                        -class torch.nn.modules.activation.Hardtanh(min_val=-1.0, max_val=1.0, inplace=False, min_value=None, max_value=None)[source]#
                                        +class torch.nn.modules.activation.Hardtanh(min_val=-1.0, max_val=1.0, inplace=False, min_value=None, max_value=None)[source]#

                                        Applies the HardTanh function element-wise.

                                        HardTanh is defined as:

                                        @@ -4452,7 +4452,7 @@

                                        Hardtanh
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4463,7 +4463,7 @@

                                        Hardtanh
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.LeakyReLU.html b/2.9/generated/torch.nn.modules.activation.LeakyReLU.html index 3ea30f831a5..98d2395651a 100644 --- a/2.9/generated/torch.nn.modules.activation.LeakyReLU.html +++ b/2.9/generated/torch.nn.modules.activation.LeakyReLU.html @@ -4415,7 +4415,7 @@

                                        LeakyReLU#

                                        -class torch.nn.modules.activation.LeakyReLU(negative_slope=0.01, inplace=False)[source]#
                                        +class torch.nn.modules.activation.LeakyReLU(negative_slope=0.01, inplace=False)[source]#

                                        Applies the LeakyReLU function element-wise.

                                        LeakyReLU(x)=max(0,x)+negative_slopemin(0,x)\text{LeakyReLU}(x) = \max(0, x) + \text{negative\_slope} * \min(0, x) @@ -4454,7 +4454,7 @@

                                        LeakyReLU
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4465,7 +4465,7 @@

                                        LeakyReLU
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Run forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.LogSigmoid.html b/2.9/generated/torch.nn.modules.activation.LogSigmoid.html index 6fabbb1b13f..839b850be14 100644 --- a/2.9/generated/torch.nn.modules.activation.LogSigmoid.html +++ b/2.9/generated/torch.nn.modules.activation.LogSigmoid.html @@ -4415,7 +4415,7 @@

                                        LogSigmoid#

                                        -class torch.nn.modules.activation.LogSigmoid(*args, **kwargs)[source]#
                                        +class torch.nn.modules.activation.LogSigmoid(*args, **kwargs)[source]#

                                        Applies the Logsigmoid function element-wise.

                                        LogSigmoid(x)=log(11+exp(x))\text{LogSigmoid}(x) = \log\left(\frac{ 1 }{ 1 + \exp(-x)}\right) @@ -4438,7 +4438,7 @@

                                        LogSigmoid
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Run forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.LogSoftmax.html b/2.9/generated/torch.nn.modules.activation.LogSoftmax.html index 2b32ba97d99..551ef110d95 100644 --- a/2.9/generated/torch.nn.modules.activation.LogSoftmax.html +++ b/2.9/generated/torch.nn.modules.activation.LogSoftmax.html @@ -4415,7 +4415,7 @@

                                        LogSoftmax#

                                        -class torch.nn.modules.activation.LogSoftmax(dim=None)[source]#
                                        +class torch.nn.modules.activation.LogSoftmax(dim=None)[source]#

                                        Applies the log(Softmax(x))\log(\text{Softmax}(x)) function to an n-dimensional input Tensor.

                                        The LogSoftmax formulation can be simplified as:

                                        @@ -4449,7 +4449,7 @@

                                        LogSoftmax
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4460,7 +4460,7 @@

                                        LogSoftmax
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Mish.html b/2.9/generated/torch.nn.modules.activation.Mish.html index 76b10c57fea..6978df5cdb2 100644 --- a/2.9/generated/torch.nn.modules.activation.Mish.html +++ b/2.9/generated/torch.nn.modules.activation.Mish.html @@ -4415,7 +4415,7 @@

                                        Mish#

                                        -class torch.nn.modules.activation.Mish(inplace=False)[source]#
                                        +class torch.nn.modules.activation.Mish(inplace=False)[source]#

                                        Applies the Mish function, element-wise.

                                        Mish: A Self Regularized Non-Monotonic Neural Activation Function.

                                        @@ -4443,7 +4443,7 @@

                                        Mish#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4454,7 +4454,7 @@

                                        Mish#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.MultiheadAttention.html b/2.9/generated/torch.nn.modules.activation.MultiheadAttention.html index fa34c3e1784..2c412e8084e 100644 --- a/2.9/generated/torch.nn.modules.activation.MultiheadAttention.html +++ b/2.9/generated/torch.nn.modules.activation.MultiheadAttention.html @@ -4415,7 +4415,7 @@

                                        MultiheadAttention#

                                        -class torch.nn.modules.activation.MultiheadAttention(embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, batch_first=False, device=None, dtype=None)[source]#
                                        +class torch.nn.modules.activation.MultiheadAttention(embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, batch_first=False, device=None, dtype=None)[source]#

                                        Allows the model to jointly attend to information from different representation subspaces.

                                        This MultiheadAttention layer implements the original architecture described in the Attention Is All You Need paper. The @@ -4478,7 +4478,7 @@

                                        MultiheadAttention
                                        -forward(query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None, average_attn_weights=True, is_causal=False)[source]#
                                        +forward(query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None, average_attn_weights=True, is_causal=False)[source]#

                                        Compute attention outputs using query, key, and value embeddings.

                                        Supports optional parameters for padding, masks and attention weights.

                                        @@ -4554,7 +4554,7 @@

                                        MultiheadAttention
                                        -merge_masks(attn_mask, key_padding_mask, query)[source]#
                                        +merge_masks(attn_mask, key_padding_mask, query)[source]#

                                        Determine mask type and combine masks if necessary.

                                        If only one mask is provided, that mask and the corresponding mask type will be returned. If both masks are provided, they will be both diff --git a/2.9/generated/torch.nn.modules.activation.PReLU.html b/2.9/generated/torch.nn.modules.activation.PReLU.html index 63b9c6bcbd9..e02422b896c 100644 --- a/2.9/generated/torch.nn.modules.activation.PReLU.html +++ b/2.9/generated/torch.nn.modules.activation.PReLU.html @@ -4415,7 +4415,7 @@

                                        PReLU#

                                        -class torch.nn.modules.activation.PReLU(num_parameters=1, init=0.25, device=None, dtype=None)[source]#
                                        +class torch.nn.modules.activation.PReLU(num_parameters=1, init=0.25, device=None, dtype=None)[source]#

                                        Applies the element-wise PReLU function.

                                        PReLU(x)=max(0,x)+amin(0,x)\text{PReLU}(x) = \max(0,x) + a * \min(0,x) @@ -4472,7 +4472,7 @@

                                        PReLU#

                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4483,7 +4483,7 @@

                                        PReLU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        @@ -4494,7 +4494,7 @@

                                        PReLU#
                                        -reset_parameters()[source]#
                                        +reset_parameters()[source]#

                                        Resets parameters based on their initialization used in __init__.

                                        diff --git a/2.9/generated/torch.nn.modules.activation.RReLU.html b/2.9/generated/torch.nn.modules.activation.RReLU.html index 36cdc40f017..fa3496607cd 100644 --- a/2.9/generated/torch.nn.modules.activation.RReLU.html +++ b/2.9/generated/torch.nn.modules.activation.RReLU.html @@ -4415,7 +4415,7 @@

                                        RReLU#

                                        -class torch.nn.modules.activation.RReLU(lower=0.125, upper=0.3333333333333333, inplace=False)[source]#
                                        +class torch.nn.modules.activation.RReLU(lower=0.125, upper=0.3333333333333333, inplace=False)[source]#

                                        Applies the randomized leaky rectified linear unit function, element-wise.

                                        Method described in the paper: Empirical Evaluation of Rectified Activations in Convolutional Network.

                                        @@ -4455,7 +4455,7 @@

                                        RReLU#

                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4466,7 +4466,7 @@

                                        RReLU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.ReLU.html b/2.9/generated/torch.nn.modules.activation.ReLU.html index 619703e4049..03cdaa13b22 100644 --- a/2.9/generated/torch.nn.modules.activation.ReLU.html +++ b/2.9/generated/torch.nn.modules.activation.ReLU.html @@ -4415,7 +4415,7 @@

                                        ReLU#

                                        -class torch.nn.modules.activation.ReLU(inplace=False)[source]#
                                        +class torch.nn.modules.activation.ReLU(inplace=False)[source]#

                                        Applies the rectified linear unit function element-wise.

                                        ReLU(x)=(x)+=max(0,x)\text{ReLU}(x) = (x)^+ = \max(0, x)

                                        @@ -4446,7 +4446,7 @@

                                        ReLU#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4457,7 +4457,7 @@

                                        ReLU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.ReLU6.html b/2.9/generated/torch.nn.modules.activation.ReLU6.html index 47b6f6d89ca..c5d009b7821 100644 --- a/2.9/generated/torch.nn.modules.activation.ReLU6.html +++ b/2.9/generated/torch.nn.modules.activation.ReLU6.html @@ -4415,7 +4415,7 @@

                                        ReLU6#

                                        -class torch.nn.modules.activation.ReLU6(inplace=False)[source]#
                                        +class torch.nn.modules.activation.ReLU6(inplace=False)[source]#

                                        Applies the ReLU6 function element-wise.

                                        ReLU6(x)=min(max(0,x),6)\text{ReLU6}(x) = \min(\max(0,x), 6) @@ -4441,7 +4441,7 @@

                                        ReLU6#

                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.SELU.html b/2.9/generated/torch.nn.modules.activation.SELU.html index ffb21e4c041..2fc696e49ca 100644 --- a/2.9/generated/torch.nn.modules.activation.SELU.html +++ b/2.9/generated/torch.nn.modules.activation.SELU.html @@ -4415,7 +4415,7 @@

                                        SELU#

                                        -class torch.nn.modules.activation.SELU(inplace=False)[source]#
                                        +class torch.nn.modules.activation.SELU(inplace=False)[source]#

                                        Applies the SELU function element-wise.

                                        SELU(x)=scale(max(0,x)+min(0,α(exp(x)1)))\text{SELU}(x) = \text{scale} * (\max(0,x) + \min(0, \alpha * (\exp(x) - 1))) @@ -4451,7 +4451,7 @@

                                        SELU#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4462,7 +4462,7 @@

                                        SELU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.SiLU.html b/2.9/generated/torch.nn.modules.activation.SiLU.html index 7138ebbd7ae..3553b3c155d 100644 --- a/2.9/generated/torch.nn.modules.activation.SiLU.html +++ b/2.9/generated/torch.nn.modules.activation.SiLU.html @@ -4415,7 +4415,7 @@

                                        SiLU#

                                        -class torch.nn.modules.activation.SiLU(inplace=False)[source]#
                                        +class torch.nn.modules.activation.SiLU(inplace=False)[source]#

                                        Applies the Sigmoid Linear Unit (SiLU) function, element-wise.

                                        The SiLU function is also known as the swish function.

                                        @@ -4448,7 +4448,7 @@

                                        SiLU#
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4459,7 +4459,7 @@

                                        SiLU#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Sigmoid.html b/2.9/generated/torch.nn.modules.activation.Sigmoid.html index 8686ec321c9..05d19718a83 100644 --- a/2.9/generated/torch.nn.modules.activation.Sigmoid.html +++ b/2.9/generated/torch.nn.modules.activation.Sigmoid.html @@ -4415,7 +4415,7 @@

                                        Sigmoid#

                                        -class torch.nn.modules.activation.Sigmoid(*args, **kwargs)[source]#
                                        +class torch.nn.modules.activation.Sigmoid(*args, **kwargs)[source]#

                                        Applies the Sigmoid function element-wise.

                                        Sigmoid(x)=σ(x)=11+exp(x)\text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)} @@ -4438,7 +4438,7 @@

                                        Sigmoid
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Softmax.html b/2.9/generated/torch.nn.modules.activation.Softmax.html index c1e2d8f5c18..503ca6f9e67 100644 --- a/2.9/generated/torch.nn.modules.activation.Softmax.html +++ b/2.9/generated/torch.nn.modules.activation.Softmax.html @@ -4415,7 +4415,7 @@

                                        Softmax#

                                        -class torch.nn.modules.activation.Softmax(dim=None)[source]#
                                        +class torch.nn.modules.activation.Softmax(dim=None)[source]#

                                        Applies the Softmax function to an n-dimensional input Tensor.

                                        Rescales them so that the elements of the n-dimensional output Tensor lie in the range [0,1] and sum to 1.

                                        @@ -4460,7 +4460,7 @@

                                        Softmax
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4471,7 +4471,7 @@

                                        Softmax
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Softmax2d.html b/2.9/generated/torch.nn.modules.activation.Softmax2d.html index 9f851dddf94..7c033838fa4 100644 --- a/2.9/generated/torch.nn.modules.activation.Softmax2d.html +++ b/2.9/generated/torch.nn.modules.activation.Softmax2d.html @@ -4415,7 +4415,7 @@

                                        Softmax2d#

                                        -class torch.nn.modules.activation.Softmax2d(*args, **kwargs)[source]#
                                        +class torch.nn.modules.activation.Softmax2d(*args, **kwargs)[source]#

                                        Applies SoftMax over features to each spatial location.

                                        When given an image of Channels x Height x Width, it will apply Softmax to each location (Channels,hi,wj)(Channels, h_i, w_j)

                                        @@ -4444,7 +4444,7 @@

                                        Softmax2d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Softmin.html b/2.9/generated/torch.nn.modules.activation.Softmin.html index ddd6f357206..c12b8ea6359 100644 --- a/2.9/generated/torch.nn.modules.activation.Softmin.html +++ b/2.9/generated/torch.nn.modules.activation.Softmin.html @@ -4415,7 +4415,7 @@

                                        Softmin#

                                        -class torch.nn.modules.activation.Softmin(dim=None)[source]#
                                        +class torch.nn.modules.activation.Softmin(dim=None)[source]#

                                        Applies the Softmin function to an n-dimensional input Tensor.

                                        Rescales them so that the elements of the n-dimensional output Tensor lie in the range [0, 1] and sum to 1.

                                        @@ -4452,7 +4452,7 @@

                                        Softmin
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4463,7 +4463,7 @@

                                        Softmin
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Softplus.html b/2.9/generated/torch.nn.modules.activation.Softplus.html index f20792f437d..d2b15008e0b 100644 --- a/2.9/generated/torch.nn.modules.activation.Softplus.html +++ b/2.9/generated/torch.nn.modules.activation.Softplus.html @@ -4415,7 +4415,7 @@

                                        Softplus#

                                        -class torch.nn.modules.activation.Softplus(beta=1.0, threshold=20.0)[source]#
                                        +class torch.nn.modules.activation.Softplus(beta=1.0, threshold=20.0)[source]#

                                        Applies the Softplus function element-wise.

                                        Softplus(x)=1βlog(1+exp(βx))\text{Softplus}(x) = \frac{1}{\beta} * \log(1 + \exp(\beta * x)) @@ -4448,7 +4448,7 @@

                                        Softplus
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4459,7 +4459,7 @@

                                        Softplus
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Run forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Softshrink.html b/2.9/generated/torch.nn.modules.activation.Softshrink.html index a0545d2e118..9df3a18b5d7 100644 --- a/2.9/generated/torch.nn.modules.activation.Softshrink.html +++ b/2.9/generated/torch.nn.modules.activation.Softshrink.html @@ -4415,7 +4415,7 @@

                                        Softshrink#

                                        -class torch.nn.modules.activation.Softshrink(lambd=0.5)[source]#
                                        +class torch.nn.modules.activation.Softshrink(lambd=0.5)[source]#

                                        Applies the soft shrinkage function element-wise.

                                        SoftShrinkage(x)={xλ, if x>λx+λ, if x<λ0, otherwise \text{SoftShrinkage}(x) = @@ -4446,7 +4446,7 @@

                                        Softshrink
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4457,7 +4457,7 @@

                                        Softshrink
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Run forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Softsign.html b/2.9/generated/torch.nn.modules.activation.Softsign.html index eae88fede5f..db8a4dfcfad 100644 --- a/2.9/generated/torch.nn.modules.activation.Softsign.html +++ b/2.9/generated/torch.nn.modules.activation.Softsign.html @@ -4415,7 +4415,7 @@

                                        Softsign#

                                        -class torch.nn.modules.activation.Softsign(*args, **kwargs)[source]#
                                        +class torch.nn.modules.activation.Softsign(*args, **kwargs)[source]#

                                        Applies the element-wise Softsign function.

                                        SoftSign(x)=x1+x\text{SoftSign}(x) = \frac{x}{ 1 + |x|} @@ -4438,7 +4438,7 @@

                                        Softsign
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Tanh.html b/2.9/generated/torch.nn.modules.activation.Tanh.html index 065103ad2d6..220e9f2102a 100644 --- a/2.9/generated/torch.nn.modules.activation.Tanh.html +++ b/2.9/generated/torch.nn.modules.activation.Tanh.html @@ -4415,7 +4415,7 @@

                                        Tanh#

                                        -class torch.nn.modules.activation.Tanh(*args, **kwargs)[source]#
                                        +class torch.nn.modules.activation.Tanh(*args, **kwargs)[source]#

                                        Applies the Hyperbolic Tangent (Tanh) function element-wise.

                                        Tanh is defined as:

                                        @@ -4439,7 +4439,7 @@

                                        Tanh#
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Tanhshrink.html b/2.9/generated/torch.nn.modules.activation.Tanhshrink.html index 4c4f3cbc575..2ade76d0256 100644 --- a/2.9/generated/torch.nn.modules.activation.Tanhshrink.html +++ b/2.9/generated/torch.nn.modules.activation.Tanhshrink.html @@ -4415,7 +4415,7 @@

                                        Tanhshrink#

                                        -class torch.nn.modules.activation.Tanhshrink(*args, **kwargs)[source]#
                                        +class torch.nn.modules.activation.Tanhshrink(*args, **kwargs)[source]#

                                        Applies the element-wise Tanhshrink function.

                                        Tanhshrink(x)=xtanh(x)\text{Tanhshrink}(x) = x - \tanh(x) @@ -4438,7 +4438,7 @@

                                        Tanhshrink
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.activation.Threshold.html b/2.9/generated/torch.nn.modules.activation.Threshold.html index d57c5f3afb6..c3c2b08695c 100644 --- a/2.9/generated/torch.nn.modules.activation.Threshold.html +++ b/2.9/generated/torch.nn.modules.activation.Threshold.html @@ -4415,7 +4415,7 @@

                                        Threshold#

                                        -class torch.nn.modules.activation.Threshold(threshold, value, inplace=False)[source]#
                                        +class torch.nn.modules.activation.Threshold(threshold, value, inplace=False)[source]#

                                        Thresholds each element of the input Tensor.

                                        Threshold is defined as:

                                        @@ -4450,7 +4450,7 @@

                                        Threshold
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4461,7 +4461,7 @@

                                        Threshold
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.adaptive.AdaptiveLogSoftmaxWithLoss.html b/2.9/generated/torch.nn.modules.adaptive.AdaptiveLogSoftmaxWithLoss.html index ed77874d97a..f23bc981085 100644 --- a/2.9/generated/torch.nn.modules.adaptive.AdaptiveLogSoftmaxWithLoss.html +++ b/2.9/generated/torch.nn.modules.adaptive.AdaptiveLogSoftmaxWithLoss.html @@ -4415,7 +4415,7 @@

                                        AdaptiveLogSoftmaxWithLoss#

                                        -class torch.nn.modules.adaptive.AdaptiveLogSoftmaxWithLoss(in_features, n_classes, cutoffs, div_value=4.0, head_bias=False, device=None, dtype=None)[source]#
                                        +class torch.nn.modules.adaptive.AdaptiveLogSoftmaxWithLoss(in_features, n_classes, cutoffs, div_value=4.0, head_bias=False, device=None, dtype=None)[source]#

                                        Efficient softmax approximation.

                                        As described in Efficient softmax approximation for GPUs by Edouard Grave, Armand Joulin, @@ -4509,7 +4509,7 @@

                                        AdaptiveLogSoftmaxWithLoss
                                        -forward(input_, target_)[source]#
                                        +forward(input_, target_)[source]#

                                        Runs the forward pass.

                                        Return type
                                        @@ -4520,7 +4520,7 @@

                                        AdaptiveLogSoftmaxWithLoss
                                        -log_prob(input)[source]#
                                        +log_prob(input)[source]#

                                        Compute log probabilities for all n_classes\texttt{n\_classes}.

                                        Parameters
                                        @@ -4546,7 +4546,7 @@

                                        AdaptiveLogSoftmaxWithLoss
                                        -predict(input)[source]#
                                        +predict(input)[source]#

                                        Return the class with the highest probability for each example in the input minibatch.

                                        This is equivalent to self.log_prob(input).argmax(dim=1), but is more efficient in some cases.

                                        @@ -4571,7 +4571,7 @@

                                        AdaptiveLogSoftmaxWithLoss
                                        -reset_parameters()[source]#
                                        +reset_parameters()[source]#

                                        Resets parameters based on their initialization used in __init__.

                                        diff --git a/2.9/generated/torch.nn.modules.batchnorm.BatchNorm1d.html b/2.9/generated/torch.nn.modules.batchnorm.BatchNorm1d.html index e5c9a7949c1..457932e85b1 100644 --- a/2.9/generated/torch.nn.modules.batchnorm.BatchNorm1d.html +++ b/2.9/generated/torch.nn.modules.batchnorm.BatchNorm1d.html @@ -4415,7 +4415,7 @@

                                        BatchNorm1d#

                                        -class torch.nn.modules.batchnorm.BatchNorm1d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.modules.batchnorm.BatchNorm1d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        Applies Batch Normalization over a 2D or 3D input.

                                        Method described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing diff --git a/2.9/generated/torch.nn.modules.batchnorm.BatchNorm2d.html b/2.9/generated/torch.nn.modules.batchnorm.BatchNorm2d.html index bf406a4c217..6a17e31be93 100644 --- a/2.9/generated/torch.nn.modules.batchnorm.BatchNorm2d.html +++ b/2.9/generated/torch.nn.modules.batchnorm.BatchNorm2d.html @@ -4415,7 +4415,7 @@

                                        BatchNorm2d#

                                        -class torch.nn.modules.batchnorm.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.modules.batchnorm.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        Applies Batch Normalization over a 4D input.

                                        4D is a mini-batch of 2D inputs with additional channel dimension. Method described in the paper diff --git a/2.9/generated/torch.nn.modules.batchnorm.BatchNorm3d.html b/2.9/generated/torch.nn.modules.batchnorm.BatchNorm3d.html index 57a24dc3dea..710ee7e8c3c 100644 --- a/2.9/generated/torch.nn.modules.batchnorm.BatchNorm3d.html +++ b/2.9/generated/torch.nn.modules.batchnorm.BatchNorm3d.html @@ -4415,7 +4415,7 @@

                                        BatchNorm3d#

                                        -class torch.nn.modules.batchnorm.BatchNorm3d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.modules.batchnorm.BatchNorm3d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        Applies Batch Normalization over a 5D input.

                                        5D is a mini-batch of 3D inputs with additional channel dimension as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing diff --git a/2.9/generated/torch.nn.modules.batchnorm.LazyBatchNorm1d.html b/2.9/generated/torch.nn.modules.batchnorm.LazyBatchNorm1d.html index 694d15f8d6d..e370dae15c8 100644 --- a/2.9/generated/torch.nn.modules.batchnorm.LazyBatchNorm1d.html +++ b/2.9/generated/torch.nn.modules.batchnorm.LazyBatchNorm1d.html @@ -4415,7 +4415,7 @@

                                        LazyBatchNorm1d#

                                        -class torch.nn.modules.batchnorm.LazyBatchNorm1d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.modules.batchnorm.LazyBatchNorm1d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        A torch.nn.BatchNorm1d module with lazy initialization.

                                        Lazy initialization based on the num_features argument of the BatchNorm1d that is inferred from the input.size(1). @@ -4444,7 +4444,7 @@

                                        LazyBatchNorm1d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of BatchNorm1d

                                        diff --git a/2.9/generated/torch.nn.modules.batchnorm.LazyBatchNorm2d.html b/2.9/generated/torch.nn.modules.batchnorm.LazyBatchNorm2d.html index 623dcba2157..8c9bbcd18d0 100644 --- a/2.9/generated/torch.nn.modules.batchnorm.LazyBatchNorm2d.html +++ b/2.9/generated/torch.nn.modules.batchnorm.LazyBatchNorm2d.html @@ -4415,7 +4415,7 @@

                                        LazyBatchNorm2d#

                                        -class torch.nn.modules.batchnorm.LazyBatchNorm2d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.modules.batchnorm.LazyBatchNorm2d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        A torch.nn.BatchNorm2d module with lazy initialization.

                                        Lazy initialization is done for the num_features argument of the BatchNorm2d that is inferred from the input.size(1). @@ -4444,7 +4444,7 @@

                                        LazyBatchNorm2d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of BatchNorm2d

                                        diff --git a/2.9/generated/torch.nn.modules.batchnorm.LazyBatchNorm3d.html b/2.9/generated/torch.nn.modules.batchnorm.LazyBatchNorm3d.html index ec86483aa93..b5e5136a65f 100644 --- a/2.9/generated/torch.nn.modules.batchnorm.LazyBatchNorm3d.html +++ b/2.9/generated/torch.nn.modules.batchnorm.LazyBatchNorm3d.html @@ -4415,7 +4415,7 @@

                                        LazyBatchNorm3d#

                                        -class torch.nn.modules.batchnorm.LazyBatchNorm3d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                        +class torch.nn.modules.batchnorm.LazyBatchNorm3d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                        A torch.nn.BatchNorm3d module with lazy initialization.

                                        Lazy initialization is done for the num_features argument of the BatchNorm3d that is inferred from the input.size(1). @@ -4444,7 +4444,7 @@

                                        LazyBatchNorm3d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of BatchNorm3d

                                        diff --git a/2.9/generated/torch.nn.modules.batchnorm.SyncBatchNorm.html b/2.9/generated/torch.nn.modules.batchnorm.SyncBatchNorm.html index b3b20731d1a..e7013d80475 100644 --- a/2.9/generated/torch.nn.modules.batchnorm.SyncBatchNorm.html +++ b/2.9/generated/torch.nn.modules.batchnorm.SyncBatchNorm.html @@ -4415,7 +4415,7 @@

                                        SyncBatchNorm#

                                        -class torch.nn.modules.batchnorm.SyncBatchNorm(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, process_group=None, device=None, dtype=None)[source]#
                                        +class torch.nn.modules.batchnorm.SyncBatchNorm(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, process_group=None, device=None, dtype=None)[source]#

                                        Applies Batch Normalization over a N-Dimensional input.

                                        The N-D input is a mini-batch of [N-2]D inputs with additional channel dimension) as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing @@ -4527,7 +4527,7 @@

                                        SyncBatchNorm
                                        -classmethod convert_sync_batchnorm(module, process_group=None)[source]#
                                        +classmethod convert_sync_batchnorm(module, process_group=None)[source]#

                                        Converts all BatchNorm*D layers in the model to torch.nn.SyncBatchNorm layers.

                                        Parameters
                                        @@ -4566,7 +4566,7 @@

                                        SyncBatchNorm
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.channelshuffle.ChannelShuffle.html b/2.9/generated/torch.nn.modules.channelshuffle.ChannelShuffle.html index bdab1f37f27..c4c6b13fedb 100644 --- a/2.9/generated/torch.nn.modules.channelshuffle.ChannelShuffle.html +++ b/2.9/generated/torch.nn.modules.channelshuffle.ChannelShuffle.html @@ -4415,7 +4415,7 @@

                                        ChannelShuffle#

                                        -class torch.nn.modules.channelshuffle.ChannelShuffle(groups)[source]#
                                        +class torch.nn.modules.channelshuffle.ChannelShuffle(groups)[source]#

                                        Divides and rearranges the channels in a tensor.

                                        This operation divides the channels in a tensor of shape (N,C,)(N, C, *) into g groups as (N,Cg,g,)(N, \frac{C}{g}, g, *) and shuffles them, @@ -4451,7 +4451,7 @@

                                        ChannelShuffle
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        @@ -4462,7 +4462,7 @@

                                        ChannelShuffle
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.container.ModuleDict.html b/2.9/generated/torch.nn.modules.container.ModuleDict.html index 59dc671f49b..cb1963d7f45 100644 --- a/2.9/generated/torch.nn.modules.container.ModuleDict.html +++ b/2.9/generated/torch.nn.modules.container.ModuleDict.html @@ -4415,7 +4415,7 @@

                                        ModuleDict#

                                        -class torch.nn.modules.container.ModuleDict(modules=None)[source]#
                                        +class torch.nn.modules.container.ModuleDict(modules=None)[source]#

                                        Holds submodules in a dictionary.

                                        ModuleDict can be indexed like a regular Python dictionary, but modules it contains are properly registered, and will be visible by all @@ -4456,7 +4456,7 @@

                                        ModuleDict
                                        -clear()[source]#
                                        +clear()[source]#

                                        Remove all items from the ModuleDict.

                                        @@ -4464,7 +4464,7 @@

                                        ModuleDict
                                        -items()[source]#
                                        +items()[source]#

                                        Return an iterable of the ModuleDict key/value pairs.

                                        Return type
                                        @@ -4475,7 +4475,7 @@

                                        ModuleDict
                                        -keys()[source]#
                                        +keys()[source]#

                                        Return an iterable of the ModuleDict keys.

                                        Return type
                                        @@ -4486,7 +4486,7 @@

                                        ModuleDict
                                        -pop(key)[source]#
                                        +pop(key)[source]#

                                        Remove key from the ModuleDict and return its module.

                                        Parameters
                                        @@ -4500,7 +4500,7 @@

                                        ModuleDict
                                        -update(modules)[source]#
                                        +update(modules)[source]#

                                        Update the ModuleDict with key-value pairs from a mapping, overwriting existing keys.

                                        Note

                                        @@ -4517,7 +4517,7 @@

                                        ModuleDict
                                        -values()[source]#
                                        +values()[source]#

                                        Return an iterable of the ModuleDict values.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.container.ModuleList.html b/2.9/generated/torch.nn.modules.container.ModuleList.html index 27d903b1d70..a56c108725c 100644 --- a/2.9/generated/torch.nn.modules.container.ModuleList.html +++ b/2.9/generated/torch.nn.modules.container.ModuleList.html @@ -4415,7 +4415,7 @@

                                        ModuleList#

                                        -class torch.nn.modules.container.ModuleList(modules=None)[source]#
                                        +class torch.nn.modules.container.ModuleList(modules=None)[source]#

                                        Holds submodules in a list.

                                        ModuleList can be indexed like a regular Python list, but modules it contains are properly registered, and will be visible by all @@ -4440,7 +4440,7 @@

                                        ModuleList
                                        -append(module)[source]#
                                        +append(module)[source]#

                                        Append a given module to the end of the list.

                                        Parameters
                                        @@ -4454,7 +4454,7 @@

                                        ModuleList
                                        -extend(modules)[source]#
                                        +extend(modules)[source]#

                                        Append modules from a Python iterable to the end of the list.

                                        Parameters
                                        @@ -4468,7 +4468,7 @@

                                        ModuleList
                                        -insert(index, module)[source]#
                                        +insert(index, module)[source]#

                                        Insert a given module before a given index in the list.

                                        Parameters
                                        diff --git a/2.9/generated/torch.nn.modules.container.ParameterDict.html b/2.9/generated/torch.nn.modules.container.ParameterDict.html index 4be79b6eeb8..7b77a2db193 100644 --- a/2.9/generated/torch.nn.modules.container.ParameterDict.html +++ b/2.9/generated/torch.nn.modules.container.ParameterDict.html @@ -4415,7 +4415,7 @@

                                        ParameterDict#

                                        -class torch.nn.modules.container.ParameterDict(parameters=None)[source]#
                                        +class torch.nn.modules.container.ParameterDict(parameters=None)[source]#

                                        Holds parameters in a dictionary.

                                        ParameterDict can be indexed like a regular Python dictionary, but Parameters it contains are properly registered, and will be visible by all Module methods. @@ -4453,7 +4453,7 @@

                                        ParameterDict
                                        -clear()[source]#
                                        +clear()[source]#

                                        Remove all items from the ParameterDict.

                                        @@ -4461,7 +4461,7 @@

                                        ParameterDict
                                        -copy()[source]#
                                        +copy()[source]#

                                        Return a copy of this ParameterDict instance.

                                        Return type
                                        @@ -4472,7 +4472,7 @@

                                        ParameterDict
                                        -fromkeys(keys, default=None)[source]#
                                        +fromkeys(keys, default=None)[source]#

                                        Return a new ParameterDict with the keys provided.

                                        Parameters
                                        @@ -4489,7 +4489,7 @@

                                        ParameterDict
                                        -get(key, default=None)[source]#
                                        +get(key, default=None)[source]#

                                        Return the parameter associated with key if present. Otherwise return default if provided, None if not.

                                        Parameters
                                        @@ -4506,7 +4506,7 @@

                                        ParameterDict
                                        -items()[source]#
                                        +items()[source]#

                                        Return an iterable of the ParameterDict key/value pairs.

                                        Return type
                                        @@ -4517,7 +4517,7 @@

                                        ParameterDict
                                        -keys()[source]#
                                        +keys()[source]#

                                        Return an iterable of the ParameterDict keys.

                                        Return type
                                        @@ -4528,7 +4528,7 @@

                                        ParameterDict
                                        -pop(key)[source]#
                                        +pop(key)[source]#

                                        Remove key from the ParameterDict and return its parameter.

                                        Parameters
                                        @@ -4542,7 +4542,7 @@

                                        ParameterDict
                                        -popitem()[source]#
                                        +popitem()[source]#

                                        Remove and return the last inserted (key, parameter) pair from the ParameterDict.

                                        Return type
                                        @@ -4553,7 +4553,7 @@

                                        ParameterDict
                                        -setdefault(key, default=None)[source]#
                                        +setdefault(key, default=None)[source]#

                                        Set the default for a key in the Parameterdict.

                                        If key is in the ParameterDict, return its value. If not, insert key with a parameter default and return default. @@ -4573,7 +4573,7 @@

                                        ParameterDict
                                        -update(parameters)[source]#
                                        +update(parameters)[source]#

                                        Update the ParameterDict with key-value pairs from parameters, overwriting existing keys.

                                        Note

                                        @@ -4591,7 +4591,7 @@

                                        ParameterDict
                                        -values()[source]#
                                        +values()[source]#

                                        Return an iterable of the ParameterDict values.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.container.ParameterList.html b/2.9/generated/torch.nn.modules.container.ParameterList.html index 7f566175dc7..1c68e673635 100644 --- a/2.9/generated/torch.nn.modules.container.ParameterList.html +++ b/2.9/generated/torch.nn.modules.container.ParameterList.html @@ -4415,7 +4415,7 @@

                                        ParameterList#

                                        -class torch.nn.modules.container.ParameterList(values=None)[source]#
                                        +class torch.nn.modules.container.ParameterList(values=None)[source]#

                                        Holds parameters in a list.

                                        ParameterList can be used like a regular Python list, but Tensors that are Parameter are properly registered, @@ -4445,7 +4445,7 @@

                                        ParameterList
                                        -append(value)[source]#
                                        +append(value)[source]#

                                        Append a given value at the end of the list.

                                        Parameters
                                        @@ -4459,7 +4459,7 @@

                                        ParameterList
                                        -extend(values)[source]#
                                        +extend(values)[source]#

                                        Append values from a Python iterable to the end of the list.

                                        Parameters
                                        @@ -4473,7 +4473,7 @@

                                        ParameterList
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Return the extra representation of the module.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.container.Sequential.html b/2.9/generated/torch.nn.modules.container.Sequential.html index 6975c3339b3..95b0789c8cb 100644 --- a/2.9/generated/torch.nn.modules.container.Sequential.html +++ b/2.9/generated/torch.nn.modules.container.Sequential.html @@ -4415,7 +4415,7 @@

                                        Sequential#

                                        -class torch.nn.modules.container.Sequential(*args: Module)[source]#
                                        +class torch.nn.modules.container.Sequential(*args: Module)[source]#
                                        class torch.nn.modules.container.Sequential(arg: OrderedDict[str, Module])

                                        A sequential container.

                                        @@ -4461,7 +4461,7 @@

                                        Sequential
                                        -append(module)[source]#
                                        +append(module)[source]#

                                        Append a given module to the end.

                                        Parameters
                                        @@ -4486,7 +4486,7 @@

                                        Sequential
                                        -extend(sequential)[source]#
                                        +extend(sequential)[source]#

                                        Extends the current Sequential container with layers from another Sequential container.

                                        Parameters
                                        @@ -4513,13 +4513,13 @@

                                        Sequential
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        -insert(index, module)[source]#
                                        +insert(index, module)[source]#

                                        Inserts a module into the Sequential container at the specified index.

                                        Parameters
                                        @@ -4547,7 +4547,7 @@

                                        Sequential
                                        -pop(key)[source]#
                                        +pop(key)[source]#

                                        Pop key from self.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.conv.Conv1d.html b/2.9/generated/torch.nn.modules.conv.Conv1d.html index f63edea6bdd..e10dd18a2c9 100644 --- a/2.9/generated/torch.nn.modules.conv.Conv1d.html +++ b/2.9/generated/torch.nn.modules.conv.Conv1d.html @@ -4415,7 +4415,7 @@

                                        Conv1d#

                                        -class torch.nn.modules.conv.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.modules.conv.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                                        Applies a 1D convolution over an input signal composed of several input planes.

                                        In the simplest case, the output value of the layer with input size diff --git a/2.9/generated/torch.nn.modules.conv.Conv2d.html b/2.9/generated/torch.nn.modules.conv.Conv2d.html index aa8bf10e2b7..1fc250826a8 100644 --- a/2.9/generated/torch.nn.modules.conv.Conv2d.html +++ b/2.9/generated/torch.nn.modules.conv.Conv2d.html @@ -4415,7 +4415,7 @@

                                        Conv2d#

                                        -class torch.nn.modules.conv.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.modules.conv.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                                        Applies a 2D convolution over an input signal composed of several input planes.

                                        In the simplest case, the output value of the layer with input size diff --git a/2.9/generated/torch.nn.modules.conv.Conv3d.html b/2.9/generated/torch.nn.modules.conv.Conv3d.html index 358746016a6..61ea01099aa 100644 --- a/2.9/generated/torch.nn.modules.conv.Conv3d.html +++ b/2.9/generated/torch.nn.modules.conv.Conv3d.html @@ -4415,7 +4415,7 @@

                                        Conv3d#

                                        -class torch.nn.modules.conv.Conv3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.modules.conv.Conv3d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                                        Applies a 3D convolution over an input signal composed of several input planes.

                                        In the simplest case, the output value of the layer with input size (N,Cin,D,H,W)(N, C_{in}, D, H, W) diff --git a/2.9/generated/torch.nn.modules.conv.ConvTranspose1d.html b/2.9/generated/torch.nn.modules.conv.ConvTranspose1d.html index 566effdd296..3e22b21f099 100644 --- a/2.9/generated/torch.nn.modules.conv.ConvTranspose1d.html +++ b/2.9/generated/torch.nn.modules.conv.ConvTranspose1d.html @@ -4415,7 +4415,7 @@

                                        ConvTranspose1d#

                                        -class torch.nn.modules.conv.ConvTranspose1d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.modules.conv.ConvTranspose1d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                                        Applies a 1D transposed convolution operator over an input image composed of several input planes.

                                        This module can be seen as the gradient of Conv1d with respect to its input. diff --git a/2.9/generated/torch.nn.modules.conv.ConvTranspose2d.html b/2.9/generated/torch.nn.modules.conv.ConvTranspose2d.html index 7ca6eeb941c..1e5129905c4 100644 --- a/2.9/generated/torch.nn.modules.conv.ConvTranspose2d.html +++ b/2.9/generated/torch.nn.modules.conv.ConvTranspose2d.html @@ -4415,7 +4415,7 @@

                                        ConvTranspose2d#

                                        -class torch.nn.modules.conv.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.modules.conv.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                                        Applies a 2D transposed convolution operator over an input image composed of several input planes.

                                        This module can be seen as the gradient of Conv2d with respect to its input. @@ -4593,7 +4593,7 @@

                                        ConvTranspose2d
                                        -forward(input, output_size=None)[source]#
                                        +forward(input, output_size=None)[source]#

                                        Performs the forward pass.

                                        Variables
                                        diff --git a/2.9/generated/torch.nn.modules.conv.ConvTranspose3d.html b/2.9/generated/torch.nn.modules.conv.ConvTranspose3d.html index 4b235811c85..79b15726ae9 100644 --- a/2.9/generated/torch.nn.modules.conv.ConvTranspose3d.html +++ b/2.9/generated/torch.nn.modules.conv.ConvTranspose3d.html @@ -4415,7 +4415,7 @@

                                        ConvTranspose3d#

                                        -class torch.nn.modules.conv.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.modules.conv.ConvTranspose3d(in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                                        Applies a 3D transposed convolution operator over an input image composed of several input planes. The transposed convolution operator multiplies each input value element-wise by a learnable kernel, diff --git a/2.9/generated/torch.nn.modules.conv.LazyConv1d.html b/2.9/generated/torch.nn.modules.conv.LazyConv1d.html index 88ceac416c8..cf1a23af925 100644 --- a/2.9/generated/torch.nn.modules.conv.LazyConv1d.html +++ b/2.9/generated/torch.nn.modules.conv.LazyConv1d.html @@ -4415,7 +4415,7 @@

                                        LazyConv1d#

                                        -class torch.nn.modules.conv.LazyConv1d(out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.modules.conv.LazyConv1d(out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                                        A torch.nn.Conv1d module with lazy initialization of the in_channels argument.

                                        The in_channels argument of the Conv1d is inferred from the input.size(1). The attributes that will be lazily initialized are weight and bias.

                                        @@ -4446,7 +4446,7 @@

                                        LazyConv1d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of Conv1d

                                        diff --git a/2.9/generated/torch.nn.modules.conv.LazyConv2d.html b/2.9/generated/torch.nn.modules.conv.LazyConv2d.html index bb81e1bcd10..f4e75cdc0bb 100644 --- a/2.9/generated/torch.nn.modules.conv.LazyConv2d.html +++ b/2.9/generated/torch.nn.modules.conv.LazyConv2d.html @@ -4415,7 +4415,7 @@

                                        LazyConv2d#

                                        -class torch.nn.modules.conv.LazyConv2d(out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.modules.conv.LazyConv2d(out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                                        A torch.nn.Conv2d module with lazy initialization of the in_channels argument.

                                        The in_channels argument of the Conv2d that is inferred from the input.size(1). The attributes that will be lazily initialized are weight and bias.

                                        @@ -4446,7 +4446,7 @@

                                        LazyConv2d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of Conv2d

                                        diff --git a/2.9/generated/torch.nn.modules.conv.LazyConv3d.html b/2.9/generated/torch.nn.modules.conv.LazyConv3d.html index d1425bafeb0..dd3a4a0b973 100644 --- a/2.9/generated/torch.nn.modules.conv.LazyConv3d.html +++ b/2.9/generated/torch.nn.modules.conv.LazyConv3d.html @@ -4415,7 +4415,7 @@

                                        LazyConv3d#

                                        -class torch.nn.modules.conv.LazyConv3d(out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.modules.conv.LazyConv3d(out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)[source]#

                                        A torch.nn.Conv3d module with lazy initialization of the in_channels argument.

                                        The in_channels argument of the Conv3d that is inferred from the input.size(1). @@ -4447,7 +4447,7 @@

                                        LazyConv3d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of Conv3d

                                        diff --git a/2.9/generated/torch.nn.modules.conv.LazyConvTranspose1d.html b/2.9/generated/torch.nn.modules.conv.LazyConvTranspose1d.html index 05db1129c10..c4bb8ce331c 100644 --- a/2.9/generated/torch.nn.modules.conv.LazyConvTranspose1d.html +++ b/2.9/generated/torch.nn.modules.conv.LazyConvTranspose1d.html @@ -4415,7 +4415,7 @@

                                        LazyConvTranspose1d#

                                        -class torch.nn.modules.conv.LazyConvTranspose1d(out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.modules.conv.LazyConvTranspose1d(out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                                        A torch.nn.ConvTranspose1d module with lazy initialization of the in_channels argument.

                                        The in_channels argument of the ConvTranspose1d that is inferred from the input.size(1). @@ -4444,7 +4444,7 @@

                                        LazyConvTranspose1d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of ConvTranspose1d

                                        diff --git a/2.9/generated/torch.nn.modules.conv.LazyConvTranspose2d.html b/2.9/generated/torch.nn.modules.conv.LazyConvTranspose2d.html index fc16ece3626..11815addfda 100644 --- a/2.9/generated/torch.nn.modules.conv.LazyConvTranspose2d.html +++ b/2.9/generated/torch.nn.modules.conv.LazyConvTranspose2d.html @@ -4415,7 +4415,7 @@

                                        LazyConvTranspose2d#

                                        -class torch.nn.modules.conv.LazyConvTranspose2d(out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.modules.conv.LazyConvTranspose2d(out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                                        A torch.nn.ConvTranspose2d module with lazy initialization of the in_channels argument.

                                        The in_channels argument of the ConvTranspose2d is inferred from the input.size(1). @@ -4444,7 +4444,7 @@

                                        LazyConvTranspose2d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of ConvTranspose2d

                                        diff --git a/2.9/generated/torch.nn.modules.conv.LazyConvTranspose3d.html b/2.9/generated/torch.nn.modules.conv.LazyConvTranspose3d.html index 1f92c153a9b..ab7b251d140 100644 --- a/2.9/generated/torch.nn.modules.conv.LazyConvTranspose3d.html +++ b/2.9/generated/torch.nn.modules.conv.LazyConvTranspose3d.html @@ -4415,7 +4415,7 @@

                                        LazyConvTranspose3d#

                                        -class torch.nn.modules.conv.LazyConvTranspose3d(out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#
                                        +class torch.nn.modules.conv.LazyConvTranspose3d(out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)[source]#

                                        A torch.nn.ConvTranspose3d module with lazy initialization of the in_channels argument.

                                        The in_channels argument of the ConvTranspose3d is inferred from the input.size(1). @@ -4444,7 +4444,7 @@

                                        LazyConvTranspose3d
                                        -cls_to_become[source]#
                                        +cls_to_become[source]#

                                        alias of ConvTranspose3d

                                        diff --git a/2.9/generated/torch.nn.modules.distance.CosineSimilarity.html b/2.9/generated/torch.nn.modules.distance.CosineSimilarity.html index 30f53ca5de8..8b044bcfab2 100644 --- a/2.9/generated/torch.nn.modules.distance.CosineSimilarity.html +++ b/2.9/generated/torch.nn.modules.distance.CosineSimilarity.html @@ -4415,7 +4415,7 @@

                                        CosineSimilarity#

                                        -class torch.nn.modules.distance.CosineSimilarity(dim=1, eps=1e-08)[source]#
                                        +class torch.nn.modules.distance.CosineSimilarity(dim=1, eps=1e-08)[source]#

                                        Returns cosine similarity between x1x_1 and x2x_2, computed along dim.

                                        similarity=x1x2max(x12x22,ϵ).\text{similarity} = \dfrac{x_1 \cdot x_2}{\max(\Vert x_1 \Vert _2 \cdot \Vert x_2 \Vert _2, \epsilon)}. @@ -4447,7 +4447,7 @@

                                        CosineSimilarity
                                        -forward(x1, x2)[source]#
                                        +forward(x1, x2)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.distance.PairwiseDistance.html b/2.9/generated/torch.nn.modules.distance.PairwiseDistance.html index 95b8940288e..efc4801942c 100644 --- a/2.9/generated/torch.nn.modules.distance.PairwiseDistance.html +++ b/2.9/generated/torch.nn.modules.distance.PairwiseDistance.html @@ -4415,7 +4415,7 @@

                                        PairwiseDistance#

                                        -class torch.nn.modules.distance.PairwiseDistance(p=2.0, eps=1e-06, keepdim=False)[source]#
                                        +class torch.nn.modules.distance.PairwiseDistance(p=2.0, eps=1e-06, keepdim=False)[source]#

                                        Computes the pairwise distance between input vectors, or between columns of input matrices.

                                        Distances are computed using p-norm, with constant eps added to avoid division by zero if p is negative, i.e.:

                                        @@ -4455,7 +4455,7 @@

                                        PairwiseDistance
                                        -forward(x1, x2)[source]#
                                        +forward(x1, x2)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.dropout.AlphaDropout.html b/2.9/generated/torch.nn.modules.dropout.AlphaDropout.html index 494b9cdaf35..6e145664e0b 100644 --- a/2.9/generated/torch.nn.modules.dropout.AlphaDropout.html +++ b/2.9/generated/torch.nn.modules.dropout.AlphaDropout.html @@ -4415,7 +4415,7 @@

                                        AlphaDropout#

                                        -class torch.nn.modules.dropout.AlphaDropout(p=0.5, inplace=False)[source]#
                                        +class torch.nn.modules.dropout.AlphaDropout(p=0.5, inplace=False)[source]#

                                        Applies Alpha Dropout over the input.

                                        Alpha Dropout is a type of Dropout that maintains the self-normalizing property. @@ -4454,7 +4454,7 @@

                                        AlphaDropout
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.dropout.Dropout.html b/2.9/generated/torch.nn.modules.dropout.Dropout.html index 09de67253a8..a79c9b4c260 100644 --- a/2.9/generated/torch.nn.modules.dropout.Dropout.html +++ b/2.9/generated/torch.nn.modules.dropout.Dropout.html @@ -4415,7 +4415,7 @@

                                        Dropout#

                                        -class torch.nn.modules.dropout.Dropout(p=0.5, inplace=False)[source]#
                                        +class torch.nn.modules.dropout.Dropout(p=0.5, inplace=False)[source]#

                                        During training, randomly zeroes some of the elements of the input tensor with probability p.

                                        The zeroed elements are chosen independently for each forward call and are sampled from a Bernoulli distribution.

                                        Each channel will be zeroed out independently on every forward call.

                                        @@ -4449,7 +4449,7 @@

                                        Dropout
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.dropout.Dropout1d.html b/2.9/generated/torch.nn.modules.dropout.Dropout1d.html index 1c3fe84596e..3fbecb4e760 100644 --- a/2.9/generated/torch.nn.modules.dropout.Dropout1d.html +++ b/2.9/generated/torch.nn.modules.dropout.Dropout1d.html @@ -4415,7 +4415,7 @@

                                        Dropout1d#

                                        -class torch.nn.modules.dropout.Dropout1d(p=0.5, inplace=False)[source]#
                                        +class torch.nn.modules.dropout.Dropout1d(p=0.5, inplace=False)[source]#

                                        Randomly zero out entire channels.

                                        A channel is a 1D feature map, e.g., the jj-th channel of the ii-th sample in the @@ -4455,7 +4455,7 @@

                                        Dropout1d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.dropout.Dropout2d.html b/2.9/generated/torch.nn.modules.dropout.Dropout2d.html index 8bae992823b..bc0ee29c357 100644 --- a/2.9/generated/torch.nn.modules.dropout.Dropout2d.html +++ b/2.9/generated/torch.nn.modules.dropout.Dropout2d.html @@ -4415,7 +4415,7 @@

                                        Dropout2d#

                                        -class torch.nn.modules.dropout.Dropout2d(p=0.5, inplace=False)[source]#
                                        +class torch.nn.modules.dropout.Dropout2d(p=0.5, inplace=False)[source]#

                                        Randomly zero out entire channels.

                                        A channel is a 2D feature map, e.g., the jj-th channel of the ii-th sample in the @@ -4463,7 +4463,7 @@

                                        Dropout2d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.dropout.Dropout3d.html b/2.9/generated/torch.nn.modules.dropout.Dropout3d.html index 07e58c226cd..103e1000416 100644 --- a/2.9/generated/torch.nn.modules.dropout.Dropout3d.html +++ b/2.9/generated/torch.nn.modules.dropout.Dropout3d.html @@ -4415,7 +4415,7 @@

                                        Dropout3d#

                                        -class torch.nn.modules.dropout.Dropout3d(p=0.5, inplace=False)[source]#
                                        +class torch.nn.modules.dropout.Dropout3d(p=0.5, inplace=False)[source]#

                                        Randomly zero out entire channels.

                                        A channel is a 3D feature map, e.g., the jj-th channel of the ii-th sample in the @@ -4455,7 +4455,7 @@

                                        Dropout3d
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.dropout.FeatureAlphaDropout.html b/2.9/generated/torch.nn.modules.dropout.FeatureAlphaDropout.html index ba9da01f3b6..a644ecf5ed0 100644 --- a/2.9/generated/torch.nn.modules.dropout.FeatureAlphaDropout.html +++ b/2.9/generated/torch.nn.modules.dropout.FeatureAlphaDropout.html @@ -4415,7 +4415,7 @@

                                        FeatureAlphaDropout#

                                        -class torch.nn.modules.dropout.FeatureAlphaDropout(p=0.5, inplace=False)[source]#
                                        +class torch.nn.modules.dropout.FeatureAlphaDropout(p=0.5, inplace=False)[source]#

                                        Randomly masks out entire channels.

                                        A channel is a feature map, e.g. the jj-th channel of the ii-th sample in the batch input @@ -4460,7 +4460,7 @@

                                        FeatureAlphaDropout
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.flatten.Flatten.html b/2.9/generated/torch.nn.modules.flatten.Flatten.html index d5fc610c31f..fe8cd1bbcda 100644 --- a/2.9/generated/torch.nn.modules.flatten.Flatten.html +++ b/2.9/generated/torch.nn.modules.flatten.Flatten.html @@ -4404,7 +4404,7 @@

                                        Flatten#

                                        -class torch.nn.modules.flatten.Flatten(start_dim=1, end_dim=-1)[source]#
                                        +class torch.nn.modules.flatten.Flatten(start_dim=1, end_dim=-1)[source]#

                                        Flattens a contiguous range of dims into a tensor.

                                        For use with Sequential, see torch.flatten() for details.

                                        @@ -4442,7 +4442,7 @@

                                        Flatten
                                        -extra_repr()[source]#
                                        +extra_repr()[source]#

                                        Returns the extra representation of the module.

                                        Return type
                                        @@ -4453,7 +4453,7 @@

                                        Flatten
                                        -forward(input)[source]#
                                        +forward(input)[source]#

                                        Runs the forward pass.

                                        Return type
                                        diff --git a/2.9/generated/torch.nn.modules.flatten.Unflatten.html b/2.9/generated/torch.nn.modules.flatten.Unflatten.html index c860daba3d4..d8aa9992aaf 100644 --- a/2.9/generated/torch.nn.modules.flatten.Unflatten.html +++ b/2.9/generated/torch.nn.modules.flatten.Unflatten.html @@ -4404,7 +4404,7 @@

                                        Unflatten#

                                        -class torch.nn.modules.flatten.Unflatten(dim, unflattened_size)[source]#
                                        +class torch.nn.modules.flatten.Unflatten(dim, unflattened_size)[source]#

                                        Unflattens a tensor dim expanding it to a desired shape. For use with Sequential.

                                        • dim specifies the dimension of the input tensor to be unflattened, and it can @@ -4464,7 +4464,7 @@

                                          Unflatten
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Returns the extra representation of the module.

                                          Return type
                                          @@ -4475,7 +4475,7 @@

                                          Unflatten
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.fold.Fold.html b/2.9/generated/torch.nn.modules.fold.Fold.html index ee6ff7dd623..78d9ca3ae3d 100644 --- a/2.9/generated/torch.nn.modules.fold.Fold.html +++ b/2.9/generated/torch.nn.modules.fold.Fold.html @@ -4415,7 +4415,7 @@

                                          Fold#

                                          -class torch.nn.modules.fold.Fold(output_size, kernel_size, dilation=1, padding=0, stride=1)[source]#
                                          +class torch.nn.modules.fold.Fold(output_size, kernel_size, dilation=1, padding=0, stride=1)[source]#

                                          Combines an array of sliding local blocks into a large containing tensor.

                                          Consider a batched input tensor containing sliding local blocks, e.g., patches of images, of shape (N,C×(kernel_size),L)(N, C \times \prod(\text{kernel\_size}), L), @@ -4525,7 +4525,7 @@

                                          Fold#
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Return the extra representation of the module.

                                          Return type
                                          @@ -4536,7 +4536,7 @@

                                          Fold#
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.fold.Unfold.html b/2.9/generated/torch.nn.modules.fold.Unfold.html index 706d1ac346c..10dce143553 100644 --- a/2.9/generated/torch.nn.modules.fold.Unfold.html +++ b/2.9/generated/torch.nn.modules.fold.Unfold.html @@ -4415,7 +4415,7 @@

                                          Unfold#

                                          -class torch.nn.modules.fold.Unfold(kernel_size, dilation=1, padding=0, stride=1)[source]#
                                          +class torch.nn.modules.fold.Unfold(kernel_size, dilation=1, padding=0, stride=1)[source]#

                                          Extracts sliding local blocks from a batched input tensor.

                                          Consider a batched input tensor of shape (N,C,)(N, C, *), where NN is the batch dimension, CC is the channel dimension, @@ -4533,7 +4533,7 @@

                                          Unfold
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Return the extra representation of the module.

                                          Return type
                                          @@ -4544,7 +4544,7 @@

                                          Unfold
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.instancenorm.InstanceNorm1d.html b/2.9/generated/torch.nn.modules.instancenorm.InstanceNorm1d.html index 2e5a1977187..45b731b4a3d 100644 --- a/2.9/generated/torch.nn.modules.instancenorm.InstanceNorm1d.html +++ b/2.9/generated/torch.nn.modules.instancenorm.InstanceNorm1d.html @@ -4415,7 +4415,7 @@

                                          InstanceNorm1d#

                                          -class torch.nn.modules.instancenorm.InstanceNorm1d(num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.instancenorm.InstanceNorm1d(num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#

                                          Applies Instance Normalization.

                                          This operation applies Instance Normalization over a 2D (unbatched) or 3D (batched) input as described in the paper diff --git a/2.9/generated/torch.nn.modules.instancenorm.InstanceNorm2d.html b/2.9/generated/torch.nn.modules.instancenorm.InstanceNorm2d.html index 554be72fd8e..a8e7dde9d3a 100644 --- a/2.9/generated/torch.nn.modules.instancenorm.InstanceNorm2d.html +++ b/2.9/generated/torch.nn.modules.instancenorm.InstanceNorm2d.html @@ -4415,7 +4415,7 @@

                                          InstanceNorm2d#

                                          -class torch.nn.modules.instancenorm.InstanceNorm2d(num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.instancenorm.InstanceNorm2d(num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#

                                          Applies Instance Normalization.

                                          This operation applies Instance Normalization over a 4D input (a mini-batch of 2D inputs diff --git a/2.9/generated/torch.nn.modules.instancenorm.InstanceNorm3d.html b/2.9/generated/torch.nn.modules.instancenorm.InstanceNorm3d.html index 57113c94b14..4c6c0fbb98d 100644 --- a/2.9/generated/torch.nn.modules.instancenorm.InstanceNorm3d.html +++ b/2.9/generated/torch.nn.modules.instancenorm.InstanceNorm3d.html @@ -4415,7 +4415,7 @@

                                          InstanceNorm3d#

                                          -class torch.nn.modules.instancenorm.InstanceNorm3d(num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.instancenorm.InstanceNorm3d(num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False, device=None, dtype=None)[source]#

                                          Applies Instance Normalization.

                                          This operation applies Instance Normalization over a 5D input (a mini-batch of 3D inputs with additional channel dimension) as described in the paper diff --git a/2.9/generated/torch.nn.modules.instancenorm.LazyInstanceNorm1d.html b/2.9/generated/torch.nn.modules.instancenorm.LazyInstanceNorm1d.html index 2802617bdfb..10ebd22d97d 100644 --- a/2.9/generated/torch.nn.modules.instancenorm.LazyInstanceNorm1d.html +++ b/2.9/generated/torch.nn.modules.instancenorm.LazyInstanceNorm1d.html @@ -4415,7 +4415,7 @@

                                          LazyInstanceNorm1d#

                                          -class torch.nn.modules.instancenorm.LazyInstanceNorm1d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.instancenorm.LazyInstanceNorm1d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                          A torch.nn.InstanceNorm1d module with lazy initialization of the num_features argument.

                                          The num_features argument of the InstanceNorm1d is inferred from the input.size(1). The attributes that will be lazily initialized are weight, bias, running_mean and running_var.

                                          @@ -4447,7 +4447,7 @@

                                          LazyInstanceNorm1d
                                          -cls_to_become[source]#
                                          +cls_to_become[source]#

                                          alias of InstanceNorm1d

                                          diff --git a/2.9/generated/torch.nn.modules.instancenorm.LazyInstanceNorm2d.html b/2.9/generated/torch.nn.modules.instancenorm.LazyInstanceNorm2d.html index 34daff8cf30..60339d5e796 100644 --- a/2.9/generated/torch.nn.modules.instancenorm.LazyInstanceNorm2d.html +++ b/2.9/generated/torch.nn.modules.instancenorm.LazyInstanceNorm2d.html @@ -4415,7 +4415,7 @@

                                          LazyInstanceNorm2d#

                                          -class torch.nn.modules.instancenorm.LazyInstanceNorm2d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.instancenorm.LazyInstanceNorm2d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                          A torch.nn.InstanceNorm2d module with lazy initialization of the num_features argument.

                                          The num_features argument of the InstanceNorm2d is inferred from the input.size(1). The attributes that will be lazily initialized are weight, bias, @@ -4448,7 +4448,7 @@

                                          LazyInstanceNorm2d
                                          -cls_to_become[source]#
                                          +cls_to_become[source]#

                                          alias of InstanceNorm2d

                                          diff --git a/2.9/generated/torch.nn.modules.instancenorm.LazyInstanceNorm3d.html b/2.9/generated/torch.nn.modules.instancenorm.LazyInstanceNorm3d.html index 83e0b533494..eec3b3f2cdc 100644 --- a/2.9/generated/torch.nn.modules.instancenorm.LazyInstanceNorm3d.html +++ b/2.9/generated/torch.nn.modules.instancenorm.LazyInstanceNorm3d.html @@ -4415,7 +4415,7 @@

                                          LazyInstanceNorm3d#

                                          -class torch.nn.modules.instancenorm.LazyInstanceNorm3d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.instancenorm.LazyInstanceNorm3d(eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)[source]#

                                          A torch.nn.InstanceNorm3d module with lazy initialization of the num_features argument.

                                          The num_features argument of the InstanceNorm3d is inferred from the input.size(1). The attributes that will be lazily initialized are weight, bias, @@ -4448,7 +4448,7 @@

                                          LazyInstanceNorm3d
                                          -cls_to_become[source]#
                                          +cls_to_become[source]#

                                          alias of InstanceNorm3d

                                          diff --git a/2.9/generated/torch.nn.modules.lazy.LazyModuleMixin.html b/2.9/generated/torch.nn.modules.lazy.LazyModuleMixin.html index 89417416cc5..dd5aa7cec4c 100644 --- a/2.9/generated/torch.nn.modules.lazy.LazyModuleMixin.html +++ b/2.9/generated/torch.nn.modules.lazy.LazyModuleMixin.html @@ -4404,7 +4404,7 @@

                                          LazyModuleMixin#

                                          -class torch.nn.modules.lazy.LazyModuleMixin(*args, **kwargs)[source]#
                                          +class torch.nn.modules.lazy.LazyModuleMixin(*args, **kwargs)[source]#

                                          A mixin for modules that lazily initialize parameters, also known as “lazy modules”.

                                          Modules that lazily initialize parameters, or “lazy modules”, derive the shapes of their parameters from the first input(s) @@ -4510,7 +4510,7 @@

                                          LazyModuleMixin
                                          -has_uninitialized_params()[source]#
                                          +has_uninitialized_params()[source]#

                                          Check if a module has parameters that are not initialized.

                                          @@ -4518,7 +4518,7 @@

                                          LazyModuleMixin
                                          -initialize_parameters(*args, **kwargs)[source]#
                                          +initialize_parameters(*args, **kwargs)[source]#

                                          Initialize parameters according to the input batch properties.

                                          This adds an interface to isolate parameter initialization from the forward pass when doing parameter shape inference.

                                          diff --git a/2.9/generated/torch.nn.modules.linear.Bilinear.html b/2.9/generated/torch.nn.modules.linear.Bilinear.html index dfda6e9f4e1..81936525a5a 100644 --- a/2.9/generated/torch.nn.modules.linear.Bilinear.html +++ b/2.9/generated/torch.nn.modules.linear.Bilinear.html @@ -4415,7 +4415,7 @@

                                          Bilinear#

                                          -class torch.nn.modules.linear.Bilinear(in1_features, in2_features, out_features, bias=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.linear.Bilinear(in1_features, in2_features, out_features, bias=True, device=None, dtype=None)[source]#

                                          Applies a bilinear transformation to the incoming data: y=x1TAx2+by = x_1^T A x_2 + b.

                                          Parameters
                                          @@ -4508,7 +4508,7 @@

                                          Bilinear
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Return the extra representation of the module.

                                          Return type
                                          @@ -4519,7 +4519,7 @@

                                          Bilinear
                                          -forward(input1, input2)[source]#
                                          +forward(input1, input2)[source]#

                                          Runs the forward pass.

                                          Return type
                                          @@ -4530,7 +4530,7 @@

                                          Bilinear
                                          -reset_parameters()[source]#
                                          +reset_parameters()[source]#

                                          Resets parameters based on their initialization used in __init__.

                                          diff --git a/2.9/generated/torch.nn.modules.linear.Identity.html b/2.9/generated/torch.nn.modules.linear.Identity.html index c55b58b6d98..05291807054 100644 --- a/2.9/generated/torch.nn.modules.linear.Identity.html +++ b/2.9/generated/torch.nn.modules.linear.Identity.html @@ -4415,7 +4415,7 @@

                                          Identity#

                                          -class torch.nn.modules.linear.Identity(*args, **kwargs)[source]#
                                          +class torch.nn.modules.linear.Identity(*args, **kwargs)[source]#

                                          A placeholder identity operator that is argument-insensitive.

                                          Parameters
                                          @@ -4442,7 +4442,7 @@

                                          Identity
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.linear.LazyLinear.html b/2.9/generated/torch.nn.modules.linear.LazyLinear.html index d30338f3f8a..3915c88566d 100644 --- a/2.9/generated/torch.nn.modules.linear.LazyLinear.html +++ b/2.9/generated/torch.nn.modules.linear.LazyLinear.html @@ -4415,7 +4415,7 @@

                                          LazyLinear#

                                          -class torch.nn.modules.linear.LazyLinear(out_features, bias=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.linear.LazyLinear(out_features, bias=True, device=None, dtype=None)[source]#

                                          A torch.nn.Linear module where in_features is inferred.

                                          In this module, the weight and bias are of torch.nn.UninitializedParameter class. They will be initialized after the first call to forward is done and the @@ -4490,13 +4490,13 @@

                                          LazyLinear
                                          -cls_to_become[source]#
                                          +cls_to_become[source]#

                                          alias of Linear

                                          -initialize_parameters(input)[source]#
                                          +initialize_parameters(input)[source]#

                                          Infers in_features based on input and initializes parameters.

                                          @@ -4504,7 +4504,7 @@

                                          LazyLinear
                                          -reset_parameters()[source]#
                                          +reset_parameters()[source]#

                                          Resets parameters based on their initialization used in __init__.

                                          diff --git a/2.9/generated/torch.nn.modules.linear.Linear.html b/2.9/generated/torch.nn.modules.linear.Linear.html index 0c20e6d7b6f..9884577c1ad 100644 --- a/2.9/generated/torch.nn.modules.linear.Linear.html +++ b/2.9/generated/torch.nn.modules.linear.Linear.html @@ -4415,7 +4415,7 @@

                                          Linear#

                                          -class torch.nn.modules.linear.Linear(in_features, out_features, bias=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.linear.Linear(in_features, out_features, bias=True, device=None, dtype=None)[source]#

                                          Applies an affine linear transformation to the incoming data: y=xAT+by = xA^T + b.

                                          This module supports TensorFloat32.

                                          On certain ROCm devices, when using float16 inputs this module will use different precision for backward.

                                          @@ -4506,7 +4506,7 @@

                                          Linear
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Return the extra representation of the module.

                                          Return type
                                          @@ -4517,7 +4517,7 @@

                                          Linear
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          @@ -4528,7 +4528,7 @@

                                          Linear
                                          -reset_parameters()[source]#
                                          +reset_parameters()[source]#

                                          Resets parameters based on their initialization used in __init__.

                                          diff --git a/2.9/generated/torch.nn.modules.loss.BCELoss.html b/2.9/generated/torch.nn.modules.loss.BCELoss.html index a394481d171..ee4ccb37c0e 100644 --- a/2.9/generated/torch.nn.modules.loss.BCELoss.html +++ b/2.9/generated/torch.nn.modules.loss.BCELoss.html @@ -4415,7 +4415,7 @@

                                          BCELoss#

                                          -class torch.nn.modules.loss.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')[source]#

                                          Creates a criterion that measures the Binary Cross Entropy between the target and the input probabilities:

                                          The unreduced (i.e. with reduction set to 'none') loss can be described as:

                                          @@ -4490,7 +4490,7 @@

                                          BCELoss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.BCEWithLogitsLoss.html b/2.9/generated/torch.nn.modules.loss.BCEWithLogitsLoss.html index efd791e29ae..4b1ba72e261 100644 --- a/2.9/generated/torch.nn.modules.loss.BCEWithLogitsLoss.html +++ b/2.9/generated/torch.nn.modules.loss.BCEWithLogitsLoss.html @@ -4415,7 +4415,7 @@

                                          BCEWithLogitsLoss#

                                          -class torch.nn.modules.loss.BCEWithLogitsLoss(weight=None, size_average=None, reduce=None, reduction='mean', pos_weight=None)[source]#
                                          +class torch.nn.modules.loss.BCEWithLogitsLoss(weight=None, size_average=None, reduce=None, reduction='mean', pos_weight=None)[source]#

                                          This loss combines a Sigmoid layer and the BCELoss in one single class. This version is more numerically stable than using a plain Sigmoid followed by a BCELoss as, by combining the operations into one layer, @@ -4516,7 +4516,7 @@

                                          BCEWithLogitsLoss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.CTCLoss.html b/2.9/generated/torch.nn.modules.loss.CTCLoss.html index cdd03a7188e..05065b4ff97 100644 --- a/2.9/generated/torch.nn.modules.loss.CTCLoss.html +++ b/2.9/generated/torch.nn.modules.loss.CTCLoss.html @@ -4415,7 +4415,7 @@

                                          CTCLoss#

                                          -class torch.nn.modules.loss.CTCLoss(blank=0, reduction='mean', zero_infinity=False)[source]#
                                          +class torch.nn.modules.loss.CTCLoss(blank=0, reduction='mean', zero_infinity=False)[source]#

                                          The Connectionist Temporal Classification loss.

                                          Calculates loss between a continuous (unsegmented) time series and a target sequence. CTCLoss sums over the probability of possible alignments of input to target, producing a loss value which is differentiable @@ -4570,7 +4570,7 @@

                                          CTCLoss
                                          -forward(log_probs, targets, input_lengths, target_lengths)[source]#
                                          +forward(log_probs, targets, input_lengths, target_lengths)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.CosineEmbeddingLoss.html b/2.9/generated/torch.nn.modules.loss.CosineEmbeddingLoss.html index b1ae4f8e5f4..2198bb1ddfe 100644 --- a/2.9/generated/torch.nn.modules.loss.CosineEmbeddingLoss.html +++ b/2.9/generated/torch.nn.modules.loss.CosineEmbeddingLoss.html @@ -4415,7 +4415,7 @@

                                          CosineEmbeddingLoss#

                                          -class torch.nn.modules.loss.CosineEmbeddingLoss(margin=0.0, size_average=None, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.CosineEmbeddingLoss(margin=0.0, size_average=None, reduce=None, reduction='mean')[source]#

                                          Creates a criterion that measures the loss given input tensors x1x_1, x2x_2 and a Tensor label yy with values 1 or -1. Use (y=1y=1) to maximize the cosine similarity of two inputs, and (y=1y=-1) otherwise. @@ -4473,7 +4473,7 @@

                                          CosineEmbeddingLoss
                                          -forward(input1, input2, target)[source]#
                                          +forward(input1, input2, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.CrossEntropyLoss.html b/2.9/generated/torch.nn.modules.loss.CrossEntropyLoss.html index d9da0391155..77e18b96bd7 100644 --- a/2.9/generated/torch.nn.modules.loss.CrossEntropyLoss.html +++ b/2.9/generated/torch.nn.modules.loss.CrossEntropyLoss.html @@ -4415,7 +4415,7 @@

                                          CrossEntropyLoss#

                                          -class torch.nn.modules.loss.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean', label_smoothing=0.0)[source]#
                                          +class torch.nn.modules.loss.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean', label_smoothing=0.0)[source]#

                                          This criterion computes the cross entropy loss between input logits and target.

                                          It is useful when training a classification problem with C classes. @@ -4598,7 +4598,7 @@

                                          CrossEntropyLoss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.GaussianNLLLoss.html b/2.9/generated/torch.nn.modules.loss.GaussianNLLLoss.html index e3f9b70bcbd..2a3ab738282 100644 --- a/2.9/generated/torch.nn.modules.loss.GaussianNLLLoss.html +++ b/2.9/generated/torch.nn.modules.loss.GaussianNLLLoss.html @@ -4415,7 +4415,7 @@

                                          GaussianNLLLoss#

                                          -class torch.nn.modules.loss.GaussianNLLLoss(*, full=False, eps=1e-06, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.GaussianNLLLoss(*, full=False, eps=1e-06, reduction='mean')[source]#

                                          Gaussian negative log likelihood loss.

                                          The targets are treated as samples from Gaussian distributions with expectations and variances predicted by the neural network. For a @@ -4491,7 +4491,7 @@

                                          GaussianNLLLoss
                                          -forward(input, target, var)[source]#
                                          +forward(input, target, var)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.HingeEmbeddingLoss.html b/2.9/generated/torch.nn.modules.loss.HingeEmbeddingLoss.html index 1ffb193d15f..db89a16488b 100644 --- a/2.9/generated/torch.nn.modules.loss.HingeEmbeddingLoss.html +++ b/2.9/generated/torch.nn.modules.loss.HingeEmbeddingLoss.html @@ -4415,7 +4415,7 @@

                                          HingeEmbeddingLoss#

                                          -class torch.nn.modules.loss.HingeEmbeddingLoss(margin=1.0, size_average=None, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.HingeEmbeddingLoss(margin=1.0, size_average=None, reduce=None, reduction='mean')[source]#

                                          Measures the loss given an input tensor xx and a labels tensor yy (containing 1 or -1). This is usually used for measuring whether two inputs are similar or @@ -4469,7 +4469,7 @@

                                          HingeEmbeddingLoss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.HuberLoss.html b/2.9/generated/torch.nn.modules.loss.HuberLoss.html index 0b8abc9cb71..edf5681c33a 100644 --- a/2.9/generated/torch.nn.modules.loss.HuberLoss.html +++ b/2.9/generated/torch.nn.modules.loss.HuberLoss.html @@ -4415,7 +4415,7 @@

                                          HuberLoss#

                                          -class torch.nn.modules.loss.HuberLoss(reduction='mean', delta=1.0)[source]#
                                          +class torch.nn.modules.loss.HuberLoss(reduction='mean', delta=1.0)[source]#

                                          Creates a criterion that uses a squared term if the absolute element-wise error falls below delta and a delta-scaled L1 term otherwise. This loss combines advantages of both L1Loss and MSELoss; the @@ -4471,7 +4471,7 @@

                                          HuberLoss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.KLDivLoss.html b/2.9/generated/torch.nn.modules.loss.KLDivLoss.html index 7512b4a3e68..69fd70ab13d 100644 --- a/2.9/generated/torch.nn.modules.loss.KLDivLoss.html +++ b/2.9/generated/torch.nn.modules.loss.KLDivLoss.html @@ -4415,7 +4415,7 @@

                                          KLDivLoss#

                                          -class torch.nn.modules.loss.KLDivLoss(size_average=None, reduce=None, reduction='mean', log_target=False)[source]#
                                          +class torch.nn.modules.loss.KLDivLoss(size_average=None, reduce=None, reduction='mean', log_target=False)[source]#

                                          The Kullback-Leibler divergence loss.

                                          For tensors of the same shape ypred, ytruey_{\text{pred}},\ y_{\text{true}}, where ypredy_{\text{pred}} is the input and ytruey_{\text{true}} is the @@ -4498,7 +4498,7 @@

                                          KLDivLoss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.L1Loss.html b/2.9/generated/torch.nn.modules.loss.L1Loss.html index 41623c230e6..da071862b8f 100644 --- a/2.9/generated/torch.nn.modules.loss.L1Loss.html +++ b/2.9/generated/torch.nn.modules.loss.L1Loss.html @@ -4415,7 +4415,7 @@

                                          L1Loss#

                                          -class torch.nn.modules.loss.L1Loss(size_average=None, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.L1Loss(size_average=None, reduce=None, reduction='mean')[source]#

                                          Creates a criterion that measures the mean absolute error (MAE) between each element in the input xx and target yy.

                                          The unreduced (i.e. with reduction set to 'none') loss can be described as:

                                          @@ -4477,7 +4477,7 @@

                                          L1Loss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.MSELoss.html b/2.9/generated/torch.nn.modules.loss.MSELoss.html index cf9cf1238da..47bff4bf7e1 100644 --- a/2.9/generated/torch.nn.modules.loss.MSELoss.html +++ b/2.9/generated/torch.nn.modules.loss.MSELoss.html @@ -4415,7 +4415,7 @@

                                          MSELoss#

                                          -class torch.nn.modules.loss.MSELoss(size_average=None, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.MSELoss(size_average=None, reduce=None, reduction='mean')[source]#

                                          Creates a criterion that measures the mean squared error (squared L2 norm) between each element in the input xx and target yy.

                                          The unreduced (i.e. with reduction set to 'none') loss can be described as:

                                          @@ -4474,7 +4474,7 @@

                                          MSELoss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.MarginRankingLoss.html b/2.9/generated/torch.nn.modules.loss.MarginRankingLoss.html index ed242a96850..4e1b7958083 100644 --- a/2.9/generated/torch.nn.modules.loss.MarginRankingLoss.html +++ b/2.9/generated/torch.nn.modules.loss.MarginRankingLoss.html @@ -4415,7 +4415,7 @@

                                          MarginRankingLoss#

                                          -class torch.nn.modules.loss.MarginRankingLoss(margin=0.0, size_average=None, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.MarginRankingLoss(margin=0.0, size_average=None, reduce=None, reduction='mean')[source]#

                                          Creates a criterion that measures the loss given inputs x1x1, x2x2, two 1D mini-batch or 0D Tensors, and a label 1D mini-batch or 0D Tensor yy (containing 1 or -1).

                                          @@ -4467,7 +4467,7 @@

                                          MarginRankingLoss
                                          -forward(input1, input2, target)[source]#
                                          +forward(input1, input2, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.MultiLabelMarginLoss.html b/2.9/generated/torch.nn.modules.loss.MultiLabelMarginLoss.html index f26b390a424..7a011799db4 100644 --- a/2.9/generated/torch.nn.modules.loss.MultiLabelMarginLoss.html +++ b/2.9/generated/torch.nn.modules.loss.MultiLabelMarginLoss.html @@ -4415,7 +4415,7 @@

                                          MultiLabelMarginLoss#

                                          -class torch.nn.modules.loss.MultiLabelMarginLoss(size_average=None, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.MultiLabelMarginLoss(size_average=None, reduce=None, reduction='mean')[source]#

                                          Creates a criterion that optimizes a multi-class multi-classification hinge loss (margin-based loss) between input xx (a 2D mini-batch Tensor) and output yy (which is a 2D Tensor of target class indices). @@ -4470,7 +4470,7 @@

                                          MultiLabelMarginLoss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.MultiLabelSoftMarginLoss.html b/2.9/generated/torch.nn.modules.loss.MultiLabelSoftMarginLoss.html index 63988ae23cf..2e1beb346ba 100644 --- a/2.9/generated/torch.nn.modules.loss.MultiLabelSoftMarginLoss.html +++ b/2.9/generated/torch.nn.modules.loss.MultiLabelSoftMarginLoss.html @@ -4415,7 +4415,7 @@

                                          MultiLabelSoftMarginLoss#

                                          -class torch.nn.modules.loss.MultiLabelSoftMarginLoss(weight=None, size_average=None, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.MultiLabelSoftMarginLoss(weight=None, size_average=None, reduce=None, reduction='mean')[source]#

                                          Creates a criterion that optimizes a multi-label one-versus-all loss based on max-entropy, between input xx and target yy of size (N,C)(N, C). @@ -4460,7 +4460,7 @@

                                          MultiLabelSoftMarginLoss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.MultiMarginLoss.html b/2.9/generated/torch.nn.modules.loss.MultiMarginLoss.html index b82725baee5..ed1d61529bb 100644 --- a/2.9/generated/torch.nn.modules.loss.MultiMarginLoss.html +++ b/2.9/generated/torch.nn.modules.loss.MultiMarginLoss.html @@ -4415,7 +4415,7 @@

                                          MultiMarginLoss#

                                          -class torch.nn.modules.loss.MultiMarginLoss(p=1, margin=1.0, weight=None, size_average=None, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.MultiMarginLoss(p=1, margin=1.0, weight=None, size_average=None, reduce=None, reduction='mean')[source]#

                                          Creates a criterion that optimizes a multi-class classification hinge loss (margin-based loss) between input xx (a 2D mini-batch Tensor) and output yy (which is a 1D tensor of target class indices, @@ -4479,7 +4479,7 @@

                                          MultiMarginLoss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.NLLLoss.html b/2.9/generated/torch.nn.modules.loss.NLLLoss.html index 94ad623876c..b957f6d307b 100644 --- a/2.9/generated/torch.nn.modules.loss.NLLLoss.html +++ b/2.9/generated/torch.nn.modules.loss.NLLLoss.html @@ -4415,7 +4415,7 @@

                                          NLLLoss#

                                          -class torch.nn.modules.loss.NLLLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.NLLLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')[source]#

                                          The negative log likelihood loss. It is useful to train a classification problem with C classes.

                                          If provided, the optional argument weight should be a 1D Tensor assigning @@ -4522,7 +4522,7 @@

                                          NLLLoss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.PoissonNLLLoss.html b/2.9/generated/torch.nn.modules.loss.PoissonNLLLoss.html index 5c056893569..753b6c90c27 100644 --- a/2.9/generated/torch.nn.modules.loss.PoissonNLLLoss.html +++ b/2.9/generated/torch.nn.modules.loss.PoissonNLLLoss.html @@ -4415,7 +4415,7 @@

                                          PoissonNLLLoss#

                                          -class torch.nn.modules.loss.PoissonNLLLoss(log_input=True, full=False, size_average=None, eps=1e-08, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.PoissonNLLLoss(log_input=True, full=False, size_average=None, eps=1e-08, reduce=None, reduction='mean')[source]#

                                          Negative log likelihood loss with Poisson distribution of target.

                                          The loss can be described as:

                                          @@ -4476,7 +4476,7 @@

                                          PoissonNLLLoss
                                          -forward(log_input, target)[source]#
                                          +forward(log_input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.SmoothL1Loss.html b/2.9/generated/torch.nn.modules.loss.SmoothL1Loss.html index f6d048531d0..66c45d749f9 100644 --- a/2.9/generated/torch.nn.modules.loss.SmoothL1Loss.html +++ b/2.9/generated/torch.nn.modules.loss.SmoothL1Loss.html @@ -4415,7 +4415,7 @@

                                          SmoothL1Loss#

                                          -class torch.nn.modules.loss.SmoothL1Loss(size_average=None, reduce=None, reduction='mean', beta=1.0)[source]#
                                          +class torch.nn.modules.loss.SmoothL1Loss(size_average=None, reduce=None, reduction='mean', beta=1.0)[source]#

                                          Creates a criterion that uses a squared term if the absolute element-wise error falls below beta and an L1 term otherwise. It is less sensitive to outliers than torch.nn.MSELoss and in some cases @@ -4492,7 +4492,7 @@

                                          SmoothL1Loss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.SoftMarginLoss.html b/2.9/generated/torch.nn.modules.loss.SoftMarginLoss.html index be78229651e..8a160371111 100644 --- a/2.9/generated/torch.nn.modules.loss.SoftMarginLoss.html +++ b/2.9/generated/torch.nn.modules.loss.SoftMarginLoss.html @@ -4415,7 +4415,7 @@

                                          SoftMarginLoss#

                                          -class torch.nn.modules.loss.SoftMarginLoss(size_average=None, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.SoftMarginLoss(size_average=None, reduce=None, reduction='mean')[source]#

                                          Creates a criterion that optimizes a two-class classification logistic loss between input tensor xx and target tensor yy (containing 1 or -1).

                                          @@ -4454,7 +4454,7 @@

                                          SoftMarginLoss
                                          -forward(input, target)[source]#
                                          +forward(input, target)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.TripletMarginLoss.html b/2.9/generated/torch.nn.modules.loss.TripletMarginLoss.html index 37917b3ccf5..9b3c7a960fb 100644 --- a/2.9/generated/torch.nn.modules.loss.TripletMarginLoss.html +++ b/2.9/generated/torch.nn.modules.loss.TripletMarginLoss.html @@ -4415,7 +4415,7 @@

                                          TripletMarginLoss#

                                          -class torch.nn.modules.loss.TripletMarginLoss(margin=1.0, p=2.0, eps=1e-06, swap=False, size_average=None, reduce=None, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.TripletMarginLoss(margin=1.0, p=2.0, eps=1e-06, swap=False, size_average=None, reduce=None, reduction='mean')[source]#

                                          Creates a criterion that measures the triplet loss given an input tensors x1x1, x2x2, x3x3 and a margin with a value greater than 00. This is used for measuring a relative similarity between samples. A triplet @@ -4483,7 +4483,7 @@

                                          TripletMarginLoss
                                          -forward(anchor, positive, negative)[source]#
                                          +forward(anchor, positive, negative)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.loss.TripletMarginWithDistanceLoss.html b/2.9/generated/torch.nn.modules.loss.TripletMarginWithDistanceLoss.html index b81a4349c31..b48c9414193 100644 --- a/2.9/generated/torch.nn.modules.loss.TripletMarginWithDistanceLoss.html +++ b/2.9/generated/torch.nn.modules.loss.TripletMarginWithDistanceLoss.html @@ -4415,7 +4415,7 @@

                                          TripletMarginWithDistanceLoss#

                                          -class torch.nn.modules.loss.TripletMarginWithDistanceLoss(*, distance_function=None, margin=1.0, swap=False, reduction='mean')[source]#
                                          +class torch.nn.modules.loss.TripletMarginWithDistanceLoss(*, distance_function=None, margin=1.0, swap=False, reduction='mean')[source]#

                                          Creates a criterion that measures the triplet loss given input tensors aa, pp, and nn (representing anchor, positive, and negative examples, respectively), and a nonnegative, @@ -4516,7 +4516,7 @@

                                          TripletMarginWithDistanceLoss
                                          -forward(anchor, positive, negative)[source]#
                                          +forward(anchor, positive, negative)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.module.register_module_backward_hook.html b/2.9/generated/torch.nn.modules.module.register_module_backward_hook.html index 7501b595d64..084540f7348 100644 --- a/2.9/generated/torch.nn.modules.module.register_module_backward_hook.html +++ b/2.9/generated/torch.nn.modules.module.register_module_backward_hook.html @@ -4404,7 +4404,7 @@

                                          torch.nn.modules.module.register_module_backward_hook#

                                          -torch.nn.modules.module.register_module_backward_hook(hook)[source]#
                                          +torch.nn.modules.module.register_module_backward_hook(hook)[source]#

                                          Register a backward hook common to all the modules.

                                          This function is deprecated in favor of torch.nn.modules.module.register_module_full_backward_hook() diff --git a/2.9/generated/torch.nn.modules.module.register_module_buffer_registration_hook.html b/2.9/generated/torch.nn.modules.module.register_module_buffer_registration_hook.html index a8981653858..4bb366c42d1 100644 --- a/2.9/generated/torch.nn.modules.module.register_module_buffer_registration_hook.html +++ b/2.9/generated/torch.nn.modules.module.register_module_buffer_registration_hook.html @@ -4404,7 +4404,7 @@

                                          torch.nn.modules.module.register_module_buffer_registration_hook#

                                          -torch.nn.modules.module.register_module_buffer_registration_hook(hook)[source]#
                                          +torch.nn.modules.module.register_module_buffer_registration_hook(hook)[source]#

                                          Register a buffer registration hook common to all modules.

                                          Warning

                                          diff --git a/2.9/generated/torch.nn.modules.module.register_module_forward_hook.html b/2.9/generated/torch.nn.modules.module.register_module_forward_hook.html index c05a7e4fc1d..6be3e22902d 100644 --- a/2.9/generated/torch.nn.modules.module.register_module_forward_hook.html +++ b/2.9/generated/torch.nn.modules.module.register_module_forward_hook.html @@ -4404,7 +4404,7 @@

                                          torch.nn.modules.module.register_module_forward_hook#

                                          -torch.nn.modules.module.register_module_forward_hook(hook, *, with_kwargs=False, always_call=False)[source]#
                                          +torch.nn.modules.module.register_module_forward_hook(hook, *, with_kwargs=False, always_call=False)[source]#

                                          Register a global forward hook for all the modules.

                                          Warning

                                          diff --git a/2.9/generated/torch.nn.modules.module.register_module_forward_pre_hook.html b/2.9/generated/torch.nn.modules.module.register_module_forward_pre_hook.html index 45d96642bc8..a9d605a9a5a 100644 --- a/2.9/generated/torch.nn.modules.module.register_module_forward_pre_hook.html +++ b/2.9/generated/torch.nn.modules.module.register_module_forward_pre_hook.html @@ -4404,7 +4404,7 @@

                                          torch.nn.modules.module.register_module_forward_pre_hook#

                                          -torch.nn.modules.module.register_module_forward_pre_hook(hook)[source]#
                                          +torch.nn.modules.module.register_module_forward_pre_hook(hook)[source]#

                                          Register a forward pre-hook common to all modules.

                                          Warning

                                          diff --git a/2.9/generated/torch.nn.modules.module.register_module_full_backward_hook.html b/2.9/generated/torch.nn.modules.module.register_module_full_backward_hook.html index f559196cb7e..21e705f5551 100644 --- a/2.9/generated/torch.nn.modules.module.register_module_full_backward_hook.html +++ b/2.9/generated/torch.nn.modules.module.register_module_full_backward_hook.html @@ -4404,7 +4404,7 @@

                                          torch.nn.modules.module.register_module_full_backward_hook#

                                          -torch.nn.modules.module.register_module_full_backward_hook(hook)[source]#
                                          +torch.nn.modules.module.register_module_full_backward_hook(hook)[source]#

                                          Register a backward hook common to all the modules.

                                          Warning

                                          diff --git a/2.9/generated/torch.nn.modules.module.register_module_full_backward_pre_hook.html b/2.9/generated/torch.nn.modules.module.register_module_full_backward_pre_hook.html index a507af67f04..9e24eaa28a9 100644 --- a/2.9/generated/torch.nn.modules.module.register_module_full_backward_pre_hook.html +++ b/2.9/generated/torch.nn.modules.module.register_module_full_backward_pre_hook.html @@ -4404,7 +4404,7 @@

                                          torch.nn.modules.module.register_module_full_backward_pre_hook#

                                          -torch.nn.modules.module.register_module_full_backward_pre_hook(hook)[source]#
                                          +torch.nn.modules.module.register_module_full_backward_pre_hook(hook)[source]#

                                          Register a backward pre-hook common to all the modules.

                                          Warning

                                          diff --git a/2.9/generated/torch.nn.modules.module.register_module_module_registration_hook.html b/2.9/generated/torch.nn.modules.module.register_module_module_registration_hook.html index e384467e0b4..7f62e20cb17 100644 --- a/2.9/generated/torch.nn.modules.module.register_module_module_registration_hook.html +++ b/2.9/generated/torch.nn.modules.module.register_module_module_registration_hook.html @@ -4404,7 +4404,7 @@

                                          torch.nn.modules.module.register_module_module_registration_hook#

                                          -torch.nn.modules.module.register_module_module_registration_hook(hook)[source]#
                                          +torch.nn.modules.module.register_module_module_registration_hook(hook)[source]#

                                          Register a module registration hook common to all modules.

                                          Warning

                                          diff --git a/2.9/generated/torch.nn.modules.module.register_module_parameter_registration_hook.html b/2.9/generated/torch.nn.modules.module.register_module_parameter_registration_hook.html index 96004ba03ea..93e3037c0a5 100644 --- a/2.9/generated/torch.nn.modules.module.register_module_parameter_registration_hook.html +++ b/2.9/generated/torch.nn.modules.module.register_module_parameter_registration_hook.html @@ -4404,7 +4404,7 @@

                                          torch.nn.modules.module.register_module_parameter_registration_hook#

                                          -torch.nn.modules.module.register_module_parameter_registration_hook(hook)[source]#
                                          +torch.nn.modules.module.register_module_parameter_registration_hook(hook)[source]#

                                          Register a parameter registration hook common to all modules.

                                          Warning

                                          diff --git a/2.9/generated/torch.nn.modules.normalization.GroupNorm.html b/2.9/generated/torch.nn.modules.normalization.GroupNorm.html index 745a0288e2a..42d4ddbfb27 100644 --- a/2.9/generated/torch.nn.modules.normalization.GroupNorm.html +++ b/2.9/generated/torch.nn.modules.normalization.GroupNorm.html @@ -4415,7 +4415,7 @@

                                          GroupNorm#

                                          -class torch.nn.modules.normalization.GroupNorm(num_groups, num_channels, eps=1e-05, affine=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.normalization.GroupNorm(num_groups, num_channels, eps=1e-05, affine=True, device=None, dtype=None)[source]#

                                          Applies Group Normalization over a mini-batch of inputs.

                                          This layer implements the operation as described in the paper Group Normalization

                                          diff --git a/2.9/generated/torch.nn.modules.normalization.LayerNorm.html b/2.9/generated/torch.nn.modules.normalization.LayerNorm.html index 9778454527f..34bea248869 100644 --- a/2.9/generated/torch.nn.modules.normalization.LayerNorm.html +++ b/2.9/generated/torch.nn.modules.normalization.LayerNorm.html @@ -4415,7 +4415,7 @@

                                          LayerNorm#

                                          -class torch.nn.modules.normalization.LayerNorm(normalized_shape, eps=1e-05, elementwise_affine=True, bias=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.normalization.LayerNorm(normalized_shape, eps=1e-05, elementwise_affine=True, bias=True, device=None, dtype=None)[source]#

                                          Applies Layer Normalization over a mini-batch of inputs.

                                          This layer implements the operation as described in the paper Layer Normalization

                                          diff --git a/2.9/generated/torch.nn.modules.normalization.LocalResponseNorm.html b/2.9/generated/torch.nn.modules.normalization.LocalResponseNorm.html index 92c5a3e30f5..501c6e3dd0a 100644 --- a/2.9/generated/torch.nn.modules.normalization.LocalResponseNorm.html +++ b/2.9/generated/torch.nn.modules.normalization.LocalResponseNorm.html @@ -4415,7 +4415,7 @@

                                          LocalResponseNorm#

                                          -class torch.nn.modules.normalization.LocalResponseNorm(size, alpha=0.0001, beta=0.75, k=1.0)[source]#
                                          +class torch.nn.modules.normalization.LocalResponseNorm(size, alpha=0.0001, beta=0.75, k=1.0)[source]#

                                          Applies local response normalization over an input signal.

                                          The input signal is composed of several input planes, where channels occupy the second dimension. Applies normalization across channels.

                                          @@ -4465,13 +4465,13 @@

                                          LocalResponseNorm
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Return the extra representation of the module.

                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.normalization.RMSNorm.html b/2.9/generated/torch.nn.modules.normalization.RMSNorm.html index 79756d117fb..7c21ae4fe0d 100644 --- a/2.9/generated/torch.nn.modules.normalization.RMSNorm.html +++ b/2.9/generated/torch.nn.modules.normalization.RMSNorm.html @@ -4415,7 +4415,7 @@

                                          RMSNorm#

                                          -class torch.nn.modules.normalization.RMSNorm(normalized_shape, eps=None, elementwise_affine=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.normalization.RMSNorm(normalized_shape, eps=None, elementwise_affine=True, device=None, dtype=None)[source]#

                                          Applies Root Mean Square Layer Normalization over a mini-batch of inputs.

                                          This layer implements the operation as described in the paper Root Mean Square Layer Normalization

                                          @@ -4464,7 +4464,7 @@

                                          RMSNorm
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Return the extra representation of the module.

                                          Return type
                                          @@ -4475,7 +4475,7 @@

                                          RMSNorm
                                          -forward(x)[source]#
                                          +forward(x)[source]#

                                          Runs the forward pass.

                                          Return type
                                          @@ -4486,7 +4486,7 @@

                                          RMSNorm
                                          -reset_parameters()[source]#
                                          +reset_parameters()[source]#

                                          Resets parameters based on their initialization used in __init__.

                                          diff --git a/2.9/generated/torch.nn.modules.padding.CircularPad1d.html b/2.9/generated/torch.nn.modules.padding.CircularPad1d.html index 9fc3c86a916..514b5f0ea9b 100644 --- a/2.9/generated/torch.nn.modules.padding.CircularPad1d.html +++ b/2.9/generated/torch.nn.modules.padding.CircularPad1d.html @@ -4415,7 +4415,7 @@

                                          CircularPad1d#

                                          -class torch.nn.modules.padding.CircularPad1d(padding)[source]#
                                          +class torch.nn.modules.padding.CircularPad1d(padding)[source]#

                                          Pads the input tensor using circular padding of the input boundary.

                                          Tensor values at the beginning of the dimension are used to pad the end, and values at the end are used to pad the beginning. If negative padding is diff --git a/2.9/generated/torch.nn.modules.padding.CircularPad2d.html b/2.9/generated/torch.nn.modules.padding.CircularPad2d.html index 2aa4f5b15c3..408a33596f8 100644 --- a/2.9/generated/torch.nn.modules.padding.CircularPad2d.html +++ b/2.9/generated/torch.nn.modules.padding.CircularPad2d.html @@ -4415,7 +4415,7 @@

                                          CircularPad2d#

                                          -class torch.nn.modules.padding.CircularPad2d(padding)[source]#
                                          +class torch.nn.modules.padding.CircularPad2d(padding)[source]#

                                          Pads the input tensor using circular padding of the input boundary.

                                          Tensor values at the beginning of the dimension are used to pad the end, and values at the end are used to pad the beginning. If negative padding is diff --git a/2.9/generated/torch.nn.modules.padding.CircularPad3d.html b/2.9/generated/torch.nn.modules.padding.CircularPad3d.html index 2e4cda0df0f..f300991f830 100644 --- a/2.9/generated/torch.nn.modules.padding.CircularPad3d.html +++ b/2.9/generated/torch.nn.modules.padding.CircularPad3d.html @@ -4415,7 +4415,7 @@

                                          CircularPad3d#

                                          -class torch.nn.modules.padding.CircularPad3d(padding)[source]#
                                          +class torch.nn.modules.padding.CircularPad3d(padding)[source]#

                                          Pads the input tensor using circular padding of the input boundary.

                                          Tensor values at the beginning of the dimension are used to pad the end, and values at the end are used to pad the beginning. If negative padding is diff --git a/2.9/generated/torch.nn.modules.padding.ConstantPad1d.html b/2.9/generated/torch.nn.modules.padding.ConstantPad1d.html index 8a494a1ccb3..da3726caaf0 100644 --- a/2.9/generated/torch.nn.modules.padding.ConstantPad1d.html +++ b/2.9/generated/torch.nn.modules.padding.ConstantPad1d.html @@ -4415,7 +4415,7 @@

                                          ConstantPad1d#

                                          -class torch.nn.modules.padding.ConstantPad1d(padding, value)[source]#
                                          +class torch.nn.modules.padding.ConstantPad1d(padding, value)[source]#

                                          Pads the input tensor boundaries with a constant value.

                                          For N-dimensional padding, use torch.nn.functional.pad().

                                          diff --git a/2.9/generated/torch.nn.modules.padding.ConstantPad2d.html b/2.9/generated/torch.nn.modules.padding.ConstantPad2d.html index 439ef2f6fc9..03b90e19728 100644 --- a/2.9/generated/torch.nn.modules.padding.ConstantPad2d.html +++ b/2.9/generated/torch.nn.modules.padding.ConstantPad2d.html @@ -4415,7 +4415,7 @@

                                          ConstantPad2d#

                                          -class torch.nn.modules.padding.ConstantPad2d(padding, value)[source]#
                                          +class torch.nn.modules.padding.ConstantPad2d(padding, value)[source]#

                                          Pads the input tensor boundaries with a constant value.

                                          For N-dimensional padding, use torch.nn.functional.pad().

                                          diff --git a/2.9/generated/torch.nn.modules.padding.ConstantPad3d.html b/2.9/generated/torch.nn.modules.padding.ConstantPad3d.html index cd4203345d4..28bc446b783 100644 --- a/2.9/generated/torch.nn.modules.padding.ConstantPad3d.html +++ b/2.9/generated/torch.nn.modules.padding.ConstantPad3d.html @@ -4415,7 +4415,7 @@

                                          ConstantPad3d#

                                          -class torch.nn.modules.padding.ConstantPad3d(padding, value)[source]#
                                          +class torch.nn.modules.padding.ConstantPad3d(padding, value)[source]#

                                          Pads the input tensor boundaries with a constant value.

                                          For N-dimensional padding, use torch.nn.functional.pad().

                                          diff --git a/2.9/generated/torch.nn.modules.padding.ReflectionPad1d.html b/2.9/generated/torch.nn.modules.padding.ReflectionPad1d.html index f842b92ab0b..7f764b18578 100644 --- a/2.9/generated/torch.nn.modules.padding.ReflectionPad1d.html +++ b/2.9/generated/torch.nn.modules.padding.ReflectionPad1d.html @@ -4415,7 +4415,7 @@

                                          ReflectionPad1d#

                                          -class torch.nn.modules.padding.ReflectionPad1d(padding)[source]#
                                          +class torch.nn.modules.padding.ReflectionPad1d(padding)[source]#

                                          Pads the input tensor using the reflection of the input boundary.

                                          For N-dimensional padding, use torch.nn.functional.pad().

                                          diff --git a/2.9/generated/torch.nn.modules.padding.ReflectionPad2d.html b/2.9/generated/torch.nn.modules.padding.ReflectionPad2d.html index 30d94e811b7..ddc90d45add 100644 --- a/2.9/generated/torch.nn.modules.padding.ReflectionPad2d.html +++ b/2.9/generated/torch.nn.modules.padding.ReflectionPad2d.html @@ -4415,7 +4415,7 @@

                                          ReflectionPad2d#

                                          -class torch.nn.modules.padding.ReflectionPad2d(padding)[source]#
                                          +class torch.nn.modules.padding.ReflectionPad2d(padding)[source]#

                                          Pads the input tensor using the reflection of the input boundary.

                                          For N-dimensional padding, use torch.nn.functional.pad().

                                          diff --git a/2.9/generated/torch.nn.modules.padding.ReflectionPad3d.html b/2.9/generated/torch.nn.modules.padding.ReflectionPad3d.html index 9b9cb9f617d..f14fb0e255a 100644 --- a/2.9/generated/torch.nn.modules.padding.ReflectionPad3d.html +++ b/2.9/generated/torch.nn.modules.padding.ReflectionPad3d.html @@ -4415,7 +4415,7 @@

                                          ReflectionPad3d#

                                          -class torch.nn.modules.padding.ReflectionPad3d(padding)[source]#
                                          +class torch.nn.modules.padding.ReflectionPad3d(padding)[source]#

                                          Pads the input tensor using the reflection of the input boundary.

                                          For N-dimensional padding, use torch.nn.functional.pad().

                                          diff --git a/2.9/generated/torch.nn.modules.padding.ReplicationPad1d.html b/2.9/generated/torch.nn.modules.padding.ReplicationPad1d.html index 4c7035cc4a1..a4052ae6a8d 100644 --- a/2.9/generated/torch.nn.modules.padding.ReplicationPad1d.html +++ b/2.9/generated/torch.nn.modules.padding.ReplicationPad1d.html @@ -4415,7 +4415,7 @@

                                          ReplicationPad1d#

                                          -class torch.nn.modules.padding.ReplicationPad1d(padding)[source]#
                                          +class torch.nn.modules.padding.ReplicationPad1d(padding)[source]#

                                          Pads the input tensor using replication of the input boundary.

                                          For N-dimensional padding, use torch.nn.functional.pad().

                                          diff --git a/2.9/generated/torch.nn.modules.padding.ReplicationPad2d.html b/2.9/generated/torch.nn.modules.padding.ReplicationPad2d.html index c1a0def2993..67edf416119 100644 --- a/2.9/generated/torch.nn.modules.padding.ReplicationPad2d.html +++ b/2.9/generated/torch.nn.modules.padding.ReplicationPad2d.html @@ -4415,7 +4415,7 @@

                                          ReplicationPad2d#

                                          -class torch.nn.modules.padding.ReplicationPad2d(padding)[source]#
                                          +class torch.nn.modules.padding.ReplicationPad2d(padding)[source]#

                                          Pads the input tensor using replication of the input boundary.

                                          For N-dimensional padding, use torch.nn.functional.pad().

                                          diff --git a/2.9/generated/torch.nn.modules.padding.ReplicationPad3d.html b/2.9/generated/torch.nn.modules.padding.ReplicationPad3d.html index d6c608462b0..ab1a0c451b1 100644 --- a/2.9/generated/torch.nn.modules.padding.ReplicationPad3d.html +++ b/2.9/generated/torch.nn.modules.padding.ReplicationPad3d.html @@ -4415,7 +4415,7 @@

                                          ReplicationPad3d#

                                          -class torch.nn.modules.padding.ReplicationPad3d(padding)[source]#
                                          +class torch.nn.modules.padding.ReplicationPad3d(padding)[source]#

                                          Pads the input tensor using replication of the input boundary.

                                          For N-dimensional padding, use torch.nn.functional.pad().

                                          diff --git a/2.9/generated/torch.nn.modules.padding.ZeroPad1d.html b/2.9/generated/torch.nn.modules.padding.ZeroPad1d.html index b9320272d62..98986e3a56c 100644 --- a/2.9/generated/torch.nn.modules.padding.ZeroPad1d.html +++ b/2.9/generated/torch.nn.modules.padding.ZeroPad1d.html @@ -4415,7 +4415,7 @@

                                          ZeroPad1d#

                                          -class torch.nn.modules.padding.ZeroPad1d(padding)[source]#
                                          +class torch.nn.modules.padding.ZeroPad1d(padding)[source]#

                                          Pads the input tensor boundaries with zero.

                                          For N-dimensional padding, use torch.nn.functional.pad().

                                          @@ -4462,7 +4462,7 @@

                                          ZeroPad1d
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Return the extra representation of the module.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.padding.ZeroPad2d.html b/2.9/generated/torch.nn.modules.padding.ZeroPad2d.html index 79fd473ed49..eac635da19f 100644 --- a/2.9/generated/torch.nn.modules.padding.ZeroPad2d.html +++ b/2.9/generated/torch.nn.modules.padding.ZeroPad2d.html @@ -4415,7 +4415,7 @@

                                          ZeroPad2d#

                                          -class torch.nn.modules.padding.ZeroPad2d(padding)[source]#
                                          +class torch.nn.modules.padding.ZeroPad2d(padding)[source]#

                                          Pads the input tensor boundaries with zero.

                                          For N-dimensional padding, use torch.nn.functional.pad().

                                          @@ -4462,7 +4462,7 @@

                                          ZeroPad2d
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Return the extra representation of the module.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.padding.ZeroPad3d.html b/2.9/generated/torch.nn.modules.padding.ZeroPad3d.html index 8dfccccbd8b..6716c9ab7f4 100644 --- a/2.9/generated/torch.nn.modules.padding.ZeroPad3d.html +++ b/2.9/generated/torch.nn.modules.padding.ZeroPad3d.html @@ -4415,7 +4415,7 @@

                                          ZeroPad3d#

                                          -class torch.nn.modules.padding.ZeroPad3d(padding)[source]#
                                          +class torch.nn.modules.padding.ZeroPad3d(padding)[source]#

                                          Pads the input tensor boundaries with zero.

                                          For N-dimensional padding, use torch.nn.functional.pad().

                                          @@ -4450,7 +4450,7 @@

                                          ZeroPad3d
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Return the extra representation of the module.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pixelshuffle.PixelShuffle.html b/2.9/generated/torch.nn.modules.pixelshuffle.PixelShuffle.html index e91a564ac52..d18d880a9c2 100644 --- a/2.9/generated/torch.nn.modules.pixelshuffle.PixelShuffle.html +++ b/2.9/generated/torch.nn.modules.pixelshuffle.PixelShuffle.html @@ -4415,7 +4415,7 @@

                                          PixelShuffle#

                                          -class torch.nn.modules.pixelshuffle.PixelShuffle(upscale_factor)[source]#
                                          +class torch.nn.modules.pixelshuffle.PixelShuffle(upscale_factor)[source]#

                                          Rearrange elements in a tensor according to an upscaling factor.

                                          Rearranges elements in a tensor of shape (,C×r2,H,W)(*, C \times r^2, H, W) to a tensor of shape (,C,H×r,W×r)(*, C, H \times r, W \times r), where r is an upscale factor.

                                          @@ -4455,7 +4455,7 @@

                                          PixelShuffle
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Return the extra representation of the module.

                                          Return type
                                          @@ -4466,7 +4466,7 @@

                                          PixelShuffle
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pixelshuffle.PixelUnshuffle.html b/2.9/generated/torch.nn.modules.pixelshuffle.PixelUnshuffle.html index 4baf3ffe858..18706139c36 100644 --- a/2.9/generated/torch.nn.modules.pixelshuffle.PixelUnshuffle.html +++ b/2.9/generated/torch.nn.modules.pixelshuffle.PixelUnshuffle.html @@ -4415,7 +4415,7 @@

                                          PixelUnshuffle#

                                          -class torch.nn.modules.pixelshuffle.PixelUnshuffle(downscale_factor)[source]#
                                          +class torch.nn.modules.pixelshuffle.PixelUnshuffle(downscale_factor)[source]#

                                          Reverse the PixelShuffle operation.

                                          Reverses the PixelShuffle operation by rearranging elements in a tensor of shape (,C,H×r,W×r)(*, C, H \times r, W \times r) to a tensor of shape @@ -4454,7 +4454,7 @@

                                          PixelUnshuffle
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Return the extra representation of the module.

                                          Return type
                                          @@ -4465,7 +4465,7 @@

                                          PixelUnshuffle
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pooling.AdaptiveAvgPool1d.html b/2.9/generated/torch.nn.modules.pooling.AdaptiveAvgPool1d.html index 46e32481298..a4020b31add 100644 --- a/2.9/generated/torch.nn.modules.pooling.AdaptiveAvgPool1d.html +++ b/2.9/generated/torch.nn.modules.pooling.AdaptiveAvgPool1d.html @@ -4415,7 +4415,7 @@

                                          AdaptiveAvgPool1d#

                                          -class torch.nn.modules.pooling.AdaptiveAvgPool1d(output_size)[source]#
                                          +class torch.nn.modules.pooling.AdaptiveAvgPool1d(output_size)[source]#

                                          Applies a 1D adaptive average pooling over an input signal composed of several input planes.

                                          The output size is LoutL_{out}, for any input size. The number of output features is equal to the number of input planes.

                                          @@ -4441,7 +4441,7 @@

                                          AdaptiveAvgPool1d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pooling.AdaptiveAvgPool2d.html b/2.9/generated/torch.nn.modules.pooling.AdaptiveAvgPool2d.html index c9b1d573f53..4c7576043fb 100644 --- a/2.9/generated/torch.nn.modules.pooling.AdaptiveAvgPool2d.html +++ b/2.9/generated/torch.nn.modules.pooling.AdaptiveAvgPool2d.html @@ -4415,7 +4415,7 @@

                                          AdaptiveAvgPool2d#

                                          -class torch.nn.modules.pooling.AdaptiveAvgPool2d(output_size)[source]#
                                          +class torch.nn.modules.pooling.AdaptiveAvgPool2d(output_size)[source]#

                                          Applies a 2D adaptive average pooling over an input signal composed of several input planes.

                                          The output is of size H x W, for any input size. The number of output features is equal to the number of input planes.

                                          @@ -4452,7 +4452,7 @@

                                          AdaptiveAvgPool2d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pooling.AdaptiveAvgPool3d.html b/2.9/generated/torch.nn.modules.pooling.AdaptiveAvgPool3d.html index d7b2b451174..0b5897d27ee 100644 --- a/2.9/generated/torch.nn.modules.pooling.AdaptiveAvgPool3d.html +++ b/2.9/generated/torch.nn.modules.pooling.AdaptiveAvgPool3d.html @@ -4415,7 +4415,7 @@

                                          AdaptiveAvgPool3d#

                                          -class torch.nn.modules.pooling.AdaptiveAvgPool3d(output_size)[source]#
                                          +class torch.nn.modules.pooling.AdaptiveAvgPool3d(output_size)[source]#

                                          Applies a 3D adaptive average pooling over an input signal composed of several input planes.

                                          The output is of size D x H x W, for any input size. The number of output features is equal to the number of input planes.

                                          @@ -4452,7 +4452,7 @@

                                          AdaptiveAvgPool3d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pooling.AdaptiveMaxPool1d.html b/2.9/generated/torch.nn.modules.pooling.AdaptiveMaxPool1d.html index 694b9e07200..1f979647912 100644 --- a/2.9/generated/torch.nn.modules.pooling.AdaptiveMaxPool1d.html +++ b/2.9/generated/torch.nn.modules.pooling.AdaptiveMaxPool1d.html @@ -4415,7 +4415,7 @@

                                          AdaptiveMaxPool1d#

                                          -class torch.nn.modules.pooling.AdaptiveMaxPool1d(output_size, return_indices=False)[source]#
                                          +class torch.nn.modules.pooling.AdaptiveMaxPool1d(output_size, return_indices=False)[source]#

                                          Applies a 1D adaptive max pooling over an input signal composed of several input planes.

                                          The output size is LoutL_{out}, for any input size. The number of output features is equal to the number of input planes.

                                          @@ -4445,7 +4445,7 @@

                                          AdaptiveMaxPool1d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          diff --git a/2.9/generated/torch.nn.modules.pooling.AdaptiveMaxPool2d.html b/2.9/generated/torch.nn.modules.pooling.AdaptiveMaxPool2d.html index 792f0acbf9d..94c650cabea 100644 --- a/2.9/generated/torch.nn.modules.pooling.AdaptiveMaxPool2d.html +++ b/2.9/generated/torch.nn.modules.pooling.AdaptiveMaxPool2d.html @@ -4415,7 +4415,7 @@

                                          AdaptiveMaxPool2d#

                                          -class torch.nn.modules.pooling.AdaptiveMaxPool2d(output_size, return_indices=False)[source]#
                                          +class torch.nn.modules.pooling.AdaptiveMaxPool2d(output_size, return_indices=False)[source]#

                                          Applies a 2D adaptive max pooling over an input signal composed of several input planes.

                                          The output is of size Hout×WoutH_{out} \times W_{out}, for any input size. The number of output features is equal to the number of input planes.

                                          @@ -4457,7 +4457,7 @@

                                          AdaptiveMaxPool2d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          diff --git a/2.9/generated/torch.nn.modules.pooling.AdaptiveMaxPool3d.html b/2.9/generated/torch.nn.modules.pooling.AdaptiveMaxPool3d.html index c510d7b0755..67743ad70ac 100644 --- a/2.9/generated/torch.nn.modules.pooling.AdaptiveMaxPool3d.html +++ b/2.9/generated/torch.nn.modules.pooling.AdaptiveMaxPool3d.html @@ -4415,7 +4415,7 @@

                                          AdaptiveMaxPool3d#

                                          -class torch.nn.modules.pooling.AdaptiveMaxPool3d(output_size, return_indices=False)[source]#
                                          +class torch.nn.modules.pooling.AdaptiveMaxPool3d(output_size, return_indices=False)[source]#

                                          Applies a 3D adaptive max pooling over an input signal composed of several input planes.

                                          The output is of size Dout×Hout×WoutD_{out} \times H_{out} \times W_{out}, for any input size. The number of output features is equal to the number of input planes.

                                          @@ -4457,7 +4457,7 @@

                                          AdaptiveMaxPool3d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          diff --git a/2.9/generated/torch.nn.modules.pooling.AvgPool1d.html b/2.9/generated/torch.nn.modules.pooling.AvgPool1d.html index b42604a1a88..11d4010bd25 100644 --- a/2.9/generated/torch.nn.modules.pooling.AvgPool1d.html +++ b/2.9/generated/torch.nn.modules.pooling.AvgPool1d.html @@ -4415,7 +4415,7 @@

                                          AvgPool1d#

                                          -class torch.nn.modules.pooling.AvgPool1d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True)[source]#
                                          +class torch.nn.modules.pooling.AvgPool1d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True)[source]#

                                          Applies a 1D average pooling over an input signal composed of several input planes.

                                          In the simplest case, the output value of the layer with input size (N,C,L)(N, C, L), output (N,C,Lout)(N, C, L_{out}) and kernel_size kk @@ -4470,7 +4470,7 @@

                                          AvgPool1d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pooling.AvgPool2d.html b/2.9/generated/torch.nn.modules.pooling.AvgPool2d.html index 5b31a19d7de..a72204cbef7 100644 --- a/2.9/generated/torch.nn.modules.pooling.AvgPool2d.html +++ b/2.9/generated/torch.nn.modules.pooling.AvgPool2d.html @@ -4415,7 +4415,7 @@

                                          AvgPool2d#

                                          -class torch.nn.modules.pooling.AvgPool2d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)[source]#
                                          +class torch.nn.modules.pooling.AvgPool2d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)[source]#

                                          Applies a 2D average pooling over an input signal composed of several input planes.

                                          In the simplest case, the output value of the layer with input size (N,C,H,W)(N, C, H, W), output (N,C,Hout,Wout)(N, C, H_{out}, W_{out}) and kernel_size (kH,kW)(kH, kW) @@ -4484,7 +4484,7 @@

                                          AvgPool2d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pooling.AvgPool3d.html b/2.9/generated/torch.nn.modules.pooling.AvgPool3d.html index 9ef0bf6f29d..494c55929d5 100644 --- a/2.9/generated/torch.nn.modules.pooling.AvgPool3d.html +++ b/2.9/generated/torch.nn.modules.pooling.AvgPool3d.html @@ -4415,7 +4415,7 @@

                                          AvgPool3d#

                                          -class torch.nn.modules.pooling.AvgPool3d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)[source]#
                                          +class torch.nn.modules.pooling.AvgPool3d(kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None)[source]#

                                          Applies a 3D average pooling over an input signal composed of several input planes.

                                          In the simplest case, the output value of the layer with input size (N,C,D,H,W)(N, C, D, H, W), output (N,C,Dout,Hout,Wout)(N, C, D_{out}, H_{out}, W_{out}) and kernel_size (kD,kH,kW)(kD, kH, kW) @@ -4495,7 +4495,7 @@

                                          AvgPool3d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pooling.FractionalMaxPool2d.html b/2.9/generated/torch.nn.modules.pooling.FractionalMaxPool2d.html index 9c3e1ac56ca..03263badea2 100644 --- a/2.9/generated/torch.nn.modules.pooling.FractionalMaxPool2d.html +++ b/2.9/generated/torch.nn.modules.pooling.FractionalMaxPool2d.html @@ -4415,7 +4415,7 @@

                                          FractionalMaxPool2d#

                                          -class torch.nn.modules.pooling.FractionalMaxPool2d(kernel_size, output_size=None, output_ratio=None, return_indices=False, _random_samples=None)[source]#
                                          +class torch.nn.modules.pooling.FractionalMaxPool2d(kernel_size, output_size=None, output_ratio=None, return_indices=False, _random_samples=None)[source]#

                                          Applies a 2D fractional max pooling over an input signal composed of several input planes.

                                          Fractional MaxPooling is described in detail in the paper Fractional MaxPooling by Ben Graham

                                          The max-pooling operation is applied in kH×kWkH \times kW regions by a stochastic diff --git a/2.9/generated/torch.nn.modules.pooling.FractionalMaxPool3d.html b/2.9/generated/torch.nn.modules.pooling.FractionalMaxPool3d.html index f74e1e7d32b..39159076fbe 100644 --- a/2.9/generated/torch.nn.modules.pooling.FractionalMaxPool3d.html +++ b/2.9/generated/torch.nn.modules.pooling.FractionalMaxPool3d.html @@ -4415,7 +4415,7 @@

                                          FractionalMaxPool3d#

                                          -class torch.nn.modules.pooling.FractionalMaxPool3d(kernel_size, output_size=None, output_ratio=None, return_indices=False, _random_samples=None)[source]#
                                          +class torch.nn.modules.pooling.FractionalMaxPool3d(kernel_size, output_size=None, output_ratio=None, return_indices=False, _random_samples=None)[source]#

                                          Applies a 3D fractional max pooling over an input signal composed of several input planes.

                                          Fractional MaxPooling is described in detail in the paper Fractional MaxPooling by Ben Graham

                                          The max-pooling operation is applied in kT×kH×kWkT \times kH \times kW regions by a stochastic diff --git a/2.9/generated/torch.nn.modules.pooling.LPPool1d.html b/2.9/generated/torch.nn.modules.pooling.LPPool1d.html index 9bb2f23608b..b772ad440e0 100644 --- a/2.9/generated/torch.nn.modules.pooling.LPPool1d.html +++ b/2.9/generated/torch.nn.modules.pooling.LPPool1d.html @@ -4415,7 +4415,7 @@

                                          LPPool1d#

                                          -class torch.nn.modules.pooling.LPPool1d(norm_type, kernel_size, stride=None, ceil_mode=False)[source]#
                                          +class torch.nn.modules.pooling.LPPool1d(norm_type, kernel_size, stride=None, ceil_mode=False)[source]#

                                          Applies a 1D power-average pooling over an input signal composed of several input planes.

                                          On each window, the function computed is:

                                          @@ -4466,7 +4466,7 @@

                                          LPPool1d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pooling.LPPool2d.html b/2.9/generated/torch.nn.modules.pooling.LPPool2d.html index 35bf88ef864..3d1073a6fe5 100644 --- a/2.9/generated/torch.nn.modules.pooling.LPPool2d.html +++ b/2.9/generated/torch.nn.modules.pooling.LPPool2d.html @@ -4415,7 +4415,7 @@

                                          LPPool2d#

                                          -class torch.nn.modules.pooling.LPPool2d(norm_type, kernel_size, stride=None, ceil_mode=False)[source]#
                                          +class torch.nn.modules.pooling.LPPool2d(norm_type, kernel_size, stride=None, ceil_mode=False)[source]#

                                          Applies a 2D power-average pooling over an input signal composed of several input planes.

                                          On each window, the function computed is:

                                          @@ -4479,7 +4479,7 @@

                                          LPPool2d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pooling.LPPool3d.html b/2.9/generated/torch.nn.modules.pooling.LPPool3d.html index f365da0d0b3..f9045cf17b0 100644 --- a/2.9/generated/torch.nn.modules.pooling.LPPool3d.html +++ b/2.9/generated/torch.nn.modules.pooling.LPPool3d.html @@ -4415,7 +4415,7 @@

                                          LPPool3d#

                                          -class torch.nn.modules.pooling.LPPool3d(norm_type, kernel_size, stride=None, ceil_mode=False)[source]#
                                          +class torch.nn.modules.pooling.LPPool3d(norm_type, kernel_size, stride=None, ceil_mode=False)[source]#

                                          Applies a 3D power-average pooling over an input signal composed of several input planes.

                                          On each window, the function computed is:

                                          @@ -4483,7 +4483,7 @@

                                          LPPool3d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pooling.MaxPool1d.html b/2.9/generated/torch.nn.modules.pooling.MaxPool1d.html index 7b5291bb0c9..d1fa4c5a413 100644 --- a/2.9/generated/torch.nn.modules.pooling.MaxPool1d.html +++ b/2.9/generated/torch.nn.modules.pooling.MaxPool1d.html @@ -4415,7 +4415,7 @@

                                          MaxPool1d#

                                          -class torch.nn.modules.pooling.MaxPool1d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)[source]#
                                          +class torch.nn.modules.pooling.MaxPool1d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)[source]#

                                          Applies a 1D max pooling over an input signal composed of several input planes.

                                          In the simplest case, the output value of the layer with input size (N,C,L)(N, C, L) and output (N,C,Lout)(N, C, L_{out}) can be precisely described as:

                                          @@ -4474,7 +4474,7 @@

                                          MaxPool1d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          diff --git a/2.9/generated/torch.nn.modules.pooling.MaxPool2d.html b/2.9/generated/torch.nn.modules.pooling.MaxPool2d.html index a30feb35c19..1ac07dae17e 100644 --- a/2.9/generated/torch.nn.modules.pooling.MaxPool2d.html +++ b/2.9/generated/torch.nn.modules.pooling.MaxPool2d.html @@ -4415,7 +4415,7 @@

                                          MaxPool2d#

                                          -class torch.nn.modules.pooling.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)[source]#
                                          +class torch.nn.modules.pooling.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)[source]#

                                          Applies a 2D max pooling over an input signal composed of several input planes.

                                          In the simplest case, the output value of the layer with input size (N,C,H,W)(N, C, H, W), output (N,C,Hout,Wout)(N, C, H_{out}, W_{out}) and kernel_size (kH,kW)(kH, kW) @@ -4483,7 +4483,7 @@

                                          MaxPool2d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          diff --git a/2.9/generated/torch.nn.modules.pooling.MaxPool3d.html b/2.9/generated/torch.nn.modules.pooling.MaxPool3d.html index 0317e640fbe..dc639514938 100644 --- a/2.9/generated/torch.nn.modules.pooling.MaxPool3d.html +++ b/2.9/generated/torch.nn.modules.pooling.MaxPool3d.html @@ -4415,7 +4415,7 @@

                                          MaxPool3d#

                                          -class torch.nn.modules.pooling.MaxPool3d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)[source]#
                                          +class torch.nn.modules.pooling.MaxPool3d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)[source]#

                                          Applies a 3D max pooling over an input signal composed of several input planes.

                                          In the simplest case, the output value of the layer with input size (N,C,D,H,W)(N, C, D, H, W), output (N,C,Dout,Hout,Wout)(N, C, D_{out}, H_{out}, W_{out}) and kernel_size (kD,kH,kW)(kD, kH, kW) @@ -4487,7 +4487,7 @@

                                          MaxPool3d
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          diff --git a/2.9/generated/torch.nn.modules.pooling.MaxUnpool1d.html b/2.9/generated/torch.nn.modules.pooling.MaxUnpool1d.html index 80f67788b2b..35171b9cd9b 100644 --- a/2.9/generated/torch.nn.modules.pooling.MaxUnpool1d.html +++ b/2.9/generated/torch.nn.modules.pooling.MaxUnpool1d.html @@ -4415,7 +4415,7 @@

                                          MaxUnpool1d#

                                          -class torch.nn.modules.pooling.MaxUnpool1d(kernel_size, stride=None, padding=0)[source]#
                                          +class torch.nn.modules.pooling.MaxUnpool1d(kernel_size, stride=None, padding=0)[source]#

                                          Computes a partial inverse of MaxPool1d.

                                          MaxPool1d is not fully invertible, since the non-maximal values are lost.

                                          MaxUnpool1d takes in as input the output of MaxPool1d @@ -4482,7 +4482,7 @@

                                          MaxUnpool1d
                                          -forward(input, indices, output_size=None)[source]#
                                          +forward(input, indices, output_size=None)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pooling.MaxUnpool2d.html b/2.9/generated/torch.nn.modules.pooling.MaxUnpool2d.html index d772a9e2bab..540012369d1 100644 --- a/2.9/generated/torch.nn.modules.pooling.MaxUnpool2d.html +++ b/2.9/generated/torch.nn.modules.pooling.MaxUnpool2d.html @@ -4415,7 +4415,7 @@

                                          MaxUnpool2d#

                                          -class torch.nn.modules.pooling.MaxUnpool2d(kernel_size, stride=None, padding=0)[source]#
                                          +class torch.nn.modules.pooling.MaxUnpool2d(kernel_size, stride=None, padding=0)[source]#

                                          Computes a partial inverse of MaxPool2d.

                                          MaxPool2d is not fully invertible, since the non-maximal values are lost.

                                          MaxUnpool2d takes in as input the output of MaxPool2d @@ -4494,7 +4494,7 @@

                                          MaxUnpool2d
                                          -forward(input, indices, output_size=None)[source]#
                                          +forward(input, indices, output_size=None)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.pooling.MaxUnpool3d.html b/2.9/generated/torch.nn.modules.pooling.MaxUnpool3d.html index e4980b08547..d203a26526b 100644 --- a/2.9/generated/torch.nn.modules.pooling.MaxUnpool3d.html +++ b/2.9/generated/torch.nn.modules.pooling.MaxUnpool3d.html @@ -4415,7 +4415,7 @@

                                          MaxUnpool3d#

                                          -class torch.nn.modules.pooling.MaxUnpool3d(kernel_size, stride=None, padding=0)[source]#
                                          +class torch.nn.modules.pooling.MaxUnpool3d(kernel_size, stride=None, padding=0)[source]#

                                          Computes a partial inverse of MaxPool3d.

                                          MaxPool3d is not fully invertible, since the non-maximal values are lost. MaxUnpool3d takes in as input the output of MaxPool3d @@ -4480,7 +4480,7 @@

                                          MaxUnpool3d
                                          -forward(input, indices, output_size=None)[source]#
                                          +forward(input, indices, output_size=None)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.rnn.GRU.html b/2.9/generated/torch.nn.modules.rnn.GRU.html index 19ecc8ca714..c91d83e8f48 100644 --- a/2.9/generated/torch.nn.modules.rnn.GRU.html +++ b/2.9/generated/torch.nn.modules.rnn.GRU.html @@ -4415,7 +4415,7 @@

                                          GRU#

                                          -class torch.nn.modules.rnn.GRU(input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.rnn.GRU(input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, device=None, dtype=None)[source]#

                                          Apply a multi-layer gated recurrent unit (GRU) RNN to an input sequence. For each element in the input sequence, each layer computes the following function:

                                          diff --git a/2.9/generated/torch.nn.modules.rnn.GRUCell.html b/2.9/generated/torch.nn.modules.rnn.GRUCell.html index 05d024cc368..8ca76fc29e8 100644 --- a/2.9/generated/torch.nn.modules.rnn.GRUCell.html +++ b/2.9/generated/torch.nn.modules.rnn.GRUCell.html @@ -4415,7 +4415,7 @@

                                          GRUCell#

                                          -class torch.nn.modules.rnn.GRUCell(input_size, hidden_size, bias=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.rnn.GRUCell(input_size, hidden_size, bias=True, device=None, dtype=None)[source]#

                                          A gated recurrent unit (GRU) cell.

                                          r=σ(Wirx+bir+Whrh+bhr)z=σ(Wizx+biz+Whzh+bhz)n=tanh(Winx+bin+r(Whnh+bhn))h=(1z)n+zh\begin{array}{ll} diff --git a/2.9/generated/torch.nn.modules.rnn.LSTM.html b/2.9/generated/torch.nn.modules.rnn.LSTM.html index cda8ac5e560..a083a465b32 100644 --- a/2.9/generated/torch.nn.modules.rnn.LSTM.html +++ b/2.9/generated/torch.nn.modules.rnn.LSTM.html @@ -4415,7 +4415,7 @@

                                          LSTM#

                                          -class torch.nn.modules.rnn.LSTM(input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, proj_size=0, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.rnn.LSTM(input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, proj_size=0, device=None, dtype=None)[source]#

                                          Apply a multi-layer long short-term memory (LSTM) RNN to an input sequence. For each element in the input sequence, each layer computes the following function:

                                          diff --git a/2.9/generated/torch.nn.modules.rnn.LSTMCell.html b/2.9/generated/torch.nn.modules.rnn.LSTMCell.html index 3f888c8bc21..e0dd8d4a394 100644 --- a/2.9/generated/torch.nn.modules.rnn.LSTMCell.html +++ b/2.9/generated/torch.nn.modules.rnn.LSTMCell.html @@ -4415,7 +4415,7 @@

                                          LSTMCell#

                                          -class torch.nn.modules.rnn.LSTMCell(input_size, hidden_size, bias=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.rnn.LSTMCell(input_size, hidden_size, bias=True, device=None, dtype=None)[source]#

                                          A long short-term memory (LSTM) cell.

                                          i=σ(Wiix+bii+Whih+bhi)f=σ(Wifx+bif+Whfh+bhf)g=tanh(Wigx+big+Whgh+bhg)o=σ(Wiox+bio+Whoh+bho)c=fc+igh=otanh(c)\begin{array}{ll} diff --git a/2.9/generated/torch.nn.modules.rnn.RNN.html b/2.9/generated/torch.nn.modules.rnn.RNN.html index d6af8b47008..50a601ce068 100644 --- a/2.9/generated/torch.nn.modules.rnn.RNN.html +++ b/2.9/generated/torch.nn.modules.rnn.RNN.html @@ -4415,7 +4415,7 @@

                                          RNN#

                                          -class torch.nn.modules.rnn.RNN(input_size, hidden_size, num_layers=1, nonlinearity='tanh', bias=True, batch_first=False, dropout=0.0, bidirectional=False, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.rnn.RNN(input_size, hidden_size, num_layers=1, nonlinearity='tanh', bias=True, batch_first=False, dropout=0.0, bidirectional=False, device=None, dtype=None)[source]#

                                          Apply a multi-layer Elman RNN with tanh\tanh or ReLU\text{ReLU} non-linearity to an input sequence. For each element in the input sequence, each layer computes the following function:

                                          @@ -4598,7 +4598,7 @@

                                          RNN#

                                          -forward(input: Tensor, hx: Optional[Tensor] = None) tuple[torch.Tensor, torch.Tensor][source]#
                                          +forward(input: Tensor, hx: Optional[Tensor] = None) tuple[torch.Tensor, torch.Tensor][source]#
                                          forward(input: PackedSequence, hx: Optional[Tensor] = None) tuple[torch.nn.utils.rnn.PackedSequence, torch.Tensor]

                                          Runs the forward pass.

                                          diff --git a/2.9/generated/torch.nn.modules.rnn.RNNBase.html b/2.9/generated/torch.nn.modules.rnn.RNNBase.html index 44d302256e4..dc470f59fe4 100644 --- a/2.9/generated/torch.nn.modules.rnn.RNNBase.html +++ b/2.9/generated/torch.nn.modules.rnn.RNNBase.html @@ -4415,7 +4415,7 @@

                                          RNNBase#

                                          -class torch.nn.modules.rnn.RNNBase(mode, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, proj_size=0, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.rnn.RNNBase(mode, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0.0, bidirectional=False, proj_size=0, device=None, dtype=None)[source]#

                                          Base class for RNN modules (RNN, LSTM, GRU).

                                          Implements aspects of RNNs shared by the RNN, LSTM, and GRU classes, such as module initialization and utility methods for parameter storage management.

                                          @@ -4431,7 +4431,7 @@

                                          RNNBase
                                          -flatten_parameters()[source]#
                                          +flatten_parameters()[source]#

                                          Reset parameter data pointer so that they can use faster code paths.

                                          Right now, this works only if the module is on the GPU and cuDNN is enabled. Otherwise, it’s a no-op.

                                          diff --git a/2.9/generated/torch.nn.modules.rnn.RNNCell.html b/2.9/generated/torch.nn.modules.rnn.RNNCell.html index 5bbc30ac66f..f0697223057 100644 --- a/2.9/generated/torch.nn.modules.rnn.RNNCell.html +++ b/2.9/generated/torch.nn.modules.rnn.RNNCell.html @@ -4415,7 +4415,7 @@

                                          RNNCell#

                                          -class torch.nn.modules.rnn.RNNCell(input_size, hidden_size, bias=True, nonlinearity='tanh', device=None, dtype=None)[source]#
                                          +class torch.nn.modules.rnn.RNNCell(input_size, hidden_size, bias=True, nonlinearity='tanh', device=None, dtype=None)[source]#

                                          An Elman RNN cell with tanh or ReLU non-linearity.

                                          h=tanh(Wihx+bih+Whhh+bhh)h' = \tanh(W_{ih} x + b_{ih} + W_{hh} h + b_{hh})

                                          If nonlinearity is ‘relu’, then ReLU is used in place of tanh.

                                          diff --git a/2.9/generated/torch.nn.modules.sparse.Embedding.html b/2.9/generated/torch.nn.modules.sparse.Embedding.html index 07979b085cb..1605509e214 100644 --- a/2.9/generated/torch.nn.modules.sparse.Embedding.html +++ b/2.9/generated/torch.nn.modules.sparse.Embedding.html @@ -4415,7 +4415,7 @@

                                          Embedding#

                                          -class torch.nn.modules.sparse.Embedding(num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None, _freeze=False, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.sparse.Embedding(num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None, _freeze=False, device=None, dtype=None)[source]#

                                          A simple lookup table that stores embeddings of a fixed dictionary and size.

                                          This module is often used to store word embeddings and retrieve them using indices. The input to the module is a list of indices, and the output is the corresponding @@ -4523,7 +4523,7 @@

                                          Embedding
                                          -classmethod from_pretrained(embeddings, freeze=True, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False)[source]#
                                          +classmethod from_pretrained(embeddings, freeze=True, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False)[source]#

                                          Create Embedding instance from given 2-dimensional FloatTensor.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.modules.sparse.EmbeddingBag.html b/2.9/generated/torch.nn.modules.sparse.EmbeddingBag.html index 49791446272..c7d7994b061 100644 --- a/2.9/generated/torch.nn.modules.sparse.EmbeddingBag.html +++ b/2.9/generated/torch.nn.modules.sparse.EmbeddingBag.html @@ -4415,7 +4415,7 @@

                                          EmbeddingBag#

                                          -class torch.nn.modules.sparse.EmbeddingBag(num_embeddings, embedding_dim, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, mode='mean', sparse=False, _weight=None, include_last_offset=False, padding_idx=None, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.sparse.EmbeddingBag(num_embeddings, embedding_dim, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, mode='mean', sparse=False, _weight=None, include_last_offset=False, padding_idx=None, device=None, dtype=None)[source]#

                                          Compute sums or means of ‘bags’ of embeddings, without instantiating the intermediate embeddings.

                                          For bags of constant length, no per_sample_weights, no indices equal to padding_idx, and with 2D inputs, this class

                                          @@ -4496,7 +4496,7 @@

                                          EmbeddingBag
                                          -forward(input, offsets=None, per_sample_weights=None)[source]#
                                          +forward(input, offsets=None, per_sample_weights=None)[source]#

                                          Forward pass of EmbeddingBag.

                                          Parameters
                                          @@ -4536,7 +4536,7 @@

                                          EmbeddingBag
                                          -classmethod from_pretrained(embeddings, freeze=True, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, mode='mean', sparse=False, include_last_offset=False, padding_idx=None)[source]#
                                          +classmethod from_pretrained(embeddings, freeze=True, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, mode='mean', sparse=False, include_last_offset=False, padding_idx=None)[source]#

                                          Create EmbeddingBag instance from given 2-dimensional FloatTensor.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.modules.transformer.Transformer.html b/2.9/generated/torch.nn.modules.transformer.Transformer.html index b49714de42c..33a97ac3f3b 100644 --- a/2.9/generated/torch.nn.modules.transformer.Transformer.html +++ b/2.9/generated/torch.nn.modules.transformer.Transformer.html @@ -4415,7 +4415,7 @@

                                          Transformer#

                                          -class torch.nn.modules.transformer.Transformer(d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation=<function relu>, custom_encoder=None, custom_decoder=None, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.transformer.Transformer(d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation=<function relu>, custom_encoder=None, custom_decoder=None, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)[source]#

                                          A basic transformer layer.

                                          This Transformer layer implements the original Transformer architecture described in the Attention Is All You Need paper. The @@ -4459,7 +4459,7 @@

                                          Transformerpytorch/examples

                                          -forward(src, tgt, src_mask=None, tgt_mask=None, memory_mask=None, src_key_padding_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, src_is_causal=None, tgt_is_causal=None, memory_is_causal=False)[source]#
                                          +forward(src, tgt, src_mask=None, tgt_mask=None, memory_mask=None, src_key_padding_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, src_is_causal=None, tgt_is_causal=None, memory_is_causal=False)[source]#

                                          Take in and process masked source/target sequences.

                                          Note

                                          @@ -4548,7 +4548,7 @@

                                          Transformer
                                          -static generate_square_subsequent_mask(sz, device=None, dtype=None)[source]#
                                          +static generate_square_subsequent_mask(sz, device=None, dtype=None)[source]#

                                          Generate a square causal mask for the sequence.

                                          The masked positions are filled with float(‘-inf’). Unmasked positions are filled with float(0.0).

                                          diff --git a/2.9/generated/torch.nn.modules.transformer.TransformerDecoder.html b/2.9/generated/torch.nn.modules.transformer.TransformerDecoder.html index 65703376420..9bcc196d9d3 100644 --- a/2.9/generated/torch.nn.modules.transformer.TransformerDecoder.html +++ b/2.9/generated/torch.nn.modules.transformer.TransformerDecoder.html @@ -4415,7 +4415,7 @@

                                          TransformerDecoder#

                                          -class torch.nn.modules.transformer.TransformerDecoder(decoder_layer, num_layers, norm=None)[source]#
                                          +class torch.nn.modules.transformer.TransformerDecoder(decoder_layer, num_layers, norm=None)[source]#

                                          TransformerDecoder is a stack of N decoder layers.

                                          This TransformerDecoder layer implements the original architecture described in the Attention Is All You Need paper. The @@ -4449,7 +4449,7 @@

                                          TransformerDecoder
                                          -forward(tgt, memory, tgt_mask=None, memory_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, tgt_is_causal=None, memory_is_causal=False)[source]#
                                          +forward(tgt, memory, tgt_mask=None, memory_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, tgt_is_causal=None, memory_is_causal=False)[source]#

                                          Pass the inputs (and mask) through the decoder layer in turn.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.modules.transformer.TransformerDecoderLayer.html b/2.9/generated/torch.nn.modules.transformer.TransformerDecoderLayer.html index b85c3f70eb6..10a01e6fa27 100644 --- a/2.9/generated/torch.nn.modules.transformer.TransformerDecoderLayer.html +++ b/2.9/generated/torch.nn.modules.transformer.TransformerDecoderLayer.html @@ -4415,7 +4415,7 @@

                                          TransformerDecoderLayer#

                                          -class torch.nn.modules.transformer.TransformerDecoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=<function relu>, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.transformer.TransformerDecoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=<function relu>, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)[source]#

                                          TransformerDecoderLayer is made up of self-attn, multi-head-attn and feedforward network.

                                          This TransformerDecoderLayer implements the original architecture described in the Attention Is All You Need paper. The @@ -4465,7 +4465,7 @@

                                          TransformerDecoderLayer
                                          -forward(tgt, memory, tgt_mask=None, memory_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, tgt_is_causal=False, memory_is_causal=False)[source]#
                                          +forward(tgt, memory, tgt_mask=None, memory_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, tgt_is_causal=False, memory_is_causal=False)[source]#

                                          Pass the inputs (and mask) through the decoder layer.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.modules.transformer.TransformerEncoder.html b/2.9/generated/torch.nn.modules.transformer.TransformerEncoder.html index 6265e0b10d9..56dff0e4300 100644 --- a/2.9/generated/torch.nn.modules.transformer.TransformerEncoder.html +++ b/2.9/generated/torch.nn.modules.transformer.TransformerEncoder.html @@ -4415,7 +4415,7 @@

                                          TransformerEncoder#

                                          -class torch.nn.modules.transformer.TransformerEncoder(encoder_layer, num_layers, norm=None, enable_nested_tensor=True, mask_check=True)[source]#
                                          +class torch.nn.modules.transformer.TransformerEncoder(encoder_layer, num_layers, norm=None, enable_nested_tensor=True, mask_check=True)[source]#

                                          TransformerEncoder is a stack of N encoder layers.

                                          This TransformerEncoder layer implements the original architecture described in the Attention Is All You Need paper. The @@ -4451,7 +4451,7 @@

                                          TransformerEncoder
                                          -forward(src, mask=None, src_key_padding_mask=None, is_causal=None)[source]#
                                          +forward(src, mask=None, src_key_padding_mask=None, is_causal=None)[source]#

                                          Pass the input through the encoder layers in turn.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.modules.transformer.TransformerEncoderLayer.html b/2.9/generated/torch.nn.modules.transformer.TransformerEncoderLayer.html index cb9ea667573..f07419bc80f 100644 --- a/2.9/generated/torch.nn.modules.transformer.TransformerEncoderLayer.html +++ b/2.9/generated/torch.nn.modules.transformer.TransformerEncoderLayer.html @@ -4415,7 +4415,7 @@

                                          TransformerEncoderLayer#

                                          -class torch.nn.modules.transformer.TransformerEncoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=<function relu>, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)[source]#
                                          +class torch.nn.modules.transformer.TransformerEncoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=<function relu>, layer_norm_eps=1e-05, batch_first=False, norm_first=False, bias=True, device=None, dtype=None)[source]#

                                          TransformerEncoderLayer is made up of self-attn and feedforward network.

                                          This TransformerEncoderLayer implements the original architecture described in the Attention Is All You Need paper. The @@ -4495,7 +4495,7 @@

                                          TransformerEncoderLayer
                                          -forward(src, src_mask=None, src_key_padding_mask=None, is_causal=False)[source]#
                                          +forward(src, src_mask=None, src_key_padding_mask=None, is_causal=False)[source]#

                                          Pass the input through the encoder layer.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.modules.upsampling.Upsample.html b/2.9/generated/torch.nn.modules.upsampling.Upsample.html index 80d124c42dc..63e14f7ed72 100644 --- a/2.9/generated/torch.nn.modules.upsampling.Upsample.html +++ b/2.9/generated/torch.nn.modules.upsampling.Upsample.html @@ -4415,7 +4415,7 @@

                                          Upsample#

                                          -class torch.nn.modules.upsampling.Upsample(size=None, scale_factor=None, mode='nearest', align_corners=None, recompute_scale_factor=None)[source]#
                                          +class torch.nn.modules.upsampling.Upsample(size=None, scale_factor=None, mode='nearest', align_corners=None, recompute_scale_factor=None)[source]#

                                          Upsamples a given multi-channel 1D (temporal), 2D (spatial) or 3D (volumetric) data.

                                          The input data is assumed to be of the form minibatch x channels x [optional depth] x [optional height] x width. @@ -4540,7 +4540,7 @@

                                          Upsample
                                          -extra_repr()[source]#
                                          +extra_repr()[source]#

                                          Return the extra representation of the module.

                                          Return type
                                          @@ -4551,7 +4551,7 @@

                                          Upsample
                                          -forward(input)[source]#
                                          +forward(input)[source]#

                                          Runs the forward pass.

                                          Return type
                                          diff --git a/2.9/generated/torch.nn.modules.upsampling.UpsamplingBilinear2d.html b/2.9/generated/torch.nn.modules.upsampling.UpsamplingBilinear2d.html index f060b5c866f..810e3d1cd20 100644 --- a/2.9/generated/torch.nn.modules.upsampling.UpsamplingBilinear2d.html +++ b/2.9/generated/torch.nn.modules.upsampling.UpsamplingBilinear2d.html @@ -4415,7 +4415,7 @@

                                          UpsamplingBilinear2d#

                                          -class torch.nn.modules.upsampling.UpsamplingBilinear2d(size=None, scale_factor=None)[source]#
                                          +class torch.nn.modules.upsampling.UpsamplingBilinear2d(size=None, scale_factor=None)[source]#

                                          Applies a 2D bilinear upsampling to an input signal composed of several input channels.

                                          To specify the scale, it takes either the size or the scale_factor as it’s constructor argument.

                                          diff --git a/2.9/generated/torch.nn.modules.upsampling.UpsamplingNearest2d.html b/2.9/generated/torch.nn.modules.upsampling.UpsamplingNearest2d.html index 75482bb9b74..861828e1540 100644 --- a/2.9/generated/torch.nn.modules.upsampling.UpsamplingNearest2d.html +++ b/2.9/generated/torch.nn.modules.upsampling.UpsamplingNearest2d.html @@ -4415,7 +4415,7 @@

                                          UpsamplingNearest2d#

                                          -class torch.nn.modules.upsampling.UpsamplingNearest2d(size=None, scale_factor=None)[source]#
                                          +class torch.nn.modules.upsampling.UpsamplingNearest2d(size=None, scale_factor=None)[source]#

                                          Applies a 2D nearest neighbor upsampling to an input signal composed of several input channels.

                                          To specify the scale, it takes either the size or the scale_factor as it’s constructor argument.

                                          diff --git a/2.9/generated/torch.nn.parallel.DistributedDataParallel.html b/2.9/generated/torch.nn.parallel.DistributedDataParallel.html index 9b4b0356d16..7bae2b2568d 100644 --- a/2.9/generated/torch.nn.parallel.DistributedDataParallel.html +++ b/2.9/generated/torch.nn.parallel.DistributedDataParallel.html @@ -4404,7 +4404,7 @@

                                          DistributedDataParallel#

                                          -class torch.nn.parallel.DistributedDataParallel(module, device_ids=None, output_device=None, dim=0, broadcast_buffers=True, init_sync=True, process_group=None, bucket_cap_mb=None, find_unused_parameters=False, check_reduction=False, gradient_as_bucket_view=False, static_graph=False, delay_all_reduce_named_params=None, param_to_hook_all_reduce=None, mixed_precision=None, device_mesh=None, skip_all_reduce_unused_params=False)[source]#
                                          +class torch.nn.parallel.DistributedDataParallel(module, device_ids=None, output_device=None, dim=0, broadcast_buffers=True, init_sync=True, process_group=None, bucket_cap_mb=None, find_unused_parameters=False, check_reduction=False, gradient_as_bucket_view=False, static_graph=False, delay_all_reduce_named_params=None, param_to_hook_all_reduce=None, mixed_precision=None, device_mesh=None, skip_all_reduce_unused_params=False)[source]#

                                          Implement distributed data parallelism based on torch.distributed at module level.

                                          This container provides data parallelism by synchronizing gradients across each model replica. The devices to synchronize across are @@ -4744,7 +4744,7 @@

                                          DistributedDataParallel
                                          -join(divide_by_initial_world_size=True, enable=True, throw_on_early_termination=False)[source]#
                                          +join(divide_by_initial_world_size=True, enable=True, throw_on_early_termination=False)[source]#

                                          Context manager for training with uneven inputs across processes in DDP.

                                          This context manager will keep track of already-joined DDP processes, and “shadow” the forward and backward passes by inserting collective @@ -4840,7 +4840,7 @@

                                          DistributedDataParallel
                                          -join_hook(**kwargs)[source]#
                                          +join_hook(**kwargs)[source]#

                                          DDP join hook enables training on uneven inputs by mirroring communications in forward and backward passes.

                                          Parameters
                                          @@ -4869,7 +4869,7 @@

                                          DistributedDataParallel
                                          -no_sync()[source]#
                                          +no_sync()[source]#

                                          Context manager to disable gradient synchronizations across DDP processes.

                                          Within this context, gradients will be accumulated on module variables, which will later be synchronized in the first @@ -4891,7 +4891,7 @@

                                          DistributedDataParallel
                                          -register_comm_hook(state, hook)[source]#
                                          +register_comm_hook(state, hook)[source]#

                                          Register communication hook for user-defined DDP aggregation of gradients across multiple workers.

                                          This hook would be very useful for researchers to try out new ideas. For example, this hook can be used to implement several algorithms like GossipGrad diff --git a/2.9/generated/torch.nn.parameter.Buffer.html b/2.9/generated/torch.nn.parameter.Buffer.html index 794789c438f..2c07dbf9248 100644 --- a/2.9/generated/torch.nn.parameter.Buffer.html +++ b/2.9/generated/torch.nn.parameter.Buffer.html @@ -4404,7 +4404,7 @@

                                          Buffer#

                                          -class torch.nn.parameter.Buffer(data=None, *, persistent=True)[source]#
                                          +class torch.nn.parameter.Buffer(data=None, *, persistent=True)[source]#

                                          A kind of Tensor that should not be considered a model parameter. For example, BatchNorm’s running_mean is not a parameter, but is part of the module’s state.

                                          Buffers are Tensor subclasses, that have a diff --git a/2.9/generated/torch.nn.parameter.Parameter.html b/2.9/generated/torch.nn.parameter.Parameter.html index c4bfb982c6d..15e60e7c05d 100644 --- a/2.9/generated/torch.nn.parameter.Parameter.html +++ b/2.9/generated/torch.nn.parameter.Parameter.html @@ -4404,7 +4404,7 @@

                                          Parameter#

                                          -class torch.nn.parameter.Parameter(data=None, requires_grad=True)[source]#
                                          +class torch.nn.parameter.Parameter(data=None, requires_grad=True)[source]#

                                          A kind of Tensor that is to be considered a module parameter.

                                          Parameters are Tensor subclasses, that have a very special property when used with Module s - when they’re diff --git a/2.9/generated/torch.nn.parameter.UninitializedBuffer.html b/2.9/generated/torch.nn.parameter.UninitializedBuffer.html index 591230c2627..2f705366c3b 100644 --- a/2.9/generated/torch.nn.parameter.UninitializedBuffer.html +++ b/2.9/generated/torch.nn.parameter.UninitializedBuffer.html @@ -4404,7 +4404,7 @@

                                          UninitializedBuffer#

                                          -class torch.nn.parameter.UninitializedBuffer(requires_grad=False, device=None, dtype=None, persistent=True)[source]#
                                          +class torch.nn.parameter.UninitializedBuffer(requires_grad=False, device=None, dtype=None, persistent=True)[source]#

                                          A buffer that is not initialized.

                                          Uninitialized Buffer is a a special case of torch.Tensor where the shape of the data is still unknown.

                                          diff --git a/2.9/generated/torch.nn.parameter.UninitializedParameter.html b/2.9/generated/torch.nn.parameter.UninitializedParameter.html index 370a4583fc1..a1541f91ee1 100644 --- a/2.9/generated/torch.nn.parameter.UninitializedParameter.html +++ b/2.9/generated/torch.nn.parameter.UninitializedParameter.html @@ -4404,7 +4404,7 @@

                                          UninitializedParameter#

                                          -class torch.nn.parameter.UninitializedParameter(requires_grad=True, device=None, dtype=None)[source]#
                                          +class torch.nn.parameter.UninitializedParameter(requires_grad=True, device=None, dtype=None)[source]#

                                          A parameter that is not initialized.

                                          Uninitialized Parameters are a special case of torch.nn.Parameter where the shape of the data is still unknown.

                                          @@ -4419,7 +4419,7 @@

                                          UninitializedParameter
                                          -cls_to_become[source]#
                                          +cls_to_become[source]#

                                          alias of Parameter

                                          diff --git a/2.9/generated/torch.nn.parameter.is_lazy.html b/2.9/generated/torch.nn.parameter.is_lazy.html index 00424d97c04..b5527ad77d8 100644 --- a/2.9/generated/torch.nn.parameter.is_lazy.html +++ b/2.9/generated/torch.nn.parameter.is_lazy.html @@ -4404,7 +4404,7 @@

                                          torch.nn.parameter.is_lazy#

                                          -torch.nn.parameter.is_lazy(param)[source]#
                                          +torch.nn.parameter.is_lazy(param)[source]#

                                          Returns whether param is an UninitializedParameter or UninitializedBuffer.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.clip_grad.clip_grad_norm.html b/2.9/generated/torch.nn.utils.clip_grad.clip_grad_norm.html index 9edaa333a16..8d4ed4f7345 100644 --- a/2.9/generated/torch.nn.utils.clip_grad.clip_grad_norm.html +++ b/2.9/generated/torch.nn.utils.clip_grad.clip_grad_norm.html @@ -4415,7 +4415,7 @@

                                          torch.nn.utils.clip_grad.clip_grad_norm#

                                          -torch.nn.utils.clip_grad.clip_grad_norm(parameters, max_norm, norm_type=2.0, error_if_nonfinite=False, foreach=None)[source]#
                                          +torch.nn.utils.clip_grad.clip_grad_norm(parameters, max_norm, norm_type=2.0, error_if_nonfinite=False, foreach=None)[source]#

                                          Clip the gradient norm of an iterable of parameters.

                                          Warning

                                          diff --git a/2.9/generated/torch.nn.utils.clip_grad.clip_grad_norm_.html b/2.9/generated/torch.nn.utils.clip_grad.clip_grad_norm_.html index ba0b103d56f..7734f7a9b77 100644 --- a/2.9/generated/torch.nn.utils.clip_grad.clip_grad_norm_.html +++ b/2.9/generated/torch.nn.utils.clip_grad.clip_grad_norm_.html @@ -4415,7 +4415,7 @@

                                          torch.nn.utils.clip_grad.clip_grad_norm_#

                                          -torch.nn.utils.clip_grad.clip_grad_norm_(parameters, max_norm, norm_type=2.0, error_if_nonfinite=False, foreach=None)[source]#
                                          +torch.nn.utils.clip_grad.clip_grad_norm_(parameters, max_norm, norm_type=2.0, error_if_nonfinite=False, foreach=None)[source]#

                                          Clip the gradient norm of an iterable of parameters.

                                          The norm is computed over the norms of the individual gradients of all parameters, as if the norms of the individual gradients were concatenated into a single vector. diff --git a/2.9/generated/torch.nn.utils.clip_grad.clip_grad_value_.html b/2.9/generated/torch.nn.utils.clip_grad.clip_grad_value_.html index 4607c68bcce..f58924b3d2b 100644 --- a/2.9/generated/torch.nn.utils.clip_grad.clip_grad_value_.html +++ b/2.9/generated/torch.nn.utils.clip_grad.clip_grad_value_.html @@ -4415,7 +4415,7 @@

                                          torch.nn.utils.clip_grad.clip_grad_value_#

                                          -torch.nn.utils.clip_grad.clip_grad_value_(parameters, clip_value, foreach=None)[source]#
                                          +torch.nn.utils.clip_grad.clip_grad_value_(parameters, clip_value, foreach=None)[source]#

                                          Clip the gradients of an iterable of parameters at specified value.

                                          Gradients are modified in-place.

                                          diff --git a/2.9/generated/torch.nn.utils.clip_grad_norm.html b/2.9/generated/torch.nn.utils.clip_grad_norm.html index 2e5aa227946..4ae113dca55 100644 --- a/2.9/generated/torch.nn.utils.clip_grad_norm.html +++ b/2.9/generated/torch.nn.utils.clip_grad_norm.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.clip_grad_norm#

                                          -torch.nn.utils.clip_grad_norm(parameters, max_norm, norm_type=2.0, error_if_nonfinite=False, foreach=None)[source]#
                                          +torch.nn.utils.clip_grad_norm(parameters, max_norm, norm_type=2.0, error_if_nonfinite=False, foreach=None)[source]#

                                          Clip the gradient norm of an iterable of parameters.

                                          Warning

                                          diff --git a/2.9/generated/torch.nn.utils.clip_grad_norm_.html b/2.9/generated/torch.nn.utils.clip_grad_norm_.html index 1c0134c4bdb..feb28052b56 100644 --- a/2.9/generated/torch.nn.utils.clip_grad_norm_.html +++ b/2.9/generated/torch.nn.utils.clip_grad_norm_.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.clip_grad_norm_#

                                          -torch.nn.utils.clip_grad_norm_(parameters, max_norm, norm_type=2.0, error_if_nonfinite=False, foreach=None)[source]#
                                          +torch.nn.utils.clip_grad_norm_(parameters, max_norm, norm_type=2.0, error_if_nonfinite=False, foreach=None)[source]#

                                          Clip the gradient norm of an iterable of parameters.

                                          The norm is computed over the norms of the individual gradients of all parameters, as if the norms of the individual gradients were concatenated into a single vector. diff --git a/2.9/generated/torch.nn.utils.clip_grad_value_.html b/2.9/generated/torch.nn.utils.clip_grad_value_.html index f5e5718e8b2..f8d83342e2c 100644 --- a/2.9/generated/torch.nn.utils.clip_grad_value_.html +++ b/2.9/generated/torch.nn.utils.clip_grad_value_.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.clip_grad_value_#

                                          -torch.nn.utils.clip_grad_value_(parameters, clip_value, foreach=None)[source]#
                                          +torch.nn.utils.clip_grad_value_(parameters, clip_value, foreach=None)[source]#

                                          Clip the gradients of an iterable of parameters at specified value.

                                          Gradients are modified in-place.

                                          diff --git a/2.9/generated/torch.nn.utils.clip_grads_with_norm_.html b/2.9/generated/torch.nn.utils.clip_grads_with_norm_.html index d8d4f5f2fad..8edcf6e0843 100644 --- a/2.9/generated/torch.nn.utils.clip_grads_with_norm_.html +++ b/2.9/generated/torch.nn.utils.clip_grads_with_norm_.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.clip_grads_with_norm_#

                                          -torch.nn.utils.clip_grads_with_norm_(parameters, max_norm, total_norm, foreach=None)[source]#
                                          +torch.nn.utils.clip_grads_with_norm_(parameters, max_norm, total_norm, foreach=None)[source]#

                                          Scale the gradients of an iterable of parameters given a pre-calculated total norm and desired max norm.

                                          The gradients will be scaled by the following calculation

                                          diff --git a/2.9/generated/torch.nn.utils.convert_conv2d_weight_memory_format.html b/2.9/generated/torch.nn.utils.convert_conv2d_weight_memory_format.html index 34ec112563d..5a2204bdea9 100644 --- a/2.9/generated/torch.nn.utils.convert_conv2d_weight_memory_format.html +++ b/2.9/generated/torch.nn.utils.convert_conv2d_weight_memory_format.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.convert_conv2d_weight_memory_format#

                                          -torch.nn.utils.convert_conv2d_weight_memory_format(module, memory_format)[source]#
                                          +torch.nn.utils.convert_conv2d_weight_memory_format(module, memory_format)[source]#

                                          Convert memory_format of nn.Conv2d.weight to memory_format.

                                          The conversion recursively applies to nested nn.Module, including module. Note that it only changes the memory_format, but not the semantics of each dimensions. diff --git a/2.9/generated/torch.nn.utils.convert_conv3d_weight_memory_format.html b/2.9/generated/torch.nn.utils.convert_conv3d_weight_memory_format.html index 34f9f8ec348..e96373b13f1 100644 --- a/2.9/generated/torch.nn.utils.convert_conv3d_weight_memory_format.html +++ b/2.9/generated/torch.nn.utils.convert_conv3d_weight_memory_format.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.convert_conv3d_weight_memory_format#

                                          -torch.nn.utils.convert_conv3d_weight_memory_format(module, memory_format)[source]#
                                          +torch.nn.utils.convert_conv3d_weight_memory_format(module, memory_format)[source]#

                                          Convert memory_format of nn.Conv3d.weight to memory_format The conversion recursively applies to nested nn.Module, including module. Note that it only changes the memory_format, but not the semantics of each dimensions. diff --git a/2.9/generated/torch.nn.utils.convert_parameters.parameters_to_vector.html b/2.9/generated/torch.nn.utils.convert_parameters.parameters_to_vector.html index 2f11f7fc760..cce3c58d363 100644 --- a/2.9/generated/torch.nn.utils.convert_parameters.parameters_to_vector.html +++ b/2.9/generated/torch.nn.utils.convert_parameters.parameters_to_vector.html @@ -4415,7 +4415,7 @@

                                          torch.nn.utils.convert_parameters.parameters_to_vector#

                                          -torch.nn.utils.convert_parameters.parameters_to_vector(parameters)[source]#
                                          +torch.nn.utils.convert_parameters.parameters_to_vector(parameters)[source]#

                                          Flatten an iterable of parameters into a single vector.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.convert_parameters.vector_to_parameters.html b/2.9/generated/torch.nn.utils.convert_parameters.vector_to_parameters.html index 29511e27123..d88aea02e4b 100644 --- a/2.9/generated/torch.nn.utils.convert_parameters.vector_to_parameters.html +++ b/2.9/generated/torch.nn.utils.convert_parameters.vector_to_parameters.html @@ -4415,7 +4415,7 @@

                                          torch.nn.utils.convert_parameters.vector_to_parameters#

                                          -torch.nn.utils.convert_parameters.vector_to_parameters(vec, parameters)[source]#
                                          +torch.nn.utils.convert_parameters.vector_to_parameters(vec, parameters)[source]#

                                          Copy slices of a vector into an iterable of parameters.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.fuse_conv_bn_eval.html b/2.9/generated/torch.nn.utils.fuse_conv_bn_eval.html index 152313bd6ac..3b8e1adec9b 100644 --- a/2.9/generated/torch.nn.utils.fuse_conv_bn_eval.html +++ b/2.9/generated/torch.nn.utils.fuse_conv_bn_eval.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.fuse_conv_bn_eval#

                                          -torch.nn.utils.fuse_conv_bn_eval(conv, bn, transpose=False)[source]#
                                          +torch.nn.utils.fuse_conv_bn_eval(conv, bn, transpose=False)[source]#

                                          Fuse a convolutional module and a BatchNorm module into a single, new convolutional module.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.fuse_conv_bn_weights.html b/2.9/generated/torch.nn.utils.fuse_conv_bn_weights.html index 7a4254cb405..862c81c5850 100644 --- a/2.9/generated/torch.nn.utils.fuse_conv_bn_weights.html +++ b/2.9/generated/torch.nn.utils.fuse_conv_bn_weights.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.fuse_conv_bn_weights#

                                          -torch.nn.utils.fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b, transpose=False)[source]#
                                          +torch.nn.utils.fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b, transpose=False)[source]#

                                          Fuse convolutional module parameters and BatchNorm module parameters into new convolutional module parameters.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.fuse_linear_bn_eval.html b/2.9/generated/torch.nn.utils.fuse_linear_bn_eval.html index c80d2c63f48..d9d0a4c5744 100644 --- a/2.9/generated/torch.nn.utils.fuse_linear_bn_eval.html +++ b/2.9/generated/torch.nn.utils.fuse_linear_bn_eval.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.fuse_linear_bn_eval#

                                          -torch.nn.utils.fuse_linear_bn_eval(linear, bn)[source]#
                                          +torch.nn.utils.fuse_linear_bn_eval(linear, bn)[source]#

                                          Fuse a linear module and a BatchNorm module into a single, new linear module.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.fuse_linear_bn_weights.html b/2.9/generated/torch.nn.utils.fuse_linear_bn_weights.html index 392fb583de2..618b98f26e8 100644 --- a/2.9/generated/torch.nn.utils.fuse_linear_bn_weights.html +++ b/2.9/generated/torch.nn.utils.fuse_linear_bn_weights.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.fuse_linear_bn_weights#

                                          -torch.nn.utils.fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b)[source]#
                                          +torch.nn.utils.fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b)[source]#

                                          Fuse linear module parameters and BatchNorm module parameters into new linear module parameters.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.fusion.fuse_conv_bn_eval.html b/2.9/generated/torch.nn.utils.fusion.fuse_conv_bn_eval.html index 875782f11a1..973abd80a50 100644 --- a/2.9/generated/torch.nn.utils.fusion.fuse_conv_bn_eval.html +++ b/2.9/generated/torch.nn.utils.fusion.fuse_conv_bn_eval.html @@ -4415,7 +4415,7 @@

                                          torch.nn.utils.fusion.fuse_conv_bn_eval#

                                          -torch.nn.utils.fusion.fuse_conv_bn_eval(conv, bn, transpose=False)[source]#
                                          +torch.nn.utils.fusion.fuse_conv_bn_eval(conv, bn, transpose=False)[source]#

                                          Fuse a convolutional module and a BatchNorm module into a single, new convolutional module.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.fusion.fuse_conv_bn_weights.html b/2.9/generated/torch.nn.utils.fusion.fuse_conv_bn_weights.html index 93227ec5481..d3da2ef0017 100644 --- a/2.9/generated/torch.nn.utils.fusion.fuse_conv_bn_weights.html +++ b/2.9/generated/torch.nn.utils.fusion.fuse_conv_bn_weights.html @@ -4415,7 +4415,7 @@

                                          torch.nn.utils.fusion.fuse_conv_bn_weights#

                                          -torch.nn.utils.fusion.fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b, transpose=False)[source]#
                                          +torch.nn.utils.fusion.fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b, transpose=False)[source]#

                                          Fuse convolutional module parameters and BatchNorm module parameters into new convolutional module parameters.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.fusion.fuse_linear_bn_eval.html b/2.9/generated/torch.nn.utils.fusion.fuse_linear_bn_eval.html index 3991b0f735f..855a1945f7f 100644 --- a/2.9/generated/torch.nn.utils.fusion.fuse_linear_bn_eval.html +++ b/2.9/generated/torch.nn.utils.fusion.fuse_linear_bn_eval.html @@ -4415,7 +4415,7 @@

                                          torch.nn.utils.fusion.fuse_linear_bn_eval#

                                          -torch.nn.utils.fusion.fuse_linear_bn_eval(linear, bn)[source]#
                                          +torch.nn.utils.fusion.fuse_linear_bn_eval(linear, bn)[source]#

                                          Fuse a linear module and a BatchNorm module into a single, new linear module.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.fusion.fuse_linear_bn_weights.html b/2.9/generated/torch.nn.utils.fusion.fuse_linear_bn_weights.html index 1f0efbbcd96..95ad4aa1bd5 100644 --- a/2.9/generated/torch.nn.utils.fusion.fuse_linear_bn_weights.html +++ b/2.9/generated/torch.nn.utils.fusion.fuse_linear_bn_weights.html @@ -4415,7 +4415,7 @@

                                          torch.nn.utils.fusion.fuse_linear_bn_weights#

                                          -torch.nn.utils.fusion.fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b)[source]#
                                          +torch.nn.utils.fusion.fuse_linear_bn_weights(linear_w, linear_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b)[source]#

                                          Fuse linear module parameters and BatchNorm module parameters into new linear module parameters.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.get_total_norm.html b/2.9/generated/torch.nn.utils.get_total_norm.html index a9f3c5f2c59..8df10aed1d7 100644 --- a/2.9/generated/torch.nn.utils.get_total_norm.html +++ b/2.9/generated/torch.nn.utils.get_total_norm.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.get_total_norm#

                                          -torch.nn.utils.get_total_norm(tensors, norm_type=2.0, error_if_nonfinite=False, foreach=None)[source]#
                                          +torch.nn.utils.get_total_norm(tensors, norm_type=2.0, error_if_nonfinite=False, foreach=None)[source]#

                                          Compute the norm of an iterable of tensors.

                                          The norm is computed over the norms of the individual tensors, as if the norms of the individual tensors were concatenated into a single vector.

                                          diff --git a/2.9/generated/torch.nn.utils.init.skip_init.html b/2.9/generated/torch.nn.utils.init.skip_init.html index 43515ad1d45..35a929746fc 100644 --- a/2.9/generated/torch.nn.utils.init.skip_init.html +++ b/2.9/generated/torch.nn.utils.init.skip_init.html @@ -4415,7 +4415,7 @@

                                          torch.nn.utils.init.skip_init#

                                          -torch.nn.utils.init.skip_init(module_cls, *args, **kwargs)[source]#
                                          +torch.nn.utils.init.skip_init(module_cls, *args, **kwargs)[source]#

                                          Given a module class object and args / kwargs, instantiate the module without initializing parameters / buffers.

                                          This can be useful if initialization is slow or if custom initialization will be performed, making the default initialization unnecessary. There are some caveats to this, due to diff --git a/2.9/generated/torch.nn.utils.memory_format.convert_conv2d_weight_memory_format.html b/2.9/generated/torch.nn.utils.memory_format.convert_conv2d_weight_memory_format.html index 86d2e3e4a35..2da0bde2fb1 100644 --- a/2.9/generated/torch.nn.utils.memory_format.convert_conv2d_weight_memory_format.html +++ b/2.9/generated/torch.nn.utils.memory_format.convert_conv2d_weight_memory_format.html @@ -4415,7 +4415,7 @@

                                          torch.nn.utils.memory_format.convert_conv2d_weight_memory_format#

                                          -torch.nn.utils.memory_format.convert_conv2d_weight_memory_format(module, memory_format)[source]#
                                          +torch.nn.utils.memory_format.convert_conv2d_weight_memory_format(module, memory_format)[source]#

                                          Convert memory_format of nn.Conv2d.weight to memory_format.

                                          The conversion recursively applies to nested nn.Module, including module. Note that it only changes the memory_format, but not the semantics of each dimensions. diff --git a/2.9/generated/torch.nn.utils.memory_format.convert_conv3d_weight_memory_format.html b/2.9/generated/torch.nn.utils.memory_format.convert_conv3d_weight_memory_format.html index 239542a57bd..647a9f89184 100644 --- a/2.9/generated/torch.nn.utils.memory_format.convert_conv3d_weight_memory_format.html +++ b/2.9/generated/torch.nn.utils.memory_format.convert_conv3d_weight_memory_format.html @@ -4415,7 +4415,7 @@

                                          torch.nn.utils.memory_format.convert_conv3d_weight_memory_format#

                                          -torch.nn.utils.memory_format.convert_conv3d_weight_memory_format(module, memory_format)[source]#
                                          +torch.nn.utils.memory_format.convert_conv3d_weight_memory_format(module, memory_format)[source]#

                                          Convert memory_format of nn.Conv3d.weight to memory_format The conversion recursively applies to nested nn.Module, including module. Note that it only changes the memory_format, but not the semantics of each dimensions. diff --git a/2.9/generated/torch.nn.utils.parameters_to_vector.html b/2.9/generated/torch.nn.utils.parameters_to_vector.html index 1873af6f852..642270973bd 100644 --- a/2.9/generated/torch.nn.utils.parameters_to_vector.html +++ b/2.9/generated/torch.nn.utils.parameters_to_vector.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.parameters_to_vector#

                                          -torch.nn.utils.parameters_to_vector(parameters)[source]#
                                          +torch.nn.utils.parameters_to_vector(parameters)[source]#

                                          Flatten an iterable of parameters into a single vector.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.parametrizations.orthogonal.html b/2.9/generated/torch.nn.utils.parametrizations.orthogonal.html index eba2225f5cd..c0392a500c7 100644 --- a/2.9/generated/torch.nn.utils.parametrizations.orthogonal.html +++ b/2.9/generated/torch.nn.utils.parametrizations.orthogonal.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.parametrizations.orthogonal#

                                          -torch.nn.utils.parametrizations.orthogonal(module, name='weight', orthogonal_map=None, *, use_trivialization=True)[source]#
                                          +torch.nn.utils.parametrizations.orthogonal(module, name='weight', orthogonal_map=None, *, use_trivialization=True)[source]#

                                          Apply an orthogonal or unitary parametrization to a matrix or a batch of matrices.

                                          Letting K\mathbb{K} be R\mathbb{R} or C\mathbb{C}, the parametrized matrix QKm×nQ \in \mathbb{K}^{m \times n} is orthogonal as

                                          diff --git a/2.9/generated/torch.nn.utils.parametrizations.spectral_norm.html b/2.9/generated/torch.nn.utils.parametrizations.spectral_norm.html index c7cf0e63304..6a9582fb116 100644 --- a/2.9/generated/torch.nn.utils.parametrizations.spectral_norm.html +++ b/2.9/generated/torch.nn.utils.parametrizations.spectral_norm.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.parametrizations.spectral_norm#

                                          -torch.nn.utils.parametrizations.spectral_norm(module, name='weight', n_power_iterations=1, eps=1e-12, dim=None)[source]#
                                          +torch.nn.utils.parametrizations.spectral_norm(module, name='weight', n_power_iterations=1, eps=1e-12, dim=None)[source]#

                                          Apply spectral normalization to a parameter in the given module.

                                          WSN=Wσ(W),σ(W)=maxh:h0Wh2h2\mathbf{W}_{SN} = \dfrac{\mathbf{W}}{\sigma(\mathbf{W})}, diff --git a/2.9/generated/torch.nn.utils.parametrizations.weight_norm.html b/2.9/generated/torch.nn.utils.parametrizations.weight_norm.html index 4e5eda508ec..e44b4c1bceb 100644 --- a/2.9/generated/torch.nn.utils.parametrizations.weight_norm.html +++ b/2.9/generated/torch.nn.utils.parametrizations.weight_norm.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.parametrizations.weight_norm#

                                          -torch.nn.utils.parametrizations.weight_norm(module, name='weight', dim=0)[source]#
                                          +torch.nn.utils.parametrizations.weight_norm(module, name='weight', dim=0)[source]#

                                          Apply weight normalization to a parameter in the given module.

                                          w=gvv\mathbf{w} = g \dfrac{\mathbf{v}}{\|\mathbf{v}\|} diff --git a/2.9/generated/torch.nn.utils.parametrize.ParametrizationList.html b/2.9/generated/torch.nn.utils.parametrize.ParametrizationList.html index 6e8cc67c36b..0cd7f8f1191 100644 --- a/2.9/generated/torch.nn.utils.parametrize.ParametrizationList.html +++ b/2.9/generated/torch.nn.utils.parametrize.ParametrizationList.html @@ -4404,7 +4404,7 @@

                                          ParametrizationList#

                                          -class torch.nn.utils.parametrize.ParametrizationList(modules, original, unsafe=False)[source]#
                                          +class torch.nn.utils.parametrize.ParametrizationList(modules, original, unsafe=False)[source]#

                                          A sequential container that holds and manages the original parameters or buffers of a parametrized torch.nn.Module.

                                          It is the type of module.parametrizations[tensor_name] when module[tensor_name] has been parametrized with register_parametrization().

                                          @@ -4432,7 +4432,7 @@

                                          ParametrizationList
                                          -right_inverse(value)[source]#
                                          +right_inverse(value)[source]#

                                          Call the right_inverse methods of the parametrizations in the inverse registration order.

                                          Then, it stores the result in self.original if right_inverse outputs one tensor or in self.original0, self.original1, … if it outputs several.

                                          diff --git a/2.9/generated/torch.nn.utils.parametrize.cached.html b/2.9/generated/torch.nn.utils.parametrize.cached.html index 26092e1e36c..4f3084504c8 100644 --- a/2.9/generated/torch.nn.utils.parametrize.cached.html +++ b/2.9/generated/torch.nn.utils.parametrize.cached.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.parametrize.cached#

                                          -torch.nn.utils.parametrize.cached()[source]#
                                          +torch.nn.utils.parametrize.cached()[source]#

                                          Context manager that enables the caching system within parametrizations registered with register_parametrization().

                                          The value of the parametrized objects is computed and cached the first time they are required when this context manager is active. The cached values are diff --git a/2.9/generated/torch.nn.utils.parametrize.is_parametrized.html b/2.9/generated/torch.nn.utils.parametrize.is_parametrized.html index 94af1eb4a2b..25b864fff82 100644 --- a/2.9/generated/torch.nn.utils.parametrize.is_parametrized.html +++ b/2.9/generated/torch.nn.utils.parametrize.is_parametrized.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.parametrize.is_parametrized#

                                          -torch.nn.utils.parametrize.is_parametrized(module, tensor_name=None)[source]#
                                          +torch.nn.utils.parametrize.is_parametrized(module, tensor_name=None)[source]#

                                          Determine if a module has a parametrization.

                                          Parameters
                                          diff --git a/2.9/generated/torch.nn.utils.parametrize.register_parametrization.html b/2.9/generated/torch.nn.utils.parametrize.register_parametrization.html index 25396abe8d8..e92089aed90 100644 --- a/2.9/generated/torch.nn.utils.parametrize.register_parametrization.html +++ b/2.9/generated/torch.nn.utils.parametrize.register_parametrization.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.parametrize.register_parametrization#

                                          -torch.nn.utils.parametrize.register_parametrization(module, tensor_name, parametrization, *, unsafe=False)[source]#
                                          +torch.nn.utils.parametrize.register_parametrization(module, tensor_name, parametrization, *, unsafe=False)[source]#

                                          Register a parametrization to a tensor in a module.

                                          Assume that tensor_name="weight" for simplicity. When accessing module.weight, the module will return the parametrized version parametrization(module.weight). diff --git a/2.9/generated/torch.nn.utils.parametrize.remove_parametrizations.html b/2.9/generated/torch.nn.utils.parametrize.remove_parametrizations.html index beb829a337f..01e1c789271 100644 --- a/2.9/generated/torch.nn.utils.parametrize.remove_parametrizations.html +++ b/2.9/generated/torch.nn.utils.parametrize.remove_parametrizations.html @@ -4404,7 +4404,7 @@

                                          torch.nn.utils.parametrize.remove_parametrizations#

                                          -torch.nn.utils.parametrize.remove_parametrizations(module, tensor_name, leave_parametrized=True)[source]#
                                          +torch.nn.utils.parametrize.remove_parametrizations(module, tensor_name, leave_parametrized=True)[source]#

                                          Remove the parametrizations on a tensor in a module.

                                          • If leave_parametrized=True, module[tensor_name] will be set to diff --git a/2.9/generated/torch.nn.utils.parametrize.transfer_parametrizations_and_params.html b/2.9/generated/torch.nn.utils.parametrize.transfer_parametrizations_and_params.html index 87ee2cdbd26..6df053a78bc 100644 --- a/2.9/generated/torch.nn.utils.parametrize.transfer_parametrizations_and_params.html +++ b/2.9/generated/torch.nn.utils.parametrize.transfer_parametrizations_and_params.html @@ -4404,7 +4404,7 @@

                                            torch.nn.utils.parametrize.transfer_parametrizations_and_params#

                                            -torch.nn.utils.parametrize.transfer_parametrizations_and_params(from_module, to_module, tensor_name=None)[source]#
                                            +torch.nn.utils.parametrize.transfer_parametrizations_and_params(from_module, to_module, tensor_name=None)[source]#

                                            Transfer parametrizations and the parameters they parametrize from from_module to to_module.

                                            If tensor_name is specified, only transfers the specified parameter, otherwise transfers all parametrized parameters. If those parameters do not exist in to_module, it will create them. diff --git a/2.9/generated/torch.nn.utils.parametrize.type_before_parametrizations.html b/2.9/generated/torch.nn.utils.parametrize.type_before_parametrizations.html index 24b091d9e32..4e66352d840 100644 --- a/2.9/generated/torch.nn.utils.parametrize.type_before_parametrizations.html +++ b/2.9/generated/torch.nn.utils.parametrize.type_before_parametrizations.html @@ -4404,7 +4404,7 @@

                                            torch.nn.utils.parametrize.type_before_parametrizations#

                                            -torch.nn.utils.parametrize.type_before_parametrizations(module)[source]#
                                            +torch.nn.utils.parametrize.type_before_parametrizations(module)[source]#

                                            Return the module type before parametrizations were applied and if not, then it returns the module type.

                                            Parameters
                                            diff --git a/2.9/generated/torch.nn.utils.prune.BasePruningMethod.html b/2.9/generated/torch.nn.utils.prune.BasePruningMethod.html index d6aeade2dbd..32eaa13679e 100644 --- a/2.9/generated/torch.nn.utils.prune.BasePruningMethod.html +++ b/2.9/generated/torch.nn.utils.prune.BasePruningMethod.html @@ -4404,13 +4404,13 @@

                                            BasePruningMethod#

                                            -class torch.nn.utils.prune.BasePruningMethod[source]#
                                            +class torch.nn.utils.prune.BasePruningMethod[source]#

                                            Abstract base class for creation of new pruning techniques.

                                            Provides a skeleton for customization requiring the overriding of methods such as compute_mask() and apply().

                                            -classmethod apply(module, name, *args, importance_scores=None, **kwargs)[source]#
                                            +classmethod apply(module, name, *args, importance_scores=None, **kwargs)[source]#

                                            Add pruning on the fly and reparametrization of a tensor.

                                            Adds the forward pre-hook that enables pruning on the fly and the reparametrization of a tensor in terms of the original tensor @@ -4437,7 +4437,7 @@

                                            BasePruningMethod
                                            -apply_mask(module)[source]#
                                            +apply_mask(module)[source]#

                                            Simply handles the multiplication between the parameter being pruned and the generated mask.

                                            Fetches the mask and the original tensor from the module and returns the pruned version of the tensor.

                                            @@ -4456,7 +4456,7 @@

                                            BasePruningMethod
                                            -abstract compute_mask(t, default_mask)[source]#
                                            +abstract compute_mask(t, default_mask)[source]#

                                            Compute and returns a mask for the input tensor t.

                                            Starting from a base default_mask (which should be a mask of ones if the tensor has not been pruned yet), generate a random mask to @@ -4484,7 +4484,7 @@

                                            BasePruningMethod
                                            -prune(t, default_mask=None, importance_scores=None)[source]#
                                            +prune(t, default_mask=None, importance_scores=None)[source]#

                                            Compute and returns a pruned version of input tensor t.

                                            According to the pruning rule specified in compute_mask().

                                            @@ -4511,7 +4511,7 @@

                                            BasePruningMethod
                                            -remove(module)[source]#
                                            +remove(module)[source]#

                                            Remove the pruning reparameterization from a module.

                                            The pruned parameter named name remains permanently pruned, and the parameter named name+'_orig' is removed from the parameter list. diff --git a/2.9/generated/torch.nn.utils.prune.CustomFromMask.html b/2.9/generated/torch.nn.utils.prune.CustomFromMask.html index be62482c644..1a4c670dcb2 100644 --- a/2.9/generated/torch.nn.utils.prune.CustomFromMask.html +++ b/2.9/generated/torch.nn.utils.prune.CustomFromMask.html @@ -4404,10 +4404,10 @@

                                            CustomFromMask#

                                            -class torch.nn.utils.prune.CustomFromMask(mask)[source]#
                                            +class torch.nn.utils.prune.CustomFromMask(mask)[source]#
                                            -classmethod apply(module, name, mask)[source]#
                                            +classmethod apply(module, name, mask)[source]#

                                            Add pruning on the fly and reparametrization of a tensor.

                                            Adds the forward pre-hook that enables pruning on the fly and the reparametrization of a tensor in terms of the original tensor @@ -4425,7 +4425,7 @@

                                            CustomFromMask
                                            -apply_mask(module)[source]#
                                            +apply_mask(module)[source]#

                                            Simply handles the multiplication between the parameter being pruned and the generated mask.

                                            Fetches the mask and the original tensor from the module and returns the pruned version of the tensor.

                                            @@ -4444,7 +4444,7 @@

                                            CustomFromMask
                                            -prune(t, default_mask=None, importance_scores=None)[source]#
                                            +prune(t, default_mask=None, importance_scores=None)[source]#

                                            Compute and returns a pruned version of input tensor t.

                                            According to the pruning rule specified in compute_mask().

                                            @@ -4471,7 +4471,7 @@

                                            CustomFromMask
                                            -remove(module)[source]#
                                            +remove(module)[source]#

                                            Remove the pruning reparameterization from a module.

                                            The pruned parameter named name remains permanently pruned, and the parameter named name+'_orig' is removed from the parameter list. diff --git a/2.9/generated/torch.nn.utils.prune.Identity.html b/2.9/generated/torch.nn.utils.prune.Identity.html index a047fffe477..c6e23bf5b8a 100644 --- a/2.9/generated/torch.nn.utils.prune.Identity.html +++ b/2.9/generated/torch.nn.utils.prune.Identity.html @@ -4404,11 +4404,11 @@

                                            Identity#

                                            -class torch.nn.utils.prune.Identity[source]#
                                            +class torch.nn.utils.prune.Identity[source]#

                                            Utility pruning method that does not prune any units but generates the pruning parametrization with a mask of ones.

                                            -classmethod apply(module, name)[source]#
                                            +classmethod apply(module, name)[source]#

                                            Add pruning on the fly and reparametrization of a tensor.

                                            Adds the forward pre-hook that enables pruning on the fly and the reparametrization of a tensor in terms of the original tensor @@ -4426,7 +4426,7 @@

                                            Identity
                                            -apply_mask(module)[source]#
                                            +apply_mask(module)[source]#

                                            Simply handles the multiplication between the parameter being pruned and the generated mask.

                                            Fetches the mask and the original tensor from the module and returns the pruned version of the tensor.

                                            @@ -4445,7 +4445,7 @@

                                            Identity
                                            -prune(t, default_mask=None, importance_scores=None)[source]#
                                            +prune(t, default_mask=None, importance_scores=None)[source]#

                                            Compute and returns a pruned version of input tensor t.

                                            According to the pruning rule specified in compute_mask().

                                            @@ -4472,7 +4472,7 @@

                                            Identity
                                            -remove(module)[source]#
                                            +remove(module)[source]#

                                            Remove the pruning reparameterization from a module.

                                            The pruned parameter named name remains permanently pruned, and the parameter named name+'_orig' is removed from the parameter list. diff --git a/2.9/generated/torch.nn.utils.prune.L1Unstructured.html b/2.9/generated/torch.nn.utils.prune.L1Unstructured.html index 1063690a3ab..f94954c4828 100644 --- a/2.9/generated/torch.nn.utils.prune.L1Unstructured.html +++ b/2.9/generated/torch.nn.utils.prune.L1Unstructured.html @@ -4404,7 +4404,7 @@

                                            L1Unstructured#

                                            -class torch.nn.utils.prune.L1Unstructured(amount)[source]#
                                            +class torch.nn.utils.prune.L1Unstructured(amount)[source]#

                                            Prune (currently unpruned) units in a tensor by zeroing out the ones with the lowest L1-norm.

                                            Parameters
                                            @@ -4416,7 +4416,7 @@

                                            L1Unstructured
                                            -classmethod apply(module, name, amount, importance_scores=None)[source]#
                                            +classmethod apply(module, name, amount, importance_scores=None)[source]#

                                            Add pruning on the fly and reparametrization of a tensor.

                                            Adds the forward pre-hook that enables pruning on the fly and the reparametrization of a tensor in terms of the original tensor @@ -4443,7 +4443,7 @@

                                            L1Unstructured
                                            -apply_mask(module)[source]#
                                            +apply_mask(module)[source]#

                                            Simply handles the multiplication between the parameter being pruned and the generated mask.

                                            Fetches the mask and the original tensor from the module and returns the pruned version of the tensor.

                                            @@ -4462,7 +4462,7 @@

                                            L1Unstructured
                                            -prune(t, default_mask=None, importance_scores=None)[source]#
                                            +prune(t, default_mask=None, importance_scores=None)[source]#

                                            Compute and returns a pruned version of input tensor t.

                                            According to the pruning rule specified in compute_mask().

                                            @@ -4489,7 +4489,7 @@

                                            L1Unstructured
                                            -remove(module)[source]#
                                            +remove(module)[source]#

                                            Remove the pruning reparameterization from a module.

                                            The pruned parameter named name remains permanently pruned, and the parameter named name+'_orig' is removed from the parameter list. diff --git a/2.9/generated/torch.nn.utils.prune.LnStructured.html b/2.9/generated/torch.nn.utils.prune.LnStructured.html index 4659b170e16..be7c7f27b9e 100644 --- a/2.9/generated/torch.nn.utils.prune.LnStructured.html +++ b/2.9/generated/torch.nn.utils.prune.LnStructured.html @@ -4404,7 +4404,7 @@

                                            LnStructured#

                                            -class torch.nn.utils.prune.LnStructured(amount, n, dim=-1)[source]#
                                            +class torch.nn.utils.prune.LnStructured(amount, n, dim=-1)[source]#

                                            Prune entire (currently unpruned) channels in a tensor based on their Ln-norm.

                                            Parameters
                                            @@ -4422,7 +4422,7 @@

                                            LnStructured
                                            -classmethod apply(module, name, amount, n, dim, importance_scores=None)[source]#
                                            +classmethod apply(module, name, amount, n, dim, importance_scores=None)[source]#

                                            Add pruning on the fly and reparametrization of a tensor.

                                            Adds the forward pre-hook that enables pruning on the fly and the reparametrization of a tensor in terms of the original tensor @@ -4453,7 +4453,7 @@

                                            LnStructured
                                            -apply_mask(module)[source]#
                                            +apply_mask(module)[source]#

                                            Simply handles the multiplication between the parameter being pruned and the generated mask.

                                            Fetches the mask and the original tensor from the module and returns the pruned version of the tensor.

                                            @@ -4472,7 +4472,7 @@

                                            LnStructured
                                            -compute_mask(t, default_mask)[source]#
                                            +compute_mask(t, default_mask)[source]#

                                            Compute and returns a mask for the input tensor t.

                                            Starting from a base default_mask (which should be a mask of ones if the tensor has not been pruned yet), generate a mask to apply on @@ -4501,7 +4501,7 @@

                                            LnStructured
                                            -prune(t, default_mask=None, importance_scores=None)[source]#
                                            +prune(t, default_mask=None, importance_scores=None)[source]#

                                            Compute and returns a pruned version of input tensor t.

                                            According to the pruning rule specified in compute_mask().

                                            @@ -4528,7 +4528,7 @@

                                            LnStructured
                                            -remove(module)[source]#
                                            +remove(module)[source]#

                                            Remove the pruning reparameterization from a module.

                                            The pruned parameter named name remains permanently pruned, and the parameter named name+'_orig' is removed from the parameter list. diff --git a/2.9/generated/torch.nn.utils.prune.PruningContainer.html b/2.9/generated/torch.nn.utils.prune.PruningContainer.html index 1f38198a0f5..c31bdfd9f2e 100644 --- a/2.9/generated/torch.nn.utils.prune.PruningContainer.html +++ b/2.9/generated/torch.nn.utils.prune.PruningContainer.html @@ -4404,7 +4404,7 @@

                                            PruningContainer#

                                            -class torch.nn.utils.prune.PruningContainer(*args)[source]#
                                            +class torch.nn.utils.prune.PruningContainer(*args)[source]#

                                            Container holding a sequence of pruning methods for iterative pruning.

                                            Keeps track of the order in which pruning methods are applied and handles combining successive pruning calls.

                                            @@ -4412,7 +4412,7 @@

                                            PruningContainer
                                            -add_pruning_method(method)[source]#
                                            +add_pruning_method(method)[source]#

                                            Add a child pruning method to the container.

                                            Parameters
                                            @@ -4424,7 +4424,7 @@

                                            PruningContainer
                                            -classmethod apply(module, name, *args, importance_scores=None, **kwargs)[source]#
                                            +classmethod apply(module, name, *args, importance_scores=None, **kwargs)[source]#

                                            Add pruning on the fly and reparametrization of a tensor.

                                            Adds the forward pre-hook that enables pruning on the fly and the reparametrization of a tensor in terms of the original tensor @@ -4451,7 +4451,7 @@

                                            PruningContainer
                                            -apply_mask(module)[source]#
                                            +apply_mask(module)[source]#

                                            Simply handles the multiplication between the parameter being pruned and the generated mask.

                                            Fetches the mask and the original tensor from the module and returns the pruned version of the tensor.

                                            @@ -4470,7 +4470,7 @@

                                            PruningContainer
                                            -compute_mask(t, default_mask)[source]#
                                            +compute_mask(t, default_mask)[source]#

                                            Apply the latest method by computing the new partial masks and returning its combination with the default_mask.

                                            The new partial mask should be computed on the entries or channels that were not zeroed out by the default_mask. @@ -4505,7 +4505,7 @@

                                            PruningContainer
                                            -prune(t, default_mask=None, importance_scores=None)[source]#
                                            +prune(t, default_mask=None, importance_scores=None)[source]#

                                            Compute and returns a pruned version of input tensor t.

                                            According to the pruning rule specified in compute_mask().

                                            @@ -4532,7 +4532,7 @@

                                            PruningContainer
                                            -remove(module)[source]#
                                            +remove(module)[source]#

                                            Remove the pruning reparameterization from a module.

                                            The pruned parameter named name remains permanently pruned, and the parameter named name+'_orig' is removed from the parameter list. diff --git a/2.9/generated/torch.nn.utils.prune.RandomStructured.html b/2.9/generated/torch.nn.utils.prune.RandomStructured.html index dbaf4bd0c2b..f55e4945d59 100644 --- a/2.9/generated/torch.nn.utils.prune.RandomStructured.html +++ b/2.9/generated/torch.nn.utils.prune.RandomStructured.html @@ -4404,7 +4404,7 @@

                                            RandomStructured#

                                            -class torch.nn.utils.prune.RandomStructured(amount, dim=-1)[source]#
                                            +class torch.nn.utils.prune.RandomStructured(amount, dim=-1)[source]#

                                            Prune entire (currently unpruned) channels in a tensor at random.

                                            Parameters
                                            @@ -4420,7 +4420,7 @@

                                            RandomStructured
                                            -classmethod apply(module, name, amount, dim=-1)[source]#
                                            +classmethod apply(module, name, amount, dim=-1)[source]#

                                            Add pruning on the fly and reparametrization of a tensor.

                                            Adds the forward pre-hook that enables pruning on the fly and the reparametrization of a tensor in terms of the original tensor @@ -4444,7 +4444,7 @@

                                            RandomStructured
                                            -apply_mask(module)[source]#
                                            +apply_mask(module)[source]#

                                            Simply handles the multiplication between the parameter being pruned and the generated mask.

                                            Fetches the mask and the original tensor from the module and returns the pruned version of the tensor.

                                            @@ -4463,7 +4463,7 @@

                                            RandomStructured
                                            -compute_mask(t, default_mask)[source]#
                                            +compute_mask(t, default_mask)[source]#

                                            Compute and returns a mask for the input tensor t.

                                            Starting from a base default_mask (which should be a mask of ones if the tensor has not been pruned yet), generate a random mask to @@ -4492,7 +4492,7 @@

                                            RandomStructured
                                            -prune(t, default_mask=None, importance_scores=None)[source]#
                                            +prune(t, default_mask=None, importance_scores=None)[source]#

                                            Compute and returns a pruned version of input tensor t.

                                            According to the pruning rule specified in compute_mask().

                                            @@ -4519,7 +4519,7 @@

                                            RandomStructured
                                            -remove(module)[source]#
                                            +remove(module)[source]#

                                            Remove the pruning reparameterization from a module.

                                            The pruned parameter named name remains permanently pruned, and the parameter named name+'_orig' is removed from the parameter list. diff --git a/2.9/generated/torch.nn.utils.prune.RandomUnstructured.html b/2.9/generated/torch.nn.utils.prune.RandomUnstructured.html index 216705dee84..70dec03c27b 100644 --- a/2.9/generated/torch.nn.utils.prune.RandomUnstructured.html +++ b/2.9/generated/torch.nn.utils.prune.RandomUnstructured.html @@ -4404,7 +4404,7 @@

                                            RandomUnstructured#

                                            -class torch.nn.utils.prune.RandomUnstructured(amount)[source]#
                                            +class torch.nn.utils.prune.RandomUnstructured(amount)[source]#

                                            Prune (currently unpruned) units in a tensor at random.

                                            Parameters
                                            @@ -4420,7 +4420,7 @@

                                            RandomUnstructured
                                            -classmethod apply(module, name, amount)[source]#
                                            +classmethod apply(module, name, amount)[source]#

                                            Add pruning on the fly and reparametrization of a tensor.

                                            Adds the forward pre-hook that enables pruning on the fly and the reparametrization of a tensor in terms of the original tensor @@ -4442,7 +4442,7 @@

                                            RandomUnstructured
                                            -apply_mask(module)[source]#
                                            +apply_mask(module)[source]#

                                            Simply handles the multiplication between the parameter being pruned and the generated mask.

                                            Fetches the mask and the original tensor from the module and returns the pruned version of the tensor.

                                            @@ -4461,7 +4461,7 @@

                                            RandomUnstructured
                                            -prune(t, default_mask=None, importance_scores=None)[source]#
                                            +prune(t, default_mask=None, importance_scores=None)[source]#

                                            Compute and returns a pruned version of input tensor t.

                                            According to the pruning rule specified in compute_mask().

                                            @@ -4488,7 +4488,7 @@

                                            RandomUnstructured
                                            -remove(module)[source]#
                                            +remove(module)[source]#

                                            Remove the pruning reparameterization from a module.

                                            The pruned parameter named name remains permanently pruned, and the parameter named name+'_orig' is removed from the parameter list. diff --git a/2.9/generated/torch.nn.utils.prune.custom_from_mask.html b/2.9/generated/torch.nn.utils.prune.custom_from_mask.html index 825ae9890a0..610bb8aac1a 100644 --- a/2.9/generated/torch.nn.utils.prune.custom_from_mask.html +++ b/2.9/generated/torch.nn.utils.prune.custom_from_mask.html @@ -4404,7 +4404,7 @@

                                            torch.nn.utils.prune.custom_from_mask#

                                            -torch.nn.utils.prune.custom_from_mask(module, name, mask)[source]#
                                            +torch.nn.utils.prune.custom_from_mask(module, name, mask)[source]#

                                            Prune tensor corresponding to parameter called name in module by applying the pre-computed mask in mask.

                                            Modifies module in place (and also return the modified module) by:

                                              diff --git a/2.9/generated/torch.nn.utils.prune.global_unstructured.html b/2.9/generated/torch.nn.utils.prune.global_unstructured.html index 457d8f02326..f779e67655a 100644 --- a/2.9/generated/torch.nn.utils.prune.global_unstructured.html +++ b/2.9/generated/torch.nn.utils.prune.global_unstructured.html @@ -4404,7 +4404,7 @@

                                              torch.nn.utils.prune.global_unstructured#

                                              -torch.nn.utils.prune.global_unstructured(parameters, pruning_method, importance_scores=None, **kwargs)[source]#
                                              +torch.nn.utils.prune.global_unstructured(parameters, pruning_method, importance_scores=None, **kwargs)[source]#

                                              Globally prunes tensors corresponding to all parameters in parameters by applying the specified pruning_method.

                                              Modifies modules in place by:

                                                diff --git a/2.9/generated/torch.nn.utils.prune.identity.html b/2.9/generated/torch.nn.utils.prune.identity.html index e20f5c4edde..6dbc09b3b53 100644 --- a/2.9/generated/torch.nn.utils.prune.identity.html +++ b/2.9/generated/torch.nn.utils.prune.identity.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.prune.identity#

                                                -torch.nn.utils.prune.identity(module, name)[source]#
                                                +torch.nn.utils.prune.identity(module, name)[source]#

                                                Apply pruning reparametrization without pruning any units.

                                                Applies pruning reparametrization to the tensor corresponding to the parameter called name in module without actually pruning any diff --git a/2.9/generated/torch.nn.utils.prune.is_pruned.html b/2.9/generated/torch.nn.utils.prune.is_pruned.html index 0f452be6ffe..60c5f327dab 100644 --- a/2.9/generated/torch.nn.utils.prune.is_pruned.html +++ b/2.9/generated/torch.nn.utils.prune.is_pruned.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.prune.is_pruned#

                                                -torch.nn.utils.prune.is_pruned(module)[source]#
                                                +torch.nn.utils.prune.is_pruned(module)[source]#

                                                Check if a module is pruned by looking for pruning pre-hooks.

                                                Check whether module is pruned by looking for forward_pre_hooks in its modules that inherit from the diff --git a/2.9/generated/torch.nn.utils.prune.l1_unstructured.html b/2.9/generated/torch.nn.utils.prune.l1_unstructured.html index a1057fae0e3..bc47a481b65 100644 --- a/2.9/generated/torch.nn.utils.prune.l1_unstructured.html +++ b/2.9/generated/torch.nn.utils.prune.l1_unstructured.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.prune.l1_unstructured#

                                                -torch.nn.utils.prune.l1_unstructured(module, name, amount, importance_scores=None)[source]#
                                                +torch.nn.utils.prune.l1_unstructured(module, name, amount, importance_scores=None)[source]#

                                                Prune tensor by removing units with the lowest L1-norm.

                                                Prunes tensor corresponding to parameter called name in module by removing the specified amount of (currently unpruned) units with the diff --git a/2.9/generated/torch.nn.utils.prune.ln_structured.html b/2.9/generated/torch.nn.utils.prune.ln_structured.html index 67c9fcb77dd..5562fb8bb34 100644 --- a/2.9/generated/torch.nn.utils.prune.ln_structured.html +++ b/2.9/generated/torch.nn.utils.prune.ln_structured.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.prune.ln_structured#

                                                -torch.nn.utils.prune.ln_structured(module, name, amount, n, dim, importance_scores=None)[source]#
                                                +torch.nn.utils.prune.ln_structured(module, name, amount, n, dim, importance_scores=None)[source]#

                                                Prune tensor by removing channels with the lowest Ln-norm along the specified dimension.

                                                Prunes tensor corresponding to parameter called name in module by removing the specified amount of (currently unpruned) channels diff --git a/2.9/generated/torch.nn.utils.prune.random_structured.html b/2.9/generated/torch.nn.utils.prune.random_structured.html index 0e6c3aacef7..6dc275a37da 100644 --- a/2.9/generated/torch.nn.utils.prune.random_structured.html +++ b/2.9/generated/torch.nn.utils.prune.random_structured.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.prune.random_structured#

                                                -torch.nn.utils.prune.random_structured(module, name, amount, dim)[source]#
                                                +torch.nn.utils.prune.random_structured(module, name, amount, dim)[source]#

                                                Prune tensor by removing random channels along the specified dimension.

                                                Prunes tensor corresponding to parameter called name in module by removing the specified amount of (currently unpruned) channels diff --git a/2.9/generated/torch.nn.utils.prune.random_unstructured.html b/2.9/generated/torch.nn.utils.prune.random_unstructured.html index 99b417b223a..dc531018c0c 100644 --- a/2.9/generated/torch.nn.utils.prune.random_unstructured.html +++ b/2.9/generated/torch.nn.utils.prune.random_unstructured.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.prune.random_unstructured#

                                                -torch.nn.utils.prune.random_unstructured(module, name, amount)[source]#
                                                +torch.nn.utils.prune.random_unstructured(module, name, amount)[source]#

                                                Prune tensor by removing random (currently unpruned) units.

                                                Prunes tensor corresponding to parameter called name in module by removing the specified amount of (currently unpruned) units diff --git a/2.9/generated/torch.nn.utils.prune.remove.html b/2.9/generated/torch.nn.utils.prune.remove.html index 6bc965a733a..8695b4b3275 100644 --- a/2.9/generated/torch.nn.utils.prune.remove.html +++ b/2.9/generated/torch.nn.utils.prune.remove.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.prune.remove#

                                                -torch.nn.utils.prune.remove(module, name)[source]#
                                                +torch.nn.utils.prune.remove(module, name)[source]#

                                                Remove the pruning reparameterization from a module and the pruning method from the forward hook.

                                                The pruned parameter named name remains permanently pruned, and the parameter named name+'_orig' is removed from the parameter list. Similarly, diff --git a/2.9/generated/torch.nn.utils.remove_spectral_norm.html b/2.9/generated/torch.nn.utils.remove_spectral_norm.html index dd99a804f72..3f02b416e7c 100644 --- a/2.9/generated/torch.nn.utils.remove_spectral_norm.html +++ b/2.9/generated/torch.nn.utils.remove_spectral_norm.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.remove_spectral_norm#

                                                -torch.nn.utils.remove_spectral_norm(module, name='weight')[source]#
                                                +torch.nn.utils.remove_spectral_norm(module, name='weight')[source]#

                                                Remove the spectral normalization reparameterization from a module.

                                                Parameters
                                                diff --git a/2.9/generated/torch.nn.utils.remove_weight_norm.html b/2.9/generated/torch.nn.utils.remove_weight_norm.html index 5cdf70138ea..3d7a44c51dd 100644 --- a/2.9/generated/torch.nn.utils.remove_weight_norm.html +++ b/2.9/generated/torch.nn.utils.remove_weight_norm.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.remove_weight_norm#

                                                -torch.nn.utils.remove_weight_norm(module, name='weight')[source]#
                                                +torch.nn.utils.remove_weight_norm(module, name='weight')[source]#

                                                Remove the weight normalization reparameterization from a module.

                                                Parameters
                                                diff --git a/2.9/generated/torch.nn.utils.rnn.PackedSequence.html b/2.9/generated/torch.nn.utils.rnn.PackedSequence.html index f12f2e13f45..0f7a44e6f7b 100644 --- a/2.9/generated/torch.nn.utils.rnn.PackedSequence.html +++ b/2.9/generated/torch.nn.utils.rnn.PackedSequence.html @@ -4404,7 +4404,7 @@

                                                PackedSequence#

                                                -class torch.nn.utils.rnn.PackedSequence(data, batch_sizes=None, sorted_indices=None, unsorted_indices=None)[source]#
                                                +class torch.nn.utils.rnn.PackedSequence(data, batch_sizes=None, sorted_indices=None, unsorted_indices=None)[source]#

                                                Holds the data and list of batch_sizes of a packed sequence.

                                                All RNN modules accept packed sequences as inputs.

                                                @@ -4476,7 +4476,7 @@

                                                PackedSequence
                                                -is_pinned()[source]#
                                                +is_pinned()[source]#

                                                Return true if self.data stored on in pinned memory.

                                                Return type
                                                @@ -4493,7 +4493,7 @@

                                                PackedSequence
                                                -to(dtype: dtype, non_blocking: bool = ..., copy: bool = ...) Self[source]#
                                                +to(dtype: dtype, non_blocking: bool = ..., copy: bool = ...) Self[source]#
                                                to(device: Optional[Union[str, device, int]] = ..., dtype: Optional[dtype] = ..., non_blocking: bool = ..., copy: bool = ...) Self
                                                diff --git a/2.9/generated/torch.nn.utils.rnn.invert_permutation.html b/2.9/generated/torch.nn.utils.rnn.invert_permutation.html index 90a61b937ca..a7b1fa07f05 100644 --- a/2.9/generated/torch.nn.utils.rnn.invert_permutation.html +++ b/2.9/generated/torch.nn.utils.rnn.invert_permutation.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.rnn.invert_permutation#

                                                -torch.nn.utils.rnn.invert_permutation(permutation)[source]#
                                                +torch.nn.utils.rnn.invert_permutation(permutation)[source]#

                                                Returns the inverse of permutation.

                                                This is useful for converting between sorted and unsorted indices in a PackedSequence.

                                                diff --git a/2.9/generated/torch.nn.utils.rnn.pack_padded_sequence.html b/2.9/generated/torch.nn.utils.rnn.pack_padded_sequence.html index 5ddd37cd72f..c846b804d70 100644 --- a/2.9/generated/torch.nn.utils.rnn.pack_padded_sequence.html +++ b/2.9/generated/torch.nn.utils.rnn.pack_padded_sequence.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.rnn.pack_padded_sequence#

                                                -torch.nn.utils.rnn.pack_padded_sequence(input, lengths, batch_first=False, enforce_sorted=True)[source]#
                                                +torch.nn.utils.rnn.pack_padded_sequence(input, lengths, batch_first=False, enforce_sorted=True)[source]#

                                                Packs a Tensor containing padded sequences of variable length.

                                                input can be of size T x B x * (if batch_first is False) or B x T x * (if batch_first is True) where T is the length diff --git a/2.9/generated/torch.nn.utils.rnn.pack_sequence.html b/2.9/generated/torch.nn.utils.rnn.pack_sequence.html index fdfdf215040..01a26c48d9e 100644 --- a/2.9/generated/torch.nn.utils.rnn.pack_sequence.html +++ b/2.9/generated/torch.nn.utils.rnn.pack_sequence.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.rnn.pack_sequence#

                                                -torch.nn.utils.rnn.pack_sequence(sequences, enforce_sorted=True)[source]#
                                                +torch.nn.utils.rnn.pack_sequence(sequences, enforce_sorted=True)[source]#

                                                Packs a list of variable length Tensors.

                                                Consecutive call of the next functions: pad_sequence, pack_padded_sequence.

                                                sequences should be a list of Tensors of size L x *, where L is diff --git a/2.9/generated/torch.nn.utils.rnn.pad_packed_sequence.html b/2.9/generated/torch.nn.utils.rnn.pad_packed_sequence.html index c9e0fe9d984..3aaaef713f0 100644 --- a/2.9/generated/torch.nn.utils.rnn.pad_packed_sequence.html +++ b/2.9/generated/torch.nn.utils.rnn.pad_packed_sequence.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.rnn.pad_packed_sequence#

                                                -torch.nn.utils.rnn.pad_packed_sequence(sequence, batch_first=False, padding_value=0.0, total_length=None)[source]#
                                                +torch.nn.utils.rnn.pad_packed_sequence(sequence, batch_first=False, padding_value=0.0, total_length=None)[source]#

                                                Pad a packed batch of variable length sequences.

                                                It is an inverse operation to pack_padded_sequence().

                                                The returned Tensor’s data will be of size T x B x * (if batch_first is False) diff --git a/2.9/generated/torch.nn.utils.rnn.pad_sequence.html b/2.9/generated/torch.nn.utils.rnn.pad_sequence.html index d37f31c654c..0cc153ec033 100644 --- a/2.9/generated/torch.nn.utils.rnn.pad_sequence.html +++ b/2.9/generated/torch.nn.utils.rnn.pad_sequence.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.rnn.pad_sequence#

                                                -torch.nn.utils.rnn.pad_sequence(sequences, batch_first=False, padding_value=0.0, padding_side='right')[source]#
                                                +torch.nn.utils.rnn.pad_sequence(sequences, batch_first=False, padding_value=0.0, padding_side='right')[source]#

                                                Pad a list of variable length Tensors with padding_value.

                                                pad_sequence stacks a list of Tensors along a new dimension, and pads them to equal length. sequences can be list of sequences with size L x *, diff --git a/2.9/generated/torch.nn.utils.rnn.unpack_sequence.html b/2.9/generated/torch.nn.utils.rnn.unpack_sequence.html index 626d3c2c50f..e3efa21d0af 100644 --- a/2.9/generated/torch.nn.utils.rnn.unpack_sequence.html +++ b/2.9/generated/torch.nn.utils.rnn.unpack_sequence.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.rnn.unpack_sequence#

                                                -torch.nn.utils.rnn.unpack_sequence(packed_sequences)[source]#
                                                +torch.nn.utils.rnn.unpack_sequence(packed_sequences)[source]#

                                                Unpack PackedSequence into a list of variable length Tensors.

                                                packed_sequences should be a PackedSequence object.

                                                Example

                                                diff --git a/2.9/generated/torch.nn.utils.rnn.unpad_sequence.html b/2.9/generated/torch.nn.utils.rnn.unpad_sequence.html index 44e75703644..cde3cb68e69 100644 --- a/2.9/generated/torch.nn.utils.rnn.unpad_sequence.html +++ b/2.9/generated/torch.nn.utils.rnn.unpad_sequence.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.rnn.unpad_sequence#

                                                -torch.nn.utils.rnn.unpad_sequence(padded_sequences, lengths, batch_first=False)[source]#
                                                +torch.nn.utils.rnn.unpad_sequence(padded_sequences, lengths, batch_first=False)[source]#

                                                Unpad padded Tensor into a list of variable length Tensors.

                                                unpad_sequence unstacks padded Tensor into a list of variable length Tensors.

                                                Example

                                                diff --git a/2.9/generated/torch.nn.utils.skip_init.html b/2.9/generated/torch.nn.utils.skip_init.html index d612f975b22..b13258dc9be 100644 --- a/2.9/generated/torch.nn.utils.skip_init.html +++ b/2.9/generated/torch.nn.utils.skip_init.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.skip_init#

                                                -torch.nn.utils.skip_init(module_cls, *args, **kwargs)[source]#
                                                +torch.nn.utils.skip_init(module_cls, *args, **kwargs)[source]#

                                                Given a module class object and args / kwargs, instantiate the module without initializing parameters / buffers.

                                                This can be useful if initialization is slow or if custom initialization will be performed, making the default initialization unnecessary. There are some caveats to this, due to diff --git a/2.9/generated/torch.nn.utils.spectral_norm.html b/2.9/generated/torch.nn.utils.spectral_norm.html index 721b613ee6f..5a8adabcfba 100644 --- a/2.9/generated/torch.nn.utils.spectral_norm.html +++ b/2.9/generated/torch.nn.utils.spectral_norm.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.spectral_norm#

                                                -torch.nn.utils.spectral_norm(module, name='weight', n_power_iterations=1, eps=1e-12, dim=None)[source]#
                                                +torch.nn.utils.spectral_norm(module, name='weight', n_power_iterations=1, eps=1e-12, dim=None)[source]#

                                                Apply spectral normalization to a parameter in the given module.

                                                WSN=Wσ(W),σ(W)=maxh:h0Wh2h2\mathbf{W}_{SN} = \dfrac{\mathbf{W}}{\sigma(\mathbf{W})}, diff --git a/2.9/generated/torch.nn.utils.spectral_norm.remove_spectral_norm.html b/2.9/generated/torch.nn.utils.spectral_norm.remove_spectral_norm.html index 28387b940f4..5ce5ab91a73 100644 --- a/2.9/generated/torch.nn.utils.spectral_norm.remove_spectral_norm.html +++ b/2.9/generated/torch.nn.utils.spectral_norm.remove_spectral_norm.html @@ -4415,7 +4415,7 @@

                                                torch.nn.utils.spectral_norm.remove_spectral_norm#

                                                -torch.nn.utils.spectral_norm.remove_spectral_norm(module, name='weight')[source]#
                                                +torch.nn.utils.spectral_norm.remove_spectral_norm(module, name='weight')[source]#

                                                Remove the spectral normalization reparameterization from a module.

                                                Parameters
                                                diff --git a/2.9/generated/torch.nn.utils.spectral_norm.spectral_norm.html b/2.9/generated/torch.nn.utils.spectral_norm.spectral_norm.html index 47ca01ca70b..0fe42c4a251 100644 --- a/2.9/generated/torch.nn.utils.spectral_norm.spectral_norm.html +++ b/2.9/generated/torch.nn.utils.spectral_norm.spectral_norm.html @@ -4415,7 +4415,7 @@

                                                torch.nn.utils.spectral_norm.spectral_norm#

                                                -torch.nn.utils.spectral_norm.spectral_norm(module, name='weight', n_power_iterations=1, eps=1e-12, dim=None)[source]#
                                                +torch.nn.utils.spectral_norm.spectral_norm(module, name='weight', n_power_iterations=1, eps=1e-12, dim=None)[source]#

                                                Apply spectral normalization to a parameter in the given module.

                                                WSN=Wσ(W),σ(W)=maxh:h0Wh2h2\mathbf{W}_{SN} = \dfrac{\mathbf{W}}{\sigma(\mathbf{W})}, diff --git a/2.9/generated/torch.nn.utils.stateless.functional_call.html b/2.9/generated/torch.nn.utils.stateless.functional_call.html index c31bcc28440..f32d7f64f3d 100644 --- a/2.9/generated/torch.nn.utils.stateless.functional_call.html +++ b/2.9/generated/torch.nn.utils.stateless.functional_call.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.stateless.functional_call#

                                                -torch.nn.utils.stateless.functional_call(module, parameters_and_buffers, args=None, kwargs=None, *, tie_weights=True, strict=False)[source]#
                                                +torch.nn.utils.stateless.functional_call(module, parameters_and_buffers, args=None, kwargs=None, *, tie_weights=True, strict=False)[source]#

                                                Perform a functional call on the module by replacing the module parameters and buffers with the provided ones.

                                                Warning

                                                diff --git a/2.9/generated/torch.nn.utils.vector_to_parameters.html b/2.9/generated/torch.nn.utils.vector_to_parameters.html index 3c5147a442d..0d36ba49d7d 100644 --- a/2.9/generated/torch.nn.utils.vector_to_parameters.html +++ b/2.9/generated/torch.nn.utils.vector_to_parameters.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.vector_to_parameters#

                                                -torch.nn.utils.vector_to_parameters(vec, parameters)[source]#
                                                +torch.nn.utils.vector_to_parameters(vec, parameters)[source]#

                                                Copy slices of a vector into an iterable of parameters.

                                                Parameters
                                                diff --git a/2.9/generated/torch.nn.utils.weight_norm.html b/2.9/generated/torch.nn.utils.weight_norm.html index ffd2ba522f1..ab7a78f8c09 100644 --- a/2.9/generated/torch.nn.utils.weight_norm.html +++ b/2.9/generated/torch.nn.utils.weight_norm.html @@ -4404,7 +4404,7 @@

                                                torch.nn.utils.weight_norm#

                                                -torch.nn.utils.weight_norm(module, name='weight', dim=0)[source]#
                                                +torch.nn.utils.weight_norm(module, name='weight', dim=0)[source]#

                                                Apply weight normalization to a parameter in the given module.

                                                w=gvv\mathbf{w} = g \dfrac{\mathbf{v}}{\|\mathbf{v}\|} diff --git a/2.9/generated/torch.nn.utils.weight_norm.remove_weight_norm.html b/2.9/generated/torch.nn.utils.weight_norm.remove_weight_norm.html index 0b235b7af1e..34f4cc02264 100644 --- a/2.9/generated/torch.nn.utils.weight_norm.remove_weight_norm.html +++ b/2.9/generated/torch.nn.utils.weight_norm.remove_weight_norm.html @@ -4415,7 +4415,7 @@

                                                torch.nn.utils.weight_norm.remove_weight_norm#

                                                -torch.nn.utils.weight_norm.remove_weight_norm(module, name='weight')[source]#
                                                +torch.nn.utils.weight_norm.remove_weight_norm(module, name='weight')[source]#

                                                Remove the weight normalization reparameterization from a module.

                                                Parameters
                                                diff --git a/2.9/generated/torch.nn.utils.weight_norm.weight_norm.html b/2.9/generated/torch.nn.utils.weight_norm.weight_norm.html index 5b5fa98bb44..1a66edc062a 100644 --- a/2.9/generated/torch.nn.utils.weight_norm.weight_norm.html +++ b/2.9/generated/torch.nn.utils.weight_norm.weight_norm.html @@ -4415,7 +4415,7 @@

                                                torch.nn.utils.weight_norm.weight_norm#

                                                -torch.nn.utils.weight_norm.weight_norm(module, name='weight', dim=0)[source]#
                                                +torch.nn.utils.weight_norm.weight_norm(module, name='weight', dim=0)[source]#

                                                Apply weight normalization to a parameter in the given module.

                                                w=gvv\mathbf{w} = g \dfrac{\mathbf{v}}{\|\mathbf{v}\|} diff --git a/2.9/generated/torch.no_grad.html b/2.9/generated/torch.no_grad.html index 2c9f77aef1e..3f70b7d8ec0 100644 --- a/2.9/generated/torch.no_grad.html +++ b/2.9/generated/torch.no_grad.html @@ -4404,7 +4404,7 @@

                                                no_grad#

                                                -class torch.no_grad(orig_func=None)[source]#
                                                +class torch.no_grad(orig_func=None)[source]#

                                                Context-manager that disables gradient calculation.

                                                Disabling gradient calculation is useful for inference, when you are sure that you will not call Tensor.backward(). It will reduce memory diff --git a/2.9/generated/torch.norm.html b/2.9/generated/torch.norm.html index c670f11f814..f8496d7fdc5 100644 --- a/2.9/generated/torch.norm.html +++ b/2.9/generated/torch.norm.html @@ -4404,7 +4404,7 @@

                                                torch.norm#

                                                -torch.norm(input, p='fro', dim=None, keepdim=False, out=None, dtype=None)[source]#
                                                +torch.norm(input, p='fro', dim=None, keepdim=False, out=None, dtype=None)[source]#

                                                Returns the matrix norm or vector norm of a given tensor.

                                                Warning

                                                diff --git a/2.9/generated/torch.optim.ASGD.html b/2.9/generated/torch.optim.ASGD.html index 236142206a7..e26a9acb26a 100644 --- a/2.9/generated/torch.optim.ASGD.html +++ b/2.9/generated/torch.optim.ASGD.html @@ -4404,7 +4404,7 @@

                                                ASGD#

                                                -class torch.optim.ASGD(params, lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0, foreach=None, maximize=False, differentiable=False, capturable=False)[source]#
                                                +class torch.optim.ASGD(params, lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0, foreach=None, maximize=False, differentiable=False, capturable=False)[source]#

                                                Implements Averaged Stochastic Gradient Descent.

                                                It has been proposed in Acceleration of stochastic approximation by averaging.

                                                @@ -4443,7 +4443,7 @@

                                                ASGD#
                                                -add_param_group(param_group)[source]#
                                                +add_param_group(param_group)[source]#

                                                Add a param group to the Optimizer s param_groups.

                                                This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                @@ -4457,7 +4457,7 @@

                                                ASGD#
                                                -load_state_dict(state_dict)[source]#
                                                +load_state_dict(state_dict)[source]#

                                                Load the optimizer state.

                                                Parameters
                                                @@ -4510,7 +4510,7 @@

                                                ASGD#
                                                -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                @@ -4544,7 +4544,7 @@

                                                ASGD#
                                                -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                @@ -4581,7 +4581,7 @@

                                                ASGD#
                                                -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                Register a state dict post-hook which will be called after state_dict() is called.

                                                It should have the following signature:

                                                hook(optimizer, state_dict) -> state_dict or None
                                                @@ -4613,7 +4613,7 @@ 

                                                ASGD#
                                                -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                Register a state dict pre-hook which will be called before state_dict() is called.

                                                It should have the following signature:

                                                hook(optimizer) -> None
                                                @@ -4645,7 +4645,7 @@ 

                                                ASGD#
                                                -register_step_post_hook(hook)[source]#
                                                +register_step_post_hook(hook)[source]#

                                                Register an optimizer step post hook which will be called after optimizer step.

                                                It should have the following signature:

                                                hook(optimizer, args, kwargs) -> None
                                                @@ -4668,7 +4668,7 @@ 

                                                ASGD#
                                                -register_step_pre_hook(hook)[source]#
                                                +register_step_pre_hook(hook)[source]#

                                                Register an optimizer step pre hook which will be called before optimizer step.

                                                It should have the following signature:

                                                hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                @@ -4693,7 +4693,7 @@ 

                                                ASGD#
                                                -state_dict()[source]#
                                                +state_dict()[source]#

                                                Return the state of the optimizer as a dict.

                                                It contains two entries:

                                                  @@ -4756,7 +4756,7 @@

                                                  ASGD#
                                                  -step(closure=None)[source]#
                                                  +step(closure=None)[source]#

                                                  Perform a single optimization step.

                                                  Parameters
                                                  @@ -4768,7 +4768,7 @@

                                                  ASGD#
                                                  -zero_grad(set_to_none=True)[source]#
                                                  +zero_grad(set_to_none=True)[source]#

                                                  Reset the gradients of all optimized torch.Tensor s.

                                                  Parameters
                                                  diff --git a/2.9/generated/torch.optim.Adadelta.html b/2.9/generated/torch.optim.Adadelta.html index 98629e8e7ce..1efc2eee4be 100644 --- a/2.9/generated/torch.optim.Adadelta.html +++ b/2.9/generated/torch.optim.Adadelta.html @@ -4404,7 +4404,7 @@

                                                  Adadelta#

                                                  -class torch.optim.Adadelta(params, lr=1.0, rho=0.9, eps=1e-06, weight_decay=0, foreach=None, *, capturable=False, maximize=False, differentiable=False)[source]#
                                                  +class torch.optim.Adadelta(params, lr=1.0, rho=0.9, eps=1e-06, weight_decay=0, foreach=None, *, capturable=False, maximize=False, differentiable=False)[source]#

                                                  Implements Adadelta algorithm.

                                                  input:γ (lr),θ0 (params),f(θ) (objective),ρ (decay),λ (weight decay)initialize:v00 (square avg),u00 (accumulate variables)fort=1todogtθft(θt1)ifλ0gtgt+λθt1vtvt1ρ+gt2(1ρ)Δxtut1+ϵvt+ϵgtutut1ρ+Δxt2(1ρ)θtθt1γΔxtreturnθt\begin{aligned} @@ -4492,7 +4492,7 @@

                                                  Adadelta
                                                  -add_param_group(param_group)[source]#
                                                  +add_param_group(param_group)[source]#

                                                  Add a param group to the Optimizer s param_groups.

                                                  This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                  @@ -4506,7 +4506,7 @@

                                                  Adadelta
                                                  -load_state_dict(state_dict)[source]#
                                                  +load_state_dict(state_dict)[source]#

                                                  Load the optimizer state.

                                                  Parameters
                                                  @@ -4559,7 +4559,7 @@

                                                  Adadelta
                                                  -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                  +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                  Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                  @@ -4593,7 +4593,7 @@

                                                  Adadelta
                                                  -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                  +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                  Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                  @@ -4630,7 +4630,7 @@

                                                  Adadelta
                                                  -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                  +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                  Register a state dict post-hook which will be called after state_dict() is called.

                                                  It should have the following signature:

                                                  hook(optimizer, state_dict) -> state_dict or None
                                                  @@ -4662,7 +4662,7 @@ 

                                                  Adadelta
                                                  -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                  +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                  Register a state dict pre-hook which will be called before state_dict() is called.

                                                  It should have the following signature:

                                                  hook(optimizer) -> None
                                                  @@ -4694,7 +4694,7 @@ 

                                                  Adadelta
                                                  -register_step_post_hook(hook)[source]#
                                                  +register_step_post_hook(hook)[source]#

                                                  Register an optimizer step post hook which will be called after optimizer step.

                                                  It should have the following signature:

                                                  hook(optimizer, args, kwargs) -> None
                                                  @@ -4717,7 +4717,7 @@ 

                                                  Adadelta
                                                  -register_step_pre_hook(hook)[source]#
                                                  +register_step_pre_hook(hook)[source]#

                                                  Register an optimizer step pre hook which will be called before optimizer step.

                                                  It should have the following signature:

                                                  hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                  @@ -4742,7 +4742,7 @@ 

                                                  Adadelta
                                                  -state_dict()[source]#
                                                  +state_dict()[source]#

                                                  Return the state of the optimizer as a dict.

                                                  It contains two entries:

                                                    @@ -4805,7 +4805,7 @@

                                                    Adadelta
                                                    -step(closure=None)[source]#
                                                    +step(closure=None)[source]#

                                                    Perform a single optimization step.

                                                    Parameters
                                                    @@ -4817,7 +4817,7 @@

                                                    Adadelta
                                                    -zero_grad(set_to_none=True)[source]#
                                                    +zero_grad(set_to_none=True)[source]#

                                                    Reset the gradients of all optimized torch.Tensor s.

                                                    Parameters
                                                    diff --git a/2.9/generated/torch.optim.Adafactor.html b/2.9/generated/torch.optim.Adafactor.html index a31b6366185..5bcb3f656b8 100644 --- a/2.9/generated/torch.optim.Adafactor.html +++ b/2.9/generated/torch.optim.Adafactor.html @@ -4634,7 +4634,7 @@

                                                    Adafactor
                                                    -add_param_group(param_group)[source]#
                                                    +add_param_group(param_group)[source]#

                                                    Add a param group to the Optimizer s param_groups.

                                                    This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                    @@ -4648,7 +4648,7 @@

                                                    Adafactor
                                                    -load_state_dict(state_dict)[source]#
                                                    +load_state_dict(state_dict)[source]#

                                                    Load the optimizer state.

                                                    Parameters
                                                    @@ -4701,7 +4701,7 @@

                                                    Adafactor
                                                    -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                    +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                    Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                    @@ -4735,7 +4735,7 @@

                                                    Adafactor
                                                    -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                    +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                    Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                    @@ -4772,7 +4772,7 @@

                                                    Adafactor
                                                    -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                    +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                    Register a state dict post-hook which will be called after state_dict() is called.

                                                    It should have the following signature:

                                                    hook(optimizer, state_dict) -> state_dict or None
                                                    @@ -4804,7 +4804,7 @@ 

                                                    Adafactor
                                                    -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                    +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                    Register a state dict pre-hook which will be called before state_dict() is called.

                                                    It should have the following signature:

                                                    hook(optimizer) -> None
                                                    @@ -4836,7 +4836,7 @@ 

                                                    Adafactor
                                                    -register_step_post_hook(hook)[source]#
                                                    +register_step_post_hook(hook)[source]#

                                                    Register an optimizer step post hook which will be called after optimizer step.

                                                    It should have the following signature:

                                                    hook(optimizer, args, kwargs) -> None
                                                    @@ -4859,7 +4859,7 @@ 

                                                    Adafactor
                                                    -register_step_pre_hook(hook)[source]#
                                                    +register_step_pre_hook(hook)[source]#

                                                    Register an optimizer step pre hook which will be called before optimizer step.

                                                    It should have the following signature:

                                                    hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                    @@ -4884,7 +4884,7 @@ 

                                                    Adafactor
                                                    -state_dict()[source]#
                                                    +state_dict()[source]#

                                                    Return the state of the optimizer as a dict.

                                                    It contains two entries:

                                                      @@ -4947,7 +4947,7 @@

                                                      Adafactor
                                                      -step(closure=None)[source]#
                                                      +step(closure=None)[source]#

                                                      Perform a single optimization step.

                                                      Parameters
                                                      @@ -4959,7 +4959,7 @@

                                                      Adafactor
                                                      -zero_grad(set_to_none=True)[source]#
                                                      +zero_grad(set_to_none=True)[source]#

                                                      Reset the gradients of all optimized torch.Tensor s.

                                                      Parameters
                                                      diff --git a/2.9/generated/torch.optim.Adagrad.html b/2.9/generated/torch.optim.Adagrad.html index 89475d24d16..ec50b73da99 100644 --- a/2.9/generated/torch.optim.Adagrad.html +++ b/2.9/generated/torch.optim.Adagrad.html @@ -4404,7 +4404,7 @@

                                                      Adagrad#

                                                      -class torch.optim.Adagrad(params, lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10, foreach=None, *, maximize=False, differentiable=False, fused=None)[source]#
                                                      +class torch.optim.Adagrad(params, lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10, foreach=None, *, maximize=False, differentiable=False, fused=None)[source]#

                                                      Implements Adagrad algorithm.

                                                      input:γ (lr),θ0 (params),f(θ) (objective),λ (weight decay),τ (initial accumulator value),η (lr decay)initialize:state_sum0τfort=1todogtθft(θt1)γ~γ/(1+(t1)η)ifλ0gtgt+λθt1state_sumtstate_sumt1+gt2θtθt1γ~gtstate_sumt+ϵreturnθt\begin{aligned} @@ -4475,7 +4475,7 @@

                                                      Adagrad
                                                      -add_param_group(param_group)[source]#
                                                      +add_param_group(param_group)[source]#

                                                      Add a param group to the Optimizer s param_groups.

                                                      This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                      @@ -4489,7 +4489,7 @@

                                                      Adagrad
                                                      -load_state_dict(state_dict)[source]#
                                                      +load_state_dict(state_dict)[source]#

                                                      Load the optimizer state.

                                                      Parameters
                                                      @@ -4542,7 +4542,7 @@

                                                      Adagrad
                                                      -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                      +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                      Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                      @@ -4576,7 +4576,7 @@

                                                      Adagrad
                                                      -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                      +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                      Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                      @@ -4613,7 +4613,7 @@

                                                      Adagrad
                                                      -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                      +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                      Register a state dict post-hook which will be called after state_dict() is called.

                                                      It should have the following signature:

                                                      hook(optimizer, state_dict) -> state_dict or None
                                                      @@ -4645,7 +4645,7 @@ 

                                                      Adagrad
                                                      -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                      +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                      Register a state dict pre-hook which will be called before state_dict() is called.

                                                      It should have the following signature:

                                                      hook(optimizer) -> None
                                                      @@ -4677,7 +4677,7 @@ 

                                                      Adagrad
                                                      -register_step_post_hook(hook)[source]#
                                                      +register_step_post_hook(hook)[source]#

                                                      Register an optimizer step post hook which will be called after optimizer step.

                                                      It should have the following signature:

                                                      hook(optimizer, args, kwargs) -> None
                                                      @@ -4700,7 +4700,7 @@ 

                                                      Adagrad
                                                      -register_step_pre_hook(hook)[source]#
                                                      +register_step_pre_hook(hook)[source]#

                                                      Register an optimizer step pre hook which will be called before optimizer step.

                                                      It should have the following signature:

                                                      hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                      @@ -4725,13 +4725,13 @@ 

                                                      Adagrad
                                                      -share_memory()[source]#
                                                      +share_memory()[source]#

                                                      Calls tensor.share_memory_() on the state sum tensors.

                                                      -state_dict()[source]#
                                                      +state_dict()[source]#

                                                      Return the state of the optimizer as a dict.

                                                      It contains two entries:

                                                        @@ -4794,7 +4794,7 @@

                                                        Adagrad
                                                        -step(closure=None)[source]#
                                                        +step(closure=None)[source]#

                                                        Perform a single optimization step.

                                                        Parameters
                                                        @@ -4806,7 +4806,7 @@

                                                        Adagrad
                                                        -zero_grad(set_to_none=True)[source]#
                                                        +zero_grad(set_to_none=True)[source]#

                                                        Reset the gradients of all optimized torch.Tensor s.

                                                        Parameters
                                                        diff --git a/2.9/generated/torch.optim.Adam.html b/2.9/generated/torch.optim.Adam.html index def4b55f086..8e772de0e95 100644 --- a/2.9/generated/torch.optim.Adam.html +++ b/2.9/generated/torch.optim.Adam.html @@ -4404,7 +4404,7 @@

                                                        Adam#

                                                        -class torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False, *, foreach=None, maximize=False, capturable=False, differentiable=False, fused=None, decoupled_weight_decay=False)[source]#
                                                        +class torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False, *, foreach=None, maximize=False, capturable=False, differentiable=False, fused=None, decoupled_weight_decay=False)[source]#

                                                        Implements Adam algorithm.

                                                        input:γ (lr),β1,β2 (betas),θ0 (params),f(θ) (objective)λ (weight decay),amsgrad,maximize,ϵ (epsilon)initialize:m00 ( first moment),v00 (second moment),v0max0fort=1todoifmaximize:gtθft(θt1)elsegtθft(θt1)ifλ0gtgt+λθt1mtβ1mt1+(1β1)gtvtβ2vt1+(1β2)gt2mt^mt/(1β1t)ifamsgradvtmaxmax(vt1max,vt)vt^vtmax/(1β2t)elsevt^vt/(1β2t)θtθt1γmt^/(vt^+ϵ)returnθt\begin{aligned} @@ -4515,7 +4515,7 @@

                                                        Adam#
                                                        -add_param_group(param_group)[source]#
                                                        +add_param_group(param_group)[source]#

                                                        Add a param group to the Optimizer s param_groups.

                                                        This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                        @@ -4529,7 +4529,7 @@

                                                        Adam#
                                                        -load_state_dict(state_dict)[source]#
                                                        +load_state_dict(state_dict)[source]#

                                                        Load the optimizer state.

                                                        Parameters
                                                        @@ -4582,7 +4582,7 @@

                                                        Adam#
                                                        -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                        +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                        Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                        @@ -4616,7 +4616,7 @@

                                                        Adam#
                                                        -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                        +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                        Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                        @@ -4653,7 +4653,7 @@

                                                        Adam#
                                                        -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                        +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                        Register a state dict post-hook which will be called after state_dict() is called.

                                                        It should have the following signature:

                                                        hook(optimizer, state_dict) -> state_dict or None
                                                        @@ -4685,7 +4685,7 @@ 

                                                        Adam#
                                                        -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                        +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                        Register a state dict pre-hook which will be called before state_dict() is called.

                                                        It should have the following signature:

                                                        hook(optimizer) -> None
                                                        @@ -4717,7 +4717,7 @@ 

                                                        Adam#
                                                        -register_step_post_hook(hook)[source]#
                                                        +register_step_post_hook(hook)[source]#

                                                        Register an optimizer step post hook which will be called after optimizer step.

                                                        It should have the following signature:

                                                        hook(optimizer, args, kwargs) -> None
                                                        @@ -4740,7 +4740,7 @@ 

                                                        Adam#
                                                        -register_step_pre_hook(hook)[source]#
                                                        +register_step_pre_hook(hook)[source]#

                                                        Register an optimizer step pre hook which will be called before optimizer step.

                                                        It should have the following signature:

                                                        hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                        @@ -4765,7 +4765,7 @@ 

                                                        Adam#
                                                        -state_dict()[source]#
                                                        +state_dict()[source]#

                                                        Return the state of the optimizer as a dict.

                                                        It contains two entries:

                                                          @@ -4828,7 +4828,7 @@

                                                          Adam#
                                                          -step(closure=None)[source]#
                                                          +step(closure=None)[source]#

                                                          Perform a single optimization step.

                                                          Parameters
                                                          @@ -4840,7 +4840,7 @@

                                                          Adam#
                                                          -zero_grad(set_to_none=True)[source]#
                                                          +zero_grad(set_to_none=True)[source]#

                                                          Reset the gradients of all optimized torch.Tensor s.

                                                          Parameters
                                                          diff --git a/2.9/generated/torch.optim.AdamW.html b/2.9/generated/torch.optim.AdamW.html index c52b80698b8..025cacf7a5d 100644 --- a/2.9/generated/torch.optim.AdamW.html +++ b/2.9/generated/torch.optim.AdamW.html @@ -4404,7 +4404,7 @@

                                                          AdamW#

                                                          -class torch.optim.AdamW(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False, *, maximize=False, foreach=None, capturable=False, differentiable=False, fused=None)[source]#
                                                          +class torch.optim.AdamW(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False, *, maximize=False, foreach=None, capturable=False, differentiable=False, fused=None)[source]#

                                                          Implements AdamW algorithm, where weight decay does not accumulate in the momentum nor variance.

                                                          input:γ(lr),β1,β2(betas),θ0(params),f(θ)(objective),ϵ (epsilon)λ(weight decay),amsgrad,maximizeinitialize:m00 (first moment),v00 ( second moment),v0max0fort=1todoifmaximize:gtθft(θt1)elsegtθft(θt1)θtθt1γλθt1mtβ1mt1+(1β1)gtvtβ2vt1+(1β2)gt2mt^mt/(1β1t)ifamsgradvtmaxmax(vt1max,vt)vt^vtmax/(1β2t)elsevt^vt/(1β2t)θtθtγmt^/(vt^+ϵ)returnθt\begin{aligned} @@ -4512,7 +4512,7 @@

                                                          AdamW#

                                                          -add_param_group(param_group)[source]#
                                                          +add_param_group(param_group)[source]#

                                                          Add a param group to the Optimizer s param_groups.

                                                          This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                          @@ -4526,7 +4526,7 @@

                                                          AdamW#
                                                          -load_state_dict(state_dict)[source]#
                                                          +load_state_dict(state_dict)[source]#

                                                          Load the optimizer state.

                                                          Parameters
                                                          @@ -4579,7 +4579,7 @@

                                                          AdamW#
                                                          -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                          +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                          Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                          @@ -4613,7 +4613,7 @@

                                                          AdamW#
                                                          -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                          +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                          Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                          @@ -4650,7 +4650,7 @@

                                                          AdamW#
                                                          -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                          +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                          Register a state dict post-hook which will be called after state_dict() is called.

                                                          It should have the following signature:

                                                          hook(optimizer, state_dict) -> state_dict or None
                                                          @@ -4682,7 +4682,7 @@ 

                                                          AdamW#
                                                          -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                          +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                          Register a state dict pre-hook which will be called before state_dict() is called.

                                                          It should have the following signature:

                                                          hook(optimizer) -> None
                                                          @@ -4714,7 +4714,7 @@ 

                                                          AdamW#
                                                          -register_step_post_hook(hook)[source]#
                                                          +register_step_post_hook(hook)[source]#

                                                          Register an optimizer step post hook which will be called after optimizer step.

                                                          It should have the following signature:

                                                          hook(optimizer, args, kwargs) -> None
                                                          @@ -4737,7 +4737,7 @@ 

                                                          AdamW#
                                                          -register_step_pre_hook(hook)[source]#
                                                          +register_step_pre_hook(hook)[source]#

                                                          Register an optimizer step pre hook which will be called before optimizer step.

                                                          It should have the following signature:

                                                          hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                          @@ -4762,7 +4762,7 @@ 

                                                          AdamW#
                                                          -state_dict()[source]#
                                                          +state_dict()[source]#

                                                          Return the state of the optimizer as a dict.

                                                          It contains two entries:

                                                            @@ -4825,7 +4825,7 @@

                                                            AdamW#
                                                            -step(closure=None)[source]#
                                                            +step(closure=None)[source]#

                                                            Perform a single optimization step.

                                                            Parameters
                                                            @@ -4837,7 +4837,7 @@

                                                            AdamW#
                                                            -zero_grad(set_to_none=True)[source]#
                                                            +zero_grad(set_to_none=True)[source]#

                                                            Reset the gradients of all optimized torch.Tensor s.

                                                            Parameters
                                                            diff --git a/2.9/generated/torch.optim.Adamax.html b/2.9/generated/torch.optim.Adamax.html index f7f8395e43e..c666244ef04 100644 --- a/2.9/generated/torch.optim.Adamax.html +++ b/2.9/generated/torch.optim.Adamax.html @@ -4404,7 +4404,7 @@

                                                            Adamax#

                                                            -class torch.optim.Adamax(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, foreach=None, *, maximize=False, differentiable=False, capturable=False)[source]#
                                                            +class torch.optim.Adamax(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, foreach=None, *, maximize=False, differentiable=False, capturable=False)[source]#

                                                            Implements Adamax algorithm (a variant of Adam based on infinity norm).

                                                            input:γ (lr),β1,β2 (betas),θ0 (params),f(θ) (objective),λ (weight decay),ϵ (epsilon)initialize:m00 ( first moment),u00 ( infinity norm)fort=1todogtθft(θt1)ifλ0gtgt+λθt1mtβ1mt1+(1β1)gtutmax(β2ut1,gt+ϵ)θtθt1γmt(1β1t)utreturnθt\begin{aligned} @@ -4465,7 +4465,7 @@

                                                            Adamax
                                                            -add_param_group(param_group)[source]#
                                                            +add_param_group(param_group)[source]#

                                                            Add a param group to the Optimizer s param_groups.

                                                            This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                            @@ -4479,7 +4479,7 @@

                                                            Adamax
                                                            -load_state_dict(state_dict)[source]#
                                                            +load_state_dict(state_dict)[source]#

                                                            Load the optimizer state.

                                                            Parameters
                                                            @@ -4532,7 +4532,7 @@

                                                            Adamax
                                                            -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                            +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                            Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                            @@ -4566,7 +4566,7 @@

                                                            Adamax
                                                            -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                            +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                            Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                            @@ -4603,7 +4603,7 @@

                                                            Adamax
                                                            -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                            +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                            Register a state dict post-hook which will be called after state_dict() is called.

                                                            It should have the following signature:

                                                            hook(optimizer, state_dict) -> state_dict or None
                                                            @@ -4635,7 +4635,7 @@ 

                                                            Adamax
                                                            -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                            +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                            Register a state dict pre-hook which will be called before state_dict() is called.

                                                            It should have the following signature:

                                                            hook(optimizer) -> None
                                                            @@ -4667,7 +4667,7 @@ 

                                                            Adamax
                                                            -register_step_post_hook(hook)[source]#
                                                            +register_step_post_hook(hook)[source]#

                                                            Register an optimizer step post hook which will be called after optimizer step.

                                                            It should have the following signature:

                                                            hook(optimizer, args, kwargs) -> None
                                                            @@ -4690,7 +4690,7 @@ 

                                                            Adamax
                                                            -register_step_pre_hook(hook)[source]#
                                                            +register_step_pre_hook(hook)[source]#

                                                            Register an optimizer step pre hook which will be called before optimizer step.

                                                            It should have the following signature:

                                                            hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                            @@ -4715,7 +4715,7 @@ 

                                                            Adamax
                                                            -state_dict()[source]#
                                                            +state_dict()[source]#

                                                            Return the state of the optimizer as a dict.

                                                            It contains two entries:

                                                              @@ -4778,7 +4778,7 @@

                                                              Adamax
                                                              -step(closure=None)[source]#
                                                              +step(closure=None)[source]#

                                                              Performs a single optimization step.

                                                              Parameters
                                                              @@ -4790,7 +4790,7 @@

                                                              Adamax
                                                              -zero_grad(set_to_none=True)[source]#
                                                              +zero_grad(set_to_none=True)[source]#

                                                              Reset the gradients of all optimized torch.Tensor s.

                                                              Parameters
                                                              diff --git a/2.9/generated/torch.optim.LBFGS.html b/2.9/generated/torch.optim.LBFGS.html index 25569566e45..ac5f8733070 100644 --- a/2.9/generated/torch.optim.LBFGS.html +++ b/2.9/generated/torch.optim.LBFGS.html @@ -4404,7 +4404,7 @@

                                                              LBFGS#

                                                              -class torch.optim.LBFGS(params, lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn=None)[source]#
                                                              +class torch.optim.LBFGS(params, lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn=None)[source]#

                                                              Implements L-BFGS algorithm.

                                                              Heavily inspired by minFunc.

                                                              @@ -4443,7 +4443,7 @@

                                                              LBFGS#

                                                              -add_param_group(param_group)[source]#
                                                              +add_param_group(param_group)[source]#

                                                              Add a param group to the Optimizer s param_groups.

                                                              This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                              @@ -4457,7 +4457,7 @@

                                                              LBFGS#
                                                              -load_state_dict(state_dict)[source]#
                                                              +load_state_dict(state_dict)[source]#

                                                              Load the optimizer state.

                                                              Parameters
                                                              @@ -4510,7 +4510,7 @@

                                                              LBFGS#
                                                              -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                              +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                              Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                              @@ -4544,7 +4544,7 @@

                                                              LBFGS#
                                                              -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                              +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                              Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                              @@ -4581,7 +4581,7 @@

                                                              LBFGS#
                                                              -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                              +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                              Register a state dict post-hook which will be called after state_dict() is called.

                                                              It should have the following signature:

                                                              hook(optimizer, state_dict) -> state_dict or None
                                                              @@ -4613,7 +4613,7 @@ 

                                                              LBFGS#
                                                              -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                              +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                              Register a state dict pre-hook which will be called before state_dict() is called.

                                                              It should have the following signature:

                                                              hook(optimizer) -> None
                                                              @@ -4645,7 +4645,7 @@ 

                                                              LBFGS#
                                                              -register_step_post_hook(hook)[source]#
                                                              +register_step_post_hook(hook)[source]#

                                                              Register an optimizer step post hook which will be called after optimizer step.

                                                              It should have the following signature:

                                                              hook(optimizer, args, kwargs) -> None
                                                              @@ -4668,7 +4668,7 @@ 

                                                              LBFGS#
                                                              -register_step_pre_hook(hook)[source]#
                                                              +register_step_pre_hook(hook)[source]#

                                                              Register an optimizer step pre hook which will be called before optimizer step.

                                                              It should have the following signature:

                                                              hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                              @@ -4693,7 +4693,7 @@ 

                                                              LBFGS#
                                                              -state_dict()[source]#
                                                              +state_dict()[source]#

                                                              Return the state of the optimizer as a dict.

                                                              It contains two entries:

                                                                @@ -4756,7 +4756,7 @@

                                                                LBFGS#
                                                                -step(closure)[source]#
                                                                +step(closure)[source]#

                                                                Perform a single optimization step.

                                                                Parameters
                                                                @@ -4768,7 +4768,7 @@

                                                                LBFGS#
                                                                -zero_grad(set_to_none=True)[source]#
                                                                +zero_grad(set_to_none=True)[source]#

                                                                Reset the gradients of all optimized torch.Tensor s.

                                                                Parameters
                                                                diff --git a/2.9/generated/torch.optim.Muon.html b/2.9/generated/torch.optim.Muon.html index 7dbe61f2bf8..18d6273aee2 100644 --- a/2.9/generated/torch.optim.Muon.html +++ b/2.9/generated/torch.optim.Muon.html @@ -4500,7 +4500,7 @@

                                                                Muon#
                                                                -add_param_group(param_group)[source]#
                                                                +add_param_group(param_group)[source]#

                                                                Add a param group to the Optimizer s param_groups.

                                                                This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                @@ -4514,7 +4514,7 @@

                                                                Muon#
                                                                -load_state_dict(state_dict)[source]#
                                                                +load_state_dict(state_dict)[source]#

                                                                Load the optimizer state.

                                                                Parameters
                                                                @@ -4567,7 +4567,7 @@

                                                                Muon#
                                                                -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                @@ -4601,7 +4601,7 @@

                                                                Muon#
                                                                -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                @@ -4638,7 +4638,7 @@

                                                                Muon#
                                                                -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                Register a state dict post-hook which will be called after state_dict() is called.

                                                                It should have the following signature:

                                                                hook(optimizer, state_dict) -> state_dict or None
                                                                @@ -4670,7 +4670,7 @@ 

                                                                Muon#
                                                                -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                Register a state dict pre-hook which will be called before state_dict() is called.

                                                                It should have the following signature:

                                                                hook(optimizer) -> None
                                                                @@ -4702,7 +4702,7 @@ 

                                                                Muon#
                                                                -register_step_post_hook(hook)[source]#
                                                                +register_step_post_hook(hook)[source]#

                                                                Register an optimizer step post hook which will be called after optimizer step.

                                                                It should have the following signature:

                                                                hook(optimizer, args, kwargs) -> None
                                                                @@ -4725,7 +4725,7 @@ 

                                                                Muon#
                                                                -register_step_pre_hook(hook)[source]#
                                                                +register_step_pre_hook(hook)[source]#

                                                                Register an optimizer step pre hook which will be called before optimizer step.

                                                                It should have the following signature:

                                                                hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                @@ -4750,7 +4750,7 @@ 

                                                                Muon#
                                                                -state_dict()[source]#
                                                                +state_dict()[source]#

                                                                Return the state of the optimizer as a dict.

                                                                It contains two entries:

                                                                -zero_grad(set_to_none=True)[source]#
                                                                +zero_grad(set_to_none=True)[source]#

                                                                Reset the gradients of all optimized torch.Tensor s.

                                                                Parameters
                                                                diff --git a/2.9/generated/torch.optim.NAdam.html b/2.9/generated/torch.optim.NAdam.html index c82d59acae7..13571d004bd 100644 --- a/2.9/generated/torch.optim.NAdam.html +++ b/2.9/generated/torch.optim.NAdam.html @@ -4404,7 +4404,7 @@

                                                                NAdam#

                                                                -class torch.optim.NAdam(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, momentum_decay=0.004, decoupled_weight_decay=False, *, foreach=None, maximize=False, capturable=False, differentiable=False)[source]#
                                                                +class torch.optim.NAdam(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, momentum_decay=0.004, decoupled_weight_decay=False, *, foreach=None, maximize=False, capturable=False, differentiable=False)[source]#

                                                                Implements NAdam algorithm.

                                                                input:γt (lr),β1,β2 (betas),θ0 (params),f(θ) (objective)λ (weight decay),ψ (momentum decay)decoupled_weight_decay,maximizeinitialize:m00 ( first moment),v00 ( second moment)fort=1todoifmaximize:gtθft(θt1)elsegtθft(θt1)θtθt1ifλ0ifdecoupled_weight_decayθtθt1γλθt1elsegtgt+λθt1μtβ1(1120.96tψ)μt+1β1(1120.96(t+1)ψ)mtβ1mt1+(1β1)gtvtβ2vt1+(1β2)gt2mt^μt+1mt/(1i=1t+1μi)+(1μt)gt/(1i=1tμi)vt^vt/(1β2t)θtθtγmt^/(vt^+ϵ)returnθt\begin{aligned} @@ -4496,7 +4496,7 @@

                                                                NAdam#

                                                                -add_param_group(param_group)[source]#
                                                                +add_param_group(param_group)[source]#

                                                                Add a param group to the Optimizer s param_groups.

                                                                This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                @@ -4510,7 +4510,7 @@

                                                                NAdam#
                                                                -load_state_dict(state_dict)[source]#
                                                                +load_state_dict(state_dict)[source]#

                                                                Load the optimizer state.

                                                                Parameters
                                                                @@ -4563,7 +4563,7 @@

                                                                NAdam#
                                                                -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                @@ -4597,7 +4597,7 @@

                                                                NAdam#
                                                                -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                @@ -4634,7 +4634,7 @@

                                                                NAdam#
                                                                -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                Register a state dict post-hook which will be called after state_dict() is called.

                                                                It should have the following signature:

                                                                hook(optimizer, state_dict) -> state_dict or None
                                                                @@ -4666,7 +4666,7 @@ 

                                                                NAdam#
                                                                -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                Register a state dict pre-hook which will be called before state_dict() is called.

                                                                It should have the following signature:

                                                                hook(optimizer) -> None
                                                                @@ -4698,7 +4698,7 @@ 

                                                                NAdam#
                                                                -register_step_post_hook(hook)[source]#
                                                                +register_step_post_hook(hook)[source]#

                                                                Register an optimizer step post hook which will be called after optimizer step.

                                                                It should have the following signature:

                                                                hook(optimizer, args, kwargs) -> None
                                                                @@ -4721,7 +4721,7 @@ 

                                                                NAdam#
                                                                -register_step_pre_hook(hook)[source]#
                                                                +register_step_pre_hook(hook)[source]#

                                                                Register an optimizer step pre hook which will be called before optimizer step.

                                                                It should have the following signature:

                                                                hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                @@ -4746,7 +4746,7 @@ 

                                                                NAdam#
                                                                -state_dict()[source]#
                                                                +state_dict()[source]#

                                                                Return the state of the optimizer as a dict.

                                                                It contains two entries:

                                                                  @@ -4809,7 +4809,7 @@

                                                                  NAdam#
                                                                  -step(closure=None)[source]#
                                                                  +step(closure=None)[source]#

                                                                  Perform a single optimization step.

                                                                  Parameters
                                                                  @@ -4821,7 +4821,7 @@

                                                                  NAdam#
                                                                  -zero_grad(set_to_none=True)[source]#
                                                                  +zero_grad(set_to_none=True)[source]#

                                                                  Reset the gradients of all optimized torch.Tensor s.

                                                                  Parameters
                                                                  diff --git a/2.9/generated/torch.optim.Optimizer.add_param_group.html b/2.9/generated/torch.optim.Optimizer.add_param_group.html index b4247cf3dbe..db22ca45b3f 100644 --- a/2.9/generated/torch.optim.Optimizer.add_param_group.html +++ b/2.9/generated/torch.optim.Optimizer.add_param_group.html @@ -4404,7 +4404,7 @@

                                                                  torch.optim.Optimizer.add_param_group#

                                                                  -Optimizer.add_param_group(param_group)[source]#
                                                                  +Optimizer.add_param_group(param_group)[source]#

                                                                  Add a param group to the Optimizer s param_groups.

                                                                  This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                  diff --git a/2.9/generated/torch.optim.Optimizer.load_state_dict.html b/2.9/generated/torch.optim.Optimizer.load_state_dict.html index 8ff7118df55..8c24d9fae0e 100644 --- a/2.9/generated/torch.optim.Optimizer.load_state_dict.html +++ b/2.9/generated/torch.optim.Optimizer.load_state_dict.html @@ -4404,7 +4404,7 @@

                                                                  torch.optim.Optimizer.load_state_dict#

                                                                  -Optimizer.load_state_dict(state_dict)[source]#
                                                                  +Optimizer.load_state_dict(state_dict)[source]#

                                                                  Load the optimizer state.

                                                                  Parameters
                                                                  diff --git a/2.9/generated/torch.optim.Optimizer.register_load_state_dict_post_hook.html b/2.9/generated/torch.optim.Optimizer.register_load_state_dict_post_hook.html index 482bee0591d..6ded46c62ee 100644 --- a/2.9/generated/torch.optim.Optimizer.register_load_state_dict_post_hook.html +++ b/2.9/generated/torch.optim.Optimizer.register_load_state_dict_post_hook.html @@ -4404,7 +4404,7 @@

                                                                  torch.optim.Optimizer.register_load_state_dict_post_hook#

                                                                  -Optimizer.register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                  +Optimizer.register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                  Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                  diff --git a/2.9/generated/torch.optim.Optimizer.register_load_state_dict_pre_hook.html b/2.9/generated/torch.optim.Optimizer.register_load_state_dict_pre_hook.html index 4d43bf4fff5..f3f15e32a03 100644 --- a/2.9/generated/torch.optim.Optimizer.register_load_state_dict_pre_hook.html +++ b/2.9/generated/torch.optim.Optimizer.register_load_state_dict_pre_hook.html @@ -4404,7 +4404,7 @@

                                                                  torch.optim.Optimizer.register_load_state_dict_pre_hook#

                                                                  -Optimizer.register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                  +Optimizer.register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                  Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                  diff --git a/2.9/generated/torch.optim.Optimizer.register_state_dict_post_hook.html b/2.9/generated/torch.optim.Optimizer.register_state_dict_post_hook.html index 7e24c296146..88ea73dacc8 100644 --- a/2.9/generated/torch.optim.Optimizer.register_state_dict_post_hook.html +++ b/2.9/generated/torch.optim.Optimizer.register_state_dict_post_hook.html @@ -4404,7 +4404,7 @@

                                                                  torch.optim.Optimizer.register_state_dict_post_hook#

                                                                  -Optimizer.register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                  +Optimizer.register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                  Register a state dict post-hook which will be called after state_dict() is called.

                                                                  It should have the following signature:

                                                                  hook(optimizer, state_dict) -> state_dict or None
                                                                  diff --git a/2.9/generated/torch.optim.Optimizer.register_state_dict_pre_hook.html b/2.9/generated/torch.optim.Optimizer.register_state_dict_pre_hook.html
                                                                  index c5149699a5e..1d294cd356d 100644
                                                                  --- a/2.9/generated/torch.optim.Optimizer.register_state_dict_pre_hook.html
                                                                  +++ b/2.9/generated/torch.optim.Optimizer.register_state_dict_pre_hook.html
                                                                  @@ -4404,7 +4404,7 @@
                                                                   

                                                                  torch.optim.Optimizer.register_state_dict_pre_hook#

                                                                  -Optimizer.register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                  +Optimizer.register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                  Register a state dict pre-hook which will be called before state_dict() is called.

                                                                  It should have the following signature:

                                                                  hook(optimizer) -> None
                                                                  diff --git a/2.9/generated/torch.optim.Optimizer.register_step_post_hook.html b/2.9/generated/torch.optim.Optimizer.register_step_post_hook.html
                                                                  index 4a74fd08caf..002310257be 100644
                                                                  --- a/2.9/generated/torch.optim.Optimizer.register_step_post_hook.html
                                                                  +++ b/2.9/generated/torch.optim.Optimizer.register_step_post_hook.html
                                                                  @@ -4404,7 +4404,7 @@
                                                                   

                                                                  torch.optim.Optimizer.register_step_post_hook#

                                                                  -Optimizer.register_step_post_hook(hook)[source]#
                                                                  +Optimizer.register_step_post_hook(hook)[source]#

                                                                  Register an optimizer step post hook which will be called after optimizer step.

                                                                  It should have the following signature:

                                                                  hook(optimizer, args, kwargs) -> None
                                                                  diff --git a/2.9/generated/torch.optim.Optimizer.register_step_pre_hook.html b/2.9/generated/torch.optim.Optimizer.register_step_pre_hook.html
                                                                  index 286727ddffa..55ff334aab0 100644
                                                                  --- a/2.9/generated/torch.optim.Optimizer.register_step_pre_hook.html
                                                                  +++ b/2.9/generated/torch.optim.Optimizer.register_step_pre_hook.html
                                                                  @@ -4404,7 +4404,7 @@
                                                                   

                                                                  torch.optim.Optimizer.register_step_pre_hook#

                                                                  -Optimizer.register_step_pre_hook(hook)[source]#
                                                                  +Optimizer.register_step_pre_hook(hook)[source]#

                                                                  Register an optimizer step pre hook which will be called before optimizer step.

                                                                  It should have the following signature:

                                                                  hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                  diff --git a/2.9/generated/torch.optim.Optimizer.state_dict.html b/2.9/generated/torch.optim.Optimizer.state_dict.html
                                                                  index 9d224d7e4a6..853b4d72731 100644
                                                                  --- a/2.9/generated/torch.optim.Optimizer.state_dict.html
                                                                  +++ b/2.9/generated/torch.optim.Optimizer.state_dict.html
                                                                  @@ -4404,7 +4404,7 @@
                                                                   

                                                                  torch.optim.Optimizer.state_dict#

                                                                  -Optimizer.state_dict()[source]#
                                                                  +Optimizer.state_dict()[source]#

                                                                  Return the state of the optimizer as a dict.

                                                                  It contains two entries:

                                                                    diff --git a/2.9/generated/torch.optim.Optimizer.step.html b/2.9/generated/torch.optim.Optimizer.step.html index 722de44ce8f..adad6a6b73b 100644 --- a/2.9/generated/torch.optim.Optimizer.step.html +++ b/2.9/generated/torch.optim.Optimizer.step.html @@ -4404,7 +4404,7 @@

                                                                    torch.optim.Optimizer.step#

                                                                    -Optimizer.step(closure: None = None) None[source]#
                                                                    +Optimizer.step(closure: None = None) None[source]#
                                                                    Optimizer.step(closure: Callable[[], float]) float

                                                                    Perform a single optimization step to update parameter.

                                                                    diff --git a/2.9/generated/torch.optim.Optimizer.zero_grad.html b/2.9/generated/torch.optim.Optimizer.zero_grad.html index a790d31318a..5b85afb3c5f 100644 --- a/2.9/generated/torch.optim.Optimizer.zero_grad.html +++ b/2.9/generated/torch.optim.Optimizer.zero_grad.html @@ -4404,7 +4404,7 @@

                                                                    torch.optim.Optimizer.zero_grad#

                                                                    -Optimizer.zero_grad(set_to_none=True)[source]#
                                                                    +Optimizer.zero_grad(set_to_none=True)[source]#

                                                                    Reset the gradients of all optimized torch.Tensor s.

                                                                    Parameters
                                                                    diff --git a/2.9/generated/torch.optim.RAdam.html b/2.9/generated/torch.optim.RAdam.html index fb3c5c8161e..634bce5c123 100644 --- a/2.9/generated/torch.optim.RAdam.html +++ b/2.9/generated/torch.optim.RAdam.html @@ -4404,7 +4404,7 @@

                                                                    RAdam#

                                                                    -class torch.optim.RAdam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, decoupled_weight_decay=False, *, foreach=None, maximize=False, capturable=False, differentiable=False)[source]#
                                                                    +class torch.optim.RAdam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, decoupled_weight_decay=False, *, foreach=None, maximize=False, capturable=False, differentiable=False)[source]#

                                                                    Implements RAdam algorithm.

                                                                    input:γ (lr),β1,β2 (betas),θ0 (params),f(θ) (objective),λ (weightdecay),maximizeϵ (epsilon),decoupled_weight_decayinitialize:m00 ( first moment),v00 ( second moment),ρ2/(1β2)1fort=1todoifmaximize:gtθft(θt1)elsegtθft(θt1)θtθt1ifλ0ifdecoupled_weight_decayθtθtγλθtelsegtgt+λθtmtβ1mt1+(1β1)gtvtβ2vt1+(1β2)gt2mt^mt/(1β1t)ρtρ2tβ2t/(1β2t)ifρt>5lt(1β2t)vt+ϵrt(ρt4)(ρt2)ρ(ρ4)(ρ2)ρtθtθtγmt^rtltelseθtθtγmt^returnθt\begin{aligned} @@ -4522,7 +4522,7 @@

                                                                    RAdam#

                                                                    -add_param_group(param_group)[source]#
                                                                    +add_param_group(param_group)[source]#

                                                                    Add a param group to the Optimizer s param_groups.

                                                                    This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                    @@ -4536,7 +4536,7 @@

                                                                    RAdam#
                                                                    -load_state_dict(state_dict)[source]#
                                                                    +load_state_dict(state_dict)[source]#

                                                                    Load the optimizer state.

                                                                    Parameters
                                                                    @@ -4589,7 +4589,7 @@

                                                                    RAdam#
                                                                    -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                    +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                    Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                    @@ -4623,7 +4623,7 @@

                                                                    RAdam#
                                                                    -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                    +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                    Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                    @@ -4660,7 +4660,7 @@

                                                                    RAdam#
                                                                    -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                    +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                    Register a state dict post-hook which will be called after state_dict() is called.

                                                                    It should have the following signature:

                                                                    hook(optimizer, state_dict) -> state_dict or None
                                                                    @@ -4692,7 +4692,7 @@ 

                                                                    RAdam#
                                                                    -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                    +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                    Register a state dict pre-hook which will be called before state_dict() is called.

                                                                    It should have the following signature:

                                                                    hook(optimizer) -> None
                                                                    @@ -4724,7 +4724,7 @@ 

                                                                    RAdam#
                                                                    -register_step_post_hook(hook)[source]#
                                                                    +register_step_post_hook(hook)[source]#

                                                                    Register an optimizer step post hook which will be called after optimizer step.

                                                                    It should have the following signature:

                                                                    hook(optimizer, args, kwargs) -> None
                                                                    @@ -4747,7 +4747,7 @@ 

                                                                    RAdam#
                                                                    -register_step_pre_hook(hook)[source]#
                                                                    +register_step_pre_hook(hook)[source]#

                                                                    Register an optimizer step pre hook which will be called before optimizer step.

                                                                    It should have the following signature:

                                                                    hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                    @@ -4772,7 +4772,7 @@ 

                                                                    RAdam#
                                                                    -state_dict()[source]#
                                                                    +state_dict()[source]#

                                                                    Return the state of the optimizer as a dict.

                                                                    It contains two entries:

                                                                      @@ -4835,7 +4835,7 @@

                                                                      RAdam#
                                                                      -step(closure=None)[source]#
                                                                      +step(closure=None)[source]#

                                                                      Perform a single optimization step.

                                                                      Parameters
                                                                      @@ -4847,7 +4847,7 @@

                                                                      RAdam#
                                                                      -zero_grad(set_to_none=True)[source]#
                                                                      +zero_grad(set_to_none=True)[source]#

                                                                      Reset the gradients of all optimized torch.Tensor s.

                                                                      Parameters
                                                                      diff --git a/2.9/generated/torch.optim.RMSprop.html b/2.9/generated/torch.optim.RMSprop.html index c070096d784..9a32adfda8b 100644 --- a/2.9/generated/torch.optim.RMSprop.html +++ b/2.9/generated/torch.optim.RMSprop.html @@ -4404,7 +4404,7 @@

                                                                      RMSprop#

                                                                      -class torch.optim.RMSprop(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False, capturable=False, foreach=None, maximize=False, differentiable=False)[source]#
                                                                      +class torch.optim.RMSprop(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False, capturable=False, foreach=None, maximize=False, differentiable=False)[source]#

                                                                      Implements RMSprop algorithm.

                                                                      input:α (alpha),γ (lr),θ0 (params),f(θ) (objective)λ (weight decay),μ (momentum),centered,ϵ (epsilon)initialize:v00 (square average),b00 (buffer),g0ave0fort=1todogtθft(θt1)ifλ0gtgt+λθt1vtαvt1+(1α)gt2vt~vtifcenteredgtavegt1aveα+(1α)gtvt~vt~(gtave)2ifμ>0btμbt1+gt/(vt~+ϵ)θtθt1γbtelseθtθt1γgt/(vt~+ϵ)returnθt\begin{aligned} @@ -4516,7 +4516,7 @@

                                                                      RMSprop
                                                                      -add_param_group(param_group)[source]#
                                                                      +add_param_group(param_group)[source]#

                                                                      Add a param group to the Optimizer s param_groups.

                                                                      This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                      @@ -4530,7 +4530,7 @@

                                                                      RMSprop
                                                                      -load_state_dict(state_dict)[source]#
                                                                      +load_state_dict(state_dict)[source]#

                                                                      Load the optimizer state.

                                                                      Parameters
                                                                      @@ -4583,7 +4583,7 @@

                                                                      RMSprop
                                                                      -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                      +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                      Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                      @@ -4617,7 +4617,7 @@

                                                                      RMSprop
                                                                      -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                      +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                      Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                      @@ -4654,7 +4654,7 @@

                                                                      RMSprop
                                                                      -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                      +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                      Register a state dict post-hook which will be called after state_dict() is called.

                                                                      It should have the following signature:

                                                                      hook(optimizer, state_dict) -> state_dict or None
                                                                      @@ -4686,7 +4686,7 @@ 

                                                                      RMSprop
                                                                      -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                      +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                      Register a state dict pre-hook which will be called before state_dict() is called.

                                                                      It should have the following signature:

                                                                      hook(optimizer) -> None
                                                                      @@ -4718,7 +4718,7 @@ 

                                                                      RMSprop
                                                                      -register_step_post_hook(hook)[source]#
                                                                      +register_step_post_hook(hook)[source]#

                                                                      Register an optimizer step post hook which will be called after optimizer step.

                                                                      It should have the following signature:

                                                                      hook(optimizer, args, kwargs) -> None
                                                                      @@ -4741,7 +4741,7 @@ 

                                                                      RMSprop
                                                                      -register_step_pre_hook(hook)[source]#
                                                                      +register_step_pre_hook(hook)[source]#

                                                                      Register an optimizer step pre hook which will be called before optimizer step.

                                                                      It should have the following signature:

                                                                      hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                      @@ -4766,7 +4766,7 @@ 

                                                                      RMSprop
                                                                      -state_dict()[source]#
                                                                      +state_dict()[source]#

                                                                      Return the state of the optimizer as a dict.

                                                                      It contains two entries:

                                                                        @@ -4829,7 +4829,7 @@

                                                                        RMSprop
                                                                        -step(closure=None)[source]#
                                                                        +step(closure=None)[source]#

                                                                        Perform a single optimization step.

                                                                        Parameters
                                                                        @@ -4841,7 +4841,7 @@

                                                                        RMSprop
                                                                        -zero_grad(set_to_none=True)[source]#
                                                                        +zero_grad(set_to_none=True)[source]#

                                                                        Reset the gradients of all optimized torch.Tensor s.

                                                                        Parameters
                                                                        diff --git a/2.9/generated/torch.optim.Rprop.html b/2.9/generated/torch.optim.Rprop.html index 239f21a9a7d..7957c16a54a 100644 --- a/2.9/generated/torch.optim.Rprop.html +++ b/2.9/generated/torch.optim.Rprop.html @@ -4404,7 +4404,7 @@

                                                                        Rprop#

                                                                        -class torch.optim.Rprop(params, lr=0.01, etas=(0.5, 1.2), step_sizes=(1e-06, 50), *, capturable=False, foreach=None, maximize=False, differentiable=False)[source]#
                                                                        +class torch.optim.Rprop(params, lr=0.01, etas=(0.5, 1.2), step_sizes=(1e-06, 50), *, capturable=False, foreach=None, maximize=False, differentiable=False)[source]#

                                                                        Implements the resilient backpropagation algorithm.

                                                                        input:θ0Rd (params),f(θ) (objective),η+/ (etaplus, etaminus),Γmax/min (step sizes)initialize:gprev00,η0lr (learning rate)fort=1todogtθft(θt1)for i=0,1,,d1doifgprevigti>0ηtimin(ηt1iη+,Γmax)else ifgprevigti<0ηtimax(ηt1iη,Γmin)gti0elseηtiηt1iθtθt1ηtsign(gt)gprevgtreturnθt\begin{aligned} @@ -4473,7 +4473,7 @@

                                                                        Rprop#

                                                                        -add_param_group(param_group)[source]#
                                                                        +add_param_group(param_group)[source]#

                                                                        Add a param group to the Optimizer s param_groups.

                                                                        This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                        @@ -4487,7 +4487,7 @@

                                                                        Rprop#
                                                                        -load_state_dict(state_dict)[source]#
                                                                        +load_state_dict(state_dict)[source]#

                                                                        Load the optimizer state.

                                                                        Parameters
                                                                        @@ -4540,7 +4540,7 @@

                                                                        Rprop#
                                                                        -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                        +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                        Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                        @@ -4574,7 +4574,7 @@

                                                                        Rprop#
                                                                        -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                        +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                        Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                        @@ -4611,7 +4611,7 @@

                                                                        Rprop#
                                                                        -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                        +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                        Register a state dict post-hook which will be called after state_dict() is called.

                                                                        It should have the following signature:

                                                                        hook(optimizer, state_dict) -> state_dict or None
                                                                        @@ -4643,7 +4643,7 @@ 

                                                                        Rprop#
                                                                        -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                        +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                        Register a state dict pre-hook which will be called before state_dict() is called.

                                                                        It should have the following signature:

                                                                        hook(optimizer) -> None
                                                                        @@ -4675,7 +4675,7 @@ 

                                                                        Rprop#
                                                                        -register_step_post_hook(hook)[source]#
                                                                        +register_step_post_hook(hook)[source]#

                                                                        Register an optimizer step post hook which will be called after optimizer step.

                                                                        It should have the following signature:

                                                                        hook(optimizer, args, kwargs) -> None
                                                                        @@ -4698,7 +4698,7 @@ 

                                                                        Rprop#
                                                                        -register_step_pre_hook(hook)[source]#
                                                                        +register_step_pre_hook(hook)[source]#

                                                                        Register an optimizer step pre hook which will be called before optimizer step.

                                                                        It should have the following signature:

                                                                        hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                        @@ -4723,7 +4723,7 @@ 

                                                                        Rprop#
                                                                        -state_dict()[source]#
                                                                        +state_dict()[source]#

                                                                        Return the state of the optimizer as a dict.

                                                                        It contains two entries:

                                                                          @@ -4786,7 +4786,7 @@

                                                                          Rprop#
                                                                          -step(closure=None)[source]#
                                                                          +step(closure=None)[source]#

                                                                          Perform a single optimization step.

                                                                          Parameters
                                                                          @@ -4798,7 +4798,7 @@

                                                                          Rprop#
                                                                          -zero_grad(set_to_none=True)[source]#
                                                                          +zero_grad(set_to_none=True)[source]#

                                                                          Reset the gradients of all optimized torch.Tensor s.

                                                                          Parameters
                                                                          diff --git a/2.9/generated/torch.optim.SGD.html b/2.9/generated/torch.optim.SGD.html index 0e0388ff677..55d11553655 100644 --- a/2.9/generated/torch.optim.SGD.html +++ b/2.9/generated/torch.optim.SGD.html @@ -4404,7 +4404,7 @@

                                                                          SGD#

                                                                          -class torch.optim.SGD(params, lr=0.001, momentum=0, dampening=0, weight_decay=0, nesterov=False, *, maximize=False, foreach=None, differentiable=False, fused=None)[source]#
                                                                          +class torch.optim.SGD(params, lr=0.001, momentum=0, dampening=0, weight_decay=0, nesterov=False, *, maximize=False, foreach=None, differentiable=False, fused=None)[source]#

                                                                          Implements stochastic gradient descent (optionally with momentum).

                                                                          input:γ (lr),θ0 (params),f(θ) (objective),λ (weight decay),μ (momentum),τ (dampening), nesterov, maximizefort=1todoifmaximize:gtθft(θt1)elsegtθft(θt1)ifλ0gtgt+λθt1ifμ0ift>1btμbt1+(1τ)gtelsebtgtifnesterovgtgt+μbtelsegtbtθtθt1γgtreturnθt\begin{aligned} @@ -4518,7 +4518,7 @@

                                                                          SGD#

                                                                          -add_param_group(param_group)[source]#
                                                                          +add_param_group(param_group)[source]#

                                                                          Add a param group to the Optimizer s param_groups.

                                                                          This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                          @@ -4532,7 +4532,7 @@

                                                                          SGD#
                                                                          -load_state_dict(state_dict)[source]#
                                                                          +load_state_dict(state_dict)[source]#

                                                                          Load the optimizer state.

                                                                          Parameters
                                                                          @@ -4585,7 +4585,7 @@

                                                                          SGD#
                                                                          -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                          +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                          Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                          @@ -4619,7 +4619,7 @@

                                                                          SGD#
                                                                          -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                          +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                          Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                          @@ -4656,7 +4656,7 @@

                                                                          SGD#
                                                                          -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                          +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                          Register a state dict post-hook which will be called after state_dict() is called.

                                                                          It should have the following signature:

                                                                          hook(optimizer, state_dict) -> state_dict or None
                                                                          @@ -4688,7 +4688,7 @@ 

                                                                          SGD#
                                                                          -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                          +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                          Register a state dict pre-hook which will be called before state_dict() is called.

                                                                          It should have the following signature:

                                                                          hook(optimizer) -> None
                                                                          @@ -4720,7 +4720,7 @@ 

                                                                          SGD#
                                                                          -register_step_post_hook(hook)[source]#
                                                                          +register_step_post_hook(hook)[source]#

                                                                          Register an optimizer step post hook which will be called after optimizer step.

                                                                          It should have the following signature:

                                                                          hook(optimizer, args, kwargs) -> None
                                                                          @@ -4743,7 +4743,7 @@ 

                                                                          SGD#
                                                                          -register_step_pre_hook(hook)[source]#
                                                                          +register_step_pre_hook(hook)[source]#

                                                                          Register an optimizer step pre hook which will be called before optimizer step.

                                                                          It should have the following signature:

                                                                          hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                          @@ -4768,7 +4768,7 @@ 

                                                                          SGD#
                                                                          -state_dict()[source]#
                                                                          +state_dict()[source]#

                                                                          Return the state of the optimizer as a dict.

                                                                          It contains two entries:

                                                                            @@ -4831,7 +4831,7 @@

                                                                            SGD#
                                                                            -step(closure=None)[source]#
                                                                            +step(closure=None)[source]#

                                                                            Perform a single optimization step.

                                                                            Parameters
                                                                            @@ -4843,7 +4843,7 @@

                                                                            SGD#
                                                                            -zero_grad(set_to_none=True)[source]#
                                                                            +zero_grad(set_to_none=True)[source]#

                                                                            Reset the gradients of all optimized torch.Tensor s.

                                                                            Parameters
                                                                            diff --git a/2.9/generated/torch.optim.SparseAdam.html b/2.9/generated/torch.optim.SparseAdam.html index df88602daec..475b0ab70c0 100644 --- a/2.9/generated/torch.optim.SparseAdam.html +++ b/2.9/generated/torch.optim.SparseAdam.html @@ -4404,7 +4404,7 @@

                                                                            SparseAdam#

                                                                            -class torch.optim.SparseAdam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, maximize=False)[source]#
                                                                            +class torch.optim.SparseAdam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, maximize=False)[source]#

                                                                            SparseAdam implements a masked version of the Adam algorithm suitable for sparse gradients. Currently, due to implementation constraints (explained below), SparseAdam is only intended for a narrow subset of use cases, specifically @@ -4463,7 +4463,7 @@

                                                                            SparseAdam
                                                                            -add_param_group(param_group)[source]#
                                                                            +add_param_group(param_group)[source]#

                                                                            Add a param group to the Optimizer s param_groups.

                                                                            This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                            @@ -4477,7 +4477,7 @@

                                                                            SparseAdam
                                                                            -load_state_dict(state_dict)[source]#
                                                                            +load_state_dict(state_dict)[source]#

                                                                            Load the optimizer state.

                                                                            Parameters
                                                                            @@ -4530,7 +4530,7 @@

                                                                            SparseAdam
                                                                            -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                            +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                            Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                            @@ -4564,7 +4564,7 @@

                                                                            SparseAdam
                                                                            -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                            +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                            Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                            @@ -4601,7 +4601,7 @@

                                                                            SparseAdam
                                                                            -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                            +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                            Register a state dict post-hook which will be called after state_dict() is called.

                                                                            It should have the following signature:

                                                                            hook(optimizer, state_dict) -> state_dict or None
                                                                            @@ -4633,7 +4633,7 @@ 

                                                                            SparseAdam
                                                                            -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                            +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                            Register a state dict pre-hook which will be called before state_dict() is called.

                                                                            It should have the following signature:

                                                                            hook(optimizer) -> None
                                                                            @@ -4665,7 +4665,7 @@ 

                                                                            SparseAdam
                                                                            -register_step_post_hook(hook)[source]#
                                                                            +register_step_post_hook(hook)[source]#

                                                                            Register an optimizer step post hook which will be called after optimizer step.

                                                                            It should have the following signature:

                                                                            hook(optimizer, args, kwargs) -> None
                                                                            @@ -4688,7 +4688,7 @@ 

                                                                            SparseAdam
                                                                            -register_step_pre_hook(hook)[source]#
                                                                            +register_step_pre_hook(hook)[source]#

                                                                            Register an optimizer step pre hook which will be called before optimizer step.

                                                                            It should have the following signature:

                                                                            hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                            @@ -4713,7 +4713,7 @@ 

                                                                            SparseAdam
                                                                            -state_dict()[source]#
                                                                            +state_dict()[source]#

                                                                            Return the state of the optimizer as a dict.

                                                                            It contains two entries:

                                                                              @@ -4776,7 +4776,7 @@

                                                                              SparseAdam
                                                                              -step(closure=None)[source]#
                                                                              +step(closure=None)[source]#

                                                                              Perform a single optimization step.

                                                                              Parameters
                                                                              @@ -4788,7 +4788,7 @@

                                                                              SparseAdam
                                                                              -zero_grad(set_to_none=True)[source]#
                                                                              +zero_grad(set_to_none=True)[source]#

                                                                              Reset the gradients of all optimized torch.Tensor s.

                                                                              Parameters
                                                                              diff --git a/2.9/generated/torch.optim.adadelta.Adadelta.html b/2.9/generated/torch.optim.adadelta.Adadelta.html index 73a11e41b82..c3dc7fa3e3e 100644 --- a/2.9/generated/torch.optim.adadelta.Adadelta.html +++ b/2.9/generated/torch.optim.adadelta.Adadelta.html @@ -4415,7 +4415,7 @@

                                                                              Adadelta#

                                                                              -class torch.optim.adadelta.Adadelta(params, lr=1.0, rho=0.9, eps=1e-06, weight_decay=0, foreach=None, *, capturable=False, maximize=False, differentiable=False)[source]#
                                                                              +class torch.optim.adadelta.Adadelta(params, lr=1.0, rho=0.9, eps=1e-06, weight_decay=0, foreach=None, *, capturable=False, maximize=False, differentiable=False)[source]#

                                                                              Implements Adadelta algorithm.

                                                                              input:γ (lr),θ0 (params),f(θ) (objective),ρ (decay),λ (weight decay)initialize:v00 (square avg),u00 (accumulate variables)fort=1todogtθft(θt1)ifλ0gtgt+λθt1vtvt1ρ+gt2(1ρ)Δxtut1+ϵvt+ϵgtutut1ρ+Δxt2(1ρ)θtθt1γΔxtreturnθt\begin{aligned} @@ -4503,7 +4503,7 @@

                                                                              Adadelta
                                                                              -add_param_group(param_group)[source]#
                                                                              +add_param_group(param_group)[source]#

                                                                              Add a param group to the Optimizer s param_groups.

                                                                              This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                              @@ -4517,7 +4517,7 @@

                                                                              Adadelta
                                                                              -load_state_dict(state_dict)[source]#
                                                                              +load_state_dict(state_dict)[source]#

                                                                              Load the optimizer state.

                                                                              Parameters
                                                                              @@ -4570,7 +4570,7 @@

                                                                              Adadelta
                                                                              -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                              +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                              Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                              @@ -4604,7 +4604,7 @@

                                                                              Adadelta
                                                                              -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                              +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                              Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                              @@ -4641,7 +4641,7 @@

                                                                              Adadelta
                                                                              -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                              +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                              Register a state dict post-hook which will be called after state_dict() is called.

                                                                              It should have the following signature:

                                                                              hook(optimizer, state_dict) -> state_dict or None
                                                                              @@ -4673,7 +4673,7 @@ 

                                                                              Adadelta
                                                                              -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                              +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                              Register a state dict pre-hook which will be called before state_dict() is called.

                                                                              It should have the following signature:

                                                                              hook(optimizer) -> None
                                                                              @@ -4705,7 +4705,7 @@ 

                                                                              Adadelta
                                                                              -register_step_post_hook(hook)[source]#
                                                                              +register_step_post_hook(hook)[source]#

                                                                              Register an optimizer step post hook which will be called after optimizer step.

                                                                              It should have the following signature:

                                                                              hook(optimizer, args, kwargs) -> None
                                                                              @@ -4728,7 +4728,7 @@ 

                                                                              Adadelta
                                                                              -register_step_pre_hook(hook)[source]#
                                                                              +register_step_pre_hook(hook)[source]#

                                                                              Register an optimizer step pre hook which will be called before optimizer step.

                                                                              It should have the following signature:

                                                                              hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                              @@ -4753,7 +4753,7 @@ 

                                                                              Adadelta
                                                                              -state_dict()[source]#
                                                                              +state_dict()[source]#

                                                                              Return the state of the optimizer as a dict.

                                                                              It contains two entries:

                                                                                @@ -4816,7 +4816,7 @@

                                                                                Adadelta
                                                                                -step(closure=None)[source]#
                                                                                +step(closure=None)[source]#

                                                                                Perform a single optimization step.

                                                                                Parameters
                                                                                @@ -4828,7 +4828,7 @@

                                                                                Adadelta
                                                                                -zero_grad(set_to_none=True)[source]#
                                                                                +zero_grad(set_to_none=True)[source]#

                                                                                Reset the gradients of all optimized torch.Tensor s.

                                                                                Parameters
                                                                                diff --git a/2.9/generated/torch.optim.adadelta.adadelta.html b/2.9/generated/torch.optim.adadelta.adadelta.html index f3f7c292555..34e00240883 100644 --- a/2.9/generated/torch.optim.adadelta.adadelta.html +++ b/2.9/generated/torch.optim.adadelta.adadelta.html @@ -4415,7 +4415,7 @@

                                                                                torch.optim.adadelta.adadelta#

                                                                                -torch.optim.adadelta.adadelta(params, grads, square_avgs, acc_deltas, state_steps, capturable=False, foreach=None, differentiable=False, has_complex=False, *, lr, rho, eps, weight_decay, maximize)[source]#
                                                                                +torch.optim.adadelta.adadelta(params, grads, square_avgs, acc_deltas, state_steps, capturable=False, foreach=None, differentiable=False, has_complex=False, *, lr, rho, eps, weight_decay, maximize)[source]#

                                                                                Functional API that performs Adadelta algorithm computation.

                                                                                See Adadelta for details.

                                                                                diff --git a/2.9/generated/torch.optim.adagrad.Adagrad.html b/2.9/generated/torch.optim.adagrad.Adagrad.html index e2ee9f73e8e..0337d4c5d07 100644 --- a/2.9/generated/torch.optim.adagrad.Adagrad.html +++ b/2.9/generated/torch.optim.adagrad.Adagrad.html @@ -4415,7 +4415,7 @@

                                                                                Adagrad#

                                                                                -class torch.optim.adagrad.Adagrad(params, lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10, foreach=None, *, maximize=False, differentiable=False, fused=None)[source]#
                                                                                +class torch.optim.adagrad.Adagrad(params, lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10, foreach=None, *, maximize=False, differentiable=False, fused=None)[source]#

                                                                                Implements Adagrad algorithm.

                                                                                input:γ (lr),θ0 (params),f(θ) (objective),λ (weight decay),τ (initial accumulator value),η (lr decay)initialize:state_sum0τfort=1todogtθft(θt1)γ~γ/(1+(t1)η)ifλ0gtgt+λθt1state_sumtstate_sumt1+gt2θtθt1γ~gtstate_sumt+ϵreturnθt\begin{aligned} @@ -4486,7 +4486,7 @@

                                                                                Adagrad
                                                                                -add_param_group(param_group)[source]#
                                                                                +add_param_group(param_group)[source]#

                                                                                Add a param group to the Optimizer s param_groups.

                                                                                This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                                @@ -4500,7 +4500,7 @@

                                                                                Adagrad
                                                                                -load_state_dict(state_dict)[source]#
                                                                                +load_state_dict(state_dict)[source]#

                                                                                Load the optimizer state.

                                                                                Parameters
                                                                                @@ -4553,7 +4553,7 @@

                                                                                Adagrad
                                                                                -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                                @@ -4587,7 +4587,7 @@

                                                                                Adagrad
                                                                                -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                                @@ -4624,7 +4624,7 @@

                                                                                Adagrad
                                                                                -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                Register a state dict post-hook which will be called after state_dict() is called.

                                                                                It should have the following signature:

                                                                                hook(optimizer, state_dict) -> state_dict or None
                                                                                @@ -4656,7 +4656,7 @@ 

                                                                                Adagrad
                                                                                -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                Register a state dict pre-hook which will be called before state_dict() is called.

                                                                                It should have the following signature:

                                                                                hook(optimizer) -> None
                                                                                @@ -4688,7 +4688,7 @@ 

                                                                                Adagrad
                                                                                -register_step_post_hook(hook)[source]#
                                                                                +register_step_post_hook(hook)[source]#

                                                                                Register an optimizer step post hook which will be called after optimizer step.

                                                                                It should have the following signature:

                                                                                hook(optimizer, args, kwargs) -> None
                                                                                @@ -4711,7 +4711,7 @@ 

                                                                                Adagrad
                                                                                -register_step_pre_hook(hook)[source]#
                                                                                +register_step_pre_hook(hook)[source]#

                                                                                Register an optimizer step pre hook which will be called before optimizer step.

                                                                                It should have the following signature:

                                                                                hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                                @@ -4736,13 +4736,13 @@ 

                                                                                Adagrad
                                                                                -share_memory()[source]#
                                                                                +share_memory()[source]#

                                                                                Calls tensor.share_memory_() on the state sum tensors.

                                                                                -state_dict()[source]#
                                                                                +state_dict()[source]#

                                                                                Return the state of the optimizer as a dict.

                                                                                It contains two entries:

                                                                                  @@ -4805,7 +4805,7 @@

                                                                                  Adagrad
                                                                                  -step(closure=None)[source]#
                                                                                  +step(closure=None)[source]#

                                                                                  Perform a single optimization step.

                                                                                  Parameters
                                                                                  @@ -4817,7 +4817,7 @@

                                                                                  Adagrad
                                                                                  -zero_grad(set_to_none=True)[source]#
                                                                                  +zero_grad(set_to_none=True)[source]#

                                                                                  Reset the gradients of all optimized torch.Tensor s.

                                                                                  Parameters
                                                                                  diff --git a/2.9/generated/torch.optim.adagrad.adagrad.html b/2.9/generated/torch.optim.adagrad.adagrad.html index 0c4c70edb30..ac215b7e756 100644 --- a/2.9/generated/torch.optim.adagrad.adagrad.html +++ b/2.9/generated/torch.optim.adagrad.adagrad.html @@ -4415,7 +4415,7 @@

                                                                                  torch.optim.adagrad.adagrad#

                                                                                  -torch.optim.adagrad.adagrad(params, grads, state_sums, state_steps, fused=None, grad_scale=None, found_inf=None, has_sparse_grad=False, foreach=None, differentiable=False, has_complex=False, *, lr, weight_decay, lr_decay, eps, maximize)[source]#
                                                                                  +torch.optim.adagrad.adagrad(params, grads, state_sums, state_steps, fused=None, grad_scale=None, found_inf=None, has_sparse_grad=False, foreach=None, differentiable=False, has_complex=False, *, lr, weight_decay, lr_decay, eps, maximize)[source]#

                                                                                  Functional API that performs Adagrad algorithm computation.

                                                                                  See Adagrad for details.

                                                                                  diff --git a/2.9/generated/torch.optim.adam.Adam.html b/2.9/generated/torch.optim.adam.Adam.html index 91f32bc60e8..852ca18d131 100644 --- a/2.9/generated/torch.optim.adam.Adam.html +++ b/2.9/generated/torch.optim.adam.Adam.html @@ -4415,7 +4415,7 @@

                                                                                  Adam#

                                                                                  -class torch.optim.adam.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False, *, foreach=None, maximize=False, capturable=False, differentiable=False, fused=None, decoupled_weight_decay=False)[source]#
                                                                                  +class torch.optim.adam.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False, *, foreach=None, maximize=False, capturable=False, differentiable=False, fused=None, decoupled_weight_decay=False)[source]#

                                                                                  Implements Adam algorithm.

                                                                                  input:γ (lr),β1,β2 (betas),θ0 (params),f(θ) (objective)λ (weight decay),amsgrad,maximize,ϵ (epsilon)initialize:m00 ( first moment),v00 (second moment),v0max0fort=1todoifmaximize:gtθft(θt1)elsegtθft(θt1)ifλ0gtgt+λθt1mtβ1mt1+(1β1)gtvtβ2vt1+(1β2)gt2mt^mt/(1β1t)ifamsgradvtmaxmax(vt1max,vt)vt^vtmax/(1β2t)elsevt^vt/(1β2t)θtθt1γmt^/(vt^+ϵ)returnθt\begin{aligned} @@ -4526,7 +4526,7 @@

                                                                                  Adam#
                                                                                  -add_param_group(param_group)[source]#
                                                                                  +add_param_group(param_group)[source]#

                                                                                  Add a param group to the Optimizer s param_groups.

                                                                                  This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                                  @@ -4540,7 +4540,7 @@

                                                                                  Adam#
                                                                                  -load_state_dict(state_dict)[source]#
                                                                                  +load_state_dict(state_dict)[source]#

                                                                                  Load the optimizer state.

                                                                                  Parameters
                                                                                  @@ -4593,7 +4593,7 @@

                                                                                  Adam#
                                                                                  -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                  +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                  Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                                  @@ -4627,7 +4627,7 @@

                                                                                  Adam#
                                                                                  -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                  +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                  Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                                  @@ -4664,7 +4664,7 @@

                                                                                  Adam#
                                                                                  -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                  +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                  Register a state dict post-hook which will be called after state_dict() is called.

                                                                                  It should have the following signature:

                                                                                  hook(optimizer, state_dict) -> state_dict or None
                                                                                  @@ -4696,7 +4696,7 @@ 

                                                                                  Adam#
                                                                                  -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                  +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                  Register a state dict pre-hook which will be called before state_dict() is called.

                                                                                  It should have the following signature:

                                                                                  hook(optimizer) -> None
                                                                                  @@ -4728,7 +4728,7 @@ 

                                                                                  Adam#
                                                                                  -register_step_post_hook(hook)[source]#
                                                                                  +register_step_post_hook(hook)[source]#

                                                                                  Register an optimizer step post hook which will be called after optimizer step.

                                                                                  It should have the following signature:

                                                                                  hook(optimizer, args, kwargs) -> None
                                                                                  @@ -4751,7 +4751,7 @@ 

                                                                                  Adam#
                                                                                  -register_step_pre_hook(hook)[source]#
                                                                                  +register_step_pre_hook(hook)[source]#

                                                                                  Register an optimizer step pre hook which will be called before optimizer step.

                                                                                  It should have the following signature:

                                                                                  hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                                  @@ -4776,7 +4776,7 @@ 

                                                                                  Adam#
                                                                                  -state_dict()[source]#
                                                                                  +state_dict()[source]#

                                                                                  Return the state of the optimizer as a dict.

                                                                                  It contains two entries:

                                                                                    @@ -4839,7 +4839,7 @@

                                                                                    Adam#
                                                                                    -step(closure=None)[source]#
                                                                                    +step(closure=None)[source]#

                                                                                    Perform a single optimization step.

                                                                                    Parameters
                                                                                    @@ -4851,7 +4851,7 @@

                                                                                    Adam#
                                                                                    -zero_grad(set_to_none=True)[source]#
                                                                                    +zero_grad(set_to_none=True)[source]#

                                                                                    Reset the gradients of all optimized torch.Tensor s.

                                                                                    Parameters
                                                                                    diff --git a/2.9/generated/torch.optim.adam.adam.html b/2.9/generated/torch.optim.adam.adam.html index 156e2d5294f..0a4cbfcbfa8 100644 --- a/2.9/generated/torch.optim.adam.adam.html +++ b/2.9/generated/torch.optim.adam.adam.html @@ -4415,7 +4415,7 @@

                                                                                    torch.optim.adam.adam#

                                                                                    -torch.optim.adam.adam(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, foreach=None, capturable=False, differentiable=False, fused=None, grad_scale=None, found_inf=None, has_complex=False, decoupled_weight_decay=False, *, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize)[source]#
                                                                                    +torch.optim.adam.adam(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, foreach=None, capturable=False, differentiable=False, fused=None, grad_scale=None, found_inf=None, has_complex=False, decoupled_weight_decay=False, *, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize)[source]#

                                                                                    Functional API that performs Adam algorithm computation.

                                                                                    See Adam for details.

                                                                                    diff --git a/2.9/generated/torch.optim.adamax.Adamax.html b/2.9/generated/torch.optim.adamax.Adamax.html index 9c079e96bfd..cfc5c7f95ef 100644 --- a/2.9/generated/torch.optim.adamax.Adamax.html +++ b/2.9/generated/torch.optim.adamax.Adamax.html @@ -4415,7 +4415,7 @@

                                                                                    Adamax#

                                                                                    -class torch.optim.adamax.Adamax(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, foreach=None, *, maximize=False, differentiable=False, capturable=False)[source]#
                                                                                    +class torch.optim.adamax.Adamax(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, foreach=None, *, maximize=False, differentiable=False, capturable=False)[source]#

                                                                                    Implements Adamax algorithm (a variant of Adam based on infinity norm).

                                                                                    input:γ (lr),β1,β2 (betas),θ0 (params),f(θ) (objective),λ (weight decay),ϵ (epsilon)initialize:m00 ( first moment),u00 ( infinity norm)fort=1todogtθft(θt1)ifλ0gtgt+λθt1mtβ1mt1+(1β1)gtutmax(β2ut1,gt+ϵ)θtθt1γmt(1β1t)utreturnθt\begin{aligned} @@ -4476,7 +4476,7 @@

                                                                                    Adamax
                                                                                    -add_param_group(param_group)[source]#
                                                                                    +add_param_group(param_group)[source]#

                                                                                    Add a param group to the Optimizer s param_groups.

                                                                                    This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                                    @@ -4490,7 +4490,7 @@

                                                                                    Adamax
                                                                                    -load_state_dict(state_dict)[source]#
                                                                                    +load_state_dict(state_dict)[source]#

                                                                                    Load the optimizer state.

                                                                                    Parameters
                                                                                    @@ -4543,7 +4543,7 @@

                                                                                    Adamax
                                                                                    -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                    +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                    Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                                    @@ -4577,7 +4577,7 @@

                                                                                    Adamax
                                                                                    -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                    +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                    Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                                    @@ -4614,7 +4614,7 @@

                                                                                    Adamax
                                                                                    -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                    +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                    Register a state dict post-hook which will be called after state_dict() is called.

                                                                                    It should have the following signature:

                                                                                    hook(optimizer, state_dict) -> state_dict or None
                                                                                    @@ -4646,7 +4646,7 @@ 

                                                                                    Adamax
                                                                                    -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                    +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                    Register a state dict pre-hook which will be called before state_dict() is called.

                                                                                    It should have the following signature:

                                                                                    hook(optimizer) -> None
                                                                                    @@ -4678,7 +4678,7 @@ 

                                                                                    Adamax
                                                                                    -register_step_post_hook(hook)[source]#
                                                                                    +register_step_post_hook(hook)[source]#

                                                                                    Register an optimizer step post hook which will be called after optimizer step.

                                                                                    It should have the following signature:

                                                                                    hook(optimizer, args, kwargs) -> None
                                                                                    @@ -4701,7 +4701,7 @@ 

                                                                                    Adamax
                                                                                    -register_step_pre_hook(hook)[source]#
                                                                                    +register_step_pre_hook(hook)[source]#

                                                                                    Register an optimizer step pre hook which will be called before optimizer step.

                                                                                    It should have the following signature:

                                                                                    hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                                    @@ -4726,7 +4726,7 @@ 

                                                                                    Adamax
                                                                                    -state_dict()[source]#
                                                                                    +state_dict()[source]#

                                                                                    Return the state of the optimizer as a dict.

                                                                                    It contains two entries:

                                                                                      @@ -4789,7 +4789,7 @@

                                                                                      Adamax
                                                                                      -step(closure=None)[source]#
                                                                                      +step(closure=None)[source]#

                                                                                      Performs a single optimization step.

                                                                                      Parameters
                                                                                      @@ -4801,7 +4801,7 @@

                                                                                      Adamax
                                                                                      -zero_grad(set_to_none=True)[source]#
                                                                                      +zero_grad(set_to_none=True)[source]#

                                                                                      Reset the gradients of all optimized torch.Tensor s.

                                                                                      Parameters
                                                                                      diff --git a/2.9/generated/torch.optim.adamax.adamax.html b/2.9/generated/torch.optim.adamax.adamax.html index 6a07d89246c..cfb21bc43e9 100644 --- a/2.9/generated/torch.optim.adamax.adamax.html +++ b/2.9/generated/torch.optim.adamax.adamax.html @@ -4415,7 +4415,7 @@

                                                                                      torch.optim.adamax.adamax#

                                                                                      -torch.optim.adamax.adamax(params, grads, exp_avgs, exp_infs, state_steps, foreach=None, maximize=False, differentiable=False, capturable=False, has_complex=False, *, eps, beta1, beta2, lr, weight_decay)[source]#
                                                                                      +torch.optim.adamax.adamax(params, grads, exp_avgs, exp_infs, state_steps, foreach=None, maximize=False, differentiable=False, capturable=False, has_complex=False, *, eps, beta1, beta2, lr, weight_decay)[source]#

                                                                                      Functional API that performs adamax algorithm computation.

                                                                                      See Adamax for details.

                                                                                      diff --git a/2.9/generated/torch.optim.adamw.AdamW.html b/2.9/generated/torch.optim.adamw.AdamW.html index 5a66aa3e0be..4d702dc7175 100644 --- a/2.9/generated/torch.optim.adamw.AdamW.html +++ b/2.9/generated/torch.optim.adamw.AdamW.html @@ -4415,7 +4415,7 @@

                                                                                      AdamW#

                                                                                      -class torch.optim.adamw.AdamW(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False, *, maximize=False, foreach=None, capturable=False, differentiable=False, fused=None)[source]#
                                                                                      +class torch.optim.adamw.AdamW(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False, *, maximize=False, foreach=None, capturable=False, differentiable=False, fused=None)[source]#

                                                                                      Implements AdamW algorithm, where weight decay does not accumulate in the momentum nor variance.

                                                                                      input:γ(lr),β1,β2(betas),θ0(params),f(θ)(objective),ϵ (epsilon)λ(weight decay),amsgrad,maximizeinitialize:m00 (first moment),v00 ( second moment),v0max0fort=1todoifmaximize:gtθft(θt1)elsegtθft(θt1)θtθt1γλθt1mtβ1mt1+(1β1)gtvtβ2vt1+(1β2)gt2mt^mt/(1β1t)ifamsgradvtmaxmax(vt1max,vt)vt^vtmax/(1β2t)elsevt^vt/(1β2t)θtθtγmt^/(vt^+ϵ)returnθt\begin{aligned} @@ -4523,7 +4523,7 @@

                                                                                      AdamW#

                                                                                      -add_param_group(param_group)[source]#
                                                                                      +add_param_group(param_group)[source]#

                                                                                      Add a param group to the Optimizer s param_groups.

                                                                                      This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                                      @@ -4537,7 +4537,7 @@

                                                                                      AdamW#
                                                                                      -load_state_dict(state_dict)[source]#
                                                                                      +load_state_dict(state_dict)[source]#

                                                                                      Load the optimizer state.

                                                                                      Parameters
                                                                                      @@ -4590,7 +4590,7 @@

                                                                                      AdamW#
                                                                                      -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                      +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                      Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                                      @@ -4624,7 +4624,7 @@

                                                                                      AdamW#
                                                                                      -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                      +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                      Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                                      @@ -4661,7 +4661,7 @@

                                                                                      AdamW#
                                                                                      -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                      +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                      Register a state dict post-hook which will be called after state_dict() is called.

                                                                                      It should have the following signature:

                                                                                      hook(optimizer, state_dict) -> state_dict or None
                                                                                      @@ -4693,7 +4693,7 @@ 

                                                                                      AdamW#
                                                                                      -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                      +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                      Register a state dict pre-hook which will be called before state_dict() is called.

                                                                                      It should have the following signature:

                                                                                      hook(optimizer) -> None
                                                                                      @@ -4725,7 +4725,7 @@ 

                                                                                      AdamW#
                                                                                      -register_step_post_hook(hook)[source]#
                                                                                      +register_step_post_hook(hook)[source]#

                                                                                      Register an optimizer step post hook which will be called after optimizer step.

                                                                                      It should have the following signature:

                                                                                      hook(optimizer, args, kwargs) -> None
                                                                                      @@ -4748,7 +4748,7 @@ 

                                                                                      AdamW#
                                                                                      -register_step_pre_hook(hook)[source]#
                                                                                      +register_step_pre_hook(hook)[source]#

                                                                                      Register an optimizer step pre hook which will be called before optimizer step.

                                                                                      It should have the following signature:

                                                                                      hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                                      @@ -4773,7 +4773,7 @@ 

                                                                                      AdamW#
                                                                                      -state_dict()[source]#
                                                                                      +state_dict()[source]#

                                                                                      Return the state of the optimizer as a dict.

                                                                                      It contains two entries:

                                                                                        @@ -4836,7 +4836,7 @@

                                                                                        AdamW#
                                                                                        -step(closure=None)[source]#
                                                                                        +step(closure=None)[source]#

                                                                                        Perform a single optimization step.

                                                                                        Parameters
                                                                                        @@ -4848,7 +4848,7 @@

                                                                                        AdamW#
                                                                                        -zero_grad(set_to_none=True)[source]#
                                                                                        +zero_grad(set_to_none=True)[source]#

                                                                                        Reset the gradients of all optimized torch.Tensor s.

                                                                                        Parameters
                                                                                        diff --git a/2.9/generated/torch.optim.adamw.adamw.html b/2.9/generated/torch.optim.adamw.adamw.html index 6b46418a96a..86d1fd2f183 100644 --- a/2.9/generated/torch.optim.adamw.adamw.html +++ b/2.9/generated/torch.optim.adamw.adamw.html @@ -4415,7 +4415,7 @@

                                                                                        torch.optim.adamw.adamw#

                                                                                        -torch.optim.adamw.adamw(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, foreach=None, capturable=False, differentiable=False, fused=None, grad_scale=None, found_inf=None, has_complex=False, *, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize)[source]#
                                                                                        +torch.optim.adamw.adamw(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, foreach=None, capturable=False, differentiable=False, fused=None, grad_scale=None, found_inf=None, has_complex=False, *, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize)[source]#

                                                                                        Functional API that performs AdamW algorithm computation.

                                                                                        See AdamW for details.

                                                                                        diff --git a/2.9/generated/torch.optim.asgd.ASGD.html b/2.9/generated/torch.optim.asgd.ASGD.html index e2d51a4d10c..34a21582934 100644 --- a/2.9/generated/torch.optim.asgd.ASGD.html +++ b/2.9/generated/torch.optim.asgd.ASGD.html @@ -4415,7 +4415,7 @@

                                                                                        ASGD#

                                                                                        -class torch.optim.asgd.ASGD(params, lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0, foreach=None, maximize=False, differentiable=False, capturable=False)[source]#
                                                                                        +class torch.optim.asgd.ASGD(params, lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0, foreach=None, maximize=False, differentiable=False, capturable=False)[source]#

                                                                                        Implements Averaged Stochastic Gradient Descent.

                                                                                        It has been proposed in Acceleration of stochastic approximation by averaging.

                                                                                        @@ -4454,7 +4454,7 @@

                                                                                        ASGD#
                                                                                        -add_param_group(param_group)[source]#
                                                                                        +add_param_group(param_group)[source]#

                                                                                        Add a param group to the Optimizer s param_groups.

                                                                                        This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                                        @@ -4468,7 +4468,7 @@

                                                                                        ASGD#
                                                                                        -load_state_dict(state_dict)[source]#
                                                                                        +load_state_dict(state_dict)[source]#

                                                                                        Load the optimizer state.

                                                                                        Parameters
                                                                                        @@ -4521,7 +4521,7 @@

                                                                                        ASGD#
                                                                                        -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                        +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                        Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                                        @@ -4555,7 +4555,7 @@

                                                                                        ASGD#
                                                                                        -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                        +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                        Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                                        @@ -4592,7 +4592,7 @@

                                                                                        ASGD#
                                                                                        -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                        +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                        Register a state dict post-hook which will be called after state_dict() is called.

                                                                                        It should have the following signature:

                                                                                        hook(optimizer, state_dict) -> state_dict or None
                                                                                        @@ -4624,7 +4624,7 @@ 

                                                                                        ASGD#
                                                                                        -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                        +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                        Register a state dict pre-hook which will be called before state_dict() is called.

                                                                                        It should have the following signature:

                                                                                        hook(optimizer) -> None
                                                                                        @@ -4656,7 +4656,7 @@ 

                                                                                        ASGD#
                                                                                        -register_step_post_hook(hook)[source]#
                                                                                        +register_step_post_hook(hook)[source]#

                                                                                        Register an optimizer step post hook which will be called after optimizer step.

                                                                                        It should have the following signature:

                                                                                        hook(optimizer, args, kwargs) -> None
                                                                                        @@ -4679,7 +4679,7 @@ 

                                                                                        ASGD#
                                                                                        -register_step_pre_hook(hook)[source]#
                                                                                        +register_step_pre_hook(hook)[source]#

                                                                                        Register an optimizer step pre hook which will be called before optimizer step.

                                                                                        It should have the following signature:

                                                                                        hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                                        @@ -4704,7 +4704,7 @@ 

                                                                                        ASGD#
                                                                                        -state_dict()[source]#
                                                                                        +state_dict()[source]#

                                                                                        Return the state of the optimizer as a dict.

                                                                                        It contains two entries:

                                                                                          @@ -4767,7 +4767,7 @@

                                                                                          ASGD#
                                                                                          -step(closure=None)[source]#
                                                                                          +step(closure=None)[source]#

                                                                                          Perform a single optimization step.

                                                                                          Parameters
                                                                                          @@ -4779,7 +4779,7 @@

                                                                                          ASGD#
                                                                                          -zero_grad(set_to_none=True)[source]#
                                                                                          +zero_grad(set_to_none=True)[source]#

                                                                                          Reset the gradients of all optimized torch.Tensor s.

                                                                                          Parameters
                                                                                          diff --git a/2.9/generated/torch.optim.asgd.asgd.html b/2.9/generated/torch.optim.asgd.asgd.html index b678ace4967..3ca59e9ec1a 100644 --- a/2.9/generated/torch.optim.asgd.asgd.html +++ b/2.9/generated/torch.optim.asgd.asgd.html @@ -4415,7 +4415,7 @@

                                                                                          torch.optim.asgd.asgd#

                                                                                          -torch.optim.asgd.asgd(params, grads, axs, mus, etas, state_steps, foreach=None, maximize=False, differentiable=False, capturable=False, has_complex=False, *, lambd, lr, t0, alpha, weight_decay)[source]#
                                                                                          +torch.optim.asgd.asgd(params, grads, axs, mus, etas, state_steps, foreach=None, maximize=False, differentiable=False, capturable=False, has_complex=False, *, lambd, lr, t0, alpha, weight_decay)[source]#

                                                                                          Functional API that performs asgd algorithm computation.

                                                                                          See ASGD for details.

                                                                                          diff --git a/2.9/generated/torch.optim.lbfgs.LBFGS.html b/2.9/generated/torch.optim.lbfgs.LBFGS.html index 956b293361f..efdc8c16349 100644 --- a/2.9/generated/torch.optim.lbfgs.LBFGS.html +++ b/2.9/generated/torch.optim.lbfgs.LBFGS.html @@ -4415,7 +4415,7 @@

                                                                                          LBFGS#

                                                                                          -class torch.optim.lbfgs.LBFGS(params, lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn=None)[source]#
                                                                                          +class torch.optim.lbfgs.LBFGS(params, lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn=None)[source]#

                                                                                          Implements L-BFGS algorithm.

                                                                                          Heavily inspired by minFunc.

                                                                                          @@ -4454,7 +4454,7 @@

                                                                                          LBFGS#

                                                                                          -add_param_group(param_group)[source]#
                                                                                          +add_param_group(param_group)[source]#

                                                                                          Add a param group to the Optimizer s param_groups.

                                                                                          This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                                          @@ -4468,7 +4468,7 @@

                                                                                          LBFGS#
                                                                                          -load_state_dict(state_dict)[source]#
                                                                                          +load_state_dict(state_dict)[source]#

                                                                                          Load the optimizer state.

                                                                                          Parameters
                                                                                          @@ -4521,7 +4521,7 @@

                                                                                          LBFGS#
                                                                                          -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                          +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                          Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                                          @@ -4555,7 +4555,7 @@

                                                                                          LBFGS#
                                                                                          -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                          +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                          Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                                          @@ -4592,7 +4592,7 @@

                                                                                          LBFGS#
                                                                                          -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                          +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                          Register a state dict post-hook which will be called after state_dict() is called.

                                                                                          It should have the following signature:

                                                                                          hook(optimizer, state_dict) -> state_dict or None
                                                                                          @@ -4624,7 +4624,7 @@ 

                                                                                          LBFGS#
                                                                                          -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                          +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                          Register a state dict pre-hook which will be called before state_dict() is called.

                                                                                          It should have the following signature:

                                                                                          hook(optimizer) -> None
                                                                                          @@ -4656,7 +4656,7 @@ 

                                                                                          LBFGS#
                                                                                          -register_step_post_hook(hook)[source]#
                                                                                          +register_step_post_hook(hook)[source]#

                                                                                          Register an optimizer step post hook which will be called after optimizer step.

                                                                                          It should have the following signature:

                                                                                          hook(optimizer, args, kwargs) -> None
                                                                                          @@ -4679,7 +4679,7 @@ 

                                                                                          LBFGS#
                                                                                          -register_step_pre_hook(hook)[source]#
                                                                                          +register_step_pre_hook(hook)[source]#

                                                                                          Register an optimizer step pre hook which will be called before optimizer step.

                                                                                          It should have the following signature:

                                                                                          hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                                          @@ -4704,7 +4704,7 @@ 

                                                                                          LBFGS#
                                                                                          -state_dict()[source]#
                                                                                          +state_dict()[source]#

                                                                                          Return the state of the optimizer as a dict.

                                                                                          It contains two entries:

                                                                                            @@ -4767,7 +4767,7 @@

                                                                                            LBFGS#
                                                                                            -step(closure)[source]#
                                                                                            +step(closure)[source]#

                                                                                            Perform a single optimization step.

                                                                                            Parameters
                                                                                            @@ -4779,7 +4779,7 @@

                                                                                            LBFGS#
                                                                                            -zero_grad(set_to_none=True)[source]#
                                                                                            +zero_grad(set_to_none=True)[source]#

                                                                                            Reset the gradients of all optimized torch.Tensor s.

                                                                                            Parameters
                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.ChainedScheduler.html b/2.9/generated/torch.optim.lr_scheduler.ChainedScheduler.html index d2712aa599a..af8380a7176 100644 --- a/2.9/generated/torch.optim.lr_scheduler.ChainedScheduler.html +++ b/2.9/generated/torch.optim.lr_scheduler.ChainedScheduler.html @@ -4404,7 +4404,7 @@

                                                                                            ChainedScheduler#

                                                                                            -class torch.optim.lr_scheduler.ChainedScheduler(schedulers, optimizer=None)[source]#
                                                                                            +class torch.optim.lr_scheduler.ChainedScheduler(schedulers, optimizer=None)[source]#

                                                                                            Chains a list of learning rate schedulers.

                                                                                            Takes in a sequence of chainable learning rate schedulers and calls their step() functions consecutively in just one call to step().

                                                                                            @@ -4437,7 +4437,7 @@

                                                                                            ChainedScheduler
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4448,7 +4448,7 @@

                                                                                            ChainedScheduler
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute learning rate using chainable form of the scheduler.

                                                                                            Return type
                                                                                            @@ -4459,7 +4459,7 @@

                                                                                            ChainedScheduler
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4471,7 +4471,7 @@

                                                                                            ChainedScheduler
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer. @@ -4485,7 +4485,7 @@

                                                                                            ChainedScheduler
                                                                                            -step()[source]#
                                                                                            +step()[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.ConstantLR.html b/2.9/generated/torch.optim.lr_scheduler.ConstantLR.html index 991c225b2b6..a6c2dcd1afa 100644 --- a/2.9/generated/torch.optim.lr_scheduler.ConstantLR.html +++ b/2.9/generated/torch.optim.lr_scheduler.ConstantLR.html @@ -4404,7 +4404,7 @@

                                                                                            ConstantLR#

                                                                                            -class torch.optim.lr_scheduler.ConstantLR(optimizer, factor=0.3333333333333333, total_iters=5, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.ConstantLR(optimizer, factor=0.3333333333333333, total_iters=5, last_epoch=-1)[source]#

                                                                                            Multiply the learning rate of each parameter group by a small constant factor.

                                                                                            The multiplication is done until the number of epoch reaches a pre-defined milestone: total_iters. Notice that such multiplication of the small constant factor can @@ -4439,7 +4439,7 @@

                                                                                            ConstantLR
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4450,7 +4450,7 @@

                                                                                            ConstantLR
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute the learning rate of each parameter group.

                                                                                            Return type
                                                                                            @@ -4461,7 +4461,7 @@

                                                                                            ConstantLR
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4473,7 +4473,7 @@

                                                                                            ConstantLR
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer.

                                                                                            @@ -4486,7 +4486,7 @@

                                                                                            ConstantLR
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.CosineAnnealingLR.html b/2.9/generated/torch.optim.lr_scheduler.CosineAnnealingLR.html index 0de1ca8086a..9efc5ba2072 100644 --- a/2.9/generated/torch.optim.lr_scheduler.CosineAnnealingLR.html +++ b/2.9/generated/torch.optim.lr_scheduler.CosineAnnealingLR.html @@ -4404,7 +4404,7 @@

                                                                                            CosineAnnealingLR#

                                                                                            -class torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max, eta_min=0.0, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max, eta_min=0.0, last_epoch=-1)[source]#

                                                                                            Set the learning rate of each parameter group using a cosine annealing schedule.

                                                                                            The learning rate is updated recursively using:

                                                                                            @@ -4452,7 +4452,7 @@

                                                                                            CosineAnnealingLR
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4463,7 +4463,7 @@

                                                                                            CosineAnnealingLR
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Retrieve the learning rate of each parameter group.

                                                                                            Return type
                                                                                            @@ -4474,7 +4474,7 @@

                                                                                            CosineAnnealingLR
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4486,7 +4486,7 @@

                                                                                            CosineAnnealingLR
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer.

                                                                                            @@ -4499,7 +4499,7 @@

                                                                                            CosineAnnealingLR
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.CosineAnnealingWarmRestarts.html b/2.9/generated/torch.optim.lr_scheduler.CosineAnnealingWarmRestarts.html index 057a7e1cc8d..387dc5a4e66 100644 --- a/2.9/generated/torch.optim.lr_scheduler.CosineAnnealingWarmRestarts.html +++ b/2.9/generated/torch.optim.lr_scheduler.CosineAnnealingWarmRestarts.html @@ -4404,7 +4404,7 @@

                                                                                            CosineAnnealingWarmRestarts#

                                                                                            -class torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0, T_mult=1, eta_min=0.0, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0, T_mult=1, eta_min=0.0, last_epoch=-1)[source]#

                                                                                            Set the learning rate of each parameter group using a cosine annealing schedule.

                                                                                            The ηmax\eta_{max} is set to the initial lr, TcurT_{cur} is the number of epochs since the last restart and TiT_{i} is the number @@ -4442,7 +4442,7 @@

                                                                                            CosineAnnealingWarmRestarts
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4453,7 +4453,7 @@

                                                                                            CosineAnnealingWarmRestarts
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute the initial learning rate.

                                                                                            Return type
                                                                                            @@ -4464,7 +4464,7 @@

                                                                                            CosineAnnealingWarmRestarts
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4476,7 +4476,7 @@

                                                                                            CosineAnnealingWarmRestarts
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer.

                                                                                            @@ -4489,7 +4489,7 @@

                                                                                            CosineAnnealingWarmRestarts
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Step could be called after every batch update.

                                                                                            Example

                                                                                            >>> scheduler = CosineAnnealingWarmRestarts(optimizer, T_0, T_mult)
                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.CyclicLR.html b/2.9/generated/torch.optim.lr_scheduler.CyclicLR.html
                                                                                            index 52c068e10ad..e9bc39c7aa4 100644
                                                                                            --- a/2.9/generated/torch.optim.lr_scheduler.CyclicLR.html
                                                                                            +++ b/2.9/generated/torch.optim.lr_scheduler.CyclicLR.html
                                                                                            @@ -4404,7 +4404,7 @@
                                                                                             

                                                                                            CyclicLR#

                                                                                            -class torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr, max_lr, step_size_up=2000, step_size_down=None, mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', cycle_momentum=True, base_momentum=0.8, max_momentum=0.9, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr, max_lr, step_size_up=2000, step_size_down=None, mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', cycle_momentum=True, base_momentum=0.8, max_momentum=0.9, last_epoch=-1)[source]#

                                                                                            Sets the learning rate of each parameter group according to cyclical learning rate policy (CLR).

                                                                                            The policy cycles the learning rate between two boundaries with a constant frequency, as detailed in the paper Cyclical Learning Rates for Training Neural Networks. @@ -4500,7 +4500,7 @@

                                                                                            CyclicLR
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4511,7 +4511,7 @@

                                                                                            CyclicLR
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Calculate the learning rate at batch index.

                                                                                            This function treats self.last_epoch as the last batch index.

                                                                                            If self.cycle_momentum is True, this function has a side effect of @@ -4525,7 +4525,7 @@

                                                                                            CyclicLR
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            @@ -4533,7 +4533,7 @@

                                                                                            CyclicLR
                                                                                            -scale_fn(x)[source]#
                                                                                            +scale_fn(x)[source]#

                                                                                            Get the scaling policy.

                                                                                            Return type
                                                                                            @@ -4544,7 +4544,7 @@

                                                                                            CyclicLR
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer. @@ -4560,7 +4560,7 @@

                                                                                            CyclicLR
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.ExponentialLR.html b/2.9/generated/torch.optim.lr_scheduler.ExponentialLR.html index 0d6582ac030..01a9c08cb78 100644 --- a/2.9/generated/torch.optim.lr_scheduler.ExponentialLR.html +++ b/2.9/generated/torch.optim.lr_scheduler.ExponentialLR.html @@ -4404,7 +4404,7 @@

                                                                                            ExponentialLR#

                                                                                            -class torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma, last_epoch=-1)[source]#

                                                                                            Decays the learning rate of each parameter group by gamma every epoch.

                                                                                            When last_epoch=-1, sets initial lr as lr.

                                                                                            @@ -4427,7 +4427,7 @@

                                                                                            ExponentialLR
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4438,7 +4438,7 @@

                                                                                            ExponentialLR
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute the learning rate of each parameter group.

                                                                                            Return type
                                                                                            @@ -4449,7 +4449,7 @@

                                                                                            ExponentialLR
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4461,7 +4461,7 @@

                                                                                            ExponentialLR
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer.

                                                                                            @@ -4474,7 +4474,7 @@

                                                                                            ExponentialLR
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.LRScheduler.html b/2.9/generated/torch.optim.lr_scheduler.LRScheduler.html index 74f453e8991..8933d4445b4 100644 --- a/2.9/generated/torch.optim.lr_scheduler.LRScheduler.html +++ b/2.9/generated/torch.optim.lr_scheduler.LRScheduler.html @@ -4404,13 +4404,13 @@

                                                                                            LRScheduler#

                                                                                            -class torch.optim.lr_scheduler.LRScheduler(optimizer, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.LRScheduler(optimizer, last_epoch=-1)[source]#

                                                                                            Adjusts the learning rate during optimization.

                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4421,7 +4421,7 @@

                                                                                            LRScheduler
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute learning rate using chainable form of the scheduler.

                                                                                            Return type
                                                                                            @@ -4432,7 +4432,7 @@

                                                                                            LRScheduler
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4444,7 +4444,7 @@

                                                                                            LRScheduler
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer.

                                                                                            @@ -4457,7 +4457,7 @@

                                                                                            LRScheduler
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.LambdaLR.html b/2.9/generated/torch.optim.lr_scheduler.LambdaLR.html index 878c9efe0b8..8b88dd1ed86 100644 --- a/2.9/generated/torch.optim.lr_scheduler.LambdaLR.html +++ b/2.9/generated/torch.optim.lr_scheduler.LambdaLR.html @@ -4404,7 +4404,7 @@

                                                                                            LambdaLR#

                                                                                            -class torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch=-1)[source]#

                                                                                            Sets the initial learning rate.

                                                                                            The learning rate of each parameter group is set to the initial lr times a given function. When last_epoch=-1, sets initial lr as lr.

                                                                                            @@ -4441,7 +4441,7 @@

                                                                                            LambdaLR
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4452,7 +4452,7 @@

                                                                                            LambdaLR
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute learning rate.

                                                                                            Return type
                                                                                            @@ -4463,7 +4463,7 @@

                                                                                            LambdaLR
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            When saving or loading the scheduler, please make sure to also save or load the state of the optimizer.

                                                                                            @@ -4476,7 +4476,7 @@

                                                                                            LambdaLR
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer. @@ -4492,7 +4492,7 @@

                                                                                            LambdaLR
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.LinearLR.html b/2.9/generated/torch.optim.lr_scheduler.LinearLR.html index b26dab09693..9c8f681f8cd 100644 --- a/2.9/generated/torch.optim.lr_scheduler.LinearLR.html +++ b/2.9/generated/torch.optim.lr_scheduler.LinearLR.html @@ -4404,7 +4404,7 @@

                                                                                            LinearLR#

                                                                                            -class torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=0.3333333333333333, end_factor=1.0, total_iters=5, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=0.3333333333333333, end_factor=1.0, total_iters=5, last_epoch=-1)[source]#

                                                                                            Decays the learning rate of each parameter group by linearly changing small multiplicative factor.

                                                                                            The multiplication is done until the number of epoch reaches a pre-defined milestone: total_iters. Notice that such decay can happen simultaneously with other changes to the learning rate @@ -4442,7 +4442,7 @@

                                                                                            LinearLR
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4453,7 +4453,7 @@

                                                                                            LinearLR
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute the learning rate.

                                                                                            Return type
                                                                                            @@ -4464,7 +4464,7 @@

                                                                                            LinearLR
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4476,7 +4476,7 @@

                                                                                            LinearLR
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer.

                                                                                            @@ -4489,7 +4489,7 @@

                                                                                            LinearLR
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.MultiStepLR.html b/2.9/generated/torch.optim.lr_scheduler.MultiStepLR.html index a73370df55e..953f0f280eb 100644 --- a/2.9/generated/torch.optim.lr_scheduler.MultiStepLR.html +++ b/2.9/generated/torch.optim.lr_scheduler.MultiStepLR.html @@ -4404,7 +4404,7 @@

                                                                                            MultiStepLR#

                                                                                            -class torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1, last_epoch=-1)[source]#

                                                                                            Decays the learning rate of each parameter group by gamma once the number of epoch reaches one of the milestones.

                                                                                            Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. When last_epoch=-1, sets initial lr as lr.

                                                                                            @@ -4434,7 +4434,7 @@

                                                                                            MultiStepLR
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4445,7 +4445,7 @@

                                                                                            MultiStepLR
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute the learning rate of each parameter group.

                                                                                            Return type
                                                                                            @@ -4456,7 +4456,7 @@

                                                                                            MultiStepLR
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4468,7 +4468,7 @@

                                                                                            MultiStepLR
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer.

                                                                                            @@ -4481,7 +4481,7 @@

                                                                                            MultiStepLR
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.MultiplicativeLR.html b/2.9/generated/torch.optim.lr_scheduler.MultiplicativeLR.html index 329d6c6a429..7c1d4e4d590 100644 --- a/2.9/generated/torch.optim.lr_scheduler.MultiplicativeLR.html +++ b/2.9/generated/torch.optim.lr_scheduler.MultiplicativeLR.html @@ -4404,7 +4404,7 @@

                                                                                            MultiplicativeLR#

                                                                                            -class torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda, last_epoch=-1)[source]#

                                                                                            Multiply the learning rate of each parameter group by the factor given in the specified function.

                                                                                            When last_epoch=-1, set initial lr as lr.

                                                                                            @@ -4430,7 +4430,7 @@

                                                                                            MultiplicativeLR
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4441,7 +4441,7 @@

                                                                                            MultiplicativeLR
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute the learning rate of each parameter group.

                                                                                            Return type
                                                                                            @@ -4452,7 +4452,7 @@

                                                                                            MultiplicativeLR
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4464,7 +4464,7 @@

                                                                                            MultiplicativeLR
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer. @@ -4479,7 +4479,7 @@

                                                                                            MultiplicativeLR
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.OneCycleLR.html b/2.9/generated/torch.optim.lr_scheduler.OneCycleLR.html index 79e925945ad..cbe39a54734 100644 --- a/2.9/generated/torch.optim.lr_scheduler.OneCycleLR.html +++ b/2.9/generated/torch.optim.lr_scheduler.OneCycleLR.html @@ -4404,7 +4404,7 @@

                                                                                            OneCycleLR#

                                                                                            -class torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, total_steps=None, epochs=None, steps_per_epoch=None, pct_start=0.3, anneal_strategy='cos', cycle_momentum=True, base_momentum=0.85, max_momentum=0.95, div_factor=25.0, final_div_factor=10000.0, three_phase=False, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, total_steps=None, epochs=None, steps_per_epoch=None, pct_start=0.3, anneal_strategy='cos', cycle_momentum=True, base_momentum=0.85, max_momentum=0.95, div_factor=25.0, final_div_factor=10000.0, three_phase=False, last_epoch=-1)[source]#

                                                                                            Sets the learning rate of each parameter group according to the 1cycle learning rate policy.

                                                                                            The 1cycle policy anneals the learning rate from an initial learning rate to some maximum learning rate and then from that maximum learning rate to some minimum learning rate much @@ -4503,7 +4503,7 @@

                                                                                            OneCycleLR
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4514,7 +4514,7 @@

                                                                                            OneCycleLR
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute the learning rate of each parameter group.

                                                                                            Return type
                                                                                            @@ -4525,7 +4525,7 @@

                                                                                            OneCycleLR
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4537,7 +4537,7 @@

                                                                                            OneCycleLR
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer.

                                                                                            @@ -4550,7 +4550,7 @@

                                                                                            OneCycleLR
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.PolynomialLR.html b/2.9/generated/torch.optim.lr_scheduler.PolynomialLR.html index c20025d33d9..30b8631cfcf 100644 --- a/2.9/generated/torch.optim.lr_scheduler.PolynomialLR.html +++ b/2.9/generated/torch.optim.lr_scheduler.PolynomialLR.html @@ -4404,7 +4404,7 @@

                                                                                            PolynomialLR#

                                                                                            -class torch.optim.lr_scheduler.PolynomialLR(optimizer, total_iters=5, power=1.0, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.PolynomialLR(optimizer, total_iters=5, power=1.0, last_epoch=-1)[source]#

                                                                                            Decays the learning rate of each parameter group using a polynomial function in the given total_iters.

                                                                                            When last_epoch=-1, sets initial lr as lr.

                                                                                            @@ -4433,7 +4433,7 @@

                                                                                            PolynomialLR
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4444,7 +4444,7 @@

                                                                                            PolynomialLR
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute the learning rate.

                                                                                            Return type
                                                                                            @@ -4455,7 +4455,7 @@

                                                                                            PolynomialLR
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4467,7 +4467,7 @@

                                                                                            PolynomialLR
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer.

                                                                                            @@ -4480,7 +4480,7 @@

                                                                                            PolynomialLR
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.ReduceLROnPlateau.html b/2.9/generated/torch.optim.lr_scheduler.ReduceLROnPlateau.html index 3988472549e..1013b17cc95 100644 --- a/2.9/generated/torch.optim.lr_scheduler.ReduceLROnPlateau.html +++ b/2.9/generated/torch.optim.lr_scheduler.ReduceLROnPlateau.html @@ -4404,7 +4404,7 @@

                                                                                            ReduceLROnPlateau#

                                                                                            -class torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)[source]#
                                                                                            +class torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)[source]#

                                                                                            Reduce learning rate when a metric has stopped improving.

                                                                                            Models often benefit from reducing the learning rate by a factor of 2-10 once learning stagnates. This scheduler reads a metrics @@ -4463,7 +4463,7 @@

                                                                                            ReduceLROnPlateau
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4474,7 +4474,7 @@

                                                                                            ReduceLROnPlateau
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute learning rate using chainable form of the scheduler.

                                                                                            Return type
                                                                                            @@ -4485,7 +4485,7 @@

                                                                                            ReduceLROnPlateau
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            @@ -4493,7 +4493,7 @@

                                                                                            ReduceLROnPlateau
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer.

                                                                                            @@ -4506,7 +4506,7 @@

                                                                                            ReduceLROnPlateau
                                                                                            -step(metrics, epoch=None)[source]#
                                                                                            +step(metrics, epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.SequentialLR.html b/2.9/generated/torch.optim.lr_scheduler.SequentialLR.html index cbb862a09d3..321f06c6627 100644 --- a/2.9/generated/torch.optim.lr_scheduler.SequentialLR.html +++ b/2.9/generated/torch.optim.lr_scheduler.SequentialLR.html @@ -4404,7 +4404,7 @@

                                                                                            SequentialLR#

                                                                                            -class torch.optim.lr_scheduler.SequentialLR(optimizer, schedulers, milestones, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.SequentialLR(optimizer, schedulers, milestones, last_epoch=-1)[source]#

                                                                                            Contains a list of schedulers expected to be called sequentially during the optimization process.

                                                                                            Specifically, the schedulers will be called according to the milestone points, which should provide exact intervals by which each scheduler should be called at a given epoch.

                                                                                            @@ -4443,7 +4443,7 @@

                                                                                            SequentialLR
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4454,7 +4454,7 @@

                                                                                            SequentialLR
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute learning rate using chainable form of the scheduler.

                                                                                            Return type
                                                                                            @@ -4465,7 +4465,7 @@

                                                                                            SequentialLR
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4477,14 +4477,14 @@

                                                                                            SequentialLR
                                                                                            -recursive_undo(sched=None)[source]#
                                                                                            +recursive_undo(sched=None)[source]#

                                                                                            Recursively undo any step performed by the initialisation of schedulers.

                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer. @@ -4498,7 +4498,7 @@

                                                                                            SequentialLR
                                                                                            -step()[source]#
                                                                                            +step()[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.lr_scheduler.StepLR.html b/2.9/generated/torch.optim.lr_scheduler.StepLR.html index 902454f78a8..c749692c40e 100644 --- a/2.9/generated/torch.optim.lr_scheduler.StepLR.html +++ b/2.9/generated/torch.optim.lr_scheduler.StepLR.html @@ -4404,7 +4404,7 @@

                                                                                            StepLR#

                                                                                            -class torch.optim.lr_scheduler.StepLR(optimizer, step_size, gamma=0.1, last_epoch=-1)[source]#
                                                                                            +class torch.optim.lr_scheduler.StepLR(optimizer, step_size, gamma=0.1, last_epoch=-1)[source]#

                                                                                            Decays the learning rate of each parameter group by gamma every step_size epochs.

                                                                                            Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. When last_epoch=-1, sets initial lr as lr.

                                                                                            @@ -4435,7 +4435,7 @@

                                                                                            StepLR
                                                                                            -get_last_lr()[source]#
                                                                                            +get_last_lr()[source]#

                                                                                            Return last computed learning rate by current scheduler.

                                                                                            Return type
                                                                                            @@ -4446,7 +4446,7 @@

                                                                                            StepLR
                                                                                            -get_lr()[source]#
                                                                                            +get_lr()[source]#

                                                                                            Compute the learning rate of each parameter group.

                                                                                            Return type
                                                                                            @@ -4457,7 +4457,7 @@

                                                                                            StepLR
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the scheduler’s state.

                                                                                            Parameters
                                                                                            @@ -4469,7 +4469,7 @@

                                                                                            StepLR
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the scheduler as a dict.

                                                                                            It contains an entry for every variable in self.__dict__ which is not the optimizer.

                                                                                            @@ -4482,7 +4482,7 @@

                                                                                            StepLR
                                                                                            -step(epoch=None)[source]#
                                                                                            +step(epoch=None)[source]#

                                                                                            Perform a step.

                                                                                            diff --git a/2.9/generated/torch.optim.nadam.NAdam.html b/2.9/generated/torch.optim.nadam.NAdam.html index 78819683bb1..ed5ba59d7ff 100644 --- a/2.9/generated/torch.optim.nadam.NAdam.html +++ b/2.9/generated/torch.optim.nadam.NAdam.html @@ -4415,7 +4415,7 @@

                                                                                            NAdam#

                                                                                            -class torch.optim.nadam.NAdam(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, momentum_decay=0.004, decoupled_weight_decay=False, *, foreach=None, maximize=False, capturable=False, differentiable=False)[source]#
                                                                                            +class torch.optim.nadam.NAdam(params, lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, momentum_decay=0.004, decoupled_weight_decay=False, *, foreach=None, maximize=False, capturable=False, differentiable=False)[source]#

                                                                                            Implements NAdam algorithm.

                                                                                            input:γt (lr),β1,β2 (betas),θ0 (params),f(θ) (objective)λ (weight decay),ψ (momentum decay)decoupled_weight_decay,maximizeinitialize:m00 ( first moment),v00 ( second moment)fort=1todoifmaximize:gtθft(θt1)elsegtθft(θt1)θtθt1ifλ0ifdecoupled_weight_decayθtθt1γλθt1elsegtgt+λθt1μtβ1(1120.96tψ)μt+1β1(1120.96(t+1)ψ)mtβ1mt1+(1β1)gtvtβ2vt1+(1β2)gt2mt^μt+1mt/(1i=1t+1μi)+(1μt)gt/(1i=1tμi)vt^vt/(1β2t)θtθtγmt^/(vt^+ϵ)returnθt\begin{aligned} @@ -4507,7 +4507,7 @@

                                                                                            NAdam#

                                                                                            -add_param_group(param_group)[source]#
                                                                                            +add_param_group(param_group)[source]#

                                                                                            Add a param group to the Optimizer s param_groups.

                                                                                            This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                                            @@ -4521,7 +4521,7 @@

                                                                                            NAdam#
                                                                                            -load_state_dict(state_dict)[source]#
                                                                                            +load_state_dict(state_dict)[source]#

                                                                                            Load the optimizer state.

                                                                                            Parameters
                                                                                            @@ -4574,7 +4574,7 @@

                                                                                            NAdam#
                                                                                            -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                            +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                            Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                                            @@ -4608,7 +4608,7 @@

                                                                                            NAdam#
                                                                                            -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                            +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                            Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                                            @@ -4645,7 +4645,7 @@

                                                                                            NAdam#
                                                                                            -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                            +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                            Register a state dict post-hook which will be called after state_dict() is called.

                                                                                            It should have the following signature:

                                                                                            hook(optimizer, state_dict) -> state_dict or None
                                                                                            @@ -4677,7 +4677,7 @@ 

                                                                                            NAdam#
                                                                                            -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                            +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                            Register a state dict pre-hook which will be called before state_dict() is called.

                                                                                            It should have the following signature:

                                                                                            hook(optimizer) -> None
                                                                                            @@ -4709,7 +4709,7 @@ 

                                                                                            NAdam#
                                                                                            -register_step_post_hook(hook)[source]#
                                                                                            +register_step_post_hook(hook)[source]#

                                                                                            Register an optimizer step post hook which will be called after optimizer step.

                                                                                            It should have the following signature:

                                                                                            hook(optimizer, args, kwargs) -> None
                                                                                            @@ -4732,7 +4732,7 @@ 

                                                                                            NAdam#
                                                                                            -register_step_pre_hook(hook)[source]#
                                                                                            +register_step_pre_hook(hook)[source]#

                                                                                            Register an optimizer step pre hook which will be called before optimizer step.

                                                                                            It should have the following signature:

                                                                                            hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                                            @@ -4757,7 +4757,7 @@ 

                                                                                            NAdam#
                                                                                            -state_dict()[source]#
                                                                                            +state_dict()[source]#

                                                                                            Return the state of the optimizer as a dict.

                                                                                            It contains two entries:

                                                                                              @@ -4820,7 +4820,7 @@

                                                                                              NAdam#
                                                                                              -step(closure=None)[source]#
                                                                                              +step(closure=None)[source]#

                                                                                              Perform a single optimization step.

                                                                                              Parameters
                                                                                              @@ -4832,7 +4832,7 @@

                                                                                              NAdam#
                                                                                              -zero_grad(set_to_none=True)[source]#
                                                                                              +zero_grad(set_to_none=True)[source]#

                                                                                              Reset the gradients of all optimized torch.Tensor s.

                                                                                              Parameters
                                                                                              diff --git a/2.9/generated/torch.optim.nadam.nadam.html b/2.9/generated/torch.optim.nadam.nadam.html index 64cae11a71c..3d55dd4a7fb 100644 --- a/2.9/generated/torch.optim.nadam.nadam.html +++ b/2.9/generated/torch.optim.nadam.nadam.html @@ -4415,7 +4415,7 @@

                                                                                              torch.optim.nadam.nadam#

                                                                                              -torch.optim.nadam.nadam(params, grads, exp_avgs, exp_avg_sqs, mu_products, state_steps, decoupled_weight_decay=False, foreach=None, capturable=False, differentiable=False, has_complex=False, maximize=False, *, beta1, beta2, lr, weight_decay, momentum_decay, eps)[source]#
                                                                                              +torch.optim.nadam.nadam(params, grads, exp_avgs, exp_avg_sqs, mu_products, state_steps, decoupled_weight_decay=False, foreach=None, capturable=False, differentiable=False, has_complex=False, maximize=False, *, beta1, beta2, lr, weight_decay, momentum_decay, eps)[source]#

                                                                                              Functional API that performs NAdam algorithm computation.

                                                                                              See NAdam for details.

                                                                                              diff --git a/2.9/generated/torch.optim.radam.RAdam.html b/2.9/generated/torch.optim.radam.RAdam.html index 2d9fef653e7..8810c1950d8 100644 --- a/2.9/generated/torch.optim.radam.RAdam.html +++ b/2.9/generated/torch.optim.radam.RAdam.html @@ -4415,7 +4415,7 @@

                                                                                              RAdam#

                                                                                              -class torch.optim.radam.RAdam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, decoupled_weight_decay=False, *, foreach=None, maximize=False, capturable=False, differentiable=False)[source]#
                                                                                              +class torch.optim.radam.RAdam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, decoupled_weight_decay=False, *, foreach=None, maximize=False, capturable=False, differentiable=False)[source]#

                                                                                              Implements RAdam algorithm.

                                                                                              input:γ (lr),β1,β2 (betas),θ0 (params),f(θ) (objective),λ (weightdecay),maximizeϵ (epsilon),decoupled_weight_decayinitialize:m00 ( first moment),v00 ( second moment),ρ2/(1β2)1fort=1todoifmaximize:gtθft(θt1)elsegtθft(θt1)θtθt1ifλ0ifdecoupled_weight_decayθtθtγλθtelsegtgt+λθtmtβ1mt1+(1β1)gtvtβ2vt1+(1β2)gt2mt^mt/(1β1t)ρtρ2tβ2t/(1β2t)ifρt>5lt(1β2t)vt+ϵrt(ρt4)(ρt2)ρ(ρ4)(ρ2)ρtθtθtγmt^rtltelseθtθtγmt^returnθt\begin{aligned} @@ -4533,7 +4533,7 @@

                                                                                              RAdam#

                                                                                              -add_param_group(param_group)[source]#
                                                                                              +add_param_group(param_group)[source]#

                                                                                              Add a param group to the Optimizer s param_groups.

                                                                                              This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                                              @@ -4547,7 +4547,7 @@

                                                                                              RAdam#
                                                                                              -load_state_dict(state_dict)[source]#
                                                                                              +load_state_dict(state_dict)[source]#

                                                                                              Load the optimizer state.

                                                                                              Parameters
                                                                                              @@ -4600,7 +4600,7 @@

                                                                                              RAdam#
                                                                                              -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                              +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                              Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                                              @@ -4634,7 +4634,7 @@

                                                                                              RAdam#
                                                                                              -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                              +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                              Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                                              @@ -4671,7 +4671,7 @@

                                                                                              RAdam#
                                                                                              -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                              +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                              Register a state dict post-hook which will be called after state_dict() is called.

                                                                                              It should have the following signature:

                                                                                              hook(optimizer, state_dict) -> state_dict or None
                                                                                              @@ -4703,7 +4703,7 @@ 

                                                                                              RAdam#
                                                                                              -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                              +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                              Register a state dict pre-hook which will be called before state_dict() is called.

                                                                                              It should have the following signature:

                                                                                              hook(optimizer) -> None
                                                                                              @@ -4735,7 +4735,7 @@ 

                                                                                              RAdam#
                                                                                              -register_step_post_hook(hook)[source]#
                                                                                              +register_step_post_hook(hook)[source]#

                                                                                              Register an optimizer step post hook which will be called after optimizer step.

                                                                                              It should have the following signature:

                                                                                              hook(optimizer, args, kwargs) -> None
                                                                                              @@ -4758,7 +4758,7 @@ 

                                                                                              RAdam#
                                                                                              -register_step_pre_hook(hook)[source]#
                                                                                              +register_step_pre_hook(hook)[source]#

                                                                                              Register an optimizer step pre hook which will be called before optimizer step.

                                                                                              It should have the following signature:

                                                                                              hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                                              @@ -4783,7 +4783,7 @@ 

                                                                                              RAdam#
                                                                                              -state_dict()[source]#
                                                                                              +state_dict()[source]#

                                                                                              Return the state of the optimizer as a dict.

                                                                                              It contains two entries:

                                                                                                @@ -4846,7 +4846,7 @@

                                                                                                RAdam#
                                                                                                -step(closure=None)[source]#
                                                                                                +step(closure=None)[source]#

                                                                                                Perform a single optimization step.

                                                                                                Parameters
                                                                                                @@ -4858,7 +4858,7 @@

                                                                                                RAdam#
                                                                                                -zero_grad(set_to_none=True)[source]#
                                                                                                +zero_grad(set_to_none=True)[source]#

                                                                                                Reset the gradients of all optimized torch.Tensor s.

                                                                                                Parameters
                                                                                                diff --git a/2.9/generated/torch.optim.radam.radam.html b/2.9/generated/torch.optim.radam.radam.html index 3d151221298..118a1667f61 100644 --- a/2.9/generated/torch.optim.radam.radam.html +++ b/2.9/generated/torch.optim.radam.radam.html @@ -4415,7 +4415,7 @@

                                                                                                torch.optim.radam.radam#

                                                                                                -torch.optim.radam.radam(params, grads, exp_avgs, exp_avg_sqs, state_steps, decoupled_weight_decay=False, foreach=None, differentiable=False, capturable=False, has_complex=False, maximize=False, *, beta1, beta2, lr, weight_decay, eps)[source]#
                                                                                                +torch.optim.radam.radam(params, grads, exp_avgs, exp_avg_sqs, state_steps, decoupled_weight_decay=False, foreach=None, differentiable=False, capturable=False, has_complex=False, maximize=False, *, beta1, beta2, lr, weight_decay, eps)[source]#

                                                                                                Functional API that performs RAdam algorithm computation.

                                                                                                See RAdam for details.

                                                                                                diff --git a/2.9/generated/torch.optim.rmsprop.RMSprop.html b/2.9/generated/torch.optim.rmsprop.RMSprop.html index 8d1b3272ce7..d4449bccfae 100644 --- a/2.9/generated/torch.optim.rmsprop.RMSprop.html +++ b/2.9/generated/torch.optim.rmsprop.RMSprop.html @@ -4415,7 +4415,7 @@

                                                                                                RMSprop#

                                                                                                -class torch.optim.rmsprop.RMSprop(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False, capturable=False, foreach=None, maximize=False, differentiable=False)[source]#
                                                                                                +class torch.optim.rmsprop.RMSprop(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False, capturable=False, foreach=None, maximize=False, differentiable=False)[source]#

                                                                                                Implements RMSprop algorithm.

                                                                                                input:α (alpha),γ (lr),θ0 (params),f(θ) (objective)λ (weight decay),μ (momentum),centered,ϵ (epsilon)initialize:v00 (square average),b00 (buffer),g0ave0fort=1todogtθft(θt1)ifλ0gtgt+λθt1vtαvt1+(1α)gt2vt~vtifcenteredgtavegt1aveα+(1α)gtvt~vt~(gtave)2ifμ>0btμbt1+gt/(vt~+ϵ)θtθt1γbtelseθtθt1γgt/(vt~+ϵ)returnθt\begin{aligned} @@ -4527,7 +4527,7 @@

                                                                                                RMSprop
                                                                                                -add_param_group(param_group)[source]#
                                                                                                +add_param_group(param_group)[source]#

                                                                                                Add a param group to the Optimizer s param_groups.

                                                                                                This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                                                @@ -4541,7 +4541,7 @@

                                                                                                RMSprop
                                                                                                -load_state_dict(state_dict)[source]#
                                                                                                +load_state_dict(state_dict)[source]#

                                                                                                Load the optimizer state.

                                                                                                Parameters
                                                                                                @@ -4594,7 +4594,7 @@

                                                                                                RMSprop
                                                                                                -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                                +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                                Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                                                @@ -4628,7 +4628,7 @@

                                                                                                RMSprop
                                                                                                -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                                +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                                Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                                                @@ -4665,7 +4665,7 @@

                                                                                                RMSprop
                                                                                                -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                                +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                                Register a state dict post-hook which will be called after state_dict() is called.

                                                                                                It should have the following signature:

                                                                                                hook(optimizer, state_dict) -> state_dict or None
                                                                                                @@ -4697,7 +4697,7 @@ 

                                                                                                RMSprop
                                                                                                -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                                +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                                Register a state dict pre-hook which will be called before state_dict() is called.

                                                                                                It should have the following signature:

                                                                                                hook(optimizer) -> None
                                                                                                @@ -4729,7 +4729,7 @@ 

                                                                                                RMSprop
                                                                                                -register_step_post_hook(hook)[source]#
                                                                                                +register_step_post_hook(hook)[source]#

                                                                                                Register an optimizer step post hook which will be called after optimizer step.

                                                                                                It should have the following signature:

                                                                                                hook(optimizer, args, kwargs) -> None
                                                                                                @@ -4752,7 +4752,7 @@ 

                                                                                                RMSprop
                                                                                                -register_step_pre_hook(hook)[source]#
                                                                                                +register_step_pre_hook(hook)[source]#

                                                                                                Register an optimizer step pre hook which will be called before optimizer step.

                                                                                                It should have the following signature:

                                                                                                hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                                                @@ -4777,7 +4777,7 @@ 

                                                                                                RMSprop
                                                                                                -state_dict()[source]#
                                                                                                +state_dict()[source]#

                                                                                                Return the state of the optimizer as a dict.

                                                                                                It contains two entries:

                                                                                                  @@ -4840,7 +4840,7 @@

                                                                                                  RMSprop
                                                                                                  -step(closure=None)[source]#
                                                                                                  +step(closure=None)[source]#

                                                                                                  Perform a single optimization step.

                                                                                                  Parameters
                                                                                                  @@ -4852,7 +4852,7 @@

                                                                                                  RMSprop
                                                                                                  -zero_grad(set_to_none=True)[source]#
                                                                                                  +zero_grad(set_to_none=True)[source]#

                                                                                                  Reset the gradients of all optimized torch.Tensor s.

                                                                                                  Parameters
                                                                                                  diff --git a/2.9/generated/torch.optim.rmsprop.rmsprop.html b/2.9/generated/torch.optim.rmsprop.rmsprop.html index ffd96773e58..e1bb38edf24 100644 --- a/2.9/generated/torch.optim.rmsprop.rmsprop.html +++ b/2.9/generated/torch.optim.rmsprop.rmsprop.html @@ -4415,7 +4415,7 @@

                                                                                                  torch.optim.rmsprop.rmsprop#

                                                                                                  -torch.optim.rmsprop.rmsprop(params, grads, square_avgs, grad_avgs, momentum_buffer_list, state_steps, foreach=None, maximize=False, differentiable=False, capturable=False, has_complex=False, *, lr, alpha, eps, weight_decay, momentum, centered)[source]#
                                                                                                  +torch.optim.rmsprop.rmsprop(params, grads, square_avgs, grad_avgs, momentum_buffer_list, state_steps, foreach=None, maximize=False, differentiable=False, capturable=False, has_complex=False, *, lr, alpha, eps, weight_decay, momentum, centered)[source]#

                                                                                                  Functional API that performs rmsprop algorithm computation.

                                                                                                  See RMSProp for details.

                                                                                                  diff --git a/2.9/generated/torch.optim.rprop.Rprop.html b/2.9/generated/torch.optim.rprop.Rprop.html index 996f70d6cee..8ab391d426f 100644 --- a/2.9/generated/torch.optim.rprop.Rprop.html +++ b/2.9/generated/torch.optim.rprop.Rprop.html @@ -4415,7 +4415,7 @@

                                                                                                  Rprop#

                                                                                                  -class torch.optim.rprop.Rprop(params, lr=0.01, etas=(0.5, 1.2), step_sizes=(1e-06, 50), *, capturable=False, foreach=None, maximize=False, differentiable=False)[source]#
                                                                                                  +class torch.optim.rprop.Rprop(params, lr=0.01, etas=(0.5, 1.2), step_sizes=(1e-06, 50), *, capturable=False, foreach=None, maximize=False, differentiable=False)[source]#

                                                                                                  Implements the resilient backpropagation algorithm.

                                                                                                  input:θ0Rd (params),f(θ) (objective),η+/ (etaplus, etaminus),Γmax/min (step sizes)initialize:gprev00,η0lr (learning rate)fort=1todogtθft(θt1)for i=0,1,,d1doifgprevigti>0ηtimin(ηt1iη+,Γmax)else ifgprevigti<0ηtimax(ηt1iη,Γmin)gti0elseηtiηt1iθtθt1ηtsign(gt)gprevgtreturnθt\begin{aligned} @@ -4484,7 +4484,7 @@

                                                                                                  Rprop#

                                                                                                  -add_param_group(param_group)[source]#
                                                                                                  +add_param_group(param_group)[source]#

                                                                                                  Add a param group to the Optimizer s param_groups.

                                                                                                  This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                                                  @@ -4498,7 +4498,7 @@

                                                                                                  Rprop#
                                                                                                  -load_state_dict(state_dict)[source]#
                                                                                                  +load_state_dict(state_dict)[source]#

                                                                                                  Load the optimizer state.

                                                                                                  Parameters
                                                                                                  @@ -4551,7 +4551,7 @@

                                                                                                  Rprop#
                                                                                                  -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                                  +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                                  Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                                                  @@ -4585,7 +4585,7 @@

                                                                                                  Rprop#
                                                                                                  -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                                  +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                                  Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                                                  @@ -4622,7 +4622,7 @@

                                                                                                  Rprop#
                                                                                                  -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                                  +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                                  Register a state dict post-hook which will be called after state_dict() is called.

                                                                                                  It should have the following signature:

                                                                                                  hook(optimizer, state_dict) -> state_dict or None
                                                                                                  @@ -4654,7 +4654,7 @@ 

                                                                                                  Rprop#
                                                                                                  -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                                  +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                                  Register a state dict pre-hook which will be called before state_dict() is called.

                                                                                                  It should have the following signature:

                                                                                                  hook(optimizer) -> None
                                                                                                  @@ -4686,7 +4686,7 @@ 

                                                                                                  Rprop#
                                                                                                  -register_step_post_hook(hook)[source]#
                                                                                                  +register_step_post_hook(hook)[source]#

                                                                                                  Register an optimizer step post hook which will be called after optimizer step.

                                                                                                  It should have the following signature:

                                                                                                  hook(optimizer, args, kwargs) -> None
                                                                                                  @@ -4709,7 +4709,7 @@ 

                                                                                                  Rprop#
                                                                                                  -register_step_pre_hook(hook)[source]#
                                                                                                  +register_step_pre_hook(hook)[source]#

                                                                                                  Register an optimizer step pre hook which will be called before optimizer step.

                                                                                                  It should have the following signature:

                                                                                                  hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                                                  @@ -4734,7 +4734,7 @@ 

                                                                                                  Rprop#
                                                                                                  -state_dict()[source]#
                                                                                                  +state_dict()[source]#

                                                                                                  Return the state of the optimizer as a dict.

                                                                                                  It contains two entries:

                                                                                                    @@ -4797,7 +4797,7 @@

                                                                                                    Rprop#
                                                                                                    -step(closure=None)[source]#
                                                                                                    +step(closure=None)[source]#

                                                                                                    Perform a single optimization step.

                                                                                                    Parameters
                                                                                                    @@ -4809,7 +4809,7 @@

                                                                                                    Rprop#
                                                                                                    -zero_grad(set_to_none=True)[source]#
                                                                                                    +zero_grad(set_to_none=True)[source]#

                                                                                                    Reset the gradients of all optimized torch.Tensor s.

                                                                                                    Parameters
                                                                                                    diff --git a/2.9/generated/torch.optim.rprop.rprop.html b/2.9/generated/torch.optim.rprop.rprop.html index acbfddb03cf..51365c17644 100644 --- a/2.9/generated/torch.optim.rprop.rprop.html +++ b/2.9/generated/torch.optim.rprop.rprop.html @@ -4415,7 +4415,7 @@

                                                                                                    torch.optim.rprop.rprop#

                                                                                                    -torch.optim.rprop.rprop(params, grads, prevs, step_sizes, state_steps, foreach=None, capturable=False, maximize=False, differentiable=False, has_complex=False, *, step_size_min, step_size_max, etaminus, etaplus)[source]#
                                                                                                    +torch.optim.rprop.rprop(params, grads, prevs, step_sizes, state_steps, foreach=None, capturable=False, maximize=False, differentiable=False, has_complex=False, *, step_size_min, step_size_max, etaminus, etaplus)[source]#

                                                                                                    Functional API that performs rprop algorithm computation.

                                                                                                    See Rprop for details.

                                                                                                    diff --git a/2.9/generated/torch.optim.sgd.SGD.html b/2.9/generated/torch.optim.sgd.SGD.html index efc666c8501..51f657d34ae 100644 --- a/2.9/generated/torch.optim.sgd.SGD.html +++ b/2.9/generated/torch.optim.sgd.SGD.html @@ -4415,7 +4415,7 @@

                                                                                                    SGD#

                                                                                                    -class torch.optim.sgd.SGD(params, lr=0.001, momentum=0, dampening=0, weight_decay=0, nesterov=False, *, maximize=False, foreach=None, differentiable=False, fused=None)[source]#
                                                                                                    +class torch.optim.sgd.SGD(params, lr=0.001, momentum=0, dampening=0, weight_decay=0, nesterov=False, *, maximize=False, foreach=None, differentiable=False, fused=None)[source]#

                                                                                                    Implements stochastic gradient descent (optionally with momentum).

                                                                                                    input:γ (lr),θ0 (params),f(θ) (objective),λ (weight decay),μ (momentum),τ (dampening), nesterov, maximizefort=1todoifmaximize:gtθft(θt1)elsegtθft(θt1)ifλ0gtgt+λθt1ifμ0ift>1btμbt1+(1τ)gtelsebtgtifnesterovgtgt+μbtelsegtbtθtθt1γgtreturnθt\begin{aligned} @@ -4529,7 +4529,7 @@

                                                                                                    SGD#

                                                                                                    -add_param_group(param_group)[source]#
                                                                                                    +add_param_group(param_group)[source]#

                                                                                                    Add a param group to the Optimizer s param_groups.

                                                                                                    This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                                                    @@ -4543,7 +4543,7 @@

                                                                                                    SGD#
                                                                                                    -load_state_dict(state_dict)[source]#
                                                                                                    +load_state_dict(state_dict)[source]#

                                                                                                    Load the optimizer state.

                                                                                                    Parameters
                                                                                                    @@ -4596,7 +4596,7 @@

                                                                                                    SGD#
                                                                                                    -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                                    +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                                    Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                                                    @@ -4630,7 +4630,7 @@

                                                                                                    SGD#
                                                                                                    -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                                    +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                                    Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                                                    @@ -4667,7 +4667,7 @@

                                                                                                    SGD#
                                                                                                    -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                                    +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                                    Register a state dict post-hook which will be called after state_dict() is called.

                                                                                                    It should have the following signature:

                                                                                                    hook(optimizer, state_dict) -> state_dict or None
                                                                                                    @@ -4699,7 +4699,7 @@ 

                                                                                                    SGD#
                                                                                                    -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                                    +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                                    Register a state dict pre-hook which will be called before state_dict() is called.

                                                                                                    It should have the following signature:

                                                                                                    hook(optimizer) -> None
                                                                                                    @@ -4731,7 +4731,7 @@ 

                                                                                                    SGD#
                                                                                                    -register_step_post_hook(hook)[source]#
                                                                                                    +register_step_post_hook(hook)[source]#

                                                                                                    Register an optimizer step post hook which will be called after optimizer step.

                                                                                                    It should have the following signature:

                                                                                                    hook(optimizer, args, kwargs) -> None
                                                                                                    @@ -4754,7 +4754,7 @@ 

                                                                                                    SGD#
                                                                                                    -register_step_pre_hook(hook)[source]#
                                                                                                    +register_step_pre_hook(hook)[source]#

                                                                                                    Register an optimizer step pre hook which will be called before optimizer step.

                                                                                                    It should have the following signature:

                                                                                                    hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                                                    @@ -4779,7 +4779,7 @@ 

                                                                                                    SGD#
                                                                                                    -state_dict()[source]#
                                                                                                    +state_dict()[source]#

                                                                                                    Return the state of the optimizer as a dict.

                                                                                                    It contains two entries:

                                                                                                      @@ -4842,7 +4842,7 @@

                                                                                                      SGD#
                                                                                                      -step(closure=None)[source]#
                                                                                                      +step(closure=None)[source]#

                                                                                                      Perform a single optimization step.

                                                                                                      Parameters
                                                                                                      @@ -4854,7 +4854,7 @@

                                                                                                      SGD#
                                                                                                      -zero_grad(set_to_none=True)[source]#
                                                                                                      +zero_grad(set_to_none=True)[source]#

                                                                                                      Reset the gradients of all optimized torch.Tensor s.

                                                                                                      Parameters
                                                                                                      diff --git a/2.9/generated/torch.optim.sgd.sgd.html b/2.9/generated/torch.optim.sgd.sgd.html index 8a902e5ed35..93006fbdee5 100644 --- a/2.9/generated/torch.optim.sgd.sgd.html +++ b/2.9/generated/torch.optim.sgd.sgd.html @@ -4415,7 +4415,7 @@

                                                                                                      torch.optim.sgd.sgd#

                                                                                                      -torch.optim.sgd.sgd(params, d_p_list, momentum_buffer_list, has_sparse_grad=False, foreach=None, fused=None, grad_scale=None, found_inf=None, *, weight_decay, momentum, lr, dampening, nesterov, maximize)[source]#
                                                                                                      +torch.optim.sgd.sgd(params, d_p_list, momentum_buffer_list, has_sparse_grad=False, foreach=None, fused=None, grad_scale=None, found_inf=None, *, weight_decay, momentum, lr, dampening, nesterov, maximize)[source]#

                                                                                                      Functional API that performs SGD algorithm computation.

                                                                                                      See SGD for details.

                                                                                                      diff --git a/2.9/generated/torch.optim.sparse_adam.SparseAdam.html b/2.9/generated/torch.optim.sparse_adam.SparseAdam.html index b31ab96eb06..d481a021fd7 100644 --- a/2.9/generated/torch.optim.sparse_adam.SparseAdam.html +++ b/2.9/generated/torch.optim.sparse_adam.SparseAdam.html @@ -4415,7 +4415,7 @@

                                                                                                      SparseAdam#

                                                                                                      -class torch.optim.sparse_adam.SparseAdam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, maximize=False)[source]#
                                                                                                      +class torch.optim.sparse_adam.SparseAdam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, maximize=False)[source]#

                                                                                                      SparseAdam implements a masked version of the Adam algorithm suitable for sparse gradients. Currently, due to implementation constraints (explained below), SparseAdam is only intended for a narrow subset of use cases, specifically @@ -4474,7 +4474,7 @@

                                                                                                      SparseAdam
                                                                                                      -add_param_group(param_group)[source]#
                                                                                                      +add_param_group(param_group)[source]#

                                                                                                      Add a param group to the Optimizer s param_groups.

                                                                                                      This can be useful when fine tuning a pre-trained network as frozen layers can be made trainable and added to the Optimizer as training progresses.

                                                                                                      @@ -4488,7 +4488,7 @@

                                                                                                      SparseAdam
                                                                                                      -load_state_dict(state_dict)[source]#
                                                                                                      +load_state_dict(state_dict)[source]#

                                                                                                      Load the optimizer state.

                                                                                                      Parameters
                                                                                                      @@ -4541,7 +4541,7 @@

                                                                                                      SparseAdam
                                                                                                      -register_load_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                                      +register_load_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                                      Register a load_state_dict post-hook which will be called after load_state_dict() is called. It should have the following signature:

                                                                                                      @@ -4575,7 +4575,7 @@

                                                                                                      SparseAdam
                                                                                                      -register_load_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                                      +register_load_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                                      Register a load_state_dict pre-hook which will be called before load_state_dict() is called. It should have the following signature:

                                                                                                      @@ -4612,7 +4612,7 @@

                                                                                                      SparseAdam
                                                                                                      -register_state_dict_post_hook(hook, prepend=False)[source]#
                                                                                                      +register_state_dict_post_hook(hook, prepend=False)[source]#

                                                                                                      Register a state dict post-hook which will be called after state_dict() is called.

                                                                                                      It should have the following signature:

                                                                                                      hook(optimizer, state_dict) -> state_dict or None
                                                                                                      @@ -4644,7 +4644,7 @@ 

                                                                                                      SparseAdam
                                                                                                      -register_state_dict_pre_hook(hook, prepend=False)[source]#
                                                                                                      +register_state_dict_pre_hook(hook, prepend=False)[source]#

                                                                                                      Register a state dict pre-hook which will be called before state_dict() is called.

                                                                                                      It should have the following signature:

                                                                                                      hook(optimizer) -> None
                                                                                                      @@ -4676,7 +4676,7 @@ 

                                                                                                      SparseAdam
                                                                                                      -register_step_post_hook(hook)[source]#
                                                                                                      +register_step_post_hook(hook)[source]#

                                                                                                      Register an optimizer step post hook which will be called after optimizer step.

                                                                                                      It should have the following signature:

                                                                                                      hook(optimizer, args, kwargs) -> None
                                                                                                      @@ -4699,7 +4699,7 @@ 

                                                                                                      SparseAdam
                                                                                                      -register_step_pre_hook(hook)[source]#
                                                                                                      +register_step_pre_hook(hook)[source]#

                                                                                                      Register an optimizer step pre hook which will be called before optimizer step.

                                                                                                      It should have the following signature:

                                                                                                      hook(optimizer, args, kwargs) -> None or modified args and kwargs
                                                                                                      @@ -4724,7 +4724,7 @@ 

                                                                                                      SparseAdam
                                                                                                      -state_dict()[source]#
                                                                                                      +state_dict()[source]#

                                                                                                      Return the state of the optimizer as a dict.

                                                                                                      It contains two entries:

                                                                                                        @@ -4787,7 +4787,7 @@

                                                                                                        SparseAdam
                                                                                                        -step(closure=None)[source]#
                                                                                                        +step(closure=None)[source]#

                                                                                                        Perform a single optimization step.

                                                                                                        Parameters
                                                                                                        @@ -4799,7 +4799,7 @@

                                                                                                        SparseAdam
                                                                                                        -zero_grad(set_to_none=True)[source]#
                                                                                                        +zero_grad(set_to_none=True)[source]#

                                                                                                        Reset the gradients of all optimized torch.Tensor s.

                                                                                                        Parameters
                                                                                                        diff --git a/2.9/generated/torch.optim.swa_utils.AveragedModel.html b/2.9/generated/torch.optim.swa_utils.AveragedModel.html index 57261475995..512251422b3 100644 --- a/2.9/generated/torch.optim.swa_utils.AveragedModel.html +++ b/2.9/generated/torch.optim.swa_utils.AveragedModel.html @@ -4404,7 +4404,7 @@

                                                                                                        AveragedModel#

                                                                                                        -class torch.optim.swa_utils.AveragedModel(model, device=None, avg_fn=None, multi_avg_fn=None, use_buffers=False)[source]#
                                                                                                        +class torch.optim.swa_utils.AveragedModel(model, device=None, avg_fn=None, multi_avg_fn=None, use_buffers=False)[source]#

                                                                                                        Implements averaged model for Stochastic Weight Averaging (SWA) and Exponential Moving Average (EMA).

                                                                                                        Stochastic Weight Averaging was proposed in Averaging Weights Leads to Wider Optima and Better Generalization by Pavel Izmailov, Dmitrii @@ -4493,7 +4493,7 @@

                                                                                                        AveragedModel
                                                                                                        -add_module(name, module)[source]#
                                                                                                        +add_module(name, module)[source]#

                                                                                                        Add a child module to the current module.

                                                                                                        The module can be accessed as an attribute using the given name.

                                                                                                        @@ -4509,7 +4509,7 @@

                                                                                                        AveragedModel
                                                                                                        -apply(fn)[source]#
                                                                                                        +apply(fn)[source]#

                                                                                                        Apply fn recursively to every submodule (as returned by .children()) as well as self.

                                                                                                        Typical use includes initializing the parameters of a model (see also torch.nn.init).

                                                                                                        @@ -4551,7 +4551,7 @@

                                                                                                        AveragedModel
                                                                                                        -bfloat16()[source]#
                                                                                                        +bfloat16()[source]#

                                                                                                        Casts all floating point parameters and buffers to bfloat16 datatype.

                                                                                                        Note

                                                                                                        @@ -4569,7 +4569,7 @@

                                                                                                        AveragedModel
                                                                                                        -buffers(recurse=True)[source]#
                                                                                                        +buffers(recurse=True)[source]#

                                                                                                        Return an iterator over module buffers.

                                                                                                        Parameters
                                                                                                        @@ -4595,7 +4595,7 @@

                                                                                                        AveragedModel
                                                                                                        -children()[source]#
                                                                                                        +children()[source]#

                                                                                                        Return an iterator over immediate children modules.

                                                                                                        Yields
                                                                                                        @@ -4609,7 +4609,7 @@

                                                                                                        AveragedModel
                                                                                                        -compile(*args, **kwargs)[source]#
                                                                                                        +compile(*args, **kwargs)[source]#

                                                                                                        Compile this Module’s forward using torch.compile().

                                                                                                        This Module’s __call__ method is compiled and all arguments are passed as-is to torch.compile().

                                                                                                        @@ -4618,7 +4618,7 @@

                                                                                                        AveragedModel
                                                                                                        -cpu()[source]#
                                                                                                        +cpu()[source]#

                                                                                                        Move all model parameters and buffers to the CPU.

                                                                                                        Note

                                                                                                        @@ -4636,7 +4636,7 @@

                                                                                                        AveragedModel
                                                                                                        -cuda(device=None)[source]#
                                                                                                        +cuda(device=None)[source]#

                                                                                                        Move all model parameters and buffers to the GPU.

                                                                                                        This also makes associated parameters and buffers different objects. So it should be called before constructing the optimizer if the module will @@ -4661,7 +4661,7 @@

                                                                                                        AveragedModel
                                                                                                        -double()[source]#
                                                                                                        +double()[source]#

                                                                                                        Casts all floating point parameters and buffers to double datatype.

                                                                                                        Note

                                                                                                        @@ -4679,7 +4679,7 @@

                                                                                                        AveragedModel
                                                                                                        -eval()[source]#
                                                                                                        +eval()[source]#

                                                                                                        Set the module in evaluation mode.

                                                                                                        This has an effect only on certain modules. See the documentation of particular modules for details of their behaviors in training/evaluation @@ -4700,7 +4700,7 @@

                                                                                                        AveragedModel
                                                                                                        -extra_repr()[source]#
                                                                                                        +extra_repr()[source]#

                                                                                                        Return the extra representation of the module.

                                                                                                        To print customized extra information, you should re-implement this method in your own modules. Both single-line and multi-line @@ -4714,7 +4714,7 @@

                                                                                                        AveragedModel
                                                                                                        -float()[source]#
                                                                                                        +float()[source]#

                                                                                                        Casts all floating point parameters and buffers to float datatype.

                                                                                                        Note

                                                                                                        @@ -4732,13 +4732,13 @@

                                                                                                        AveragedModel
                                                                                                        -forward(*args, **kwargs)[source]#
                                                                                                        +forward(*args, **kwargs)[source]#

                                                                                                        Forward pass.

                                                                                                        -get_buffer(target)[source]#
                                                                                                        +get_buffer(target)[source]#

                                                                                                        Return the buffer given by target if it exists, otherwise throw an error.

                                                                                                        See the docstring for get_submodule for a more detailed explanation of this method’s functionality as well as how to @@ -4765,7 +4765,7 @@

                                                                                                        AveragedModel
                                                                                                        -get_extra_state()[source]#
                                                                                                        +get_extra_state()[source]#

                                                                                                        Return any extra state to include in the module’s state_dict.

                                                                                                        Implement this and a corresponding set_extra_state() for your module if you need to store extra state. This function is called when building the @@ -4786,7 +4786,7 @@

                                                                                                        AveragedModel
                                                                                                        -get_parameter(target)[source]#
                                                                                                        +get_parameter(target)[source]#

                                                                                                        Return the parameter given by target if it exists, otherwise throw an error.

                                                                                                        See the docstring for get_submodule for a more detailed explanation of this method’s functionality as well as how to @@ -4813,7 +4813,7 @@

                                                                                                        AveragedModel
                                                                                                        -get_submodule(target)[source]#
                                                                                                        +get_submodule(target)[source]#

                                                                                                        Return the submodule given by target if it exists, otherwise throw an error.

                                                                                                        For example, let’s say you have an nn.Module A that looks like this:

                                                                                                        @@ -4862,7 +4862,7 @@

                                                                                                        AveragedModel
                                                                                                        -half()[source]#
                                                                                                        +half()[source]#

                                                                                                        Casts all floating point parameters and buffers to half datatype.

                                                                                                        Note

                                                                                                        @@ -4880,7 +4880,7 @@

                                                                                                        AveragedModel
                                                                                                        -ipu(device=None)[source]#
                                                                                                        +ipu(device=None)[source]#

                                                                                                        Move all model parameters and buffers to the IPU.

                                                                                                        This also makes associated parameters and buffers different objects. So it should be called before constructing the optimizer if the module will @@ -4905,7 +4905,7 @@

                                                                                                        AveragedModel
                                                                                                        -load_state_dict(state_dict, strict=True, assign=False)[source]#
                                                                                                        +load_state_dict(state_dict, strict=True, assign=False)[source]#

                                                                                                        Copy parameters and buffers from state_dict into this module and its descendants.

                                                                                                        If strict is True, then the keys of state_dict must exactly match the keys returned @@ -4960,7 +4960,7 @@

                                                                                                        AveragedModel
                                                                                                        -modules()[source]#
                                                                                                        +modules()[source]#

                                                                                                        Return an iterator over all modules in the network.

                                                                                                        Yields
                                                                                                        @@ -4992,7 +4992,7 @@

                                                                                                        AveragedModel
                                                                                                        -mtia(device=None)[source]#
                                                                                                        +mtia(device=None)[source]#

                                                                                                        Move all model parameters and buffers to the MTIA.

                                                                                                        This also makes associated parameters and buffers different objects. So it should be called before constructing the optimizer if the module will @@ -5017,7 +5017,7 @@

                                                                                                        AveragedModel
                                                                                                        -named_buffers(prefix='', recurse=True, remove_duplicate=True)[source]#
                                                                                                        +named_buffers(prefix='', recurse=True, remove_duplicate=True)[source]#

                                                                                                        Return an iterator over module buffers, yielding both the name of the buffer as well as the buffer itself.

                                                                                                        Parameters
                                                                                                        @@ -5046,7 +5046,7 @@

                                                                                                        AveragedModel
                                                                                                        -named_children()[source]#
                                                                                                        +named_children()[source]#

                                                                                                        Return an iterator over immediate children modules, yielding both the name of the module as well as the module itself.

                                                                                                        Yields
                                                                                                        @@ -5066,7 +5066,7 @@

                                                                                                        AveragedModel
                                                                                                        -named_modules(memo=None, prefix='', remove_duplicate=True)[source]#
                                                                                                        +named_modules(memo=None, prefix='', remove_duplicate=True)[source]#

                                                                                                        Return an iterator over all modules in the network, yielding both the name of the module as well as the module itself.

                                                                                                        Parameters
                                                                                                        @@ -5103,7 +5103,7 @@

                                                                                                        AveragedModel
                                                                                                        -named_parameters(prefix='', recurse=True, remove_duplicate=True)[source]#
                                                                                                        +named_parameters(prefix='', recurse=True, remove_duplicate=True)[source]#

                                                                                                        Return an iterator over module parameters, yielding both the name of the parameter as well as the parameter itself.

                                                                                                        Parameters
                                                                                                        @@ -5133,7 +5133,7 @@

                                                                                                        AveragedModel
                                                                                                        -parameters(recurse=True)[source]#
                                                                                                        +parameters(recurse=True)[source]#

                                                                                                        Return an iterator over module parameters.

                                                                                                        This is typically passed to an optimizer.

                                                                                                        @@ -5160,7 +5160,7 @@

                                                                                                        AveragedModel
                                                                                                        -register_backward_hook(hook)[source]#
                                                                                                        +register_backward_hook(hook)[source]#

                                                                                                        Register a backward hook on the module.

                                                                                                        This function is deprecated in favor of register_full_backward_hook() and the behavior of this function will change in future versions.

                                                                                                        @@ -5177,7 +5177,7 @@

                                                                                                        AveragedModel
                                                                                                        -register_buffer(name, tensor, persistent=True)[source]#
                                                                                                        +register_buffer(name, tensor, persistent=True)[source]#

                                                                                                        Add a buffer to the module.

                                                                                                        This is typically used to register a buffer that should not be considered a model parameter. For example, BatchNorm’s running_mean @@ -5209,7 +5209,7 @@

                                                                                                        AveragedModel
                                                                                                        -register_forward_hook(hook, *, prepend=False, with_kwargs=False, always_call=False)[source]#
                                                                                                        +register_forward_hook(hook, *, prepend=False, with_kwargs=False, always_call=False)[source]#

                                                                                                        Register a forward hook on the module.

                                                                                                        The hook will be called every time after forward() has computed an output.

                                                                                                        If with_kwargs is False or not specified, the input contains only @@ -5260,7 +5260,7 @@

                                                                                                        AveragedModel
                                                                                                        -register_forward_pre_hook(hook, *, prepend=False, with_kwargs=False)[source]#
                                                                                                        +register_forward_pre_hook(hook, *, prepend=False, with_kwargs=False)[source]#

                                                                                                        Register a forward pre-hook on the module.

                                                                                                        The hook will be called every time before forward() is invoked.

                                                                                                        If with_kwargs is false or not specified, the input contains only @@ -5310,7 +5310,7 @@

                                                                                                        AveragedModel
                                                                                                        -register_full_backward_hook(hook, prepend=False)[source]#
                                                                                                        +register_full_backward_hook(hook, prepend=False)[source]#

                                                                                                        Register a backward hook on the module.

                                                                                                        The hook will be called every time the gradients with respect to a module are computed, and its firing rules are as follows:

                                                                                                        @@ -5367,7 +5367,7 @@

                                                                                                        AveragedModel
                                                                                                        -register_full_backward_pre_hook(hook, prepend=False)[source]#
                                                                                                        +register_full_backward_pre_hook(hook, prepend=False)[source]#

                                                                                                        Register a backward pre-hook on the module.

                                                                                                        The hook will be called every time the gradients for the module are computed. The hook should have the following signature:

                                                                                                        @@ -5413,7 +5413,7 @@

                                                                                                        AveragedModel
                                                                                                        -register_load_state_dict_post_hook(hook)[source]#
                                                                                                        +register_load_state_dict_post_hook(hook)[source]#

                                                                                                        Register a post-hook to be run after module’s load_state_dict() is called.

                                                                                                        It should have the following signature::

                                                                                                        hook(module, incompatible_keys) -> None

                                                                                                        @@ -5443,7 +5443,7 @@

                                                                                                        AveragedModel
                                                                                                        -register_load_state_dict_pre_hook(hook)[source]#
                                                                                                        +register_load_state_dict_pre_hook(hook)[source]#

                                                                                                        Register a pre-hook to be run before module’s load_state_dict() is called.

                                                                                                        It should have the following signature::

                                                                                                        hook(module, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs) -> None # noqa: B950

                                                                                                        @@ -5459,7 +5459,7 @@

                                                                                                        AveragedModel
                                                                                                        -register_module(name, module)[source]#
                                                                                                        +register_module(name, module)[source]#

                                                                                                        Alias for add_module().

                                                                                                        @@ -5467,7 +5467,7 @@

                                                                                                        AveragedModel
                                                                                                        -register_parameter(name, param)[source]#
                                                                                                        +register_parameter(name, param)[source]#

                                                                                                        Add a parameter to the module.

                                                                                                        The parameter can be accessed as an attribute using given name.

                                                                                                        @@ -5486,7 +5486,7 @@

                                                                                                        AveragedModel
                                                                                                        -register_state_dict_post_hook(hook)[source]#
                                                                                                        +register_state_dict_post_hook(hook)[source]#

                                                                                                        Register a post-hook for the state_dict() method.

                                                                                                        It should have the following signature::

                                                                                                        hook(module, state_dict, prefix, local_metadata) -> None

                                                                                                        @@ -5497,7 +5497,7 @@

                                                                                                        AveragedModel
                                                                                                        -register_state_dict_pre_hook(hook)[source]#
                                                                                                        +register_state_dict_pre_hook(hook)[source]#

                                                                                                        Register a pre-hook for the state_dict() method.

                                                                                                        It should have the following signature::

                                                                                                        hook(module, prefix, keep_vars) -> None

                                                                                                        @@ -5509,7 +5509,7 @@

                                                                                                        AveragedModel
                                                                                                        -requires_grad_(requires_grad=True)[source]#
                                                                                                        +requires_grad_(requires_grad=True)[source]#

                                                                                                        Change if autograd should record operations on parameters in this module.

                                                                                                        This method sets the parameters’ requires_grad attributes in-place.

                                                                                                        @@ -5533,7 +5533,7 @@

                                                                                                        AveragedModel
                                                                                                        -set_extra_state(state)[source]#
                                                                                                        +set_extra_state(state)[source]#

                                                                                                        Set extra state contained in the loaded state_dict.

                                                                                                        This function is called from load_state_dict() to handle any extra state found within the state_dict. Implement this function and a corresponding @@ -5548,7 +5548,7 @@

                                                                                                        AveragedModel
                                                                                                        -set_submodule(target, module, strict=False)[source]#
                                                                                                        +set_submodule(target, module, strict=False)[source]#

                                                                                                        Set the submodule given by target if it exists, otherwise throw an error.

                                                                                                        Note

                                                                                                        @@ -5606,7 +5606,7 @@

                                                                                                        AveragedModel
                                                                                                        -share_memory()[source]#
                                                                                                        +share_memory()[source]#

                                                                                                        See torch.Tensor.share_memory_().

                                                                                                        Return type
                                                                                                        @@ -5617,7 +5617,7 @@

                                                                                                        AveragedModel
                                                                                                        -state_dict(*args, destination=None, prefix='', keep_vars=False)[source]#
                                                                                                        +state_dict(*args, destination=None, prefix='', keep_vars=False)[source]#

                                                                                                        Return a dictionary containing references to the whole state of the module.

                                                                                                        Both parameters and persistent buffers (e.g. running averages) are included. Keys are corresponding parameter and buffer names. @@ -5670,27 +5670,27 @@

                                                                                                        AveragedModel
                                                                                                        -to(*args, **kwargs)[source]#
                                                                                                        +to(*args, **kwargs)[source]#

                                                                                                        Move and/or cast the parameters and buffers.

                                                                                                        This can be called as

                                                                                                        -to(device=None, dtype=None, non_blocking=False)[source]
                                                                                                        +to(device=None, dtype=None, non_blocking=False)[source]
                                                                                                        -to(dtype, non_blocking=False)[source]
                                                                                                        +to(dtype, non_blocking=False)[source]
                                                                                                        -to(tensor, non_blocking=False)[source]
                                                                                                        +to(tensor, non_blocking=False)[source]
                                                                                                        -to(memory_format=torch.channels_last)[source]
                                                                                                        +to(memory_format=torch.channels_last)[source]

                                                                                                        Its signature is similar to torch.Tensor.to(), but only accepts @@ -5769,7 +5769,7 @@

                                                                                                        AveragedModel
                                                                                                        -to_empty(*, device, recurse=True)[source]#
                                                                                                        +to_empty(*, device, recurse=True)[source]#

                                                                                                        Move the parameters and buffers to the specified device without copying storage.

                                                                                                        Parameters
                                                                                                        @@ -5791,7 +5791,7 @@

                                                                                                        AveragedModel
                                                                                                        -train(mode=True)[source]#
                                                                                                        +train(mode=True)[source]#

                                                                                                        Set the module in training mode.

                                                                                                        This has an effect only on certain modules. See the documentation of particular modules for details of their behaviors in training/evaluation @@ -5813,7 +5813,7 @@

                                                                                                        AveragedModel
                                                                                                        -type(dst_type)[source]#
                                                                                                        +type(dst_type)[source]#

                                                                                                        Casts all parameters and buffers to dst_type.

                                                                                                        Note

                                                                                                        @@ -5834,7 +5834,7 @@

                                                                                                        AveragedModel
                                                                                                        -update_parameters(model)[source]#
                                                                                                        +update_parameters(model)[source]#

                                                                                                        Update model parameters.

                                                                                                        @@ -5842,7 +5842,7 @@

                                                                                                        AveragedModel
                                                                                                        -xpu(device=None)[source]#
                                                                                                        +xpu(device=None)[source]#

                                                                                                        Move all model parameters and buffers to the XPU.

                                                                                                        This also makes associated parameters and buffers different objects. So it should be called before constructing optimizer if the module will @@ -5867,7 +5867,7 @@

                                                                                                        AveragedModel
                                                                                                        -zero_grad(set_to_none=True)[source]#
                                                                                                        +zero_grad(set_to_none=True)[source]#

                                                                                                        Reset gradients of all model parameters.

                                                                                                        See similar function under torch.optim.Optimizer for more context.

                                                                                                        diff --git a/2.9/generated/torch.optim.swa_utils.SWALR.html b/2.9/generated/torch.optim.swa_utils.SWALR.html index b90dbe9ff50..362461be137 100644 --- a/2.9/generated/torch.optim.swa_utils.SWALR.html +++ b/2.9/generated/torch.optim.swa_utils.SWALR.html @@ -4404,7 +4404,7 @@

                                                                                                        SWALR#

                                                                                                        -class torch.optim.swa_utils.SWALR(optimizer, swa_lr, anneal_epochs=10, anneal_strategy='cos', last_epoch=-1)[source]#
                                                                                                        +class torch.optim.swa_utils.SWALR(optimizer, swa_lr, anneal_epochs=10, anneal_strategy='cos', last_epoch=-1)[source]#

                                                                                                        Anneals the learning rate in each parameter group to a fixed value.

                                                                                                        This learning rate scheduler is meant to be used with Stochastic Weight Averaging (SWA) method (see torch.optim.swa_utils.AveragedModel).

                                                                                                        @@ -4447,7 +4447,7 @@

                                                                                                        SWALR#

                                                                                                        -get_last_lr()[source]#
                                                                                                        +get_last_lr()[source]#

                                                                                                        Return last computed learning rate by current scheduler.

                                                                                                        Return type
                                                                                                        @@ -4458,13 +4458,13 @@

                                                                                                        SWALR#
                                                                                                        -get_lr()[source]#
                                                                                                        +get_lr()[source]#

                                                                                                        Get learning rate.

                                                                                                        -load_state_dict(state_dict)[source]#
                                                                                                        +load_state_dict(state_dict)[source]#

                                                                                                        Load the scheduler’s state.

                                                                                                        Parameters
                                                                                                        @@ -4476,7 +4476,7 @@

                                                                                                        SWALR#
                                                                                                        -state_dict()[source]#
                                                                                                        +state_dict()[source]#

                                                                                                        Return the state of the scheduler as a dict.

                                                                                                        It contains an entry for every variable in self.__dict__ which is not the optimizer.

                                                                                                        @@ -4489,7 +4489,7 @@

                                                                                                        SWALR#
                                                                                                        -step(epoch=None)[source]#
                                                                                                        +step(epoch=None)[source]#

                                                                                                        Perform a step.

                                                                                                        diff --git a/2.9/generated/torch.pca_lowrank.html b/2.9/generated/torch.pca_lowrank.html index bf9b2e4fd04..1f2ce047f06 100644 --- a/2.9/generated/torch.pca_lowrank.html +++ b/2.9/generated/torch.pca_lowrank.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.pca_lowrank#

                                                                                                        -torch.pca_lowrank(A, q=None, center=True, niter=2)[source]#
                                                                                                        +torch.pca_lowrank(A, q=None, center=True, niter=2)[source]#

                                                                                                        Performs linear Principal Component Analysis (PCA) on a low-rank matrix, batches of such matrices, or sparse matrix.

                                                                                                        This function returns a namedtuple (U, S, V) which is the diff --git a/2.9/generated/torch.quasirandom.SobolEngine.html b/2.9/generated/torch.quasirandom.SobolEngine.html index 319630d32fa..add6ca82752 100644 --- a/2.9/generated/torch.quasirandom.SobolEngine.html +++ b/2.9/generated/torch.quasirandom.SobolEngine.html @@ -4404,7 +4404,7 @@

                                                                                                        SobolEngine#

                                                                                                        -class torch.quasirandom.SobolEngine(dimension, scramble=False, seed=None)[source]#
                                                                                                        +class torch.quasirandom.SobolEngine(dimension, scramble=False, seed=None)[source]#

                                                                                                        The torch.quasirandom.SobolEngine is an engine for generating (scrambled) Sobol sequences. Sobol sequences are an example of low discrepancy quasi-random sequences.

                                                                                                        @@ -4446,7 +4446,7 @@

                                                                                                        SobolEngine
                                                                                                        -draw(n=1, out=None, dtype=None)[source]#
                                                                                                        +draw(n=1, out=None, dtype=None)[source]#

                                                                                                        Function to draw a sequence of n points from a Sobol sequence. Note that the samples are dependent on the previous samples. The size of the result is (n,dimension)(n, dimension).

                                                                                                        @@ -4469,7 +4469,7 @@

                                                                                                        SobolEngine
                                                                                                        -draw_base2(m, out=None, dtype=None)[source]#
                                                                                                        +draw_base2(m, out=None, dtype=None)[source]#

                                                                                                        Function to draw a sequence of 2**m points from a Sobol sequence. Note that the samples are dependent on the previous samples. The size of the result is (2m,dimension)(2**m, dimension).

                                                                                                        @@ -4491,7 +4491,7 @@

                                                                                                        SobolEngine
                                                                                                        -fast_forward(n)[source]#
                                                                                                        +fast_forward(n)[source]#

                                                                                                        Function to fast-forward the state of the SobolEngine by n steps. This is equivalent to drawing n samples without using the samples.

                                                                                                        @@ -4504,7 +4504,7 @@

                                                                                                        SobolEngine
                                                                                                        -reset()[source]#
                                                                                                        +reset()[source]#

                                                                                                        Function to reset the SobolEngine to base state.

                                                                                                        diff --git a/2.9/generated/torch.save.html b/2.9/generated/torch.save.html index 9ad99a8a228..a5f97ef484f 100644 --- a/2.9/generated/torch.save.html +++ b/2.9/generated/torch.save.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.save#

                                                                                                        -torch.save(obj, f, pickle_module=pickle, pickle_protocol=2, _use_new_zipfile_serialization=True)[source]#
                                                                                                        +torch.save(obj, f, pickle_module=pickle, pickle_protocol=2, _use_new_zipfile_serialization=True)[source]#

                                                                                                        Saves an object to a disk file.

                                                                                                        See also: Saving and loading tensors

                                                                                                        See Layout Control for more advanced tools to manipulate a checkpoint.

                                                                                                        diff --git a/2.9/generated/torch.seed.html b/2.9/generated/torch.seed.html index 43afb9068c3..1dd013dea75 100644 --- a/2.9/generated/torch.seed.html +++ b/2.9/generated/torch.seed.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.seed#

                                                                                                        -torch.seed()[source]#
                                                                                                        +torch.seed()[source]#

                                                                                                        Sets the seed for generating random numbers to a non-deterministic random number on all devices. Returns a 64 bit number used to seed the RNG.

                                                                                                        diff --git a/2.9/generated/torch.set_default_device.html b/2.9/generated/torch.set_default_device.html index 3bd5a11a807..8a4644e6a29 100644 --- a/2.9/generated/torch.set_default_device.html +++ b/2.9/generated/torch.set_default_device.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.set_default_device#

                                                                                                        -torch.set_default_device(device)[source]#
                                                                                                        +torch.set_default_device(device)[source]#

                                                                                                        Sets the default torch.Tensor to be allocated on device. This does not affect factory function calls which are called with an explicit device argument. Factory calls will be performed as if they diff --git a/2.9/generated/torch.set_default_dtype.html b/2.9/generated/torch.set_default_dtype.html index f1b18e9f13b..bf4aec7a5bb 100644 --- a/2.9/generated/torch.set_default_dtype.html +++ b/2.9/generated/torch.set_default_dtype.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.set_default_dtype#

                                                                                                        -torch.set_default_dtype(d, /)[source]#
                                                                                                        +torch.set_default_dtype(d, /)[source]#

                                                                                                        Sets the default floating point dtype to d. Supports floating point dtype as inputs. Other dtypes will cause torch to raise an exception.

                                                                                                        When PyTorch is initialized its default floating point dtype is torch.float32, diff --git a/2.9/generated/torch.set_default_tensor_type.html b/2.9/generated/torch.set_default_tensor_type.html index c79466df002..11e78998f5e 100644 --- a/2.9/generated/torch.set_default_tensor_type.html +++ b/2.9/generated/torch.set_default_tensor_type.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.set_default_tensor_type#

                                                                                                        -torch.set_default_tensor_type(t, /)[source]#
                                                                                                        +torch.set_default_tensor_type(t, /)[source]#

                                                                                                        Warning

                                                                                                        This function is deprecated as of PyTorch 2.1, please use torch.set_default_dtype() and diff --git a/2.9/generated/torch.set_deterministic_debug_mode.html b/2.9/generated/torch.set_deterministic_debug_mode.html index b914f5392bf..a9a45378912 100644 --- a/2.9/generated/torch.set_deterministic_debug_mode.html +++ b/2.9/generated/torch.set_deterministic_debug_mode.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.set_deterministic_debug_mode#

                                                                                                        -torch.set_deterministic_debug_mode(debug_mode)[source]#
                                                                                                        +torch.set_deterministic_debug_mode(debug_mode)[source]#

                                                                                                        Sets the debug mode for deterministic operations.

                                                                                                        Note

                                                                                                        diff --git a/2.9/generated/torch.set_float32_matmul_precision.html b/2.9/generated/torch.set_float32_matmul_precision.html index df5922a1847..ed78761d17c 100644 --- a/2.9/generated/torch.set_float32_matmul_precision.html +++ b/2.9/generated/torch.set_float32_matmul_precision.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.set_float32_matmul_precision#

                                                                                                        -torch.set_float32_matmul_precision(precision)[source]#
                                                                                                        +torch.set_float32_matmul_precision(precision)[source]#

                                                                                                        Sets the internal precision of float32 matrix multiplications.

                                                                                                        Running float32 matrix multiplications in lower precision may significantly increase performance, and in some programs the loss of precision has a negligible impact.

                                                                                                        diff --git a/2.9/generated/torch.set_printoptions.html b/2.9/generated/torch.set_printoptions.html index 67916e58f85..80d55e11827 100644 --- a/2.9/generated/torch.set_printoptions.html +++ b/2.9/generated/torch.set_printoptions.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.set_printoptions#

                                                                                                        -torch.set_printoptions(precision=None, threshold=None, edgeitems=None, linewidth=None, profile=None, sci_mode=None)[source]#
                                                                                                        +torch.set_printoptions(precision=None, threshold=None, edgeitems=None, linewidth=None, profile=None, sci_mode=None)[source]#

                                                                                                        Set options for printing. Items shamelessly taken from NumPy

                                                                                                        Parameters
                                                                                                        diff --git a/2.9/generated/torch.set_rng_state.html b/2.9/generated/torch.set_rng_state.html index c0ba356caf0..fa6b7848dca 100644 --- a/2.9/generated/torch.set_rng_state.html +++ b/2.9/generated/torch.set_rng_state.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.set_rng_state#

                                                                                                        -torch.set_rng_state(new_state)[source]#
                                                                                                        +torch.set_rng_state(new_state)[source]#

                                                                                                        Sets the random number generator state.

                                                                                                        Note

                                                                                                        diff --git a/2.9/generated/torch.set_warn_always.html b/2.9/generated/torch.set_warn_always.html index a41abc5c7df..5e5b864207d 100644 --- a/2.9/generated/torch.set_warn_always.html +++ b/2.9/generated/torch.set_warn_always.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.set_warn_always#

                                                                                                        -torch.set_warn_always(b, /)[source]#
                                                                                                        +torch.set_warn_always(b, /)[source]#

                                                                                                        When this flag is False (default) then some PyTorch warnings may only appear once per process. This helps avoid excessive warning information. Setting it to True causes these warnings to always appear, which may be diff --git a/2.9/generated/torch.signal.windows.bartlett.html b/2.9/generated/torch.signal.windows.bartlett.html index 98199a07e50..21dd958aed1 100644 --- a/2.9/generated/torch.signal.windows.bartlett.html +++ b/2.9/generated/torch.signal.windows.bartlett.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.signal.windows.bartlett#

                                                                                                        -torch.signal.windows.bartlett(M, *, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#
                                                                                                        +torch.signal.windows.bartlett(M, *, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#

                                                                                                        Computes the Bartlett window.

                                                                                                        The Bartlett window is defined as follows:

                                                                                                        diff --git a/2.9/generated/torch.signal.windows.blackman.html b/2.9/generated/torch.signal.windows.blackman.html index 9b78355fa25..d69fcd23b7f 100644 --- a/2.9/generated/torch.signal.windows.blackman.html +++ b/2.9/generated/torch.signal.windows.blackman.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.signal.windows.blackman#

                                                                                                        -torch.signal.windows.blackman(M, *, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#
                                                                                                        +torch.signal.windows.blackman(M, *, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#

                                                                                                        Computes the Blackman window.

                                                                                                        The Blackman window is defined as follows:

                                                                                                        diff --git a/2.9/generated/torch.signal.windows.cosine.html b/2.9/generated/torch.signal.windows.cosine.html index 3e6c11ac0ed..8db517d5055 100644 --- a/2.9/generated/torch.signal.windows.cosine.html +++ b/2.9/generated/torch.signal.windows.cosine.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.signal.windows.cosine#

                                                                                                        -torch.signal.windows.cosine(M, *, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#
                                                                                                        +torch.signal.windows.cosine(M, *, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#

                                                                                                        Computes a window with a simple cosine waveform, following the same implementation as SciPy. This window is also known as the sine window.

                                                                                                        The cosine window is defined as follows:

                                                                                                        diff --git a/2.9/generated/torch.signal.windows.exponential.html b/2.9/generated/torch.signal.windows.exponential.html index 412f98cf6f6..2598ae1844c 100644 --- a/2.9/generated/torch.signal.windows.exponential.html +++ b/2.9/generated/torch.signal.windows.exponential.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.signal.windows.exponential#

                                                                                                        -torch.signal.windows.exponential(M, *, center=None, tau=1.0, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#
                                                                                                        +torch.signal.windows.exponential(M, *, center=None, tau=1.0, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#

                                                                                                        Computes a window with an exponential waveform. Also known as Poisson window.

                                                                                                        The exponential window is defined as follows:

                                                                                                        diff --git a/2.9/generated/torch.signal.windows.gaussian.html b/2.9/generated/torch.signal.windows.gaussian.html index 80d77f84520..388b36aeae9 100644 --- a/2.9/generated/torch.signal.windows.gaussian.html +++ b/2.9/generated/torch.signal.windows.gaussian.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.signal.windows.gaussian#

                                                                                                        -torch.signal.windows.gaussian(M, *, std=1.0, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#
                                                                                                        +torch.signal.windows.gaussian(M, *, std=1.0, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#

                                                                                                        Computes a window with a gaussian waveform.

                                                                                                        The gaussian window is defined as follows:

                                                                                                        diff --git a/2.9/generated/torch.signal.windows.general_cosine.html b/2.9/generated/torch.signal.windows.general_cosine.html index 08007fd0c60..da6643594c1 100644 --- a/2.9/generated/torch.signal.windows.general_cosine.html +++ b/2.9/generated/torch.signal.windows.general_cosine.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.signal.windows.general_cosine#

                                                                                                        -torch.signal.windows.general_cosine(M, *, a, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#
                                                                                                        +torch.signal.windows.general_cosine(M, *, a, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#

                                                                                                        Computes the general cosine window.

                                                                                                        The general cosine window is defined as follows:

                                                                                                        diff --git a/2.9/generated/torch.signal.windows.general_hamming.html b/2.9/generated/torch.signal.windows.general_hamming.html index df0085c8dcc..00976db665e 100644 --- a/2.9/generated/torch.signal.windows.general_hamming.html +++ b/2.9/generated/torch.signal.windows.general_hamming.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.signal.windows.general_hamming#

                                                                                                        -torch.signal.windows.general_hamming(M, *, alpha=0.54, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#
                                                                                                        +torch.signal.windows.general_hamming(M, *, alpha=0.54, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#

                                                                                                        Computes the general Hamming window.

                                                                                                        The general Hamming window is defined as follows:

                                                                                                        diff --git a/2.9/generated/torch.signal.windows.hamming.html b/2.9/generated/torch.signal.windows.hamming.html index 0b2affed45c..6715b8b769e 100644 --- a/2.9/generated/torch.signal.windows.hamming.html +++ b/2.9/generated/torch.signal.windows.hamming.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.signal.windows.hamming#

                                                                                                        -torch.signal.windows.hamming(M, *, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#
                                                                                                        +torch.signal.windows.hamming(M, *, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#

                                                                                                        Computes the Hamming window.

                                                                                                        The Hamming window is defined as follows:

                                                                                                        diff --git a/2.9/generated/torch.signal.windows.hann.html b/2.9/generated/torch.signal.windows.hann.html index 7186a108d46..5dd8a9b1f62 100644 --- a/2.9/generated/torch.signal.windows.hann.html +++ b/2.9/generated/torch.signal.windows.hann.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.signal.windows.hann#

                                                                                                        -torch.signal.windows.hann(M, *, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#
                                                                                                        +torch.signal.windows.hann(M, *, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#

                                                                                                        Computes the Hann window.

                                                                                                        The Hann window is defined as follows:

                                                                                                        diff --git a/2.9/generated/torch.signal.windows.kaiser.html b/2.9/generated/torch.signal.windows.kaiser.html index 93380e6a330..acad40a5c9d 100644 --- a/2.9/generated/torch.signal.windows.kaiser.html +++ b/2.9/generated/torch.signal.windows.kaiser.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.signal.windows.kaiser#

                                                                                                        -torch.signal.windows.kaiser(M, *, beta=12.0, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#
                                                                                                        +torch.signal.windows.kaiser(M, *, beta=12.0, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#

                                                                                                        Computes the Kaiser window.

                                                                                                        The Kaiser window is defined as follows:

                                                                                                        diff --git a/2.9/generated/torch.signal.windows.nuttall.html b/2.9/generated/torch.signal.windows.nuttall.html index 7468b4e261a..53733fa4472 100644 --- a/2.9/generated/torch.signal.windows.nuttall.html +++ b/2.9/generated/torch.signal.windows.nuttall.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.signal.windows.nuttall#

                                                                                                        -torch.signal.windows.nuttall(M, *, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#
                                                                                                        +torch.signal.windows.nuttall(M, *, sym=True, dtype=None, layout=torch.strided, device=None, requires_grad=False)[source]#

                                                                                                        Computes the minimum 4-term Blackman-Harris window according to Nuttall.

                                                                                                        wn=10.36358cos(zn)+0.48917cos(2zn)0.13659cos(3zn)+0.01064cos(4zn)w_n = 1 - 0.36358 \cos{(z_n)} + 0.48917 \cos{(2z_n)} - 0.13659 \cos{(3z_n)} + 0.01064 \cos{(4z_n)} diff --git a/2.9/generated/torch.sparse.as_sparse_gradcheck.html b/2.9/generated/torch.sparse.as_sparse_gradcheck.html index 1ca1d3a17b0..e043b4abc17 100644 --- a/2.9/generated/torch.sparse.as_sparse_gradcheck.html +++ b/2.9/generated/torch.sparse.as_sparse_gradcheck.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.sparse.as_sparse_gradcheck#

                                                                                                        -torch.sparse.as_sparse_gradcheck(gradcheck)[source]#
                                                                                                        +torch.sparse.as_sparse_gradcheck(gradcheck)[source]#

                                                                                                        Decorate function, to extend gradcheck for sparse tensors.

                                                                                                        Decorator for torch.autograd.gradcheck or its functools.partial variants that extends the gradcheck function with support to input diff --git a/2.9/generated/torch.sparse.check_sparse_tensor_invariants.html b/2.9/generated/torch.sparse.check_sparse_tensor_invariants.html index 931cab66a37..a1689faddfb 100644 --- a/2.9/generated/torch.sparse.check_sparse_tensor_invariants.html +++ b/2.9/generated/torch.sparse.check_sparse_tensor_invariants.html @@ -4404,7 +4404,7 @@

                                                                                                        check_sparse_tensor_invariants#

                                                                                                        -class torch.sparse.check_sparse_tensor_invariants(enable=True)[source]#
                                                                                                        +class torch.sparse.check_sparse_tensor_invariants(enable=True)[source]#

                                                                                                        A tool to control checking sparse tensor invariants.

                                                                                                        The following options exists to manage sparsr tensor invariants checking in sparse tensor construction:

                                                                                                        @@ -4447,14 +4447,14 @@

                                                                                                        check_sparse_tensor_invariants
                                                                                                        -static disable()[source]#
                                                                                                        +static disable()[source]#

                                                                                                        Disable sparse tensor invariants checking in sparse tensor constructors.

                                                                                                        See torch.sparse.check_sparse_tensor_invariants.enable() for more information.

                                                                                                        -static enable()[source]#
                                                                                                        +static enable()[source]#

                                                                                                        Enable sparse tensor invariants checking in sparse tensor constructors.

                                                                                                        Note

                                                                                                        @@ -4473,7 +4473,7 @@

                                                                                                        check_sparse_tensor_invariants
                                                                                                        -static is_enabled()[source]#
                                                                                                        +static is_enabled()[source]#

                                                                                                        Return True if the sparse tensor invariants checking is enabled.

                                                                                                        Note

                                                                                                        diff --git a/2.9/generated/torch.sparse.sum.html b/2.9/generated/torch.sparse.sum.html index 4f7d1297330..78740f4c973 100644 --- a/2.9/generated/torch.sparse.sum.html +++ b/2.9/generated/torch.sparse.sum.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.sparse.sum#

                                                                                                        -torch.sparse.sum(input, dim=None, dtype=None)[source]#
                                                                                                        +torch.sparse.sum(input, dim=None, dtype=None)[source]#

                                                                                                        Return the sum of each row of the given sparse tensor.

                                                                                                        Returns the sum of each row of the sparse tensor input in the given dimensions dim. If dim is a list of dimensions, diff --git a/2.9/generated/torch.split.html b/2.9/generated/torch.split.html index 819bdec6232..8b10763e59b 100644 --- a/2.9/generated/torch.split.html +++ b/2.9/generated/torch.split.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.split#

                                                                                                        -torch.split(tensor, split_size_or_sections, dim=0)[source]#
                                                                                                        +torch.split(tensor, split_size_or_sections, dim=0)[source]#

                                                                                                        Splits the tensor into chunks. Each chunk is a view of the original tensor.

                                                                                                        If split_size_or_sections is an integer type, then tensor will be split into equally sized chunks (if possible). Last chunk will be smaller if diff --git a/2.9/generated/torch.stft.html b/2.9/generated/torch.stft.html index 2c8e74c2bf3..a0e5d3fc21a 100644 --- a/2.9/generated/torch.stft.html +++ b/2.9/generated/torch.stft.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.stft#

                                                                                                        -torch.stft(input, n_fft, hop_length=None, win_length=None, window=None, center=True, pad_mode='reflect', normalized=False, onesided=None, return_complex=None, align_to_window=None)[source]#
                                                                                                        +torch.stft(input, n_fft, hop_length=None, win_length=None, window=None, center=True, pad_mode='reflect', normalized=False, onesided=None, return_complex=None, align_to_window=None)[source]#

                                                                                                        Short-time Fourier transform (STFT).

                                                                                                        Warning

                                                                                                        diff --git a/2.9/generated/torch.svd_lowrank.html b/2.9/generated/torch.svd_lowrank.html index 1702efa47ff..1eb1f1722be 100644 --- a/2.9/generated/torch.svd_lowrank.html +++ b/2.9/generated/torch.svd_lowrank.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.svd_lowrank#

                                                                                                        -torch.svd_lowrank(A, q=6, niter=2, M=None)[source]#
                                                                                                        +torch.svd_lowrank(A, q=6, niter=2, M=None)[source]#

                                                                                                        Return the singular value decomposition (U, S, V) of a matrix, batches of matrices, or a sparse matrix AA such that AUdiag(S)VHA \approx U \operatorname{diag}(S) V^{\text{H}}. In case MM is given, then diff --git a/2.9/generated/torch.sym_float.html b/2.9/generated/torch.sym_float.html index e8fe4a4eb5e..8cb28c42915 100644 --- a/2.9/generated/torch.sym_float.html +++ b/2.9/generated/torch.sym_float.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.sym_float#

                                                                                                        -torch.sym_float(a)[source]#
                                                                                                        +torch.sym_float(a)[source]#

                                                                                                        SymInt-aware utility for float casting.

                                                                                                        Parameters
                                                                                                        diff --git a/2.9/generated/torch.sym_fresh_size.html b/2.9/generated/torch.sym_fresh_size.html index 36a46b7ce9d..74a225d3885 100644 --- a/2.9/generated/torch.sym_fresh_size.html +++ b/2.9/generated/torch.sym_fresh_size.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.sym_fresh_size#

                                                                                                        -torch.sym_fresh_size(expr)[source]#
                                                                                                        +torch.sym_fresh_size(expr)[source]#
                                                                                                        diff --git a/2.9/generated/torch.sym_int.html b/2.9/generated/torch.sym_int.html index 900065b199d..b9b046b4447 100644 --- a/2.9/generated/torch.sym_int.html +++ b/2.9/generated/torch.sym_int.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.sym_int#

                                                                                                        -torch.sym_int(a)[source]#
                                                                                                        +torch.sym_int(a)[source]#

                                                                                                        SymInt-aware utility for int casting.

                                                                                                        Parameters
                                                                                                        diff --git a/2.9/generated/torch.sym_ite.html b/2.9/generated/torch.sym_ite.html index 7ee6024f88f..95c0ef4ffd2 100644 --- a/2.9/generated/torch.sym_ite.html +++ b/2.9/generated/torch.sym_ite.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.sym_ite#

                                                                                                        -torch.sym_ite(b, t, f)[source]#
                                                                                                        +torch.sym_ite(b, t, f)[source]#

                                                                                                        SymInt-aware utility for ternary operator (t if b else f.)

                                                                                                        diff --git a/2.9/generated/torch.sym_max.html b/2.9/generated/torch.sym_max.html index 75628ca171c..d1120070dc8 100644 --- a/2.9/generated/torch.sym_max.html +++ b/2.9/generated/torch.sym_max.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.sym_max#

                                                                                                        -torch.sym_max(a, b)[source]#
                                                                                                        +torch.sym_max(a, b)[source]#

                                                                                                        SymInt-aware utility for max which avoids branching on a < b. Unlike builtins.max(), this only works for int/float, and it always promotes to float if any argument is float (unlike builtins.max, which diff --git a/2.9/generated/torch.sym_min.html b/2.9/generated/torch.sym_min.html index 82e7ad1e2fc..597b758028c 100644 --- a/2.9/generated/torch.sym_min.html +++ b/2.9/generated/torch.sym_min.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.sym_min#

                                                                                                        -torch.sym_min(a, b)[source]#
                                                                                                        +torch.sym_min(a, b)[source]#

                                                                                                        SymInt-aware utility for min().

                                                                                                        diff --git a/2.9/generated/torch.sym_not.html b/2.9/generated/torch.sym_not.html index 0bb545211c9..0504ac39c1c 100644 --- a/2.9/generated/torch.sym_not.html +++ b/2.9/generated/torch.sym_not.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.sym_not#

                                                                                                        -torch.sym_not(a)[source]#
                                                                                                        +torch.sym_not(a)[source]#

                                                                                                        SymInt-aware utility for logical negation.

                                                                                                        Parameters
                                                                                                        diff --git a/2.9/generated/torch.sym_sum.html b/2.9/generated/torch.sym_sum.html index 25e7dc8db15..48c2e179d48 100644 --- a/2.9/generated/torch.sym_sum.html +++ b/2.9/generated/torch.sym_sum.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.sym_sum#

                                                                                                        -torch.sym_sum(args)[source]#
                                                                                                        +torch.sym_sum(args)[source]#

                                                                                                        N-ary add which is faster to compute for long lists than iterated binary addition. Only does something special for integers.

                                                                                                        diff --git a/2.9/generated/torch.tensordot.html b/2.9/generated/torch.tensordot.html index 8d4d826b266..c887f0f0992 100644 --- a/2.9/generated/torch.tensordot.html +++ b/2.9/generated/torch.tensordot.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.tensordot#

                                                                                                        -torch.tensordot(a, b, dims=2, out=None)[source]#
                                                                                                        +torch.tensordot(a, b, dims=2, out=None)[source]#

                                                                                                        Returns a contraction of a and b over multiple dimensions.

                                                                                                        tensordot implements a generalized matrix product.

                                                                                                        diff --git a/2.9/generated/torch.unique.html b/2.9/generated/torch.unique.html index 8c53871f727..fc05d186219 100644 --- a/2.9/generated/torch.unique.html +++ b/2.9/generated/torch.unique.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.unique#

                                                                                                        -torch.unique(input, sorted=True, return_inverse=False, return_counts=False, dim=None) tuple[Tensor, Tensor, Tensor][source]#
                                                                                                        +torch.unique(input, sorted=True, return_inverse=False, return_counts=False, dim=None) tuple[Tensor, Tensor, Tensor][source]#

                                                                                                        Returns the unique elements of the input tensor.

                                                                                                        Note

                                                                                                        diff --git a/2.9/generated/torch.unique_consecutive.html b/2.9/generated/torch.unique_consecutive.html index 6f6bb035b10..0979b85158d 100644 --- a/2.9/generated/torch.unique_consecutive.html +++ b/2.9/generated/torch.unique_consecutive.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.unique_consecutive#

                                                                                                        -torch.unique_consecutive(*args, **kwargs)[source]#
                                                                                                        +torch.unique_consecutive(*args, **kwargs)[source]#

                                                                                                        Eliminates all but the first element from every consecutive group of equivalent elements.

                                                                                                        Note

                                                                                                        diff --git a/2.9/generated/torch.unravel_index.html b/2.9/generated/torch.unravel_index.html index e95c5974179..469000b60e9 100644 --- a/2.9/generated/torch.unravel_index.html +++ b/2.9/generated/torch.unravel_index.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.unravel_index#

                                                                                                        -torch.unravel_index(indices, shape)[source]#
                                                                                                        +torch.unravel_index(indices, shape)[source]#

                                                                                                        Converts a tensor of flat indices into a tuple of coordinate tensors that index into an arbitrary tensor of the specified shape.

                                                                                                        diff --git a/2.9/generated/torch.use_deterministic_algorithms.html b/2.9/generated/torch.use_deterministic_algorithms.html index e1b7c8f4629..e16f7832e35 100644 --- a/2.9/generated/torch.use_deterministic_algorithms.html +++ b/2.9/generated/torch.use_deterministic_algorithms.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.use_deterministic_algorithms#

                                                                                                        -torch.use_deterministic_algorithms(mode, *, warn_only=False)[source]#
                                                                                                        +torch.use_deterministic_algorithms(mode, *, warn_only=False)[source]#

                                                                                                        Sets whether PyTorch operations must use “deterministic” algorithms. That is, algorithms which, given the same input, and when run on the same software and hardware, always produce the same output. diff --git a/2.9/generated/torch.utils.generate_methods_for_privateuse1_backend.html b/2.9/generated/torch.utils.generate_methods_for_privateuse1_backend.html index 7de3798f02a..ff570145776 100644 --- a/2.9/generated/torch.utils.generate_methods_for_privateuse1_backend.html +++ b/2.9/generated/torch.utils.generate_methods_for_privateuse1_backend.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.utils.generate_methods_for_privateuse1_backend#

                                                                                                        -torch.utils.generate_methods_for_privateuse1_backend(for_tensor=True, for_module=True, for_packed_sequence=True, for_storage=False, unsupported_dtype=None)[source]#
                                                                                                        +torch.utils.generate_methods_for_privateuse1_backend(for_tensor=True, for_module=True, for_packed_sequence=True, for_storage=False, unsupported_dtype=None)[source]#

                                                                                                        Automatically generate attributes and methods for the custom backend after rename privateuse1 backend.

                                                                                                        In the default scenario, storage-related methods will not be generated automatically.

                                                                                                        When you implement kernels for various torch operations, and register them to the PrivateUse1 dispatch key. diff --git a/2.9/generated/torch.utils.get_cpp_backtrace.html b/2.9/generated/torch.utils.get_cpp_backtrace.html index 0227a946a13..c26da595adf 100644 --- a/2.9/generated/torch.utils.get_cpp_backtrace.html +++ b/2.9/generated/torch.utils.get_cpp_backtrace.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.utils.get_cpp_backtrace#

                                                                                                        -torch.utils.get_cpp_backtrace(frames_to_skip=0, maximum_number_of_frames=64)[source]#
                                                                                                        +torch.utils.get_cpp_backtrace(frames_to_skip=0, maximum_number_of_frames=64)[source]#

                                                                                                        Return a string containing the C++ stack trace of the current thread.

                                                                                                        Parameters
                                                                                                        diff --git a/2.9/generated/torch.utils.rename_privateuse1_backend.html b/2.9/generated/torch.utils.rename_privateuse1_backend.html index 766518e9cab..c58fa5d04bb 100644 --- a/2.9/generated/torch.utils.rename_privateuse1_backend.html +++ b/2.9/generated/torch.utils.rename_privateuse1_backend.html @@ -4404,7 +4404,7 @@

                                                                                                        torch.utils.rename_privateuse1_backend#

                                                                                                        -torch.utils.rename_privateuse1_backend(backend_name)[source]#
                                                                                                        +torch.utils.rename_privateuse1_backend(backend_name)[source]#

                                                                                                        Rename the privateuse1 backend device to make it more convenient to use as a device name within PyTorch APIs.

                                                                                                        The steps are:

                                                                                                          diff --git a/2.9/generated/torch.utils.set_module.html b/2.9/generated/torch.utils.set_module.html index 92f79e2a110..c7bee044c3b 100644 --- a/2.9/generated/torch.utils.set_module.html +++ b/2.9/generated/torch.utils.set_module.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.utils.set_module#

                                                                                                          -torch.utils.set_module(obj, mod)[source]#
                                                                                                          +torch.utils.set_module(obj, mod)[source]#

                                                                                                          Set the module attribute on a python object for a given object for nicer printing

                                                                                                          diff --git a/2.9/generated/torch.utils.swap_tensors.html b/2.9/generated/torch.utils.swap_tensors.html index 5d462e23f12..2dce16ecc1e 100644 --- a/2.9/generated/torch.utils.swap_tensors.html +++ b/2.9/generated/torch.utils.swap_tensors.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.utils.swap_tensors#

                                                                                                          -torch.utils.swap_tensors(t1, t2)[source]#
                                                                                                          +torch.utils.swap_tensors(t1, t2)[source]#

                                                                                                          This function swaps the content of the two Tensor objects. At a high level, this will make t1 have the content of t2 while preserving its identity.

                                                                                                          diff --git a/2.9/generated/torch.vmap.html b/2.9/generated/torch.vmap.html index 82ed04ba6a7..195ddb971f6 100644 --- a/2.9/generated/torch.vmap.html +++ b/2.9/generated/torch.vmap.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.vmap#

                                                                                                          -torch.vmap(func, in_dims=0, out_dims=0, randomness='error', *, chunk_size=None)[source]#
                                                                                                          +torch.vmap(func, in_dims=0, out_dims=0, randomness='error', *, chunk_size=None)[source]#

                                                                                                          vmap is the vectorizing map; vmap(func) returns a new function that maps func over some dimension of the inputs. Semantically, vmap pushes the map into PyTorch operations called by func, effectively diff --git a/2.9/generated/torch.xpu.Event.html b/2.9/generated/torch.xpu.Event.html index c68e3521cce..e6487b7af2b 100644 --- a/2.9/generated/torch.xpu.Event.html +++ b/2.9/generated/torch.xpu.Event.html @@ -4404,7 +4404,7 @@

                                                                                                          Event#

                                                                                                          -class torch.xpu.Event(enable_timing=False)[source]#
                                                                                                          +class torch.xpu.Event(enable_timing=False)[source]#

                                                                                                          Wrapper around a XPU event.

                                                                                                          XPU events are synchronization markers that can be used to monitor the device’s progress, and to synchronize XPU streams.

                                                                                                          @@ -4419,7 +4419,7 @@

                                                                                                          Event#

                                                                                                          -elapsed_time(end_event)[source]#
                                                                                                          +elapsed_time(end_event)[source]#

                                                                                                          Return the time elapsed.

                                                                                                          Time reported in milliseconds after the event was recorded and before the end_event was recorded.

                                                                                                          @@ -4427,7 +4427,7 @@

                                                                                                          Event#
                                                                                                          -query()[source]#
                                                                                                          +query()[source]#

                                                                                                          Check if all work currently captured by event has completed.

                                                                                                          Returns
                                                                                                          @@ -4442,7 +4442,7 @@

                                                                                                          Event#
                                                                                                          -record(stream=None)[source]#
                                                                                                          +record(stream=None)[source]#

                                                                                                          Record the event in a given stream.

                                                                                                          Uses torch.xpu.current_stream() if no stream is specified. The stream’s device must match the event’s device.

                                                                                                          @@ -4452,7 +4452,7 @@

                                                                                                          Event#
                                                                                                          -synchronize()[source]#
                                                                                                          +synchronize()[source]#

                                                                                                          Wait for the event to complete.

                                                                                                          Waits until the completion of all work currently captured in this event. This prevents the CPU thread from proceeding until the event completes.

                                                                                                          @@ -4462,7 +4462,7 @@

                                                                                                          Event#
                                                                                                          -wait(stream=None)[source]#
                                                                                                          +wait(stream=None)[source]#

                                                                                                          Make all future work submitted to the given stream wait for this event.

                                                                                                          Use torch.xpu.current_stream() if no stream is specified.

                                                                                                          diff --git a/2.9/generated/torch.xpu.Stream.html b/2.9/generated/torch.xpu.Stream.html index fd3551111d0..8db0ca06a8b 100644 --- a/2.9/generated/torch.xpu.Stream.html +++ b/2.9/generated/torch.xpu.Stream.html @@ -4404,7 +4404,7 @@

                                                                                                          Stream#

                                                                                                          -class torch.xpu.Stream(device=None, priority=0, **kwargs)[source]#
                                                                                                          +class torch.xpu.Stream(device=None, priority=0, **kwargs)[source]#

                                                                                                          Wrapper around a XPU stream.

                                                                                                          A XPU stream is a linear sequence of execution that belongs to a specific device, independent from other streams. It supports with statement as a @@ -4426,7 +4426,7 @@

                                                                                                          Stream
                                                                                                          -query()[source]#
                                                                                                          +query()[source]#

                                                                                                          Check if all the work submitted has been completed.

                                                                                                          Returns
                                                                                                          @@ -4440,7 +4440,7 @@

                                                                                                          Stream
                                                                                                          -record_event(event=None)[source]#
                                                                                                          +record_event(event=None)[source]#

                                                                                                          Record an event.

                                                                                                          Parameters
                                                                                                          @@ -4455,7 +4455,7 @@

                                                                                                          Stream
                                                                                                          -synchronize()[source]#
                                                                                                          +synchronize()[source]#

                                                                                                          Wait for all the kernels in this stream to complete.

                                                                                                          @@ -4463,7 +4463,7 @@

                                                                                                          Stream
                                                                                                          -wait_event(event)[source]#
                                                                                                          +wait_event(event)[source]#

                                                                                                          Make all future work submitted to the stream wait for an event.

                                                                                                          Parameters
                                                                                                          @@ -4474,7 +4474,7 @@

                                                                                                          Stream
                                                                                                          -wait_stream(stream)[source]#
                                                                                                          +wait_stream(stream)[source]#

                                                                                                          Synchronize with another stream.

                                                                                                          All future work submitted to this stream will wait until all kernels submitted to a given stream at the time of call complete.

                                                                                                          diff --git a/2.9/generated/torch.xpu.StreamContext.html b/2.9/generated/torch.xpu.StreamContext.html index ec453509868..928c1481033 100644 --- a/2.9/generated/torch.xpu.StreamContext.html +++ b/2.9/generated/torch.xpu.StreamContext.html @@ -4404,7 +4404,7 @@

                                                                                                          StreamContext#

                                                                                                          -class torch.xpu.StreamContext(stream)[source]#
                                                                                                          +class torch.xpu.StreamContext(stream)[source]#

                                                                                                          Context-manager that selects a given stream.

                                                                                                          All XPU kernels queued within its context will be enqueued on a selected stream.

                                                                                                          diff --git a/2.9/generated/torch.xpu.current_device.html b/2.9/generated/torch.xpu.current_device.html index 6f08990ac17..0c44ee88594 100644 --- a/2.9/generated/torch.xpu.current_device.html +++ b/2.9/generated/torch.xpu.current_device.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.current_device#

                                                                                                          -torch.xpu.current_device()[source]#
                                                                                                          +torch.xpu.current_device()[source]#

                                                                                                          Return the index of a currently selected device.

                                                                                                          Return type
                                                                                                          diff --git a/2.9/generated/torch.xpu.current_stream.html b/2.9/generated/torch.xpu.current_stream.html index 61ef847d779..9ebf6284f5e 100644 --- a/2.9/generated/torch.xpu.current_stream.html +++ b/2.9/generated/torch.xpu.current_stream.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.current_stream#

                                                                                                          -torch.xpu.current_stream(device=None)[source]#
                                                                                                          +torch.xpu.current_stream(device=None)[source]#

                                                                                                          Return the currently selected Stream for a given device.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.device.html b/2.9/generated/torch.xpu.device.html index 51dd20e229f..81fc7b64649 100644 --- a/2.9/generated/torch.xpu.device.html +++ b/2.9/generated/torch.xpu.device.html @@ -4404,7 +4404,7 @@

                                                                                                          device#

                                                                                                          -class torch.xpu.device(device)[source]#
                                                                                                          +class torch.xpu.device(device)[source]#

                                                                                                          Context-manager that changes the selected device.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.device_count.html b/2.9/generated/torch.xpu.device_count.html index 199b7574a73..0f6bf09c61e 100644 --- a/2.9/generated/torch.xpu.device_count.html +++ b/2.9/generated/torch.xpu.device_count.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.device_count#

                                                                                                          -torch.xpu.device_count()[source]#
                                                                                                          +torch.xpu.device_count()[source]#

                                                                                                          Return the number of XPU device available.

                                                                                                          Return type
                                                                                                          diff --git a/2.9/generated/torch.xpu.device_of.html b/2.9/generated/torch.xpu.device_of.html index 72bff2451f2..50d8e1b4c4e 100644 --- a/2.9/generated/torch.xpu.device_of.html +++ b/2.9/generated/torch.xpu.device_of.html @@ -4404,7 +4404,7 @@

                                                                                                          device_of#

                                                                                                          -class torch.xpu.device_of(obj)[source]#
                                                                                                          +class torch.xpu.device_of(obj)[source]#

                                                                                                          Context-manager that changes the current device to that of given object.

                                                                                                          You can use both tensors and storages as arguments. If a given object is not allocated on a XPU, this is a no-op.

                                                                                                          diff --git a/2.9/generated/torch.xpu.get_arch_list.html b/2.9/generated/torch.xpu.get_arch_list.html index 7dbcb6dbf3c..e16a37d7d7e 100644 --- a/2.9/generated/torch.xpu.get_arch_list.html +++ b/2.9/generated/torch.xpu.get_arch_list.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.get_arch_list#

                                                                                                          -torch.xpu.get_arch_list()[source]#
                                                                                                          +torch.xpu.get_arch_list()[source]#

                                                                                                          Return list XPU architectures this library was compiled for.

                                                                                                          Return type
                                                                                                          diff --git a/2.9/generated/torch.xpu.get_device_capability.html b/2.9/generated/torch.xpu.get_device_capability.html index 7955c458d79..8e282cdf6a6 100644 --- a/2.9/generated/torch.xpu.get_device_capability.html +++ b/2.9/generated/torch.xpu.get_device_capability.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.get_device_capability#

                                                                                                          -torch.xpu.get_device_capability(device=None)[source]#
                                                                                                          +torch.xpu.get_device_capability(device=None)[source]#

                                                                                                          Get the xpu capability of a device.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.get_device_name.html b/2.9/generated/torch.xpu.get_device_name.html index 5f12bb1e275..c8ea6fba7f7 100644 --- a/2.9/generated/torch.xpu.get_device_name.html +++ b/2.9/generated/torch.xpu.get_device_name.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.get_device_name#

                                                                                                          -torch.xpu.get_device_name(device=None)[source]#
                                                                                                          +torch.xpu.get_device_name(device=None)[source]#

                                                                                                          Get the name of a device.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.get_device_properties.html b/2.9/generated/torch.xpu.get_device_properties.html index c92887f4853..4e13bac7b42 100644 --- a/2.9/generated/torch.xpu.get_device_properties.html +++ b/2.9/generated/torch.xpu.get_device_properties.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.get_device_properties#

                                                                                                          -torch.xpu.get_device_properties(device=None)[source]#
                                                                                                          +torch.xpu.get_device_properties(device=None)[source]#

                                                                                                          Get the properties of a device.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.get_gencode_flags.html b/2.9/generated/torch.xpu.get_gencode_flags.html index 2e9ff9d28f5..b60fc1c4acf 100644 --- a/2.9/generated/torch.xpu.get_gencode_flags.html +++ b/2.9/generated/torch.xpu.get_gencode_flags.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.get_gencode_flags#

                                                                                                          -torch.xpu.get_gencode_flags()[source]#
                                                                                                          +torch.xpu.get_gencode_flags()[source]#

                                                                                                          Return XPU AOT(ahead-of-time) build flags this library was compiled with.

                                                                                                          Return type
                                                                                                          diff --git a/2.9/generated/torch.xpu.get_rng_state.html b/2.9/generated/torch.xpu.get_rng_state.html index e4e8918d88f..91d7a56e622 100644 --- a/2.9/generated/torch.xpu.get_rng_state.html +++ b/2.9/generated/torch.xpu.get_rng_state.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.get_rng_state#

                                                                                                          -torch.xpu.get_rng_state(device='xpu')[source]#
                                                                                                          +torch.xpu.get_rng_state(device='xpu')[source]#

                                                                                                          Return the random number generator state of the specified GPU as a ByteTensor.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.get_rng_state_all.html b/2.9/generated/torch.xpu.get_rng_state_all.html index ebb03bc9eeb..8b942a5feda 100644 --- a/2.9/generated/torch.xpu.get_rng_state_all.html +++ b/2.9/generated/torch.xpu.get_rng_state_all.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.get_rng_state_all#

                                                                                                          -torch.xpu.get_rng_state_all()[source]#
                                                                                                          +torch.xpu.get_rng_state_all()[source]#

                                                                                                          Return a list of ByteTensor representing the random number states of all devices.

                                                                                                          Return type
                                                                                                          diff --git a/2.9/generated/torch.xpu.get_stream_from_external.html b/2.9/generated/torch.xpu.get_stream_from_external.html index 1040439c32e..9ce6c4100a4 100644 --- a/2.9/generated/torch.xpu.get_stream_from_external.html +++ b/2.9/generated/torch.xpu.get_stream_from_external.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.get_stream_from_external#

                                                                                                          -torch.xpu.get_stream_from_external(data_ptr, device=None)[source]#
                                                                                                          +torch.xpu.get_stream_from_external(data_ptr, device=None)[source]#

                                                                                                          Return a Stream from an external SYCL queue.

                                                                                                          This function is used to wrap SYCL queue created in other libraries in order to facilitate data exchange and multi-library interactions.

                                                                                                          diff --git a/2.9/generated/torch.xpu.init.html b/2.9/generated/torch.xpu.init.html index 83b4fb9d95f..7305c975cd9 100644 --- a/2.9/generated/torch.xpu.init.html +++ b/2.9/generated/torch.xpu.init.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.init#

                                                                                                          -torch.xpu.init()[source]#
                                                                                                          +torch.xpu.init()[source]#

                                                                                                          Initialize PyTorch’s XPU state. This is a Python API about lazy initialization that avoids initializing XPU until the first time it is accessed. Does nothing if the XPU state is diff --git a/2.9/generated/torch.xpu.initial_seed.html b/2.9/generated/torch.xpu.initial_seed.html index 099dfaf269f..6bb91eeb552 100644 --- a/2.9/generated/torch.xpu.initial_seed.html +++ b/2.9/generated/torch.xpu.initial_seed.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.initial_seed#

                                                                                                          -torch.xpu.initial_seed()[source]#
                                                                                                          +torch.xpu.initial_seed()[source]#

                                                                                                          Return the current random seed of the current GPU.

                                                                                                          Warning

                                                                                                          diff --git a/2.9/generated/torch.xpu.is_available.html b/2.9/generated/torch.xpu.is_available.html index 68fb5968390..16d021639db 100644 --- a/2.9/generated/torch.xpu.is_available.html +++ b/2.9/generated/torch.xpu.is_available.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.is_available#

                                                                                                          -torch.xpu.is_available()[source]#
                                                                                                          +torch.xpu.is_available()[source]#

                                                                                                          Return a bool indicating if XPU is currently available.

                                                                                                          Return type
                                                                                                          diff --git a/2.9/generated/torch.xpu.is_initialized.html b/2.9/generated/torch.xpu.is_initialized.html index fa100a07af4..d3b7c2c2f7e 100644 --- a/2.9/generated/torch.xpu.is_initialized.html +++ b/2.9/generated/torch.xpu.is_initialized.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.is_initialized#

                                                                                                          -torch.xpu.is_initialized()[source]#
                                                                                                          +torch.xpu.is_initialized()[source]#

                                                                                                          Return whether PyTorch’s XPU state has been initialized.

                                                                                                          diff --git a/2.9/generated/torch.xpu.manual_seed.html b/2.9/generated/torch.xpu.manual_seed.html index 4043b182b5e..9f048925a8c 100644 --- a/2.9/generated/torch.xpu.manual_seed.html +++ b/2.9/generated/torch.xpu.manual_seed.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.manual_seed#

                                                                                                          -torch.xpu.manual_seed(seed)[source]#
                                                                                                          +torch.xpu.manual_seed(seed)[source]#

                                                                                                          Set the seed for generating random numbers for the current GPU.

                                                                                                          It’s safe to call this function if XPU is not available; in that case, it is silently ignored.

                                                                                                          diff --git a/2.9/generated/torch.xpu.manual_seed_all.html b/2.9/generated/torch.xpu.manual_seed_all.html index d22d3fce9aa..ca164755683 100644 --- a/2.9/generated/torch.xpu.manual_seed_all.html +++ b/2.9/generated/torch.xpu.manual_seed_all.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.manual_seed_all#

                                                                                                          -torch.xpu.manual_seed_all(seed)[source]#
                                                                                                          +torch.xpu.manual_seed_all(seed)[source]#

                                                                                                          Set the seed for generating random numbers on all GPUs.

                                                                                                          It’s safe to call this function if XPU is not available; in that case, it is silently ignored.

                                                                                                          diff --git a/2.9/generated/torch.xpu.memory.empty_cache.html b/2.9/generated/torch.xpu.memory.empty_cache.html index d03bd6877de..ebdded7f133 100644 --- a/2.9/generated/torch.xpu.memory.empty_cache.html +++ b/2.9/generated/torch.xpu.memory.empty_cache.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.memory.empty_cache#

                                                                                                          -torch.xpu.memory.empty_cache()[source]#
                                                                                                          +torch.xpu.memory.empty_cache()[source]#

                                                                                                          Release all unoccupied cached memory currently held by the caching allocator so that those can be used in other XPU application.

                                                                                                          diff --git a/2.9/generated/torch.xpu.memory.max_memory_allocated.html b/2.9/generated/torch.xpu.memory.max_memory_allocated.html index c56e44fa9b3..789574726a0 100644 --- a/2.9/generated/torch.xpu.memory.max_memory_allocated.html +++ b/2.9/generated/torch.xpu.memory.max_memory_allocated.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.memory.max_memory_allocated#

                                                                                                          -torch.xpu.memory.max_memory_allocated(device=None)[source]#
                                                                                                          +torch.xpu.memory.max_memory_allocated(device=None)[source]#

                                                                                                          Return the maximum GPU memory occupied by tensors in bytes for a given device.

                                                                                                          By default, this returns the peak allocated memory since the beginning of this program. reset_peak_memory_stats() can be used to diff --git a/2.9/generated/torch.xpu.memory.max_memory_reserved.html b/2.9/generated/torch.xpu.memory.max_memory_reserved.html index 3aba7dccb58..d6828e858ee 100644 --- a/2.9/generated/torch.xpu.memory.max_memory_reserved.html +++ b/2.9/generated/torch.xpu.memory.max_memory_reserved.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.memory.max_memory_reserved#

                                                                                                          -torch.xpu.memory.max_memory_reserved(device=None)[source]#
                                                                                                          +torch.xpu.memory.max_memory_reserved(device=None)[source]#

                                                                                                          Return the maximum GPU memory managed by the caching allocator in bytes for a given device.

                                                                                                          By default, this returns the peak cached memory since the beginning of this program. reset_peak_memory_stats() can be used to reset diff --git a/2.9/generated/torch.xpu.memory.mem_get_info.html b/2.9/generated/torch.xpu.memory.mem_get_info.html index 8341255b0a8..a6ea6ec92de 100644 --- a/2.9/generated/torch.xpu.memory.mem_get_info.html +++ b/2.9/generated/torch.xpu.memory.mem_get_info.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.memory.mem_get_info#

                                                                                                          -torch.xpu.memory.mem_get_info(device=None)[source]#
                                                                                                          +torch.xpu.memory.mem_get_info(device=None)[source]#

                                                                                                          Return the global free and total GPU memory for a given device.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.memory.memory_allocated.html b/2.9/generated/torch.xpu.memory.memory_allocated.html index 520881a1c4c..f7c92534148 100644 --- a/2.9/generated/torch.xpu.memory.memory_allocated.html +++ b/2.9/generated/torch.xpu.memory.memory_allocated.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.memory.memory_allocated#

                                                                                                          -torch.xpu.memory.memory_allocated(device=None)[source]#
                                                                                                          +torch.xpu.memory.memory_allocated(device=None)[source]#

                                                                                                          Return the current GPU memory occupied by tensors in bytes for a given device.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.memory.memory_reserved.html b/2.9/generated/torch.xpu.memory.memory_reserved.html index b5e16ad7376..59e718749f4 100644 --- a/2.9/generated/torch.xpu.memory.memory_reserved.html +++ b/2.9/generated/torch.xpu.memory.memory_reserved.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.memory.memory_reserved#

                                                                                                          -torch.xpu.memory.memory_reserved(device=None)[source]#
                                                                                                          +torch.xpu.memory.memory_reserved(device=None)[source]#

                                                                                                          Return the current GPU memory managed by the caching allocator in bytes for a given device.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.memory.memory_stats.html b/2.9/generated/torch.xpu.memory.memory_stats.html index 6577067ebb6..baa016795dd 100644 --- a/2.9/generated/torch.xpu.memory.memory_stats.html +++ b/2.9/generated/torch.xpu.memory.memory_stats.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.memory.memory_stats#

                                                                                                          -torch.xpu.memory.memory_stats(device=None)[source]#
                                                                                                          +torch.xpu.memory.memory_stats(device=None)[source]#

                                                                                                          Return a dictionary of XPU memory allocator statistics for a given device.

                                                                                                          The return value of this function is a dictionary of statistics, each of which is a non-negative integer.

                                                                                                          diff --git a/2.9/generated/torch.xpu.memory.memory_stats_as_nested_dict.html b/2.9/generated/torch.xpu.memory.memory_stats_as_nested_dict.html index ba5a6ad6bc4..70c5a16f960 100644 --- a/2.9/generated/torch.xpu.memory.memory_stats_as_nested_dict.html +++ b/2.9/generated/torch.xpu.memory.memory_stats_as_nested_dict.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.memory.memory_stats_as_nested_dict#

                                                                                                          -torch.xpu.memory.memory_stats_as_nested_dict(device=None)[source]#
                                                                                                          +torch.xpu.memory.memory_stats_as_nested_dict(device=None)[source]#

                                                                                                          Return the result of memory_stats() as a nested dictionary.

                                                                                                          Return type
                                                                                                          diff --git a/2.9/generated/torch.xpu.memory.reset_accumulated_memory_stats.html b/2.9/generated/torch.xpu.memory.reset_accumulated_memory_stats.html index 0737f08b7f5..6cd9093a07d 100644 --- a/2.9/generated/torch.xpu.memory.reset_accumulated_memory_stats.html +++ b/2.9/generated/torch.xpu.memory.reset_accumulated_memory_stats.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.memory.reset_accumulated_memory_stats#

                                                                                                          -torch.xpu.memory.reset_accumulated_memory_stats(device=None)[source]#
                                                                                                          +torch.xpu.memory.reset_accumulated_memory_stats(device=None)[source]#

                                                                                                          Reset the “accumulated” (historical) stats tracked by the XPU memory allocator.

                                                                                                          See memory_stats() for details. Accumulated stats correspond to the “allocated” and “freed” keys in each individual stat dict.

                                                                                                          diff --git a/2.9/generated/torch.xpu.memory.reset_peak_memory_stats.html b/2.9/generated/torch.xpu.memory.reset_peak_memory_stats.html index 4da8260331b..91db45db915 100644 --- a/2.9/generated/torch.xpu.memory.reset_peak_memory_stats.html +++ b/2.9/generated/torch.xpu.memory.reset_peak_memory_stats.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.memory.reset_peak_memory_stats#

                                                                                                          -torch.xpu.memory.reset_peak_memory_stats(device=None)[source]#
                                                                                                          +torch.xpu.memory.reset_peak_memory_stats(device=None)[source]#

                                                                                                          Reset the “peak” stats tracked by the XPU memory allocator.

                                                                                                          See memory_stats() for details. Peak stats correspond to the “peak” key in each individual stat dict.

                                                                                                          diff --git a/2.9/generated/torch.xpu.seed.html b/2.9/generated/torch.xpu.seed.html index fe9482071ff..aae1a87248a 100644 --- a/2.9/generated/torch.xpu.seed.html +++ b/2.9/generated/torch.xpu.seed.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.seed#

                                                                                                          -torch.xpu.seed()[source]#
                                                                                                          +torch.xpu.seed()[source]#

                                                                                                          Set the seed for generating random numbers to a random number for the current GPU.

                                                                                                          It’s safe to call this function if XPU is not available; in that case, it is silently ignored.

                                                                                                          diff --git a/2.9/generated/torch.xpu.seed_all.html b/2.9/generated/torch.xpu.seed_all.html index 7099756abc6..b780f4368d4 100644 --- a/2.9/generated/torch.xpu.seed_all.html +++ b/2.9/generated/torch.xpu.seed_all.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.seed_all#

                                                                                                          -torch.xpu.seed_all()[source]#
                                                                                                          +torch.xpu.seed_all()[source]#

                                                                                                          Set the seed for generating random numbers to a random number on all GPUs.

                                                                                                          It’s safe to call this function if XPU is not available; in that case, it is silently ignored.

                                                                                                          diff --git a/2.9/generated/torch.xpu.set_device.html b/2.9/generated/torch.xpu.set_device.html index 8d3e3726cd2..caa77834cf6 100644 --- a/2.9/generated/torch.xpu.set_device.html +++ b/2.9/generated/torch.xpu.set_device.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.set_device#

                                                                                                          -torch.xpu.set_device(device)[source]#
                                                                                                          +torch.xpu.set_device(device)[source]#

                                                                                                          Set the current device.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.set_rng_state.html b/2.9/generated/torch.xpu.set_rng_state.html index 15d16f01d17..38bd3b2b2dd 100644 --- a/2.9/generated/torch.xpu.set_rng_state.html +++ b/2.9/generated/torch.xpu.set_rng_state.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.set_rng_state#

                                                                                                          -torch.xpu.set_rng_state(new_state, device='xpu')[source]#
                                                                                                          +torch.xpu.set_rng_state(new_state, device='xpu')[source]#

                                                                                                          Set the random number generator state of the specified GPU.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.set_rng_state_all.html b/2.9/generated/torch.xpu.set_rng_state_all.html index f76abf775b4..84ba3c72a73 100644 --- a/2.9/generated/torch.xpu.set_rng_state_all.html +++ b/2.9/generated/torch.xpu.set_rng_state_all.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.set_rng_state_all#

                                                                                                          -torch.xpu.set_rng_state_all(new_states)[source]#
                                                                                                          +torch.xpu.set_rng_state_all(new_states)[source]#

                                                                                                          Set the random number generator state of all devices.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.set_stream.html b/2.9/generated/torch.xpu.set_stream.html index d02d3ee6e7d..2a191492211 100644 --- a/2.9/generated/torch.xpu.set_stream.html +++ b/2.9/generated/torch.xpu.set_stream.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.set_stream#

                                                                                                          -torch.xpu.set_stream(stream)[source]#
                                                                                                          +torch.xpu.set_stream(stream)[source]#
                                                                                                          Set the current stream.This is a wrapper API to set the stream.

                                                                                                          Usage of this function is discouraged in favor of the stream context manager.

                                                                                                          diff --git a/2.9/generated/torch.xpu.stream.html b/2.9/generated/torch.xpu.stream.html index 0e223d602cb..3c0f300f2ab 100644 --- a/2.9/generated/torch.xpu.stream.html +++ b/2.9/generated/torch.xpu.stream.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.stream#

                                                                                                          -torch.xpu.stream(stream)[source]#
                                                                                                          +torch.xpu.stream(stream)[source]#

                                                                                                          Wrap around the Context-manager StreamContext that selects a given stream.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/generated/torch.xpu.synchronize.html b/2.9/generated/torch.xpu.synchronize.html index 904521033b1..f645ef15ff7 100644 --- a/2.9/generated/torch.xpu.synchronize.html +++ b/2.9/generated/torch.xpu.synchronize.html @@ -4404,7 +4404,7 @@

                                                                                                          torch.xpu.synchronize#

                                                                                                          -torch.xpu.synchronize(device=None)[source]#
                                                                                                          +torch.xpu.synchronize(device=None)[source]#

                                                                                                          Wait for all kernels in all streams on a XPU device to complete.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/hub.html b/2.9/hub.html index 0e33f719470..0c39fd565fb 100644 --- a/2.9/hub.html +++ b/2.9/hub.html @@ -4469,7 +4469,7 @@

                                                                                                          Loading models from Hubtorch.hub.load().

                                                                                                          -torch.hub.list(github, force_reload=False, skip_validation=False, trust_repo=None, verbose=True)[source]#
                                                                                                          +torch.hub.list(github, force_reload=False, skip_validation=False, trust_repo=None, verbose=True)[source]#

                                                                                                          List all callable entrypoints available in the repo specified by github.

                                                                                                          Parameters
                                                                                                          @@ -4522,7 +4522,7 @@

                                                                                                          Loading models from Hub
                                                                                                          -torch.hub.help(github, model, force_reload=False, skip_validation=False, trust_repo=None)[source]#
                                                                                                          +torch.hub.help(github, model, force_reload=False, skip_validation=False, trust_repo=None)[source]#

                                                                                                          Show the docstring of entrypoint model.

                                                                                                          Parameters
                                                                                                          @@ -4567,7 +4567,7 @@

                                                                                                          Loading models from Hub
                                                                                                          -torch.hub.load(repo_or_dir, model, *args, source='github', trust_repo=None, force_reload=False, verbose=True, skip_validation=False, **kwargs)[source]#
                                                                                                          +torch.hub.load(repo_or_dir, model, *args, source='github', trust_repo=None, force_reload=False, verbose=True, skip_validation=False, **kwargs)[source]#

                                                                                                          Load a model from a github repo or a local directory.

                                                                                                          Note: Loading a model is the typical use case, but this can also be used to for loading other objects such as tokenizers, loss functions, etc.

                                                                                                          @@ -4641,7 +4641,7 @@

                                                                                                          Loading models from Hub
                                                                                                          -torch.hub.download_url_to_file(url, dst, hash_prefix=None, progress=True)[source]#
                                                                                                          +torch.hub.download_url_to_file(url, dst, hash_prefix=None, progress=True)[source]#

                                                                                                          Download object at the given URL to a local path.

                                                                                                          Parameters
                                                                                                          @@ -4666,7 +4666,7 @@

                                                                                                          Loading models from Hub
                                                                                                          -torch.hub.load_state_dict_from_url(url, model_dir=None, map_location=None, progress=True, check_hash=False, file_name=None, weights_only=False)[source]#
                                                                                                          +torch.hub.load_state_dict_from_url(url, model_dir=None, map_location=None, progress=True, check_hash=False, file_name=None, weights_only=False)[source]#

                                                                                                          Loads the Torch serialized object at the given URL.

                                                                                                          If downloaded file is a zip file, it will be automatically decompressed.

                                                                                                          @@ -4729,7 +4729,7 @@

                                                                                                          Where are my downloaded models saved?
                                                                                                          -torch.hub.get_dir()[source]#
                                                                                                          +torch.hub.get_dir()[source]#

                                                                                                          Get the Torch Hub cache directory used for storing downloaded models & weights.

                                                                                                          If set_dir() is not called, default path is $TORCH_HOME/hub where environment variable $TORCH_HOME defaults to $XDG_CACHE_HOME/torch. @@ -4745,7 +4745,7 @@

                                                                                                          Where are my downloaded models saved?
                                                                                                          -torch.hub.set_dir(d)[source]#
                                                                                                          +torch.hub.set_dir(d)[source]#

                                                                                                          Optionally set the Torch Hub directory used to save downloaded models & weights.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/library.html b/2.9/library.html index ebd25f55752..4dd20c7482a 100644 --- a/2.9/library.html +++ b/2.9/library.html @@ -4409,7 +4409,7 @@

                                                                                                          Testing custom ops
                                                                                                          -torch.library.opcheck(op, args, kwargs=None, *, test_utils=('test_schema', 'test_autograd_registration', 'test_faketensor', 'test_aot_dispatch_dynamic'), raise_exception=True, atol=None, rtol=None)[source]#
                                                                                                          +torch.library.opcheck(op, args, kwargs=None, *, test_utils=('test_schema', 'test_autograd_registration', 'test_faketensor', 'test_aot_dispatch_dynamic'), raise_exception=True, atol=None, rtol=None)[source]#

                                                                                                          Given an operator and some sample arguments, tests if the operator is registered correctly.

                                                                                                          That is, when you use the torch.library/TORCH_LIBRARY APIs to create a @@ -4527,7 +4527,7 @@

                                                                                                          Creating new custom ops in Pythontorch.library.custom_op() to create new custom ops.

                                                                                                          -torch.library.custom_op(name, fn=None, /, *, mutates_args, device_types=None, schema=None, tags=None)[source]#
                                                                                                          +torch.library.custom_op(name, fn=None, /, *, mutates_args, device_types=None, schema=None, tags=None)[source]#

                                                                                                          Wraps a function into custom operator.

                                                                                                          Reasons why you may want to create a custom op include: - Wrapping a third-party library or custom kernel to work with PyTorch @@ -4625,7 +4625,7 @@

                                                                                                          Creating new custom ops in Python
                                                                                                          -torch.library.triton_op(name, fn=None, /, *, mutates_args, schema=None)[source]#
                                                                                                          +torch.library.triton_op(name, fn=None, /, *, mutates_args, schema=None)[source]#

                                                                                                          Create a custom operator whose implementation is backed by 1+ triton kernels.

                                                                                                          This is a more structured way of using triton kernels with PyTorch. Prefer using triton kernels with no torch.library custom operator wrappers @@ -4719,7 +4719,7 @@

                                                                                                          Creating new custom ops in Python
                                                                                                          -torch.library.wrap_triton(triton_kernel, /)[source]#
                                                                                                          +torch.library.wrap_triton(triton_kernel, /)[source]#

                                                                                                          Allows capture of a triton kernel into a graph via make_fx or non-strict torch.export.

                                                                                                          These technologies perform Dispatcher-based tracing (via @@ -4792,7 +4792,7 @@

                                                                                                          Extending custom ops (created from Python or C++)
                                                                                                          -torch.library.register_kernel(op, device_types, func=None, /, *, lib=None)[source]#
                                                                                                          +torch.library.register_kernel(op, device_types, func=None, /, *, lib=None)[source]#

                                                                                                          Register an implementation for a device type for this operator.

                                                                                                          Some valid device_types are: “cpu”, “cuda”, “xla”, “mps”, “ipu”, “xpu”. This API may be used as a decorator.

                                                                                                          @@ -4841,7 +4841,7 @@

                                                                                                          Extending custom ops (created from Python or C++)
                                                                                                          -torch.library.register_autocast(op, device_type, cast_inputs, /, *, lib=None)[source]#
                                                                                                          +torch.library.register_autocast(op, device_type, cast_inputs, /, *, lib=None)[source]#

                                                                                                          Register an autocast dispatch rule for this custom op.

                                                                                                          Valid device_type include: “cpu” and “cuda”.

                                                                                                          @@ -4883,7 +4883,7 @@

                                                                                                          Extending custom ops (created from Python or C++)
                                                                                                          -torch.library.register_autograd(op, backward, /, *, setup_context=None, lib=None)[source]#
                                                                                                          +torch.library.register_autograd(op, backward, /, *, setup_context=None, lib=None)[source]#

                                                                                                          Register a backward formula for this custom op.

                                                                                                          In order for an operator to work with autograd, you need to register a backward formula: @@ -4967,7 +4967,7 @@

                                                                                                          Extending custom ops (created from Python or C++)
                                                                                                          -torch.library.register_fake(op, func=None, /, *, lib=None, _stacklevel=1, allow_override=False)[source]#
                                                                                                          +torch.library.register_fake(op, func=None, /, *, lib=None, _stacklevel=1, allow_override=False)[source]#

                                                                                                          Register a FakeTensor implementation (“fake impl”) for this operator.

                                                                                                          Also sometimes known as a “meta kernel”, “abstract impl”.

                                                                                                          An “FakeTensor implementation” specifies the behavior of this operator on @@ -5062,7 +5062,7 @@

                                                                                                          Extending custom ops (created from Python or C++)
                                                                                                          -torch.library.register_vmap(op, func=None, /, *, lib=None)[source]#
                                                                                                          +torch.library.register_vmap(op, func=None, /, *, lib=None)[source]#

                                                                                                          Register a vmap implementation to support torch.vmap() for this custom op.

                                                                                                          This API may be used as a decorator (see examples).

                                                                                                          In order for an operator to work with torch.vmap(), you may need to register a @@ -5140,14 +5140,14 @@

                                                                                                          Extending custom ops (created from Python or C++)
                                                                                                          -torch.library.impl_abstract(qualname, func=None, *, lib=None, _stacklevel=1)[source]#
                                                                                                          +torch.library.impl_abstract(qualname, func=None, *, lib=None, _stacklevel=1)[source]#

                                                                                                          This API was renamed to torch.library.register_fake() in PyTorch 2.4. Please use that instead.

                                                                                                          -torch.library.get_ctx()[source]#
                                                                                                          +torch.library.get_ctx()[source]#

                                                                                                          get_ctx() returns the current AbstractImplCtx object.

                                                                                                          Calling get_ctx() is only valid inside of an fake impl (see torch.library.register_fake() for more usage details.

                                                                                                          @@ -5160,7 +5160,7 @@

                                                                                                          Extending custom ops (created from Python or C++)
                                                                                                          -torch.library.register_torch_dispatch(op, torch_dispatch_class, func=None, /, *, lib=None)[source]#
                                                                                                          +torch.library.register_torch_dispatch(op, torch_dispatch_class, func=None, /, *, lib=None)[source]#

                                                                                                          Registers a torch_dispatch rule for the given operator and torch_dispatch_class.

                                                                                                          This allows for open registration to specify the behavior between the operator and the torch_dispatch_class without needing to modify the torch_dispatch_class @@ -5204,7 +5204,7 @@

                                                                                                          Extending custom ops (created from Python or C++)
                                                                                                          -torch.library.infer_schema(prototype_function, /, *, mutates_args, op_name=None)[source]#
                                                                                                          +torch.library.infer_schema(prototype_function, /, *, mutates_args, op_name=None)[source]#

                                                                                                          Parses the schema of a given function with type hints. The schema is inferred from the function’s type hints, and can be used to define a new operator.

                                                                                                          We make the following assumptions:

                                                                                                          @@ -5255,7 +5255,7 @@

                                                                                                          Extending custom ops (created from Python or C++)
                                                                                                          -class torch._library.custom_ops.CustomOpDef(namespace, name, schema, fn, tags=None)[source]#
                                                                                                          +class torch._library.custom_ops.CustomOpDef(namespace, name, schema, fn, tags=None)[source]#

                                                                                                          CustomOpDef is a wrapper around a function that turns it into a custom op.

                                                                                                          It has various methods for registering additional behavior for this custom op.

                                                                                                          @@ -5265,7 +5265,7 @@

                                                                                                          Extending custom ops (created from Python or C++)
                                                                                                          -set_kernel_enabled(device_type, enabled=True)[source]#
                                                                                                          +set_kernel_enabled(device_type, enabled=True)[source]#

                                                                                                          Disable or re-enable an already registered kernel for this custom operator.

                                                                                                          If the kernel is already disabled/enabled, this is a no-op.

                                                                                                          @@ -5307,7 +5307,7 @@

                                                                                                          Extending custom ops (created from Python or C++)
                                                                                                          -torch.library.get_kernel(op, dispatch_key)[source]#
                                                                                                          +torch.library.get_kernel(op, dispatch_key)[source]#

                                                                                                          Returns the computed kernel for a given operator and dispatch key.

                                                                                                          This function retrieves the kernel that would be executed for a given operator and dispatch key combination. The returned SafeKernelFunction @@ -5387,7 +5387,7 @@

                                                                                                          Low-level APIsGoogle Colab.

                                                                                                          -class torch.library.Library(ns, kind, dispatch_key='')[source]#
                                                                                                          +class torch.library.Library(ns, kind, dispatch_key='')[source]#

                                                                                                          A class to create libraries that can be used to register new operators or override operators in existing libraries from Python. A user can optionally pass in a dispatch keyname if they only want to register @@ -5408,7 +5408,7 @@

                                                                                                          Low-level APIs
                                                                                                          -define(schema, alias_analysis='', *, tags=())[source]#
                                                                                                          +define(schema, alias_analysis='', *, tags=())[source]#

                                                                                                          Defines a new operator and its semantics in the ns namespace.

                                                                                                          Parameters
                                                                                                          @@ -5435,7 +5435,7 @@

                                                                                                          Low-level APIs
                                                                                                          -fallback(fn, dispatch_key='', *, with_keyset=False)[source]#
                                                                                                          +fallback(fn, dispatch_key='', *, with_keyset=False)[source]#

                                                                                                          Registers the function implementation as the fallback for the given key.

                                                                                                          This function only works for a library with global namespace (“_”).

                                                                                                          @@ -5462,7 +5462,7 @@

                                                                                                          Low-level APIs
                                                                                                          -impl(op_name, fn, dispatch_key='', *, with_keyset=False, allow_override=False)[source]#
                                                                                                          +impl(op_name, fn, dispatch_key='', *, with_keyset=False, allow_override=False)[source]#

                                                                                                          Registers the function implementation for an operator defined in the library.

                                                                                                          Parameters
                                                                                                          @@ -5495,13 +5495,13 @@

                                                                                                          Low-level APIs
                                                                                                          -torch.library.fallthrough_kernel()[source]#
                                                                                                          +torch.library.fallthrough_kernel()[source]#

                                                                                                          A dummy function to pass to Library.impl in order to register a fallthrough.

                                                                                                          -torch.library.define(qualname, schema, *, lib=None, tags=())[source]#
                                                                                                          +torch.library.define(qualname, schema, *, lib=None, tags=())[source]#
                                                                                                          torch.library.define(lib, schema, alias_analysis='')

                                                                                                          Defines a new operator.

                                                                                                          @@ -5558,7 +5558,7 @@

                                                                                                          Low-level APIs
                                                                                                          -torch.library.impl(lib, name, dispatch_key='')[source]#
                                                                                                          +torch.library.impl(lib, name, dispatch_key='')[source]#
                                                                                                          torch.library.impl(qualname: str, types: Union[str, Sequence[str]], func: Literal[None] = None, *, lib: Optional[Library] = None) Callable[[Callable[..., object]], None]
                                                                                                          diff --git a/2.9/mobile_optimizer.html b/2.9/mobile_optimizer.html index 9894b26c73a..ed2e6ef67d7 100644 --- a/2.9/mobile_optimizer.html +++ b/2.9/mobile_optimizer.html @@ -4405,7 +4405,7 @@

                                                                                                          torch.utils.mobile_optimizer
                                                                                                          -torch.utils.mobile_optimizer.optimize_for_mobile(script_module, optimization_blocklist=None, preserved_methods=None, backend='CPU')[source]#
                                                                                                          +torch.utils.mobile_optimizer.optimize_for_mobile(script_module, optimization_blocklist=None, preserved_methods=None, backend='CPU')[source]#

                                                                                                          Optimize a torch script module for mobile deployment.

                                                                                                          Parameters
                                                                                                          diff --git a/2.9/model_zoo.html b/2.9/model_zoo.html index 7c7935e2107..5e522f96f25 100644 --- a/2.9/model_zoo.html +++ b/2.9/model_zoo.html @@ -4397,7 +4397,7 @@

                                                                                                          torch.utils.model_zooMoved to torch.hub.

                                                                                                          -torch.utils.model_zoo.load_url(url, model_dir=None, map_location=None, progress=True, check_hash=False, file_name=None, weights_only=False)[source]#
                                                                                                          +torch.utils.model_zoo.load_url(url, model_dir=None, map_location=None, progress=True, check_hash=False, file_name=None, weights_only=False)[source]#

                                                                                                          Loads the Torch serialized object at the given URL.

                                                                                                          If downloaded file is a zip file, it will be automatically decompressed.

                                                                                                          diff --git a/2.9/module_tracker.html b/2.9/module_tracker.html index 972e82ad439..33672365493 100644 --- a/2.9/module_tracker.html +++ b/2.9/module_tracker.html @@ -4398,7 +4398,7 @@ It can be used within other tracking tools to be able to easily associate measured quantities to user-friendly names. This is used in particular in the FlopCounterMode today.

                                                                                                          -class torch.utils.module_tracker.ModuleTracker[source]#
                                                                                                          +class torch.utils.module_tracker.ModuleTracker[source]#

                                                                                                          ModuleTracker is a context manager that tracks the nn.Module hierarchy during execution so that other system can query which Module is currently being executed (or its backward is being executed).

                                                                                                          diff --git a/2.9/monitor.html b/2.9/monitor.html index 66b19732dcf..350a04a914e 100644 --- a/2.9/monitor.html +++ b/2.9/monitor.html @@ -4571,7 +4571,7 @@

                                                                                                          torch.monitor
                                                                                                          -class torch.monitor.TensorboardEventHandler(writer)[source]#
                                                                                                          +class torch.monitor.TensorboardEventHandler(writer)[source]#

                                                                                                          TensorboardEventHandler is an event handler that will write known events to the provided SummaryWriter.

                                                                                                          This currently only supports torch.monitor.Stat events which are logged @@ -4587,7 +4587,7 @@

                                                                                                          torch.monitor
                                                                                                          -__init__(writer)[source]#
                                                                                                          +__init__(writer)[source]#

                                                                                                          Constructs the TensorboardEventHandler.

                                                                                                          diff --git a/2.9/multiprocessing.html b/2.9/multiprocessing.html index 6e99b7d8574..097d66ec088 100644 --- a/2.9/multiprocessing.html +++ b/2.9/multiprocessing.html @@ -1643,19 +1643,19 @@

                                                                                                          Strategy management#

                                                                                                          -torch.multiprocessing.get_all_sharing_strategies()[source]#
                                                                                                          +torch.multiprocessing.get_all_sharing_strategies()[source]#

                                                                                                          Return a set of sharing strategies supported on a current system.

                                                                                                          -torch.multiprocessing.get_sharing_strategy()[source]#
                                                                                                          +torch.multiprocessing.get_sharing_strategy()[source]#

                                                                                                          Return the current strategy for sharing CPU tensors.

                                                                                                          -torch.multiprocessing.set_sharing_strategy(new_strategy)[source]#
                                                                                                          +torch.multiprocessing.set_sharing_strategy(new_strategy)[source]#

                                                                                                          Set the strategy for sharing CPU tensors.

                                                                                                          Parameters
                                                                                                          @@ -1794,7 +1794,7 @@

                                                                                                          Spawning subprocesses
                                                                                                          -torch.multiprocessing.spawn.spawn(fn, args=(), nprocs=1, join=True, daemon=False, start_method='spawn')[source]#
                                                                                                          +torch.multiprocessing.spawn.spawn(fn, args=(), nprocs=1, join=True, daemon=False, start_method='spawn')[source]#

                                                                                                          Spawns nprocs processes that run fn with args.

                                                                                                          If one of the processes exits with a non-zero exit status, the remaining processes are killed and an exception is raised with the @@ -1831,11 +1831,11 @@

                                                                                                          Spawning subprocesses
                                                                                                          -class torch.multiprocessing.SpawnContext[source]#
                                                                                                          +class torch.multiprocessing.SpawnContext[source]#

                                                                                                          Returned by spawn() when called with join=False.

                                                                                                          -join(timeout=None, grace_period=None)[source]#
                                                                                                          +join(timeout=None, grace_period=None)[source]#

                                                                                                          Join one or more processes within spawn context.

                                                                                                          Attempt to join one or more processes in this spawn context. If one of them exited with a non-zero exit status, this function diff --git a/2.9/named_tensor.html b/2.9/named_tensor.html index b41b6017ccb..56fc908b8f5 100644 --- a/2.9/named_tensor.html +++ b/2.9/named_tensor.html @@ -4663,7 +4663,7 @@

                                                                                                          Named tensor API reference
                                                                                                          -rename(*names, **rename_map)[source]#
                                                                                                          +rename(*names, **rename_map)[source]#

                                                                                                          Renames dimension names of self.

                                                                                                          There are two main usages:

                                                                                                          self.rename(**rename_map) returns a view on tensor that has dims @@ -4696,13 +4696,13 @@

                                                                                                          Named tensor API reference
                                                                                                          -rename_(*names, **rename_map)[source]#
                                                                                                          +rename_(*names, **rename_map)[source]#

                                                                                                          In-place version of rename().

                                                                                                          -refine_names(*names)[source]#
                                                                                                          +refine_names(*names)[source]#

                                                                                                          Refines the dimension names of self according to names.

                                                                                                          Refining is a special case of renaming that “lifts” unnamed dimensions. A None dim can be refined to have any name; a named dim can only be @@ -4784,7 +4784,7 @@

                                                                                                          Named tensor API reference
                                                                                                          -align_to(*names)[source]#
                                                                                                          +align_to(*names)[source]#

                                                                                                          Permutes the dimensions of the self tensor to match the order specified in names, adding size-one dims for any new names.

                                                                                                          All of the dims of self must be named in order to use this method. diff --git a/2.9/nested.html b/2.9/nested.html index 59082aef576..f8669effc21 100644 --- a/2.9/nested.html +++ b/2.9/nested.html @@ -4833,7 +4833,7 @@

                                                                                                          Data dependent operation within torch.compile

                                                                                                          Detailed Docs for Construction and Conversion Functions#

                                                                                                          -torch.nested.nested_tensor(tensor_list, *, dtype=None, layout=None, device=None, requires_grad=False, pin_memory=False)[source]#
                                                                                                          +torch.nested.nested_tensor(tensor_list, *, dtype=None, layout=None, device=None, requires_grad=False, pin_memory=False)[source]#

                                                                                                          Constructs a nested tensor with no autograd history (also known as a “leaf tensor”, see Autograd mechanics) from tensor_list a list of tensors.

                                                                                                          @@ -4873,7 +4873,7 @@

                                                                                                          Data dependent operation within torch.compile
                                                                                                          -torch.nested.nested_tensor_from_jagged(values, offsets=None, lengths=None, jagged_dim=None, min_seqlen=None, max_seqlen=None)[source]#
                                                                                                          +torch.nested.nested_tensor_from_jagged(values, offsets=None, lengths=None, jagged_dim=None, min_seqlen=None, max_seqlen=None)[source]#

                                                                                                          Constructs a jagged layout nested tensor from the given jagged components. The jagged layout consists of a required values buffer with the jagged dimension packed into a single dimension. The offsets / lengths metadata determines how this dimension is split into batch elements @@ -4949,7 +4949,7 @@

                                                                                                          Data dependent operation within torch.compile
                                                                                                          -torch.nested.as_nested_tensor(ts, dtype=None, device=None, layout=None)[source]#
                                                                                                          +torch.nested.as_nested_tensor(ts, dtype=None, device=None, layout=None)[source]#

                                                                                                          Constructs a nested tensor preserving autograd history from a tensor or a list / tuple of tensors.

                                                                                                          If a nested tensor is passed, it will be returned directly unless the device / dtype / layout @@ -5054,7 +5054,7 @@

                                                                                                          Data dependent operation within torch.compile
                                                                                                          -torch.nested.masked_select(tensor, mask)[source]#
                                                                                                          +torch.nested.masked_select(tensor, mask)[source]#

                                                                                                          Constructs a nested tensor given a strided tensor input and a strided mask, the resulting jagged layout nested tensor will have values retain values where the mask is equal to True. The dimensionality of the mask is preserved and is represented with the offsets, this is unlike masked_select() where the output is collapsed to a 1D tensor.

                                                                                                          @@ -5090,7 +5090,7 @@

                                                                                                          Data dependent operation within torch.compile
                                                                                                          -torch.nested.narrow(tensor, dim, start, length, layout=torch.strided)[source]#
                                                                                                          +torch.nested.narrow(tensor, dim, start, length, layout=torch.strided)[source]#

                                                                                                          Constructs a nested tensor (which might be a view) from tensor, a strided tensor. This follows similar semantics to torch.Tensor.narrow, where in the dim-th dimension the new nested tensor shows only the elements in the interval [start, start+length). As nested representations diff --git a/2.9/nn.attention.flex_attention.html b/2.9/nn.attention.flex_attention.html index 6936c2af1aa..5ae65e3dd27 100644 --- a/2.9/nn.attention.flex_attention.html +++ b/2.9/nn.attention.flex_attention.html @@ -4405,7 +4405,7 @@

                                                                                                          Created On: Jul 16, 2024 | Last Updated On: Sep 08, 2025

                                                                                                          -torch.nn.attention.flex_attention.flex_attention(query, key, value, score_mod=None, block_mask=None, scale=None, enable_gqa=False, return_lse=False, kernel_options=None, *, return_aux=None)[source]#
                                                                                                          +torch.nn.attention.flex_attention.flex_attention(query, key, value, score_mod=None, block_mask=None, scale=None, enable_gqa=False, return_lse=False, kernel_options=None, *, return_aux=None)[source]#

                                                                                                          This function implements scaled dot product attention with an arbitrary attention score modification function.

                                                                                                          This function computes the scaled dot product attention between query, key, and value tensors with a user-defined attention score modification function. The attention score modification function will be applied after the attention @@ -4493,7 +4493,7 @@

                                                                                                          -class torch.nn.attention.flex_attention.AuxOutput(lse=None, max_scores=None)[source]#
                                                                                                          +class torch.nn.attention.flex_attention.AuxOutput(lse=None, max_scores=None)[source]#

                                                                                                          Auxiliary outputs from flex_attention operation.

                                                                                                          Fields will be None if not requested, or contain the tensor if requested.

                                                                                                          @@ -4502,7 +4502,7 @@
                                                                                                          -class torch.nn.attention.flex_attention.AuxRequest(lse=False, max_scores=False)[source]#
                                                                                                          +class torch.nn.attention.flex_attention.AuxRequest(lse=False, max_scores=False)[source]#

                                                                                                          Request which auxiliary outputs to compute from flex_attention.

                                                                                                          Each field is a boolean indicating whether that auxiliary output should be computed.

                                                                                                          @@ -4513,7 +4513,7 @@

                                                                                                          BlockMask Utilities#

                                                                                                          -torch.nn.attention.flex_attention.create_block_mask(mask_mod, B, H, Q_LEN, KV_LEN, device='cuda', BLOCK_SIZE=128, _compile=False)[source]#
                                                                                                          +torch.nn.attention.flex_attention.create_block_mask(mask_mod, B, H, Q_LEN, KV_LEN, device='cuda', BLOCK_SIZE=128, _compile=False)[source]#

                                                                                                          This function creates a block mask tuple from a mask_mod function.

                                                                                                          Parameters
                                                                                                          @@ -4556,7 +4556,7 @@

                                                                                                          BlockMask Utilities
                                                                                                          -torch.nn.attention.flex_attention.create_mask(mod_fn, B, H, Q_LEN, KV_LEN, device='cuda')[source]#
                                                                                                          +torch.nn.attention.flex_attention.create_mask(mod_fn, B, H, Q_LEN, KV_LEN, device='cuda')[source]#

                                                                                                          This function creates a mask tensor from a mod_fn function.

                                                                                                          Parameters
                                                                                                          @@ -4580,7 +4580,7 @@

                                                                                                          BlockMask Utilities
                                                                                                          -torch.nn.attention.flex_attention.create_nested_block_mask(mask_mod, B, H, q_nt, kv_nt=None, BLOCK_SIZE=128, _compile=False)[source]#
                                                                                                          +torch.nn.attention.flex_attention.create_nested_block_mask(mask_mod, B, H, q_nt, kv_nt=None, BLOCK_SIZE=128, _compile=False)[source]#

                                                                                                          This function creates a nested tensor compatible block mask tuple from a mask_mod function. The returned BlockMask will be on the device specified by the input nested tensor.

                                                                                                          @@ -4652,7 +4652,7 @@

                                                                                                          BlockMask Utilities
                                                                                                          -torch.nn.attention.flex_attention.and_masks(*mask_mods)[source]#
                                                                                                          +torch.nn.attention.flex_attention.and_masks(*mask_mods)[source]#

                                                                                                          Returns a mask_mod that’s the intersection of provided mask_mods

                                                                                                          Return type
                                                                                                          @@ -4663,7 +4663,7 @@

                                                                                                          BlockMask Utilities
                                                                                                          -torch.nn.attention.flex_attention.or_masks(*mask_mods)[source]#
                                                                                                          +torch.nn.attention.flex_attention.or_masks(*mask_mods)[source]#

                                                                                                          Returns a mask_mod that’s the union of provided mask_mods

                                                                                                          Return type
                                                                                                          @@ -4674,7 +4674,7 @@

                                                                                                          BlockMask Utilities
                                                                                                          -torch.nn.attention.flex_attention.noop_mask(batch, head, token_q, token_kv)[source]#
                                                                                                          +torch.nn.attention.flex_attention.noop_mask(batch, head, token_q, token_kv)[source]#

                                                                                                          Returns a noop mask_mod

                                                                                                          Return type
                                                                                                          @@ -4688,7 +4688,7 @@

                                                                                                          BlockMask Utilities#

                                                                                                          -class torch.nn.attention.flex_attention.FlexKernelOptions[source]#
                                                                                                          +class torch.nn.attention.flex_attention.FlexKernelOptions[source]#

                                                                                                          Options for controlling the behavior of FlexAttention kernels.

                                                                                                          These options are passed to the underlying Triton kernels to control performance and numerical behavior. Most users will not need to specify these options as the @@ -4857,7 +4857,7 @@

                                                                                                          FlexKernelOptions#

                                                                                                          -class torch.nn.attention.flex_attention.BlockMask(seq_lengths, kv_num_blocks, kv_indices, full_kv_num_blocks, full_kv_indices, q_num_blocks, q_indices, full_q_num_blocks, full_q_indices, BLOCK_SIZE, mask_mod)[source]#
                                                                                                          +class torch.nn.attention.flex_attention.BlockMask(seq_lengths, kv_num_blocks, kv_indices, full_kv_num_blocks, full_kv_indices, q_num_blocks, q_indices, full_q_num_blocks, full_q_indices, BLOCK_SIZE, mask_mod)[source]#

                                                                                                          BlockMask is our format for representing a block-sparse attention mask. It is somewhat of a cross in-between BCSR and a non-sparse format.

                                                                                                          Basics

                                                                                                          @@ -4909,7 +4909,7 @@

                                                                                                          BlockMask
                                                                                                          -as_tuple(flatten=True)[source]#
                                                                                                          +as_tuple(flatten=True)[source]#

                                                                                                          Returns a tuple of the attributes of the BlockMask.

                                                                                                          Parameters
                                                                                                          @@ -4920,7 +4920,7 @@

                                                                                                          BlockMask
                                                                                                          -classmethod from_kv_blocks(kv_num_blocks, kv_indices, full_kv_num_blocks=None, full_kv_indices=None, BLOCK_SIZE=128, mask_mod=None, seq_lengths=None, compute_q_blocks=True)[source]#
                                                                                                          +classmethod from_kv_blocks(kv_num_blocks, kv_indices, full_kv_num_blocks=None, full_kv_indices=None, BLOCK_SIZE=128, mask_mod=None, seq_lengths=None, compute_q_blocks=True)[source]#

                                                                                                          Creates a BlockMask instance from key-value block information.

                                                                                                          Parameters
                                                                                                          @@ -4985,7 +4985,7 @@

                                                                                                          BlockMask
                                                                                                          -numel()[source]#
                                                                                                          +numel()[source]#

                                                                                                          Returns the number of elements (not accounting for sparsity) in the mask.

                                                                                                          @@ -5011,7 +5011,7 @@

                                                                                                          BlockMask
                                                                                                          -sparsity()[source]#
                                                                                                          +sparsity()[source]#

                                                                                                          Computes the percentage of blocks that are sparse (i.e. not computed)

                                                                                                          Return type
                                                                                                          @@ -5022,7 +5022,7 @@

                                                                                                          BlockMask
                                                                                                          -to(device)[source]#
                                                                                                          +to(device)[source]#

                                                                                                          Moves the BlockMask to the specified device.

                                                                                                          Parameters
                                                                                                          @@ -5048,7 +5048,7 @@

                                                                                                          BlockMask
                                                                                                          -to_dense()[source]#
                                                                                                          +to_dense()[source]#

                                                                                                          Returns a dense block that is equivalent to the block mask.

                                                                                                          Return type
                                                                                                          @@ -5059,7 +5059,7 @@

                                                                                                          BlockMask
                                                                                                          -to_string(grid_size=(20, 20), limit=4)[source]#
                                                                                                          +to_string(grid_size=(20, 20), limit=4)[source]#

                                                                                                          Returns a string representation of the block mask. Quite nifty.

                                                                                                          If grid_size is -1, prints out an uncompressed version. Warning, it can be quite big!

                                                                                                          diff --git a/2.9/nn.init.html b/2.9/nn.init.html index f59562b73cf..83212a6eda7 100644 --- a/2.9/nn.init.html +++ b/2.9/nn.init.html @@ -4402,7 +4402,7 @@

                                                                                                          -torch.nn.init.calculate_gain(nonlinearity, param=None)[source]#
                                                                                                          +torch.nn.init.calculate_gain(nonlinearity, param=None)[source]#

                                                                                                          Return the recommended gain value for the given nonlinearity function.

                                                                                                          The values are as follows:

                    @@ -4490,7 +4490,7 @@
                    -torch.nn.init.uniform_(tensor, a=0.0, b=1.0, generator=None)[source]#
                    +torch.nn.init.uniform_(tensor, a=0.0, b=1.0, generator=None)[source]#

                    Fill the input Tensor with values drawn from the uniform distribution.

                    U(a,b)\mathcal{U}(a, b).

                    @@ -4515,7 +4515,7 @@
                    -torch.nn.init.normal_(tensor, mean=0.0, std=1.0, generator=None)[source]#
                    +torch.nn.init.normal_(tensor, mean=0.0, std=1.0, generator=None)[source]#

                    Fill the input Tensor with values drawn from the normal distribution.

                    N(mean,std2)\mathcal{N}(\text{mean}, \text{std}^2).

                    @@ -4540,7 +4540,7 @@
                    -torch.nn.init.constant_(tensor, val)[source]#
                    +torch.nn.init.constant_(tensor, val)[source]#

                    Fill the input Tensor with the value val\text{val}.

                    Parameters
                    @@ -4562,7 +4562,7 @@
                    -torch.nn.init.ones_(tensor)[source]#
                    +torch.nn.init.ones_(tensor)[source]#

                    Fill the input Tensor with the scalar value 1.

                    Parameters
                    @@ -4581,7 +4581,7 @@
                    -torch.nn.init.zeros_(tensor)[source]#
                    +torch.nn.init.zeros_(tensor)[source]#

                    Fill the input Tensor with the scalar value 0.

                    Parameters
                    @@ -4600,7 +4600,7 @@
                    -torch.nn.init.eye_(tensor)[source]#
                    +torch.nn.init.eye_(tensor)[source]#

                    Fill the 2-dimensional input Tensor with the identity matrix.

                    Preserves the identity of the inputs in Linear layers, where as many inputs are preserved as possible.

                    @@ -4621,7 +4621,7 @@
                    -torch.nn.init.dirac_(tensor, groups=1)[source]#
                    +torch.nn.init.dirac_(tensor, groups=1)[source]#

                    Fill the {3, 4, 5}-dimensional input Tensor with the Dirac delta function.

                    Preserves the identity of the inputs in Convolutional layers, where as many input channels are preserved as possible. In case @@ -4648,7 +4648,7 @@

                    -torch.nn.init.xavier_uniform_(tensor, gain=1.0, generator=None)[source]#
                    +torch.nn.init.xavier_uniform_(tensor, gain=1.0, generator=None)[source]#

                    Fill the input Tensor with values using a Xavier uniform distribution.

                    The method is described in Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010). @@ -4686,7 +4686,7 @@

                    -torch.nn.init.xavier_normal_(tensor, gain=1.0, generator=None)[source]#
                    +torch.nn.init.xavier_normal_(tensor, gain=1.0, generator=None)[source]#

                    Fill the input Tensor with values using a Xavier normal distribution.

                    The method is described in Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010). The resulting tensor @@ -4723,7 +4723,7 @@

                    -torch.nn.init.kaiming_uniform_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu', generator=None)[source]#
                    +torch.nn.init.kaiming_uniform_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu', generator=None)[source]#

                    Fill the input Tensor with values using a Kaiming uniform distribution.

                    The method is described in Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification - He, K. et al. (2015). @@ -4777,7 +4777,7 @@

                    -torch.nn.init.kaiming_normal_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu', generator=None)[source]#
                    +torch.nn.init.kaiming_normal_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu', generator=None)[source]#

                    Fill the input Tensor with values using a Kaiming normal distribution.

                    The method is described in Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification - He, K. et al. (2015). @@ -4834,7 +4834,7 @@

                    -torch.nn.init.trunc_normal_(tensor, mean=0.0, std=1.0, a=-2.0, b=2.0, generator=None)[source]#
                    +torch.nn.init.trunc_normal_(tensor, mean=0.0, std=1.0, a=-2.0, b=2.0, generator=None)[source]#

                    Fill the input Tensor with values drawn from a truncated normal distribution.

                    The values are effectively drawn from the normal distribution N(mean,std2)\mathcal{N}(\text{mean}, \text{std}^2) @@ -4865,7 +4865,7 @@

                    -torch.nn.init.orthogonal_(tensor, gain=1, generator=None)[source]#
                    +torch.nn.init.orthogonal_(tensor, gain=1, generator=None)[source]#

                    Fill the input Tensor with a (semi) orthogonal matrix.

                    Described in Exact solutions to the nonlinear dynamics of learning in deep linear neural networks - Saxe, A. et al. (2013). The input tensor must have @@ -4892,7 +4892,7 @@

                    -torch.nn.init.sparse_(tensor, sparsity, std=0.01, generator=None)[source]#
                    +torch.nn.init.sparse_(tensor, sparsity, std=0.01, generator=None)[source]#

                    Fill the 2D input Tensor as a sparse matrix.

                    The non-zero elements will be drawn from the normal distribution N(0,0.01)\mathcal{N}(0, 0.01), as described in Deep learning via diff --git a/2.9/notes/serialization.html b/2.9/notes/serialization.html index 37ae3aaef16..d95585fba90 100644 --- a/2.9/notes/serialization.html +++ b/2.9/notes/serialization.html @@ -1984,7 +1984,7 @@

                    Environment Variables
                    -torch.serialization.register_package(priority, tagger, deserializer)[source]#
                    +torch.serialization.register_package(priority, tagger, deserializer)[source]#

                    Registers callables for tagging and deserializing storage objects with an associated priority. Tagging associates a device with a storage object at save time while deserializing moves a storage object to an appropriate device at load time. tagger and deserializer @@ -2025,7 +2025,7 @@

                    Environment Variables
                    -torch.serialization.get_crc32_options()[source]#
                    +torch.serialization.get_crc32_options()[source]#

                    Get whether torch.save() computes and writes crc32 for each record.

                    Defaults to True.

                    @@ -2037,7 +2037,7 @@

                    Environment Variables
                    -torch.serialization.set_crc32_options(compute_crc32)[source]#
                    +torch.serialization.set_crc32_options(compute_crc32)[source]#

                    Set whether torch.save() computes and writes crc32 for each record.

                    Note

                    @@ -2054,7 +2054,7 @@

                    Environment Variables
                    -torch.serialization.get_default_load_endianness()[source]#
                    +torch.serialization.get_default_load_endianness()[source]#

                    Get fallback byte order for loading files

                    If byteorder mark is not present in saved checkpoint, this byte order is used as fallback. @@ -2071,7 +2071,7 @@

                    Environment Variables
                    -torch.serialization.set_default_load_endianness(endianness)[source]#
                    +torch.serialization.set_default_load_endianness(endianness)[source]#

                    Set fallback byte order for loading files

                    If byteorder mark is not present in saved checkpoint, this byte order is used as fallback. @@ -2085,7 +2085,7 @@

                    Environment Variables
                    -torch.serialization.get_default_mmap_options()[source]#
                    +torch.serialization.get_default_mmap_options()[source]#

                    Get default mmap options for torch.load() with mmap=True.

                    Defaults to mmap.MAP_PRIVATE.

                    @@ -2100,7 +2100,7 @@

                    Environment Variables
                    -torch.serialization.set_default_mmap_options(flags)[source]#
                    +torch.serialization.set_default_mmap_options(flags)[source]#

                    Context manager or function to set default mmap options for torch.load() with mmap=True to flags.

                    For now, only either mmap.MAP_PRIVATE or mmap.MAP_SHARED are supported. Please open an issue if you need any other option to be added here.

                    @@ -2117,7 +2117,7 @@

                    Environment Variables
                    -torch.serialization.add_safe_globals(safe_globals)[source]#
                    +torch.serialization.add_safe_globals(safe_globals)[source]#

                    Marks the given globals as safe for weights_only load. For example, functions added to this list can be called during unpickling, classes could be instantiated and have state set.

                    @@ -2152,7 +2152,7 @@

                    Environment Variables
                    -torch.serialization.clear_safe_globals()[source]#
                    +torch.serialization.clear_safe_globals()[source]#

                    Clears the list of globals that are safe for weights_only load.

                    @@ -2160,7 +2160,7 @@

                    Environment Variables
                    -torch.serialization.get_safe_globals()[source]#
                    +torch.serialization.get_safe_globals()[source]#

                    Returns the list of user-added globals that are safe for weights_only load.

                    Return type
                    @@ -2171,7 +2171,7 @@

                    Environment Variables
                    -torch.serialization.get_unsafe_globals_in_checkpoint(f)[source]#
                    +torch.serialization.get_unsafe_globals_in_checkpoint(f)[source]#

                    Returns a list of strings of functions/classes in a torch.save object that are not safe for weights_only.

                    For a given function or class f, the corresponding string will be of the form {f.__module__}.{f.__name__}.

                    @@ -2199,7 +2199,7 @@

                    Environment Variables
                    -class torch.serialization.safe_globals(safe_globals)[source]#
                    +class torch.serialization.safe_globals(safe_globals)[source]#

                    Context-manager that adds certain globals as safe for weights_only load.

                    Parameters
                    @@ -2227,7 +2227,7 @@

                    Environment Variables
                    -class torch.serialization.skip_data(materialize_fake_tensors=False)[source]#
                    +class torch.serialization.skip_data(materialize_fake_tensors=False)[source]#

                    Context-manager that skips writing/reading storage bytes for torch.save / torch.load calls.

                    For the save path, storages will still be saved, but the space that their bytes would usually be written to will be empty space. The storage bytes can then be populated in a separate pass.

                    diff --git a/2.9/onnx.html b/2.9/onnx.html index 8d8a21aa44f..924b3e478aa 100644 --- a/2.9/onnx.html +++ b/2.9/onnx.html @@ -4459,7 +4459,7 @@

                    Contributing / Developing#

                    -torch.onnx.export(model, args=(), f=None, *, kwargs=None, verbose=None, input_names=None, output_names=None, opset_version=None, dynamo=True, external_data=True, dynamic_shapes=None, custom_translation_table=None, report=False, optimize=True, verify=False, profile=False, dump_exported_program=False, artifacts_dir='.', fallback=False, export_params=True, keep_initializers_as_inputs=False, dynamic_axes=None, training=<TrainingMode.EVAL: 0>, operator_export_type=<OperatorExportTypes.ONNX: 0>, do_constant_folding=True, custom_opsets=None, export_modules_as_functions=False, autograd_inlining=True)[source]
                    +torch.onnx.export(model, args=(), f=None, *, kwargs=None, verbose=None, input_names=None, output_names=None, opset_version=None, dynamo=True, external_data=True, dynamic_shapes=None, custom_translation_table=None, report=False, optimize=True, verify=False, profile=False, dump_exported_program=False, artifacts_dir='.', fallback=False, export_params=True, keep_initializers_as_inputs=False, dynamic_axes=None, training=<TrainingMode.EVAL: 0>, operator_export_type=<OperatorExportTypes.ONNX: 0>, do_constant_folding=True, custom_opsets=None, export_modules_as_functions=False, autograd_inlining=True)[source]

                    Exports a model into ONNX format.

                    Setting dynamo=True enables the new ONNX export logic which is based on torch.export.ExportedProgram and a more modern @@ -4656,7 +4656,7 @@

                    Functions
                    -torch.onnx.is_in_onnx_export()[source]
                    +torch.onnx.is_in_onnx_export()[source]

                    Returns whether it is in the middle of ONNX export.

                    Return type
                    @@ -4698,7 +4698,7 @@

                    Deprecated APIs
                    -torch.onnx.register_custom_op_symbolic(symbolic_name, symbolic_fn, opset_version)[source]#
                    +torch.onnx.register_custom_op_symbolic(symbolic_name, symbolic_fn, opset_version)[source]#

                    Registers a symbolic function for a custom operator.

                    When the user registers symbolic for custom/contrib ops, it is highly recommended to add shape inference for that operator via setType API, @@ -4721,7 +4721,7 @@

                    Deprecated APIs
                    -torch.onnx.unregister_custom_op_symbolic(symbolic_name, opset_version)[source]#
                    +torch.onnx.unregister_custom_op_symbolic(symbolic_name, opset_version)[source]#

                    Unregisters symbolic_name.

                    See “Custom Operators” in the module documentation for an example usage.

                    @@ -4737,7 +4737,7 @@

                    Deprecated APIs
                    -torch.onnx.select_model_mode_for_export(model, mode)[source]#
                    +torch.onnx.select_model_mode_for_export(model, mode)[source]#

                    A context manager to temporarily set the training mode of model to mode, resetting it when we exit the with-block.

                    diff --git a/2.9/onnx_export.html b/2.9/onnx_export.html index 700af9a9564..5c6ef738022 100644 --- a/2.9/onnx_export.html +++ b/2.9/onnx_export.html @@ -4618,7 +4618,7 @@

                    MetadataAPI Reference#

                    -torch.onnx.export(model, args=(), f=None, *, kwargs=None, verbose=None, input_names=None, output_names=None, opset_version=None, dynamo=True, external_data=True, dynamic_shapes=None, custom_translation_table=None, report=False, optimize=True, verify=False, profile=False, dump_exported_program=False, artifacts_dir='.', fallback=False, export_params=True, keep_initializers_as_inputs=False, dynamic_axes=None, training=<TrainingMode.EVAL: 0>, operator_export_type=<OperatorExportTypes.ONNX: 0>, do_constant_folding=True, custom_opsets=None, export_modules_as_functions=False, autograd_inlining=True)[source]#
                    +torch.onnx.export(model, args=(), f=None, *, kwargs=None, verbose=None, input_names=None, output_names=None, opset_version=None, dynamo=True, external_data=True, dynamic_shapes=None, custom_translation_table=None, report=False, optimize=True, verify=False, profile=False, dump_exported_program=False, artifacts_dir='.', fallback=False, export_params=True, keep_initializers_as_inputs=False, dynamic_axes=None, training=<TrainingMode.EVAL: 0>, operator_export_type=<OperatorExportTypes.ONNX: 0>, do_constant_folding=True, custom_opsets=None, export_modules_as_functions=False, autograd_inlining=True)[source]#

                    Exports a model into ONNX format.

                    Setting dynamo=True enables the new ONNX export logic which is based on torch.export.ExportedProgram and a more modern @@ -4827,7 +4827,7 @@

                    API Reference
                    -apply_weights(state_dict)[source]#
                    +apply_weights(state_dict)[source]#

                    Apply the weights from the specified state dict to the ONNX model.

                    Use this method to replace FakeTensors or other weights.

                    @@ -4839,7 +4839,7 @@

                    API Reference
                    -call_reference(*args, **kwargs)[source]#
                    +call_reference(*args, **kwargs)[source]#

                    Run the ONNX model using the reference backend.

                    Return type
                    @@ -4850,7 +4850,7 @@

                    API Reference
                    -compute_values(value_names, args=(), kwargs=None)[source]#
                    +compute_values(value_names, args=(), kwargs=None)[source]#

                    Compute the values of the specified names in the ONNX model.

                    This method is used to compute the values of the specified names in the ONNX model. The values are returned as a dictionary mapping names to tensors.

                    @@ -4869,7 +4869,7 @@

                    API Reference
                    -initialize_inference_session(initializer=<function _ort_session_initializer>)[source]#
                    +initialize_inference_session(initializer=<function _ort_session_initializer>)[source]#

                    Initialize the ONNX Runtime inference session.

                    Parameters
                    @@ -4888,7 +4888,7 @@

                    API Reference
                    -optimize()[source]#
                    +optimize()[source]#

                    Optimize the ONNX model.

                    This method optimizes the ONNX model by performing constant folding and eliminating redundancies in the graph. The optimization is done in-place.

                    @@ -4898,7 +4898,7 @@

                    API Reference
                    -release()[source]#
                    +release()[source]#

                    Release the inference session.

                    You may call this method to release the resources used by the inference session.

                    @@ -4907,7 +4907,7 @@

                    API Reference
                    -save(destination, *, include_initializers=True, keep_initializers_as_inputs=False, external_data=None)[source]#
                    +save(destination, *, include_initializers=True, keep_initializers_as_inputs=False, external_data=None)[source]#

                    Save the ONNX model to the specified destination.

                    When external_data is True or the model is larger than 2GB, the weights are saved as external data in a separate file.

                    @@ -4949,7 +4949,7 @@

                    API Reference
                    -torch.onnx.is_in_onnx_export()[source]#
                    +torch.onnx.is_in_onnx_export()[source]#

                    Returns whether it is in the middle of ONNX export.

                    Return type
                    diff --git a/2.9/onnx_ops.html b/2.9/onnx_ops.html index b886991108b..7fb74fa1404 100644 --- a/2.9/onnx_ops.html +++ b/2.9/onnx_ops.html @@ -4413,7 +4413,7 @@

                    Symbolic Operatorsif torch.onnx.is_in_onnx_export block.

                    -torch.onnx.ops.symbolic(domain_op, /, inputs, attrs=None, *, dtype, shape, version=None, metadata_props=None)[source]#
                    +torch.onnx.ops.symbolic(domain_op, /, inputs, attrs=None, *, dtype, shape, version=None, metadata_props=None)[source]#

                    Create a symbolic FX operator to represent an arbitrary ONNX operator.

                    This function is used to create a symbolic operator with a single output. To create an operator with multiple outputs, use symbolic_multi_out().

                    @@ -4473,7 +4473,7 @@

                    Symbolic Operators
                    -torch.onnx.ops.symbolic_multi_out(domain_op, /, inputs, attrs=None, *, dtypes, shapes, version=None, metadata_props=None)[source]#
                    +torch.onnx.ops.symbolic_multi_out(domain_op, /, inputs, attrs=None, *, dtypes, shapes, version=None, metadata_props=None)[source]#

                    Create a symbolic FX operator to represent an arbitrary ONNX operator with multiple outputs.

                    You may use if torch.onnx.is_in_onnx_export() to conditionally enable the symbolic logic only during torch.onnx.export().

                    @@ -4599,7 +4599,7 @@

                    ONNX Operators
                    -torch.onnx.ops.rotary_embedding(X, cos_cache, sin_cache, position_ids=None, *, interleaved=False, num_heads=0, rotary_embedding_dim=0)[source]#
                    +torch.onnx.ops.rotary_embedding(X, cos_cache, sin_cache, position_ids=None, *, interleaved=False, num_heads=0, rotary_embedding_dim=0)[source]#

                    RotaryEmbedding op in ONNX.

                    https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html

                    RotaryEmbedding is the implementation of rotary positional embeddings (RoPE) based on the paper https://arxiv.org/pdf/2104.09864. @@ -4652,7 +4652,7 @@

                    ONNX Operators
                    -torch.onnx.ops.attention(Q, K, V, attn_mask=None, past_key=None, past_value=None, *, is_causal=False, kv_num_heads=0, q_num_heads=0, qk_matmul_output_mode=0, scale=None, softcap=0.0, softmax_precision=None)[source]#
                    +torch.onnx.ops.attention(Q, K, V, attn_mask=None, past_key=None, past_value=None, *, is_causal=False, kv_num_heads=0, q_num_heads=0, qk_matmul_output_mode=0, scale=None, softcap=0.0, softmax_precision=None)[source]#

                    Attention op in ONNX.

                    https://onnx.ai/onnx/operators/onnx__Attention.html

                    Computes scaled dot product attention on query, key and value tensors, using an optional attention mask if passed.

                    @@ -4774,7 +4774,7 @@

                    ONNX to ATen Decomposition Table
                    -torch.onnx.ops.aten_decompositions()[source]#
                    +torch.onnx.ops.aten_decompositions()[source]#

                    Return the ONNX to ATen decomp table.

                    Return type
                    diff --git a/2.9/onnx_verification.html b/2.9/onnx_verification.html index 0f56861417e..873d5a6592f 100644 --- a/2.9/onnx_verification.html +++ b/2.9/onnx_verification.html @@ -4406,7 +4406,7 @@

                    A set of tools to verify the correctness of ONNX models.

                    -torch.onnx.verification.verify_onnx_program(onnx_program, args=None, kwargs=None, compare_intermediates=False)[source]#
                    +torch.onnx.verification.verify_onnx_program(onnx_program, args=None, kwargs=None, compare_intermediates=False)[source]#

                    Verify the ONNX model by comparing the values with the expected values from ExportedProgram.

                    Parameters
                    @@ -4453,7 +4453,7 @@
                    -asdict()[source]#
                    +asdict()[source]#

                    Convert the VerificationInfo object to a dictionary.

                    Returns
                    @@ -4467,7 +4467,7 @@
                    -classmethod from_tensors(name, expected, actual)[source]#
                    +classmethod from_tensors(name, expected, actual)[source]#

                    Create a VerificationInfo object from two tensors.

                    Parameters
                    diff --git a/2.9/optim.html b/2.9/optim.html index 7ea8f596e03..85210cd665c 100644 --- a/2.9/optim.html +++ b/2.9/optim.html @@ -4510,7 +4510,7 @@

                    optimizer.step(

                    Base class#

                    -class torch.optim.Optimizer(params, defaults)[source]#
                    +class torch.optim.Optimizer(params, defaults)[source]#

                    Base class for all optimizers.

                    Warning

                    @@ -5272,13 +5272,13 @@

                    Putting it all together: EMA
                    -torch.optim.swa_utils.get_ema_multi_avg_fn(decay=0.999)[source]#
                    +torch.optim.swa_utils.get_ema_multi_avg_fn(decay=0.999)[source]#

                    Get the function applying exponential moving average (EMA) across multiple params.

                    -torch.optim.swa_utils.update_bn(loader, model, device=None)[source]#
                    +torch.optim.swa_utils.update_bn(loader, model, device=None)[source]#

                    Update BatchNorm running_mean, running_var buffers in the model.

                    It performs one pass over data in loader to estimate the activation statistics for BatchNorm layers in the model.

                    diff --git a/2.9/package.html b/2.9/package.html index 835b51265ce..aae708e3e62 100644 --- a/2.9/package.html +++ b/2.9/package.html @@ -5071,7 +5071,7 @@

                    ManglingAPI Reference#

                    -class torch.package.PackagingError(dependency_graph, debug=False)[source]#
                    +class torch.package.PackagingError(dependency_graph, debug=False)[source]#

                    This exception is raised when there is an issue with exporting a package. PackageExporter will attempt to gather up all the errors and present them to you at once.

                    @@ -5081,14 +5081,14 @@

                    API Reference
                    -class torch.package.EmptyMatchError[source]#
                    +class torch.package.EmptyMatchError[source]#

                    This is an exception that is thrown when a mock or extern is marked as allow_empty=False, and is not matched with any module during packaging.

                    -class torch.package.PackageExporter(f, importer=<torch.package.importer._SysImporter object>, debug=False)[source]#
                    +class torch.package.PackageExporter(f, importer=<torch.package.importer._SysImporter object>, debug=False)[source]#

                    Exporters allow you to write packages of code, pickled Python data, and arbitrary binary and text resources into a self-contained package.

                    Imports can load this code in a hermetic way, such that code is loaded @@ -5112,7 +5112,7 @@

                    API Reference
                    -__init__(f, importer=<torch.package.importer._SysImporter object>, debug=False)[source]#
                    +__init__(f, importer=<torch.package.importer._SysImporter object>, debug=False)[source]#

                    Create an exporter.

                    Parameters
                    @@ -5129,7 +5129,7 @@

                    API Reference
                    -add_dependency(module_name, dependencies=True)[source]#
                    +add_dependency(module_name, dependencies=True)[source]#

                    Given a module, add it to the dependency graph according to patterns specified by the user.

                    @@ -5138,7 +5138,7 @@

                    API Reference
                    -all_paths(src, dst)[source]#
                    +all_paths(src, dst)[source]#
                    Return a dot representation of the subgraph

                    that has all paths from src to dst.

                    @@ -5156,7 +5156,7 @@

                    API Reference
                    -close()[source]#
                    +close()[source]#

                    Write the package to the filesystem. Any calls after close() are now invalid. It is preferable to use resource guard syntax instead:

                    with PackageExporter("file.zip") as e:
                    @@ -5167,7 +5167,7 @@ 

                    API Reference
                    -denied_modules()[source]#
                    +denied_modules()[source]#

                    Return all modules that are currently denied.

                    Returns
                    @@ -5182,7 +5182,7 @@

                    API Reference
                    -deny(include, *, exclude=())[source]#
                    +deny(include, *, exclude=())[source]#

                    Blocklist modules who names match the given glob patterns from the list of modules the package can import. If a dependency on any matching packages is found, a PackagingError is raised.

                    @@ -5198,7 +5198,7 @@

                    API Reference
                    -dependency_graph_string()[source]#
                    +dependency_graph_string()[source]#

                    Returns digraph string representation of dependencies in package.

                    Returns
                    @@ -5212,7 +5212,7 @@

                    API Reference
                    -extern(include, *, exclude=(), allow_empty=True)[source]#
                    +extern(include, *, exclude=(), allow_empty=True)[source]#

                    Include module in the list of external modules the package can import. This will prevent dependency discovery from saving it in the package. The importer will load an external module directly from the standard import system. @@ -5237,7 +5237,7 @@

                    API Reference
                    -externed_modules()[source]#
                    +externed_modules()[source]#

                    Return all modules that are currently externed.

                    Returns
                    @@ -5252,7 +5252,7 @@

                    API Reference
                    -get_rdeps(module_name)[source]#
                    +get_rdeps(module_name)[source]#

                    Return a list of all modules which depend on the module module_name.

                    Returns
                    @@ -5266,7 +5266,7 @@

                    API Reference
                    -get_unique_id()[source]#
                    +get_unique_id()[source]#

                    Get an id. This id is guaranteed to only be handed out once for this package.

                    Return type
                    @@ -5277,7 +5277,7 @@

                    API Reference
                    -intern(include, *, exclude=(), allow_empty=True)[source]#
                    +intern(include, *, exclude=(), allow_empty=True)[source]#

                    Specify modules that should be packaged. A module must match some intern pattern in order to be included in the package and have its dependencies processed recursively.

                    @@ -5297,7 +5297,7 @@

                    API Reference
                    -interned_modules()[source]#
                    +interned_modules()[source]#

                    Return all modules that are currently interned.

                    Returns
                    @@ -5312,7 +5312,7 @@

                    API Reference
                    -mock(include, *, exclude=(), allow_empty=True)[source]#
                    +mock(include, *, exclude=(), allow_empty=True)[source]#

                    Replace some required modules with a mock implementation. Mocked modules will return a fake object for any attribute accessed from it. Because we copy file-by-file, the dependency resolution will sometimes find files that are imported by model files but whose functionality is never used @@ -5348,7 +5348,7 @@

                    API Reference
                    -mocked_modules()[source]#
                    +mocked_modules()[source]#

                    Return all modules that are currently mocked.

                    Returns
                    @@ -5363,7 +5363,7 @@

                    API Reference
                    -register_extern_hook(hook)[source]#
                    +register_extern_hook(hook)[source]#

                    Registers an extern hook on the exporter.

                    The hook will be called each time a module matches against an extern() pattern. It should have the following signature:

                    @@ -5384,7 +5384,7 @@

                    API Reference
                    -register_intern_hook(hook)[source]#
                    +register_intern_hook(hook)[source]#

                    Registers an intern hook on the exporter.

                    The hook will be called each time a module matches against an intern() pattern. It should have the following signature:

                    @@ -5405,7 +5405,7 @@

                    API Reference
                    -register_mock_hook(hook)[source]#
                    +register_mock_hook(hook)[source]#

                    Registers a mock hook on the exporter.

                    The hook will be called each time a module matches against a mock() pattern. It should have the following signature:

                    @@ -5426,7 +5426,7 @@

                    API Reference
                    -save_binary(package, resource, binary)[source]#
                    +save_binary(package, resource, binary)[source]#

                    Save raw bytes to the package.

                    Parameters
                    @@ -5441,7 +5441,7 @@

                    API Reference
                    -save_module(module_name, dependencies=True)[source]#
                    +save_module(module_name, dependencies=True)[source]#

                    Save the code for module into the package. Code for the module is resolved using the importers path to find the module object, and then using its __file__ attribute to find the source code.

                    @@ -5457,7 +5457,7 @@

                    API Reference
                    -save_pickle(package, resource, obj, dependencies=True, pickle_protocol=3)[source]#
                    +save_pickle(package, resource, obj, dependencies=True, pickle_protocol=3)[source]#

                    Save a python object to the archive using pickle. Equivalent to torch.save() but saving into the archive rather than a stand-alone file. Standard pickle does not save the code, only the objects. If dependencies is true, this method will also scan the pickled objects for which modules are required @@ -5480,7 +5480,7 @@

                    API Reference
                    -save_source_file(module_name, file_or_directory, dependencies=True)[source]#
                    +save_source_file(module_name, file_or_directory, dependencies=True)[source]#

                    Adds the local file system file_or_directory to the source package to provide the code for module_name.

                    @@ -5498,7 +5498,7 @@

                    API Reference
                    -save_source_string(module_name, src, is_package=False, dependencies=True)[source]#
                    +save_source_string(module_name, src, is_package=False, dependencies=True)[source]#

                    Adds src as the source code for module_name in the exported package.

                    Parameters
                    @@ -5515,7 +5515,7 @@

                    API Reference
                    -save_text(package, resource, text)[source]#
                    +save_text(package, resource, text)[source]#

                    Save text data to the package.

                    Parameters
                    @@ -5532,7 +5532,7 @@

                    API Reference
                    -class torch.package.PackageImporter(file_or_buffer, module_allowed=<function PackageImporter.<lambda>>)[source]#
                    +class torch.package.PackageImporter(file_or_buffer, module_allowed=<function PackageImporter.<lambda>>)[source]#

                    Importers allow you to load code written to packages by PackageExporter. Code is loaded in a hermetic way, using files from the package rather than the normal python import system. This allows @@ -5547,7 +5547,7 @@

                    API Reference
                    -__init__(file_or_buffer, module_allowed=<function PackageImporter.<lambda>>)[source]#
                    +__init__(file_or_buffer, module_allowed=<function PackageImporter.<lambda>>)[source]#

                    Open file_or_buffer for importing. This checks that the imported package only requires modules allowed by module_allowed

                    @@ -5568,7 +5568,7 @@

                    API Reference
                    -file_structure(*, include='**', exclude=())[source]#
                    +file_structure(*, include='**', exclude=())[source]#

                    Returns a file structure representation of package’s zipfile.

                    Parameters
                    @@ -5590,7 +5590,7 @@

                    API Reference
                    -id()[source]#
                    +id()[source]#

                    Returns internal identifier that torch.package uses to distinguish PackageImporter instances. Looks like:

                    <torch_package_0>
                    @@ -5600,7 +5600,7 @@ 

                    API Reference
                    -import_module(name, package=None)[source]#
                    +import_module(name, package=None)[source]#

                    Load a module from the package if it hasn’t already been loaded, and then return the module. Modules are loaded locally to the importer and will appear in self.modules rather than sys.modules.

                    @@ -5622,7 +5622,7 @@

                    API Reference
                    -load_binary(package, resource)[source]#
                    +load_binary(package, resource)[source]#

                    Load raw bytes.

                    Parameters
                    @@ -5642,7 +5642,7 @@

                    API Reference
                    -load_pickle(package, resource, map_location=None)[source]#
                    +load_pickle(package, resource, map_location=None)[source]#

                    Unpickles the resource from the package, loading any modules that are needed to construct the objects using import_module().

                    @@ -5664,7 +5664,7 @@

                    API Reference
                    -load_text(package, resource, encoding='utf-8', errors='strict')[source]#
                    +load_text(package, resource, encoding='utf-8', errors='strict')[source]#

                    Load a string.

                    Parameters
                    @@ -5686,7 +5686,7 @@

                    API Reference
                    -python_version()[source]#
                    +python_version()[source]#

                    Returns the version of python that was used to create this package.

                    Note: this function is experimental and not Forward Compatible. The plan is to move this into a lock file later on.

                    @@ -5701,7 +5701,7 @@

                    API Reference
                    -class torch.package.Directory(name, is_dir)[source]#
                    +class torch.package.Directory(name, is_dir)[source]#

                    A file structure representation. Organized as Directory nodes that have lists of their Directory children. Directories for a package are created by calling PackageImporter.file_structure().

                    @@ -5709,7 +5709,7 @@

                    API Reference
                    -has_file(filename)[source]#
                    +has_file(filename)[source]#

                    Checks if a file is present in a Directory.

                    Parameters
                    diff --git a/2.9/profiler.html b/2.9/profiler.html index 18a2cdfd020..9184ce5a9f6 100644 --- a/2.9/profiler.html +++ b/2.9/profiler.html @@ -4408,7 +4408,7 @@

                    torch.profiler#

                    -class torch.profiler._KinetoProfile(*, activities=None, record_shapes=False, profile_memory=False, with_stack=False, with_flops=False, with_modules=False, experimental_config=None, execution_trace_observer=None, acc_events=False, custom_trace_id_callback=None)[source]#
                    +class torch.profiler._KinetoProfile(*, activities=None, record_shapes=False, profile_memory=False, with_stack=False, with_flops=False, with_modules=False, experimental_config=None, execution_trace_observer=None, acc_events=False, custom_trace_id_callback=None)[source]#

                    Low-level profiler wrap the autograd profile

                    Parameters
                    @@ -4451,7 +4451,7 @@

                    API Reference
                    -add_metadata(key, value)[source]#
                    +add_metadata(key, value)[source]#

                    Adds a user defined metadata with a string key and a string value into the trace file

                    @@ -4460,7 +4460,7 @@

                    API Reference
                    -add_metadata_json(key, value)[source]#
                    +add_metadata_json(key, value)[source]#

                    Adds a user defined metadata with a string key and a valid json value into the trace file

                    @@ -4469,14 +4469,14 @@

                    API Reference
                    -events()[source]#
                    +events()[source]#

                    Returns the list of unaggregated profiler events, to be used in the trace callback or after the profiling is finished

                    -export_chrome_trace(path)[source]#
                    +export_chrome_trace(path)[source]#

                    Exports the collected trace in Chrome JSON format. If kineto is enabled, only last cycle in schedule is exported.

                    @@ -4485,7 +4485,7 @@

                    API Reference
                    -export_memory_timeline(path, device=None)[source]#
                    +export_memory_timeline(path, device=None)[source]#

                    Export memory event information from the profiler collected tree for a given device, and export a timeline plot. There are 3 exportable files using export_memory_timeline, each controlled by the @@ -4510,7 +4510,7 @@

                    API Reference
                    -export_stacks(path, metric='self_cpu_time_total')[source]#
                    +export_stacks(path, metric='self_cpu_time_total')[source]#

                    Save stack traces to a file

                    Parameters
                    @@ -4524,7 +4524,7 @@

                    API Reference
                    -key_averages(group_by_input_shape=False, group_by_stack_n=0, group_by_overload_name=False)[source]#
                    +key_averages(group_by_input_shape=False, group_by_stack_n=0, group_by_overload_name=False)[source]#

                    Averages events, grouping them by operator name and (optionally) input shapes, stack and overload name.

                    @@ -4538,7 +4538,7 @@

                    API Reference
                    -preset_metadata_json(key, value)[source]#
                    +preset_metadata_json(key, value)[source]#

                    Preset a user defined metadata when the profiler is not started and added into the trace file later. Metadata is in the format of a string key and a valid json value

                    @@ -4548,7 +4548,7 @@

                    API Reference
                    -toggle_collection_dynamic(enable, activities)[source]#
                    +toggle_collection_dynamic(enable, activities)[source]#

                    Toggle collection of activities on/off at any point of collection. Currently supports toggling Torch Ops (CPU) and CUDA activity supported in Kineto

                    @@ -4581,7 +4581,7 @@

                    API Reference
                    -class torch.profiler.profile(*, activities=None, schedule=None, on_trace_ready=None, record_shapes=False, profile_memory=False, with_stack=False, with_flops=False, with_modules=False, experimental_config=None, execution_trace_observer=None, acc_events=False, use_cuda=None, custom_trace_id_callback=None)[source]#
                    +class torch.profiler.profile(*, activities=None, schedule=None, on_trace_ready=None, record_shapes=False, profile_memory=False, with_stack=False, with_flops=False, with_modules=False, experimental_config=None, execution_trace_observer=None, acc_events=False, use_cuda=None, custom_trace_id_callback=None)[source]#

                    Profiler context manager.

                    Parameters
                    @@ -4708,13 +4708,13 @@

                    API Reference
                    -get_trace_id()[source]#
                    +get_trace_id()[source]#

                    Returns the current trace ID.

                    -set_custom_trace_id_callback(callback)[source]#
                    +set_custom_trace_id_callback(callback)[source]#

                    Sets a callback to be called when a new trace ID is generated.

                    @@ -4722,7 +4722,7 @@

                    API Reference
                    -step()[source]#
                    +step()[source]#

                    Signals the profiler that the next profiling step has started.

                    @@ -4732,7 +4732,7 @@

                    API Reference
                    -class torch.profiler.ProfilerAction(value)[source]#
                    +class torch.profiler.ProfilerAction(value)[source]#

                    Profiler actions that can be taken at the specified intervals

                    @@ -4755,7 +4755,7 @@

                    API Reference
                    -torch.profiler.schedule(*, wait, warmup, active, repeat=0, skip_first=0, skip_first_wait=0)[source]#
                    +torch.profiler.schedule(*, wait, warmup, active, repeat=0, skip_first=0, skip_first_wait=0)[source]#

                    Returns a callable that can be used as profiler schedule argument. The profiler will skip the first skip_first steps, then wait for wait steps, then do the warmup for the next warmup steps, then do the active recording for the next active steps and then repeat the cycle starting with wait steps. @@ -4776,7 +4776,7 @@

                    API Reference
                    -torch.profiler.tensorboard_trace_handler(dir_name, worker_name=None, use_gzip=False)[source]#
                    +torch.profiler.tensorboard_trace_handler(dir_name, worker_name=None, use_gzip=False)[source]#

                    Outputs tracing files to directory of dir_name, then that directory can be directly delivered to tensorboard as logdir. worker_name should be unique for each worker in distributed scenario, @@ -4790,13 +4790,13 @@

                    API Reference#

                    -torch.profiler.itt.is_available()[source]#
                    +torch.profiler.itt.is_available()[source]#

                    Check if ITT feature is available or not

                    -torch.profiler.itt.mark(msg)[source]#
                    +torch.profiler.itt.mark(msg)[source]#

                    Describe an instantaneous event that occurred at some point.

                    Parameters
                    @@ -4807,7 +4807,7 @@

                    Intel Instrumentation and Tracing Technology APIs
                    -torch.profiler.itt.range_push(msg)[source]#
                    +torch.profiler.itt.range_push(msg)[source]#

                    Pushes a range onto a stack of nested range span. Returns zero-based depth of the range that is started.

                    @@ -4819,7 +4819,7 @@

                    Intel Instrumentation and Tracing Technology APIs
                    -torch.profiler.itt.range_pop()[source]#
                    +torch.profiler.itt.range_pop()[source]#

                    Pops a range off of a stack of nested range spans. Returns the zero-based depth of the range that is ended.

                    diff --git a/2.9/quantization.html b/2.9/quantization.html index d0af97e0c97..72b039cb0c1 100644 --- a/2.9/quantization.html +++ b/2.9/quantization.html @@ -4414,17 +4414,17 @@

                    Quantization API Reference (Kept since APIs are still public)
                    -torch.ao.ns.fx.utils.compute_sqnr(x, y)[source]#
                    +torch.ao.ns.fx.utils.compute_sqnr(x, y)[source]#
                    -torch.ao.ns.fx.utils.compute_normalized_l2_error(x, y)[source]#
                    +torch.ao.ns.fx.utils.compute_normalized_l2_error(x, y)[source]#
                    -torch.ao.ns.fx.utils.compute_cosine_similarity(x, y)[source]#
                    +torch.ao.ns.fx.utils.compute_cosine_similarity(x, y)[source]#

                    diff --git a/2.9/random.html b/2.9/random.html index a119f1015a4..7b534b4e926 100644 --- a/2.9/random.html +++ b/2.9/random.html @@ -4396,7 +4396,7 @@

                    Created On: Aug 07, 2019 | Last Updated On: Jun 18, 2025

                    -torch.random.fork_rng(devices=None, enabled=True, _caller='fork_rng', _devices_kw='devices', device_type='cuda')[source]#
                    +torch.random.fork_rng(devices=None, enabled=True, _caller='fork_rng', _devices_kw='devices', device_type='cuda')[source]#

                    Forks the RNG, so that when you return, the RNG is reset to the state that it was previously in.

                    @@ -4422,7 +4422,7 @@
                    -torch.random.get_rng_state()[source]#
                    +torch.random.get_rng_state()[source]#

                    Returns the random number generator state as a torch.ByteTensor.

                    Note

                    @@ -4438,7 +4438,7 @@
                    -torch.random.initial_seed()[source]#
                    +torch.random.initial_seed()[source]#

                    Returns the initial seed for generating random numbers as a Python long.

                    @@ -4454,7 +4454,7 @@
                    -torch.random.manual_seed(seed)[source]#
                    +torch.random.manual_seed(seed)[source]#

                    Sets the seed for generating random numbers on all devices. Returns a torch.Generator object.

                    @@ -4472,7 +4472,7 @@
                    -torch.random.seed()[source]#
                    +torch.random.seed()[source]#

                    Sets the seed for generating random numbers to a non-deterministic random number on all devices. Returns a 64 bit number used to seed the RNG.

                    @@ -4484,7 +4484,7 @@
                    -torch.random.set_rng_state(new_state)[source]#
                    +torch.random.set_rng_state(new_state)[source]#

                    Sets the random number generator state.

                    Note

                    diff --git a/2.9/rpc.html b/2.9/rpc.html index d34bb06506d..d583a2669c7 100644 --- a/2.9/rpc.html +++ b/2.9/rpc.html @@ -4477,7 +4477,7 @@

                    Basics
                    -torch.distributed.rpc.init_rpc(name, backend=None, rank=-1, world_size=None, rpc_backend_options=None)[source]#
                    +torch.distributed.rpc.init_rpc(name, backend=None, rank=-1, world_size=None, rpc_backend_options=None)[source]#

                    Initializes RPC primitives such as the local RPC agent and distributed autograd, which immediately makes the current process ready to send and receive RPCs.

                    @@ -4520,7 +4520,7 @@

                    Basics
                    -torch.distributed.rpc.rpc_sync(to, func, args=None, kwargs=None, timeout=-1.0)[source]#
                    +torch.distributed.rpc.rpc_sync(to, func, args=None, kwargs=None, timeout=-1.0)[source]#

                    Make a blocking RPC call to run function func on worker to. RPC messages are sent and received in parallel to execution of Python code. This method is thread-safe.

                    @@ -4595,7 +4595,7 @@

                    Basics
                    -torch.distributed.rpc.rpc_async(to, func, args=None, kwargs=None, timeout=-1.0)[source]#
                    +torch.distributed.rpc.rpc_async(to, func, args=None, kwargs=None, timeout=-1.0)[source]#

                    Make a non-blocking RPC call to run function func on worker to. RPC messages are sent and received in parallel to execution of Python code. This method is thread-safe. This method will immediately return a @@ -4692,7 +4692,7 @@

                    Basics
                    -torch.distributed.rpc.remote(to, func, args=None, kwargs=None, timeout=-1.0)[source]#
                    +torch.distributed.rpc.remote(to, func, args=None, kwargs=None, timeout=-1.0)[source]#

                    Make a remote call to run func on worker to and return an RRef to the result value immediately. Worker to will be the owner of the returned @@ -4801,7 +4801,7 @@

                    Basics
                    -torch.distributed.rpc.get_worker_info(worker_name=None)[source]#
                    +torch.distributed.rpc.get_worker_info(worker_name=None)[source]#

                    Get WorkerInfo of a given worker name. Use this WorkerInfo to avoid passing an expensive string on every invocation.

                    @@ -4820,7 +4820,7 @@

                    Basics
                    -torch.distributed.rpc.shutdown(graceful=True, timeout=0)[source]#
                    +torch.distributed.rpc.shutdown(graceful=True, timeout=0)[source]#

                    Perform a shutdown of the RPC agent, and then destroy the RPC agent. This stops the local agent from accepting outstanding requests, and shuts down the RPC framework by terminating all RPC threads. If graceful=True, @@ -4901,7 +4901,7 @@

                    Basics
                    -torch.distributed.rpc.functions.async_execution(fn)[source]#
                    +torch.distributed.rpc.functions.async_execution(fn)[source]#

                    A decorator for a function indicating that the return value of the function is guaranteed to be a Future object and this function can run asynchronously on the RPC callee. More specifically, the @@ -5142,7 +5142,7 @@

                    TensorPipe Backend
                    -class torch.distributed.rpc.TensorPipeRpcBackendOptions(*, num_worker_threads=16, rpc_timeout=60.0, init_method='env://', device_maps=None, devices=None, _transports=None, _channels=None)[source]#
                    +class torch.distributed.rpc.TensorPipeRpcBackendOptions(*, num_worker_threads=16, rpc_timeout=60.0, init_method='env://', device_maps=None, devices=None, _transports=None, _channels=None)[source]#

                    The backend options for TensorPipeAgent, derived from RpcBackendOptions.

                    @@ -5214,7 +5214,7 @@

                    TensorPipe Backend
                    -set_device_map(to, device_map)[source]#
                    +set_device_map(to, device_map)[source]#

                    Set device mapping between each RPC caller and callee pair. This function can be called multiple times to incrementally add device placement configurations.

                    @@ -5265,7 +5265,7 @@

                    TensorPipe Backend
                    -set_devices(devices)[source]#
                    +set_devices(devices)[source]#

                    Set local devices used by the TensorPipe RPC agent. When processing CUDA RPC requests, the TensorPipe RPC agent will properly synchronize CUDA streams for all devices in this List.

                    @@ -5612,7 +5612,7 @@

                    TensorPipe Backend
                    -class torch.distributed.nn.api.remote_module.RemoteModule(*args, **kwargs)[source]#
                    +class torch.distributed.nn.api.remote_module.RemoteModule(*args, **kwargs)[source]#

                    A RemoteModule instance can only be created after RPC initialization.

                    It creates a user-specified module on a specified remote node. @@ -5693,7 +5693,7 @@

                    TensorPipe Backend
                    -get_module_rref()[source]#
                    +get_module_rref()[source]#

                    Return an RRef (RRef[nn.Module]) pointing to the remote module.

                    Return type
                    @@ -5704,7 +5704,7 @@

                    TensorPipe Backend
                    -remote_parameters(recurse=True)[source]#
                    +remote_parameters(recurse=True)[source]#

                    Return a list of RRef pointing to the remote module’s parameters.

                    This can typically be used in conjunction with DistributedOptimizer.

                    @@ -5785,7 +5785,7 @@

                    Distributed Autograd Framework
                    -class torch.distributed.autograd.context[source]#
                    +class torch.distributed.autograd.context[source]#

                    Context object to wrap forward and backward passes when using distributed autograd. The context_id generated in the with statement is required to uniquely identify a distributed backward pass diff --git a/2.9/storage.html b/2.9/storage.html index 1637202861f..49d6db6a08d 100644 --- a/2.9/storage.html +++ b/2.9/storage.html @@ -4504,52 +4504,52 @@

                    Special cases
                    -class torch.UntypedStorage(*args, **kwargs)[source]#
                    +class torch.UntypedStorage(*args, **kwargs)[source]#
                    -bfloat16()[source]#
                    +bfloat16()[source]#

                    Casts this storage to bfloat16 type.

                    -bool()[source]#
                    +bool()[source]#

                    Casts this storage to bool type.

                    -byte()[source]#
                    +byte()[source]#

                    Casts this storage to byte type.

                    -byteswap(dtype)[source]#
                    +byteswap(dtype)[source]#

                    Swap bytes in underlying data.

                    -char()[source]#
                    +char()[source]#

                    Casts this storage to char type.

                    -clone()[source]#
                    +clone()[source]#

                    Return a copy of this storage.

                    -complex_double()[source]#
                    +complex_double()[source]#

                    Casts this storage to complex double type.

                    -complex_float()[source]#
                    +complex_float()[source]#

                    Casts this storage to complex float type.

                    @@ -4560,13 +4560,13 @@

                    Special cases
                    -cpu()[source]#
                    +cpu()[source]#

                    Return a CPU copy of this storage if it’s not already on the CPU.

                    -cuda(device=None, non_blocking=False)[source]#
                    +cuda(device=None, non_blocking=False)[source]#

                    Returns a copy of this object in CUDA memory.

                    If this object is already in CUDA memory and on the correct device, then no copy is performed and the original object is returned.

                    @@ -4597,7 +4597,7 @@

                    Special cases
                    -double()[source]#
                    +double()[source]#

                    Casts this storage to double type.

                    @@ -4621,31 +4621,31 @@

                    Special cases
                    -float()[source]#
                    +float()[source]#

                    Casts this storage to float type.

                    -float8_e4m3fn()[source]#
                    +float8_e4m3fn()[source]#

                    Casts this storage to float8_e4m3fn type

                    -float8_e4m3fnuz()[source]#
                    +float8_e4m3fnuz()[source]#

                    Casts this storage to float8_e4m3fnuz type

                    -float8_e5m2()[source]#
                    +float8_e5m2()[source]#

                    Casts this storage to float8_e5m2 type

                    -float8_e5m2fnuz()[source]#
                    +float8_e5m2fnuz()[source]#

                    Casts this storage to float8_e5m2fnuz type

                    @@ -4679,7 +4679,7 @@

                    Special cases
                    -get_device()[source]#
                    +get_device()[source]#
                    Return type

                    int

                    @@ -4689,13 +4689,13 @@

                    Special cases
                    -half()[source]#
                    +half()[source]#

                    Casts this storage to half type.

                    -hpu(device=None, non_blocking=False)[source]#
                    +hpu(device=None, non_blocking=False)[source]#

                    Returns a copy of this object in HPU memory.

                    If this object is already in HPU memory and on the correct device, then no copy is performed and the original object is returned.

                    @@ -4716,7 +4716,7 @@

                    Special cases
                    -int()[source]#
                    +int()[source]#

                    Casts this storage to int type.

                    @@ -4732,7 +4732,7 @@

                    Special cases
                    -is_pinned(device='cuda')[source]#
                    +is_pinned(device='cuda')[source]#

                    Determine whether the CPU storage is already pinned on device.

                    Parameters
                    @@ -4762,13 +4762,13 @@

                    Special cases
                    -long()[source]#
                    +long()[source]#

                    Casts this storage to long type.

                    -mps()[source]#
                    +mps()[source]#

                    Return a MPS copy of this storage if it’s not already on the MPS.

                    @@ -4784,7 +4784,7 @@

                    Special cases
                    -pin_memory(device='cuda')[source]#
                    +pin_memory(device='cuda')[source]#

                    Copy the CPU storage to pinned memory, if it’s not already pinned.

                    Parameters
                    @@ -4809,7 +4809,7 @@

                    Special cases
                    -share_memory_(*args, **kwargs)[source]#
                    +share_memory_(*args, **kwargs)[source]#

                    Moves the storage to shared memory.

                    This is a no-op for storages already in shared memory and for CUDA storages, which do not need to be moved for sharing across processes. @@ -4844,13 +4844,13 @@

                    Special cases
                    -short()[source]#
                    +short()[source]#

                    Casts this storage to short type.

                    -size()[source]#
                    +size()[source]#
                    Return type

                    int

                    @@ -4860,20 +4860,20 @@

                    Special cases
                    -to(*, device, non_blocking=False)[source]#
                    +to(*, device, non_blocking=False)[source]#

                    -tolist()[source]#
                    +tolist()[source]#

                    Return a list containing the elements of this storage.

                    -type(dtype=None, non_blocking=False)[source]#
                    +type(dtype=None, non_blocking=False)[source]#
                    Return type

                    Union[_StorageBase, TypedStorage]

                    @@ -4883,7 +4883,7 @@

                    Special cases
                    -untyped()[source]#
                    +untyped()[source]#

                    @@ -4918,67 +4918,67 @@

                    Legacy Typed Storagetorch.Tensor views.

                    -class torch.TypedStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.TypedStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    -bfloat16()[source]#
                    +bfloat16()[source]#

                    Casts this storage to bfloat16 type.

                    -bool()[source]#
                    +bool()[source]#

                    Casts this storage to bool type.

                    -byte()[source]#
                    +byte()[source]#

                    Casts this storage to byte type.

                    -char()[source]#
                    +char()[source]#

                    Casts this storage to char type.

                    -clone()[source]#
                    +clone()[source]#

                    Return a copy of this storage.

                    -complex_double()[source]#
                    +complex_double()[source]#

                    Casts this storage to complex double type.

                    -complex_float()[source]#
                    +complex_float()[source]#

                    Casts this storage to complex float type.

                    -copy_(source, non_blocking=None)[source]#
                    +copy_(source, non_blocking=None)[source]#
                    -cpu()[source]#
                    +cpu()[source]#

                    Return a CPU copy of this storage if it’s not already on the CPU.

                    -cuda(device=None, non_blocking=False)[source]#
                    +cuda(device=None, non_blocking=False)[source]#

                    Returns a copy of this object in CUDA memory.

                    If this object is already in CUDA memory and on the correct device, then no copy is performed and the original object is returned.

                    @@ -4999,7 +4999,7 @@

                    Legacy Typed Storage
                    -data_ptr()[source]#
                    +data_ptr()[source]#

                    @@ -5009,7 +5009,7 @@

                    Legacy Typed Storage
                    -double()[source]#
                    +double()[source]#

                    Casts this storage to double type.

                    @@ -5020,7 +5020,7 @@

                    Legacy Typed Storage
                    -element_size()[source]#
                    +element_size()[source]#

                    @@ -5032,47 +5032,47 @@

                    Legacy Typed Storage
                    -fill_(value)[source]#
                    +fill_(value)[source]#

                    -float()[source]#
                    +float()[source]#

                    Casts this storage to float type.

                    -float8_e4m3fn()[source]#
                    +float8_e4m3fn()[source]#

                    Casts this storage to float8_e4m3fn type

                    -float8_e4m3fnuz()[source]#
                    +float8_e4m3fnuz()[source]#

                    Casts this storage to float8_e4m3fnuz type

                    -float8_e5m2()[source]#
                    +float8_e5m2()[source]#

                    Casts this storage to float8_e5m2 type

                    -float8_e5m2fnuz()[source]#
                    +float8_e5m2fnuz()[source]#

                    Casts this storage to float8_e5m2fnuz type

                    -classmethod from_buffer(*args, **kwargs)[source]#
                    +classmethod from_buffer(*args, **kwargs)[source]#
                    -classmethod from_file(filename, shared=False, size=0) Storage[source]#
                    +classmethod from_file(filename, shared=False, size=0) Storage[source]#

                    Creates a CPU storage backed by a memory-mapped file.

                    If shared is True, then memory is shared between all processes. All changes are written to the file. If shared is False, then the changes on @@ -5095,7 +5095,7 @@

                    Legacy Typed Storage
                    -get_device()[source]#
                    +get_device()[source]#
                    Return type

                    int

                    @@ -5105,13 +5105,13 @@

                    Legacy Typed Storage
                    -half()[source]#
                    +half()[source]#

                    Casts this storage to half type.

                    -hpu(device=None, non_blocking=False)[source]#
                    +hpu(device=None, non_blocking=False)[source]#

                    Returns a copy of this object in HPU memory.

                    If this object is already in HPU memory and on the correct device, then no copy is performed and the original object is returned.

                    @@ -5132,7 +5132,7 @@

                    Legacy Typed Storage
                    -int()[source]#
                    +int()[source]#

                    Casts this storage to int type.

                    @@ -5148,7 +5148,7 @@

                    Legacy Typed Storage
                    -is_pinned(device='cuda')[source]#
                    +is_pinned(device='cuda')[source]#

                    Determine whether the CPU TypedStorage is already pinned on device.

                    Parameters
                    @@ -5163,7 +5163,7 @@

                    Legacy Typed Storage
                    -is_shared()[source]#
                    +is_shared()[source]#

                    @@ -5173,23 +5173,23 @@

                    Legacy Typed Storage
                    -long()[source]#
                    +long()[source]#

                    Casts this storage to long type.

                    -nbytes()[source]#
                    +nbytes()[source]#
                    -pickle_storage_type()[source]#
                    +pickle_storage_type()[source]#
                    -pin_memory(device='cuda')[source]#
                    +pin_memory(device='cuda')[source]#

                    Copy the CPU TypedStorage to pinned memory, if it’s not already pinned.

                    Parameters
                    @@ -5204,34 +5204,34 @@

                    Legacy Typed Storage
                    -resizable()[source]#
                    +resizable()[source]#

                    -resize_(size)[source]#
                    +resize_(size)[source]#
                    -share_memory_()[source]#
                    +share_memory_()[source]#

                    See torch.UntypedStorage.share_memory_()

                    -short()[source]#
                    +short()[source]#

                    Casts this storage to short type.

                    -size()[source]#
                    +size()[source]#
                    -to(*, device, non_blocking=False)[source]#
                    +to(*, device, non_blocking=False)[source]#

                    Returns a copy of this object in device memory.

                    If this object is already on the correct device, then no copy is performed and the original object is returned.

                    @@ -5252,13 +5252,13 @@

                    Legacy Typed Storage
                    -tolist()[source]#
                    +tolist()[source]#

                    Return a list containing the elements of this storage.

                    -type(dtype=None, non_blocking=False)[source]#
                    +type(dtype=None, non_blocking=False)[source]#

                    Returns the type if dtype is not provided, else casts this object to the specified type.

                    If this is already of the correct type, no copy is performed and the @@ -5283,7 +5283,7 @@

                    Legacy Typed Storage
                    -untyped()[source]#
                    +untyped()[source]#

                    Return the internal torch.UntypedStorage.

                    @@ -5291,7 +5291,7 @@

                    Legacy Typed Storage
                    -class torch.DoubleStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.DoubleStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5303,7 +5303,7 @@

                    Legacy Typed Storage
                    -class torch.FloatStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.FloatStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5315,7 +5315,7 @@

                    Legacy Typed Storage
                    -class torch.HalfStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.HalfStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5327,7 +5327,7 @@

                    Legacy Typed Storage
                    -class torch.LongStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.LongStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5339,7 +5339,7 @@

                    Legacy Typed Storage
                    -class torch.IntStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.IntStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5351,7 +5351,7 @@

                    Legacy Typed Storage
                    -class torch.ShortStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.ShortStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5363,7 +5363,7 @@

                    Legacy Typed Storage
                    -class torch.CharStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.CharStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5375,7 +5375,7 @@

                    Legacy Typed Storage
                    -class torch.ByteStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.ByteStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5387,7 +5387,7 @@

                    Legacy Typed Storage
                    -class torch.BoolStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.BoolStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5399,7 +5399,7 @@

                    Legacy Typed Storage
                    -class torch.BFloat16Storage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.BFloat16Storage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5411,7 +5411,7 @@

                    Legacy Typed Storage
                    -class torch.ComplexDoubleStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.ComplexDoubleStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5423,7 +5423,7 @@

                    Legacy Typed Storage
                    -class torch.ComplexFloatStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.ComplexFloatStorage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5435,7 +5435,7 @@

                    Legacy Typed Storage
                    -class torch.QUInt8Storage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.QUInt8Storage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5447,7 +5447,7 @@

                    Legacy Typed Storage
                    -class torch.QInt8Storage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.QInt8Storage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5459,7 +5459,7 @@

                    Legacy Typed Storage
                    -class torch.QInt32Storage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.QInt32Storage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5471,7 +5471,7 @@

                    Legacy Typed Storage
                    -class torch.QUInt4x2Storage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.QUInt4x2Storage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    @@ -5483,7 +5483,7 @@

                    Legacy Typed Storage
                    -class torch.QUInt2x4Storage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    +class torch.QUInt2x4Storage(*args, wrap_storage=None, dtype=None, device=None, _internal=False)[source]#
                    diff --git a/2.9/tensorboard.html b/2.9/tensorboard.html index 93061b88dcf..8cd78993b56 100644 --- a/2.9/tensorboard.html +++ b/2.9/tensorboard.html @@ -4455,7 +4455,7 @@

                    -class torch.utils.tensorboard.writer.SummaryWriter(log_dir=None, comment='', purge_step=None, max_queue=10, flush_secs=120, filename_suffix='')[source]#
                    +class torch.utils.tensorboard.writer.SummaryWriter(log_dir=None, comment='', purge_step=None, max_queue=10, flush_secs=120, filename_suffix='')[source]#

                    Writes entries directly to event files in the log_dir to be consumed by TensorBoard.

                    The SummaryWriter class provides a high-level API to create an event file in a given directory and add summaries and events to it. The class updates the @@ -4464,7 +4464,7 @@ training.

                    -__init__(log_dir=None, comment='', purge_step=None, max_queue=10, flush_secs=120, filename_suffix='')[source]#
                    +__init__(log_dir=None, comment='', purge_step=None, max_queue=10, flush_secs=120, filename_suffix='')[source]#

                    Create a SummaryWriter that will write out events and summaries to the event file.

                    Parameters
                    @@ -4511,7 +4511,7 @@
                    -add_scalar(tag, scalar_value, global_step=None, walltime=None, new_style=False, double_precision=False)[source]#
                    +add_scalar(tag, scalar_value, global_step=None, walltime=None, new_style=False, double_precision=False)[source]#

                    Add scalar data to summary.

                    Parameters
                    @@ -4541,7 +4541,7 @@
                    -add_scalars(main_tag, tag_scalar_dict, global_step=None, walltime=None)[source]#
                    +add_scalars(main_tag, tag_scalar_dict, global_step=None, walltime=None)[source]#

                    Add many scalar data to summary.

                    Parameters
                    @@ -4573,7 +4573,7 @@
                    -add_histogram(tag, values, global_step=None, bins='tensorflow', walltime=None, max_bins=None)[source]#
                    +add_histogram(tag, values, global_step=None, bins='tensorflow', walltime=None, max_bins=None)[source]#

                    Add histogram to summary.

                    Parameters
                    @@ -4604,7 +4604,7 @@
                    -add_image(tag, img_tensor, global_step=None, walltime=None, dataformats='CHW')[source]#
                    +add_image(tag, img_tensor, global_step=None, walltime=None, dataformats='CHW')[source]#

                    Add image data to summary.

                    Note that this requires the pillow package.

                    @@ -4652,7 +4652,7 @@
                    -add_images(tag, img_tensor, global_step=None, walltime=None, dataformats='NCHW')[source]#
                    +add_images(tag, img_tensor, global_step=None, walltime=None, dataformats='NCHW')[source]#

                    Add batched image data to summary.

                    Note that this requires the pillow package.

                    @@ -4693,7 +4693,7 @@
                    -add_figure(tag, figure, global_step=None, close=True, walltime=None)[source]#
                    +add_figure(tag, figure, global_step=None, close=True, walltime=None)[source]#

                    Render matplotlib figure into an image and add it to summary.

                    Note that this requires the matplotlib package.

                    @@ -4712,7 +4712,7 @@
                    -add_video(tag, vid_tensor, global_step=None, fps=4, walltime=None)[source]#
                    +add_video(tag, vid_tensor, global_step=None, fps=4, walltime=None)[source]#

                    Add video data to summary.

                    Note that this requires the moviepy package.

                    @@ -4735,7 +4735,7 @@
                    -add_audio(tag, snd_tensor, global_step=None, sample_rate=44100, walltime=None)[source]#
                    +add_audio(tag, snd_tensor, global_step=None, sample_rate=44100, walltime=None)[source]#

                    Add audio data to summary.

                    Parameters
                    @@ -4757,7 +4757,7 @@
                    -add_text(tag, text_string, global_step=None, walltime=None)[source]#
                    +add_text(tag, text_string, global_step=None, walltime=None)[source]#

                    Add text data to summary.

                    Parameters
                    @@ -4779,7 +4779,7 @@
                    -add_graph(model, input_to_model=None, verbose=False, use_strict_trace=True)[source]#
                    +add_graph(model, input_to_model=None, verbose=False, use_strict_trace=True)[source]#

                    Add graph data to summary.

                    Parameters
                    @@ -4798,7 +4798,7 @@
                    -add_embedding(mat, metadata=None, label_img=None, global_step=None, tag='default', metadata_header=None)[source]#
                    +add_embedding(mat, metadata=None, label_img=None, global_step=None, tag='default', metadata_header=None)[source]#

                    Add embedding projector data to summary.

                    Parameters
                    @@ -4847,7 +4847,7 @@
                    -add_pr_curve(tag, labels, predictions, global_step=None, num_thresholds=127, weights=None, walltime=None)[source]#
                    +add_pr_curve(tag, labels, predictions, global_step=None, num_thresholds=127, weights=None, walltime=None)[source]#

                    Add precision recall curve.

                    Plotting a precision-recall curve lets you understand your model’s performance under different threshold settings. With this function, @@ -4882,7 +4882,7 @@

                    -add_custom_scalars(layout)[source]#
                    +add_custom_scalars(layout)[source]#

                    Create special chart by collecting charts tags in ‘scalars’.

                    NOTE: This function can only be called once for each SummaryWriter() object.

                    Because it only provides metadata to tensorboard, the function can be called before or after the training loop.

                    @@ -4906,7 +4906,7 @@
                    -add_mesh(tag, vertices, colors=None, faces=None, config_dict=None, global_step=None, walltime=None)[source]#
                    +add_mesh(tag, vertices, colors=None, faces=None, config_dict=None, global_step=None, walltime=None)[source]#

                    Add meshes or 3D point clouds to TensorBoard.

                    The visualization is based on Three.js, so it allows users to interact with the rendered object. Besides the basic definitions @@ -4964,7 +4964,7 @@

                    -add_hparams(hparam_dict, metric_dict, hparam_domain_discrete=None, run_name=None, global_step=None)[source]#
                    +add_hparams(hparam_dict, metric_dict, hparam_domain_discrete=None, run_name=None, global_step=None)[source]#

                    Add a set of hyperparameters to be compared in TensorBoard.

                    Parameters
                    @@ -5000,7 +5000,7 @@
                    -flush()[source]#
                    +flush()[source]#

                    Flushes the event file to disk.

                    Call this method to make sure that all pending events have been written to disk.

                    @@ -5008,7 +5008,7 @@
                    -close()[source]#
                    +close()[source]#
                    diff --git a/2.9/testing.html b/2.9/testing.html index cb9d3ad6b8e..b1a80177d72 100644 --- a/2.9/testing.html +++ b/2.9/testing.html @@ -4396,7 +4396,7 @@

                    Created On: May 07, 2021 | Last Updated On: Jun 10, 2025

                    -torch.testing.assert_close(actual, expected, *, allow_subclasses=True, rtol=None, atol=None, equal_nan=False, check_device=True, check_dtype=True, check_layout=True, check_stride=False, msg=None)[source]#
                    +torch.testing.assert_close(actual, expected, *, allow_subclasses=True, rtol=None, atol=None, equal_nan=False, check_device=True, check_dtype=True, check_layout=True, check_stride=False, msg=None)[source]#

                    Asserts that actual and expected are close.

                    If actual and expected are strided, non-quantized, real-valued, and finite, they are considered close if

                    @@ -4676,7 +4676,7 @@
                    -torch.testing.make_tensor(*shape, dtype, device, low=None, high=None, requires_grad=False, noncontiguous=False, exclude_zero=False, memory_format=None)[source]#
                    +torch.testing.make_tensor(*shape, dtype, device, low=None, high=None, requires_grad=False, noncontiguous=False, exclude_zero=False, memory_format=None)[source]#

                    Creates a tensor with the given shape, device, and dtype, and filled with values uniformly drawn from [low, high).

                    If low or high are specified and are outside the range of the dtype’s representable @@ -4777,7 +4777,7 @@

                    -torch.testing.assert_allclose(actual, expected, rtol=None, atol=None, equal_nan=True, msg='')[source]#
                    +torch.testing.assert_allclose(actual, expected, rtol=None, atol=None, equal_nan=True, msg='')[source]#

                    Warning

                    torch.testing.assert_allclose() is deprecated since 1.12 and will be removed in a future release. diff --git a/2.9/torch.compiler_aot_inductor.html b/2.9/torch.compiler_aot_inductor.html index 4bdcd44a9d1..7f75b0eac54 100644 --- a/2.9/torch.compiler_aot_inductor.html +++ b/2.9/torch.compiler_aot_inductor.html @@ -4595,7 +4595,7 @@

                    Troubleshooting#

                    -torch._inductor.aoti_compile_and_package(exported_program, _deprecated_unused_args=None, _deprecated_unused_kwargs=None, *, package_path=None, inductor_configs=None)[source]#
                    +torch._inductor.aoti_compile_and_package(exported_program, _deprecated_unused_args=None, _deprecated_unused_kwargs=None, *, package_path=None, inductor_configs=None)[source]#

                    Compiles the exported program with AOTInductor, and packages it into a .pt2 artifact specified by the input package_path. To load the package, you can call torch._inductor.aoti_load_package(package_path).

                    @@ -4645,7 +4645,7 @@

                    API Reference
                    -torch._inductor.aoti_load_package(path, run_single_threaded=False, device_index=-1)[source]#
                    +torch._inductor.aoti_load_package(path, run_single_threaded=False, device_index=-1)[source]#

                    Loads the model from the PT2 package.

                    If multiple models were packaged into the PT2, this will load the default model. To load a specific model, you can directly call the load API

                    diff --git a/2.9/torch.html b/2.9/torch.html index 014443e8070..4598478ba4b 100644 --- a/2.9/torch.html +++ b/2.9/torch.html @@ -6159,13 +6159,13 @@

                    Utilities#

                    -class torch.SymInt(node)[source]#
                    +class torch.SymInt(node)[source]#

                    Like an int (including magic methods), but redirects all operations on the wrapped node. This is used in particular to symbolically record operations in the symbolic shape workflow.

                    -as_integer_ratio()[source]#
                    +as_integer_ratio()[source]#

                    Represent this int as an exact integer ratio

                    Return type
                    @@ -6178,13 +6178,13 @@

                    Symbolic Numbers
                    -class torch.SymFloat(node)[source]#
                    +class torch.SymFloat(node)[source]#

                    Like a float (including magic methods), but redirects all operations on the wrapped node. This is used in particular to symbolically record operations in the symbolic shape workflow.

                    -as_integer_ratio()[source]#
                    +as_integer_ratio()[source]#

                    Represent this float as an exact integer ratio

                    Return type
                    @@ -6195,7 +6195,7 @@

                    Symbolic Numbers
                    -conjugate()[source]#
                    +conjugate()[source]#

                    Returns the complex conjugate of the float.

                    Return type
                    @@ -6206,7 +6206,7 @@

                    Symbolic Numbers
                    -hex()[source]#
                    +hex()[source]#

                    Returns the hexadecimal representation of the float.

                    Return type
                    @@ -6217,7 +6217,7 @@

                    Symbolic Numbers
                    -is_integer()[source]#
                    +is_integer()[source]#

                    Return True if the float is an integer.

                    @@ -6225,7 +6225,7 @@

                    Symbolic Numbers
                    -class torch.SymBool(node)[source]#
                    +class torch.SymBool(node)[source]#

                    Like a bool (including magic methods), but redirects all operations on the wrapped node. This is used in particular to symbolically record operations in the symbolic shape workflow.

                    diff --git a/2.9/torch.overrides.html b/2.9/torch.overrides.html index c2f3fb8aec1..bc69099e1e1 100644 --- a/2.9/torch.overrides.html +++ b/2.9/torch.overrides.html @@ -4401,7 +4401,7 @@

                    Functions#

                    -torch.overrides.get_ignored_functions()[source]#
                    +torch.overrides.get_ignored_functions()[source]#

                    Return public functions that cannot be overridden by __torch_function__.

                    Returns
                    @@ -4424,7 +4424,7 @@

                    Functions
                    -torch.overrides.get_overridable_functions()[source]#
                    +torch.overrides.get_overridable_functions()[source]#

                    List functions that are overridable via __torch_function__

                    Returns
                    @@ -4439,7 +4439,7 @@

                    Functions
                    -torch.overrides.resolve_name(f)[source]#
                    +torch.overrides.resolve_name(f)[source]#

                    Get a human readable string name for a function passed to __torch_function__

                    @@ -4458,7 +4458,7 @@

                    Functions
                    -torch.overrides.get_testing_overrides()[source]#
                    +torch.overrides.get_testing_overrides()[source]#

                    Return a dict containing dummy overrides for all overridable functions

                    Returns
                    @@ -4482,7 +4482,7 @@

                    Functions
                    -torch.overrides.handle_torch_function(public_api, relevant_args, *args, **kwargs)[source]#
                    +torch.overrides.handle_torch_function(public_api, relevant_args, *args, **kwargs)[source]#

                    Implement a function with checks for __torch_function__ overrides.

                    See torch::autograd::handle_torch_function for the equivalent of this function in the C++ implementation.

                    @@ -4545,7 +4545,7 @@

                    Functions
                    -torch.overrides.is_tensor_like(inp)[source]#
                    +torch.overrides.is_tensor_like(inp)[source]#

                    Returns True if the passed-in input is a Tensor-like.

                    Currently, this occurs whenever there’s a __torch_function__ attribute on the type of the input.

                    @@ -4579,7 +4579,7 @@

                    Functions
                    -torch.overrides.is_tensor_method_or_property(func)[source]#
                    +torch.overrides.is_tensor_method_or_property(func)[source]#

                    Returns True if the function passed in is a handler for a method or property belonging to torch.Tensor, as passed into __torch_function__.

                    @@ -4609,7 +4609,7 @@

                    Functions
                    -torch.overrides.wrap_torch_function(dispatcher)[source]#
                    +torch.overrides.wrap_torch_function(dispatcher)[source]#

                    Wraps a given function with __torch_function__ -related functionality.

                    Parameters
                    diff --git a/2.9/torch_cuda_memory.html b/2.9/torch_cuda_memory.html index cc5b745fa4f..32dc1c79efe 100644 --- a/2.9/torch_cuda_memory.html +++ b/2.9/torch_cuda_memory.html @@ -4458,7 +4458,7 @@

                    Allocator State HistorySnapshot API Reference#

                    -torch.cuda.memory._record_memory_history(enabled='all', context='all', stacks='all', max_entries=9223372036854775807, device=None, clear_history=False, compile_context=False, global_record_annotations=False)[source]#
                    +torch.cuda.memory._record_memory_history(enabled='all', context='all', stacks='all', max_entries=9223372036854775807, device=None, clear_history=False, compile_context=False, global_record_annotations=False)[source]#

                    Enable recording of stack traces associated with memory allocations, so you can tell what allocated any piece of memory in torch.cuda.memory._snapshot().

                    @@ -4539,7 +4539,7 @@

                    Latency impact
                    -torch.cuda.memory._snapshot(device=None)[source]#
                    +torch.cuda.memory._snapshot(device=None)[source]#

                    Save a snapshot of CUDA memory state at the time it was called.

                    The state is represented as a dictionary with the following structure.

                    class Snapshot(TypedDict):
                    @@ -4622,7 +4622,7 @@ 

                    Latency impact
                    -torch.cuda.memory._dump_snapshot(filename='dump_snapshot.pickle')[source]#
                    +torch.cuda.memory._dump_snapshot(filename='dump_snapshot.pickle')[source]#

                    Save a pickled version of the torch.memory._snapshot() dictionary to a file.

                    This file can be opened by the interactive snapshot viewer at pytorch.org/memory_viz

                    Snapshot file sizes scale with max_entries and stack trace depth per entry, diff --git a/replace_github_links.sh b/replace_github_links.sh index 7ca5e217f84..126fc2edc29 100755 --- a/replace_github_links.sh +++ b/replace_github_links.sh @@ -1,18 +1,17 @@ #!/bin/bash -# Replaces GitHub links from v2.7.1 to v2.7.0 in all html files in a directory +# Replaces GitHub links from v2.9.1 to v2.9.0 in all html files in a directory # # Usage: # ./replace_github_links.sh directory # # Example (from the root directory) -# ./replace_github_links.sh 2.7 +# ./replace_github_links.sh 2.9 if [ "$1" == "" ]; then echo "Incorrect usage. Correct Usage: replace_github_links.sh " exit 1 fi -find $1 -name "*.html" -print0 | xargs -0 sed -i 's|github.com/pytorch/pytorch/blob/v2.7.1|github.com/pytorch/pytorch/blob/v2.7.0|g' - -echo "Replaced v2.7.1 with v2.7.0 in GitHub links in $1 directory" +find $1 -name "*.html" -print0 | xargs -0 sed -i 's|github.com/pytorch/pytorch/blob/v2.9.1|github.com/pytorch/pytorch/blob/v2.9.0|g' +echo "Replaced v2.9.1 with v2.9.0 in GitHub links in $1 directory"