diff --git a/.tether/man/Layer.txt b/.tether/man/Layer.txt
index c0622237c..eee664282 100644
--- a/.tether/man/Layer.txt
+++ b/.tether/man/Layer.txt
@@ -1,6 +1,6 @@
Help on class Layer in module keras.src.layers.layer:
-class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation.Operation, keras.src.saving.keras_saveable.KerasSaveable)
+class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation.Operation)
| Layer(*args, **kwargs)
|
| This is the class from which all layers inherit.
@@ -243,6 +243,7 @@ class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation.
| regularizer=None,
| constraint=None,
| aggregation='none',
+ | overwrite_with_gradient=False,
| name=None
| )
| Add a weight variable to the layer.
@@ -274,6 +275,9 @@ class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation.
| the type of multi-replica aggregation to be used for this
| variable when writing custom data parallel training loops.
| Defaults to `"none"`.
+ | overwrite_with_gradient: Boolean, whether to overwrite the variable
+ | with the computed gradient. This is useful for float8 training.
+ | Defaults to `False`.
| name: String name of the variable. Useful for debugging purposes.
|
| build(self, input_shape)
@@ -448,6 +452,12 @@ class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation.
| ref_var.assign(value)
| ```
|
+ | symbolic_call(
+ | self,
+ | *args,
+ | **kwargs
+ | )
+ |
| ----------------------------------------------------------------------
| Static methods defined here:
|
diff --git a/.tether/man/activation_elu.txt b/.tether/man/activation_elu.txt
index d2be33c3f..13a23b8be 100644
--- a/.tether/man/activation_elu.txt
+++ b/.tether/man/activation_elu.txt
@@ -19,6 +19,7 @@ and the information that is propagated to the next layer.
Args:
x: Input tensor.
+ alpha: A scalar, slope of positive section. Defaults to `1.0`.
Reference:
diff --git a/.tether/man/activation_sparse_sigmoid.txt b/.tether/man/activation_sparse_sigmoid.txt
new file mode 100644
index 000000000..6de7b64d3
--- /dev/null
+++ b/.tether/man/activation_sparse_sigmoid.txt
@@ -0,0 +1,18 @@
+__signature__
+keras.activations.sparse_sigmoid(x)
+__doc__
+Sparse sigmoid activation function.
+
+It is defined as
+
+`f(x) = 0` for `x <= -1`,
+`f(x) = 0.5 * (x + 1)` for `-1 < x < 1`,
+`f(x) = 1` for `x >= 1`.
+
+Args:
+ x: Input tensor.
+
+Reference:
+
+- [M. Blondel, A. F. T. Martins, V. Niculae, 2019](https://arxiv.org/pdf/1901.02324)
+
diff --git a/.tether/man/callback_early_stopping.txt b/.tether/man/callback_early_stopping.txt
index f4e735529..286c062af 100644
--- a/.tether/man/callback_early_stopping.txt
+++ b/.tether/man/callback_early_stopping.txt
@@ -1,6 +1,6 @@
Help on class EarlyStopping in module keras.src.callbacks.early_stopping:
-class EarlyStopping(keras.src.callbacks.callback.Callback)
+class EarlyStopping(keras.src.callbacks.monitor_callback.MonitorCallback)
| EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto', baseline=None, restore_best_weights=False, start_from_epoch=0)
|
| Stop training when a monitored metric has stopped improving.
@@ -60,6 +60,7 @@ class EarlyStopping(keras.src.callbacks.callback.Callback)
|
| Method resolution order:
| EarlyStopping
+ | keras.src.callbacks.monitor_callback.MonitorCallback
| keras.src.callbacks.callback.Callback
| builtins.object
|
diff --git a/.tether/man/callback_lambda.txt b/.tether/man/callback_lambda.txt
index 44cdc12f8..030982b12 100644
--- a/.tether/man/callback_lambda.txt
+++ b/.tether/man/callback_lambda.txt
@@ -13,8 +13,8 @@ class LambdaCallback(keras.src.callbacks.callback.Callback)
| `epoch`, `logs`
| - `on_train_begin` and `on_train_end` expect one positional argument:
| `logs`
- | - `on_train_batch_begin` and `on_train_batch_end` expect two positional
- | arguments: `batch`, `logs`
+ | - `on_train_batch_begin` and `on_train_batch_end` expect a positional
+ | argument `batch` and a keyword argument `logs`
| - See `Callback` class definition for the full list of functions and their
| expected arguments.
|
@@ -79,3 +79,4 @@ class LambdaCallback(keras.src.callbacks.callback.Callback)
| )
| Initialize self. See help(type(self)) for accurate signature.
|
+
diff --git a/.tether/man/callback_model_checkpoint.txt b/.tether/man/callback_model_checkpoint.txt
index c79cb11f0..13bbc3b37 100644
--- a/.tether/man/callback_model_checkpoint.txt
+++ b/.tether/man/callback_model_checkpoint.txt
@@ -1,6 +1,6 @@
Help on class ModelCheckpoint in module keras.src.callbacks.model_checkpoint:
-class ModelCheckpoint(keras.src.callbacks.callback.Callback)
+class ModelCheckpoint(keras.src.callbacks.monitor_callback.MonitorCallback)
| ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', save_freq='epoch', initial_value_threshold=None)
|
| Callback to save the Keras model or model weights at some frequency.
@@ -95,9 +95,8 @@ class ModelCheckpoint(keras.src.callbacks.callback.Callback)
| decision to overwrite the current save file is made based on either
| the maximization or the minimization of the monitored quantity.
| For `val_acc`, this should be `"max"`, for `val_loss` this should be
- | `"min"`, etc. In `"auto"` mode, the mode is set to `"max"` if the
- | quantities monitored are `"acc"` or start with `"fmeasure"` and are
- | set to `"min"` for the rest of the quantities.
+ | `"min"`, etc. In `"auto"` mode, the direction is automatically
+ | inferred from the name of the monitored quantity.
| save_weights_only: if `True`, then only the model's weights will be
| saved (`model.save_weights(filepath)`), else the full model is
| saved (`model.save(filepath)`).
@@ -116,6 +115,7 @@ class ModelCheckpoint(keras.src.callbacks.callback.Callback)
|
| Method resolution order:
| ModelCheckpoint
+ | keras.src.callbacks.monitor_callback.MonitorCallback
| keras.src.callbacks.callback.Callback
| builtins.object
|
diff --git a/.tether/man/callback_reduce_lr_on_plateau.txt b/.tether/man/callback_reduce_lr_on_plateau.txt
index 33451653d..ab2d51828 100644
--- a/.tether/man/callback_reduce_lr_on_plateau.txt
+++ b/.tether/man/callback_reduce_lr_on_plateau.txt
@@ -1,6 +1,6 @@
Help on class ReduceLROnPlateau in module keras.src.callbacks.reduce_lr_on_plateau:
-class ReduceLROnPlateau(keras.src.callbacks.callback.Callback)
+class ReduceLROnPlateau(keras.src.callbacks.monitor_callback.MonitorCallback)
| ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.0, **kwargs)
|
| Reduce learning rate when a metric has stopped improving.
@@ -39,6 +39,7 @@ class ReduceLROnPlateau(keras.src.callbacks.callback.Callback)
|
| Method resolution order:
| ReduceLROnPlateau
+ | keras.src.callbacks.monitor_callback.MonitorCallback
| keras.src.callbacks.callback.Callback
| builtins.object
|
diff --git a/.tether/man/config_is_nnx_enabled.txt b/.tether/man/config_is_nnx_enabled.txt
new file mode 100644
index 000000000..658046298
--- /dev/null
+++ b/.tether/man/config_is_nnx_enabled.txt
@@ -0,0 +1,9 @@
+__signature__
+keras.config.is_nnx_enabled()
+__doc__
+Checks whether NNX specific features are enabled for the JAX backend.
+
+Returns:
+ bool: `True` if NNX backend features are enabled, `False` otherwise.
+ Defaults to `False`.
+
diff --git a/.tether/man/config_max_epochs.txt b/.tether/man/config_max_epochs.txt
new file mode 100644
index 000000000..945af4d2e
--- /dev/null
+++ b/.tether/man/config_max_epochs.txt
@@ -0,0 +1,13 @@
+__signature__
+keras.config.max_epochs()
+__doc__
+Get the maximum number of epochs for any call to fit.
+
+Retrieves the limit on the number of epochs set by
+`keras.config.set_max_epochs` or the `KERAS_MAX_EPOCHS` environment
+variable.
+
+Returns:
+ The integer limit on the number of epochs or `None`, if no limit has
+ been set.
+
diff --git a/.tether/man/config_max_steps_per_epoch.txt b/.tether/man/config_max_steps_per_epoch.txt
new file mode 100644
index 000000000..de4b03408
--- /dev/null
+++ b/.tether/man/config_max_steps_per_epoch.txt
@@ -0,0 +1,13 @@
+__signature__
+keras.config.max_steps_per_epoch()
+__doc__
+Get the maximum number of steps for any call to fit/evaluate/predict.
+
+Retrieves the limit on the number of epochs set by
+`keras.config.set_max_steps_per_epoch` or the `KERAS_MAX_STEPS_PER_EPOCH`
+environment variable.
+
+Args:
+ max_epochs: The integer limit on the number of epochs or `None`. If
+ `None`, no limit is applied.
+
diff --git a/.tether/man/config_set_max_epochs.txt b/.tether/man/config_set_max_epochs.txt
new file mode 100644
index 000000000..0845c02a9
--- /dev/null
+++ b/.tether/man/config_set_max_epochs.txt
@@ -0,0 +1,13 @@
+__signature__
+keras.config.set_max_epochs(max_epochs)
+__doc__
+Limit the maximum number of epochs for any call to fit.
+
+This will cap the number of epochs for any training run using `model.fit()`.
+This is purely for debugging, and can also be set via the `KERAS_MAX_EPOCHS`
+environment variable to quickly run a script without modifying its source.
+
+Args:
+ max_epochs: The integer limit on the number of epochs or `None`. If
+ `None`, no limit is applied.
+
diff --git a/.tether/man/config_set_max_steps_per_epoch.txt b/.tether/man/config_set_max_steps_per_epoch.txt
new file mode 100644
index 000000000..f0b060b19
--- /dev/null
+++ b/.tether/man/config_set_max_steps_per_epoch.txt
@@ -0,0 +1,14 @@
+__signature__
+keras.config.set_max_steps_per_epoch(max_steps_per_epoch)
+__doc__
+Limit the maximum number of steps for any call to fit/evaluate/predict.
+
+This will cap the number of steps for single epoch of a call to `fit()`,
+`evaluate()`, or `predict()`. This is purely for debugging, and can also be
+set via the `KERAS_MAX_STEPS_PER_EPOCH` environment variable to quickly run
+a scrip without modifying its source.
+
+Args:
+ max_epochs: The integer limit on the number of epochs or `None`. If
+ `None`, no limit is applied.
+
diff --git a/.tether/man/keras.activations.txt b/.tether/man/keras.activations.txt
index 48851e264..064021d14 100644
--- a/.tether/man/keras.activations.txt
+++ b/.tether/man/keras.activations.txt
@@ -31,6 +31,7 @@ softmax(x, axis=-1)
softplus(x)
softsign(x)
sparse_plus(x)
+sparse_sigmoid(x)
sparsemax(x, axis=-1)
squareplus(x, b=4)
swish(x)
diff --git a/.tether/man/keras.applications.txt b/.tether/man/keras.applications.txt
index 9f9c401ce..bb8eb26c9 100644
--- a/.tether/man/keras.applications.txt
+++ b/.tether/man/keras.applications.txt
@@ -1,4 +1,4 @@
-convnext: Module(keras.api.applications.convnext)
+convnext: Module(keras.applications.convnext)
ConvNeXtBase(
include_top=True,
include_preprocessing=True,
@@ -54,7 +54,7 @@ ConvNeXtXLarge(
classifier_activation='softmax',
name='convnext_xlarge'
)
-densenet: Module(keras.api.applications.densenet)
+densenet: Module(keras.applications.densenet)
DenseNet121(
include_top=True,
weights='imagenet',
@@ -85,8 +85,8 @@ DenseNet201(
classifier_activation='softmax',
name='densenet201'
)
-efficientnet: Module(keras.api.applications.efficientnet)
-efficientnet_v2: Module(keras.api.applications.efficientnet_v2)
+efficientnet: Module(keras.applications.efficientnet)
+efficientnet_v2: Module(keras.applications.efficientnet_v2)
EfficientNetB0(
include_top=True,
weights='imagenet',
@@ -244,9 +244,9 @@ EfficientNetV2S(
include_preprocessing=True,
name='efficientnetv2-s'
)
-imagenet_utils: Module(keras.api.applications.imagenet_utils)
-inception_resnet_v2: Module(keras.api.applications.inception_resnet_v2)
-inception_v3: Module(keras.api.applications.inception_v3)
+imagenet_utils: Module(keras.applications.imagenet_utils)
+inception_resnet_v2: Module(keras.applications.inception_resnet_v2)
+inception_v3: Module(keras.applications.inception_v3)
InceptionResNetV2(
include_top=True,
weights='imagenet',
@@ -267,7 +267,7 @@ InceptionV3(
classifier_activation='softmax',
name='inception_v3'
)
-mobilenet: Module(keras.api.applications.mobilenet)
+mobilenet: Module(keras.applications.mobilenet)
MobileNet(
input_shape=None,
alpha=1.0,
@@ -281,8 +281,8 @@ MobileNet(
classifier_activation='softmax',
name=None
)
-mobilenet_v2: Module(keras.api.applications.mobilenet_v2)
-mobilenet_v3: Module(keras.api.applications.mobilenet_v3)
+mobilenet_v2: Module(keras.applications.mobilenet_v2)
+mobilenet_v3: Module(keras.applications.mobilenet_v3)
MobileNetV2(
input_shape=None,
alpha=1.0,
@@ -322,7 +322,7 @@ MobileNetV3Small(
include_preprocessing=True,
name='MobileNetV3Small'
)
-nasnet: Module(keras.api.applications.nasnet)
+nasnet: Module(keras.applications.nasnet)
NASNetLarge(
input_shape=None,
include_top=True,
@@ -343,8 +343,8 @@ NASNetMobile(
classifier_activation='softmax',
name='nasnet_mobile'
)
-resnet: Module(keras.api.applications.resnet)
-resnet_v2: Module(keras.api.applications.resnet_v2)
+resnet: Module(keras.applications.resnet)
+resnet_v2: Module(keras.applications.resnet_v2)
ResNet101(
include_top=True,
weights='imagenet',
@@ -385,7 +385,7 @@ ResNet152V2(
classifier_activation='softmax',
name='resnet152v2'
)
-resnet50: Module(keras.api.applications.resnet50)
+resnet50: Module(keras.applications.resnet50)
ResNet50(
include_top=True,
weights='imagenet',
@@ -406,7 +406,7 @@ ResNet50V2(
classifier_activation='softmax',
name='resnet50v2'
)
-vgg16: Module(keras.api.applications.vgg16)
+vgg16: Module(keras.applications.vgg16)
VGG16(
include_top=True,
weights='imagenet',
@@ -417,7 +417,7 @@ VGG16(
classifier_activation='softmax',
name='vgg16'
)
-vgg19: Module(keras.api.applications.vgg19)
+vgg19: Module(keras.applications.vgg19)
VGG19(
include_top=True,
weights='imagenet',
@@ -428,7 +428,7 @@ VGG19(
classifier_activation='softmax',
name='vgg19'
)
-xception: Module(keras.api.applications.xception)
+xception: Module(keras.applications.xception)
Xception(
include_top=True,
weights='imagenet',
diff --git a/.tether/man/keras.config.txt b/.tether/man/keras.config.txt
index ffbe8f439..451ef0afc 100644
--- a/.tether/man/keras.config.txt
+++ b/.tether/man/keras.config.txt
@@ -12,10 +12,15 @@ floatx()
image_data_format()
is_flash_attention_enabled()
is_interactive_logging_enabled()
+is_nnx_enabled()
is_traceback_filtering_enabled()
+max_epochs()
+max_steps_per_epoch()
set_backend(backend)
set_dtype_policy(policy)
set_epsilon(value)
set_floatx(value)
set_image_data_format(data_format)
+set_max_epochs(max_epochs)
+set_max_steps_per_epoch(max_steps_per_epoch)
diff --git a/.tether/man/keras.datasets.txt b/.tether/man/keras.datasets.txt
index f771e1b50..79bb80818 100644
--- a/.tether/man/keras.datasets.txt
+++ b/.tether/man/keras.datasets.txt
@@ -1,9 +1,9 @@
-boston_housing: Module(keras.api.datasets.boston_housing)
-california_housing: Module(keras.api.datasets.california_housing)
-cifar10: Module(keras.api.datasets.cifar10)
-cifar100: Module(keras.api.datasets.cifar100)
-fashion_mnist: Module(keras.api.datasets.fashion_mnist)
-imdb: Module(keras.api.datasets.imdb)
-mnist: Module(keras.api.datasets.mnist)
-reuters: Module(keras.api.datasets.reuters)
+boston_housing: Module(keras.datasets.boston_housing)
+california_housing: Module(keras.datasets.california_housing)
+cifar10: Module(keras.datasets.cifar10)
+cifar100: Module(keras.datasets.cifar100)
+fashion_mnist: Module(keras.datasets.fashion_mnist)
+imdb: Module(keras.datasets.imdb)
+mnist: Module(keras.datasets.mnist)
+reuters: Module(keras.datasets.reuters)
diff --git a/.tether/man/keras.distribution.txt b/.tether/man/keras.distribution.txt
index ee66b1b01..67af628b9 100644
--- a/.tether/man/keras.distribution.txt
+++ b/.tether/man/keras.distribution.txt
@@ -21,6 +21,7 @@ ModelParallel(
*,
layout_map=None,
batch_dim_name=None,
+ auto_shard_dataset=True,
**kwargs
)
set_distribution(value)
diff --git a/.tether/man/keras.layers.txt b/.tether/man/keras.layers.txt
index 91187291b..c6af1f082 100644
--- a/.tether/man/keras.layers.txt
+++ b/.tether/man/keras.layers.txt
@@ -153,6 +153,7 @@ Conv1DTranspose(
kernel_size,
strides=1,
padding='valid',
+ output_padding=None,
data_format=None,
dilation_rate=1,
activation=None,
@@ -190,6 +191,7 @@ Conv2DTranspose(
kernel_size,
strides=(1, 1),
padding='valid',
+ output_padding=None,
data_format=None,
dilation_rate=(1, 1),
activation=None,
@@ -228,6 +230,7 @@ Conv3DTranspose(
strides=(1, 1, 1),
padding='valid',
data_format=None,
+ output_padding=None,
dilation_rate=(1, 1, 1),
activation=None,
use_bias=True,
@@ -354,6 +357,7 @@ Convolution1DTranspose(
kernel_size,
strides=1,
padding='valid',
+ output_padding=None,
data_format=None,
dilation_rate=1,
activation=None,
@@ -391,6 +395,7 @@ Convolution2DTranspose(
kernel_size,
strides=(1, 1),
padding='valid',
+ output_padding=None,
data_format=None,
dilation_rate=(1, 1),
activation=None,
@@ -429,6 +434,7 @@ Convolution3DTranspose(
strides=(1, 1, 1),
padding='valid',
data_format=None,
+ output_padding=None,
dilation_rate=(1, 1, 1),
activation=None,
use_bias=True,
@@ -470,6 +476,7 @@ Dense(
kernel_constraint=None,
bias_constraint=None,
lora_rank=None,
+ lora_alpha=None,
**kwargs
)
DepthwiseConv1D(
@@ -546,6 +553,7 @@ EinsumDense(
kernel_constraint=None,
bias_constraint=None,
lora_rank=None,
+ lora_alpha=None,
**kwargs
)
ELU(alpha=1.0, **kwargs)
@@ -558,6 +566,7 @@ Embedding(
mask_zero=False,
weights=None,
lora_rank=None,
+ lora_alpha=None,
**kwargs
)
Equalization(
@@ -808,7 +817,6 @@ LayerNormalization(
epsilon=0.001,
center=True,
scale=True,
- rms_scaling=False,
beta_initializer='zeros',
gamma_initializer='ones',
beta_regularizer=None,
@@ -1028,6 +1036,17 @@ RandomCrop(
name=None,
**kwargs
)
+RandomElasticTransform(
+ factor=1.0,
+ scale=1.0,
+ interpolation='bilinear',
+ fill_mode='reflect',
+ fill_value=0.0,
+ value_range=(0, 255),
+ seed=None,
+ data_format=None,
+ **kwargs
+)
RandomErasing(
factor=1.0,
scale=(0.02, 0.33),
@@ -1404,6 +1423,7 @@ TimeDistributed(layer, **kwargs)
TorchModuleWrapper(
module,
name=None,
+ output_shape=None,
**kwargs
)
UnitNormalization(axis=-1, **kwargs)
diff --git a/.tether/man/keras.legacy.txt b/.tether/man/keras.legacy.txt
index 13f97b309..43e1bbaf2 100644
--- a/.tether/man/keras.legacy.txt
+++ b/.tether/man/keras.legacy.txt
@@ -1,2 +1,2 @@
-saving: Module(keras.api.legacy.saving)
+saving: Module(keras.legacy.saving)
diff --git a/.tether/man/keras.losses.txt b/.tether/man/keras.losses.txt
index b3c44ae29..33dc78c49 100644
--- a/.tether/man/keras.losses.txt
+++ b/.tether/man/keras.losses.txt
@@ -50,6 +50,11 @@ categorical_focal_crossentropy(
label_smoothing=0.0,
axis=-1
)
+categorical_generalized_cross_entropy(
+ y_true,
+ y_pred,
+ q
+)
categorical_hinge(y_true, y_pred)
CategoricalCrossentropy(
from_logits=False,
@@ -69,6 +74,12 @@ CategoricalFocalCrossentropy(
name='categorical_focal_crossentropy',
dtype=None
)
+CategoricalGeneralizedCrossEntropy(
+ q=0.5,
+ reduction='sum_over_batch_size',
+ name='categorical_generalized_cross_entropy',
+ dtype=None
+)
CategoricalHinge(
reduction='sum_over_batch_size',
name='categorical_hinge',
@@ -197,6 +208,7 @@ SparseCategoricalCrossentropy(
from_logits=False,
ignore_class=None,
reduction='sum_over_batch_size',
+ axis=-1,
name='sparse_categorical_crossentropy',
dtype=None
)
diff --git a/.tether/man/keras.ops.image.txt b/.tether/man/keras.ops.image.txt
index 242330155..4864fdf94 100644
--- a/.tether/man/keras.ops.image.txt
+++ b/.tether/man/keras.ops.image.txt
@@ -16,6 +16,16 @@ crop_images(
target_width=None,
data_format=None
)
+elastic_transform(
+ images,
+ alpha=20.0,
+ sigma=5.0,
+ interpolation='bilinear',
+ fill_mode='reflect',
+ fill_value=0.0,
+ seed=None,
+ data_format=None
+)
extract_patches(
images,
size,
diff --git a/.tether/man/keras.ops.nn.txt b/.tether/man/keras.ops.nn.txt
index a5655066d..141ffedbc 100644
--- a/.tether/man/keras.ops.nn.txt
+++ b/.tether/man/keras.ops.nn.txt
@@ -37,7 +37,7 @@ conv(
conv_transpose(
inputs,
kernel,
- strides,
+ strides=1,
padding='valid',
output_padding=None,
data_format=None,
@@ -75,7 +75,8 @@ dot_product_attention(
mask=None,
scale=None,
is_causal=False,
- flash_attention=None
+ flash_attention=None,
+ attn_logits_soft_cap=None
)
elu(x, alpha=1.0)
gelu(x, approximate=True)
@@ -85,6 +86,14 @@ hard_sigmoid(x)
hard_silu(x)
hard_swish(x)
hard_tanh(x)
+layer_normalization(
+ x,
+ gamma=None,
+ beta=None,
+ axis=-1,
+ epsilon=None,
+ **kwargs
+)
leaky_relu(x, negative_slope=0.2)
log_sigmoid(x)
log_softmax(x, axis=-1)
@@ -132,7 +141,7 @@ relu(x)
relu6(x)
rms_normalization(
x,
- scale=1,
+ scale=None,
axis=-1,
epsilon=None
)
@@ -159,6 +168,7 @@ sparse_categorical_crossentropy(
axis=-1
)
sparse_plus(x)
+sparse_sigmoid(x)
sparsemax(x, axis=-1)
squareplus(x, b=4)
swish(x)
diff --git a/.tether/man/keras.ops.numpy.txt b/.tether/man/keras.ops.numpy.txt
index be45c6fe8..b91190169 100644
--- a/.tether/man/keras.ops.numpy.txt
+++ b/.tether/man/keras.ops.numpy.txt
@@ -16,6 +16,7 @@ amin(
axis=None,
keepdims=False
)
+angle(x)
any(
x,
axis=None,
@@ -61,6 +62,7 @@ average(
axis=None,
weights=None
)
+bartlett(x)
bincount(
x,
weights=None,
@@ -74,7 +76,9 @@ bitwise_not(x)
bitwise_or(x, y)
bitwise_right_shift(x, y)
bitwise_xor(x, y)
+blackman(x)
broadcast_to(x, shape)
+cbrt(x)
ceil(x)
clip(
x,
@@ -85,6 +89,7 @@ concatenate(xs, axis=0)
conj(x)
conjugate(x)
copy(x)
+corrcoef(x)
correlate(
x1,
x2,
@@ -111,6 +116,7 @@ cumsum(
axis=None,
dtype=None
)
+deg2rad(x)
diag(x, k=0)
diagflat(x, k=0)
diagonal(
@@ -128,7 +134,11 @@ digitize(x, bins)
divide(x1, x2)
divide_no_nan(x1, x2)
dot(x1, x2)
-einsum(subscripts, *operands)
+einsum(
+ subscripts,
+ *operands,
+ **kwargs
+)
empty(shape, dtype=None)
equal(x1, x2)
exp(x)
@@ -157,6 +167,9 @@ full_like(
get_item(x, key)
greater(x1, x2)
greater_equal(x1, x2)
+hamming(x)
+hanning(x)
+heaviside(x1, x2)
histogram(
x,
bins=10,
@@ -176,6 +189,7 @@ isclose(
isfinite(x)
isinf(x)
isnan(x)
+kaiser(x, beta)
left_shift(x, y)
less(x1, x2)
less_equal(x1, x2)
@@ -293,6 +307,11 @@ rot90(
axes=(0, 1)
)
round(x, decimals=0)
+searchsorted(
+ sorted_sequence,
+ values,
+ side='left'
+)
select(
condlist,
choicelist,
diff --git a/.tether/man/keras.ops.txt b/.tether/man/keras.ops.txt
index 71440537f..2081aad40 100644
--- a/.tether/man/keras.ops.txt
+++ b/.tether/man/keras.ops.txt
@@ -16,6 +16,7 @@ amin(
axis=None,
keepdims=False
)
+angle(x)
any(
x,
axis=None,
@@ -74,6 +75,7 @@ average_pool(
padding='valid',
data_format=None
)
+bartlett(x)
batch_normalization(
x,
mean,
@@ -101,6 +103,7 @@ bitwise_not(x)
bitwise_or(x, y)
bitwise_right_shift(x, y)
bitwise_xor(x, y)
+blackman(x)
broadcast_to(x, shape)
cast(x, dtype)
categorical_crossentropy(
@@ -109,6 +112,7 @@ categorical_crossentropy(
from_logits=False,
axis=-1
)
+cbrt(x)
ceil(x)
celu(x, alpha=1.0)
cholesky(x)
@@ -136,7 +140,7 @@ conv(
conv_transpose(
inputs,
kernel,
- strides,
+ strides=1,
padding='valid',
output_padding=None,
data_format=None,
@@ -150,6 +154,7 @@ convert_to_tensor(
ragged=None
)
copy(x)
+corrcoef(x)
correlate(
x1,
x2,
@@ -193,6 +198,7 @@ cumsum(
dtype=None
)
custom_gradient(f)
+deg2rad(x)
depthwise_conv(
inputs,
kernel,
@@ -227,12 +233,17 @@ dot_product_attention(
mask=None,
scale=None,
is_causal=False,
- flash_attention=None
+ flash_attention=None,
+ attn_logits_soft_cap=None
)
dtype(x)
eig(x)
eigh(x)
-einsum(subscripts, *operands)
+einsum(
+ subscripts,
+ *operands,
+ **kwargs
+)
elu(x, alpha=1.0)
empty(shape, dtype=None)
equal(x1, x2)
@@ -279,11 +290,14 @@ get_item(x, key)
glu(x, axis=-1)
greater(x1, x2)
greater_equal(x1, x2)
+hamming(x)
+hanning(x)
hard_shrink(x, threshold=0.5)
hard_sigmoid(x)
hard_silu(x)
hard_swish(x)
hard_tanh(x)
+heaviside(x1, x2)
histogram(
x,
bins=10,
@@ -293,7 +307,7 @@ hstack(xs)
identity(n, dtype=None)
ifft2(x)
imag(x)
-image: Module(keras.api.ops.image)
+image: Module(keras.ops.image)
in_top_k(
targets,
predictions,
@@ -322,11 +336,20 @@ istft(
window='hann',
center=True
)
+kaiser(x, beta)
+layer_normalization(
+ x,
+ gamma=None,
+ beta=None,
+ axis=-1,
+ epsilon=None,
+ **kwargs
+)
leaky_relu(x, negative_slope=0.2)
left_shift(x, y)
less(x1, x2)
less_equal(x1, x2)
-linalg: Module(keras.api.ops.linalg)
+linalg: Module(keras.ops.linalg)
linspace(
start,
stop,
@@ -431,7 +454,7 @@ nan_to_num(
)
ndim(x)
negative(x)
-nn: Module(keras.api.ops.nn)
+nn: Module(keras.ops.nn)
nonzero(x)
norm(
x,
@@ -446,7 +469,7 @@ normalize(
epsilon=None
)
not_equal(x1, x2)
-numpy: Module(keras.api.ops.numpy)
+numpy: Module(keras.ops.numpy)
one_hot(
x,
num_classes,
@@ -504,7 +527,7 @@ rfft(x, fft_length=None)
right_shift(x, y)
rms_normalization(
x,
- scale=1,
+ scale=None,
axis=-1,
epsilon=None
)
@@ -608,6 +631,7 @@ sparse_categorical_crossentropy(
axis=-1
)
sparse_plus(x)
+sparse_sigmoid(x)
sparsemax(x, axis=-1)
split(
x,
@@ -720,6 +744,8 @@ vectorize(
signature=None
)
vectorized_map(function, elements)
+view_as_complex(x)
+view_as_real(x)
vstack(xs)
where(
condition,
diff --git a/.tether/man/keras.optimizers.txt b/.tether/man/keras.optimizers.txt
index 047b7c298..557b2e339 100644
--- a/.tether/man/keras.optimizers.txt
+++ b/.tether/man/keras.optimizers.txt
@@ -141,7 +141,7 @@ Lamb(
name='lamb',
**kwargs
)
-legacy: Module(keras.api.optimizers.legacy)
+legacy: Module(keras.optimizers.legacy)
Lion(
learning_rate=0.001,
beta_1=0.9,
@@ -164,6 +164,32 @@ LossScaleOptimizer(
dynamic_growth_steps=2000,
**kwargs
)
+Muon(
+ learning_rate=0.001,
+ adam_beta_1=0.9,
+ adam_beta_2=0.999,
+ epsilon=1e-07,
+ weight_decay=0.1,
+ clipnorm=None,
+ clipvalue=None,
+ global_clipnorm=None,
+ use_ema=False,
+ ema_momentum=0.99,
+ ema_overwrite_frequency=None,
+ loss_scale_factor=None,
+ gradient_accumulation_steps=None,
+ name='muon',
+ exclude_layers=None,
+ exclude_embeddings=True,
+ muon_a=3.4445,
+ muon_b=-4.775,
+ muon_c=2.0315,
+ adam_lr_ratio=0.1,
+ momentum=0.95,
+ ns_steps=6,
+ nesterov=True,
+ **kwargs
+)
Nadam(
learning_rate=0.001,
beta_1=0.9,
@@ -200,7 +226,7 @@ RMSprop(
name='rmsprop',
**kwargs
)
-schedules: Module(keras.api.optimizers.schedules)
+schedules: Module(keras.optimizers.schedules)
serialize(optimizer)
SGD(
learning_rate=0.01,
diff --git a/.tether/man/keras.preprocessing.txt b/.tether/man/keras.preprocessing.txt
index c7da2c534..4ba34405b 100644
--- a/.tether/man/keras.preprocessing.txt
+++ b/.tether/man/keras.preprocessing.txt
@@ -1,4 +1,4 @@
-image: Module(keras.api.preprocessing.image)
+image: Module(keras.preprocessing.image)
image_dataset_from_directory(
directory,
labels='inferred',
@@ -18,7 +18,7 @@ image_dataset_from_directory(
data_format=None,
verbose=True
)
-sequence: Module(keras.api.preprocessing.sequence)
+sequence: Module(keras.preprocessing.sequence)
text_dataset_from_directory(
directory,
labels='inferred',
diff --git a/.tether/man/keras.quantizers.txt b/.tether/man/keras.quantizers.txt
index 0970068c1..c113bde91 100644
--- a/.tether/man/keras.quantizers.txt
+++ b/.tether/man/keras.quantizers.txt
@@ -29,6 +29,7 @@ fake_quant_with_min_max_vars(
axis=None
)
get(identifier, **kwargs)
+pack_int4(arr, axis=0)
quantize_and_dequantize(
inputs,
scale,
@@ -37,4 +38,9 @@ quantize_and_dequantize(
)
Quantizer(output_dtype='int8')
serialize(initializer)
+unpack_int4(
+ packed,
+ orig_len,
+ axis=0
+)
diff --git a/.tether/man/keras.saving.txt b/.tether/man/keras.saving.txt
index 1c17c37ff..24be1e1c6 100644
--- a/.tether/man/keras.saving.txt
+++ b/.tether/man/keras.saving.txt
@@ -38,6 +38,7 @@ save_weights(
model,
filepath,
overwrite=True,
+ max_shard_size=None,
**kwargs
)
serialize_keras_object(obj)
diff --git a/.tether/man/keras.txt b/.tether/man/keras.txt
index 19675df83..9fb6f9837 100644
--- a/.tether/man/keras.txt
+++ b/.tether/man/keras.txt
@@ -1,15 +1,15 @@
-activations: Module(keras.api.activations)
-applications: Module(keras.api.applications)
-backend: Module(keras.api.backend)
-callbacks: Module(keras.api.callbacks)
-config: Module(keras.api.config)
-constraints: Module(keras.api.constraints)
-datasets: Module(keras.api.datasets)
+activations: Module(keras.activations)
+applications: Module(keras.applications)
+backend: Module(keras.backend)
+callbacks: Module(keras.callbacks)
+config: Module(keras.config)
+constraints: Module(keras.constraints)
+datasets: Module(keras.datasets)
device(device_name)
-distribution: Module(keras.api.distribution)
-dtype_policies: Module(keras.api.dtype_policies)
+distribution: Module(keras.distribution)
+dtype_policies: Module(keras.dtype_policies)
DTypePolicy(name=None)
-export: Module(keras.api.export)
+export: Module(keras.export)
FloatDTypePolicy(name=None)
Function(
inputs,
@@ -17,7 +17,7 @@ Function(
name=None
)
Initializer()
-initializers: Module(keras.api.initializers)
+initializers: Module(keras.initializers)
Input(
shape=None,
batch_size=None,
@@ -46,49 +46,51 @@ KerasTensor(
sparse=False,
ragged=False,
record_history=True,
- name=None
+ name=None,
+ **kwargs
)
Layer(*args, **kwargs)
-layers: Module(keras.api.layers)
-legacy: Module(keras.api.legacy)
+layers: Module(keras.layers)
+legacy: Module(keras.legacy)
Loss(
name=None,
reduction='sum_over_batch_size',
dtype=None
)
-losses: Module(keras.api.losses)
+losses: Module(keras.losses)
Metric(dtype=None, name=None)
-metrics: Module(keras.api.metrics)
-mixed_precision: Module(keras.api.mixed_precision)
+metrics: Module(keras.metrics)
+mixed_precision: Module(keras.mixed_precision)
Model(*args, **kwargs)
-models: Module(keras.api.models)
+models: Module(keras.models)
name_scope(name, **kwargs)
Operation(*args, **kwargs)
ops: Module(keras.ops)
Optimizer(*args, **kwargs)
-optimizers: Module(keras.api.optimizers)
-preprocessing: Module(keras.api.preprocessing)
+optimizers: Module(keras.optimizers)
+preprocessing: Module(keras.preprocessing)
Quantizer(output_dtype='int8')
-quantizers: Module(keras.api.quantizers)
-random: Module(keras.api.random)
+quantizers: Module(keras.quantizers)
+random: Module(keras.random)
Regularizer()
-regularizers: Module(keras.api.regularizers)
+regularizers: Module(keras.regularizers)
remat(f)
RematScope(
mode='full',
output_size_threshold=1024,
layer_names=None
)
-saving: Module(keras.api.saving)
+saving: Module(keras.saving)
Sequential(*args, **kwargs)
+src: Module(keras.src)
StatelessScope(
state_mapping=None,
collect_losses=False,
initialize_variables=True
)
SymbolicScope()
-tree: Module(keras.api.tree)
-utils: Module(keras.api.utils)
+tree: Module(keras.tree)
+utils: Module(keras.utils)
Variable(
initializer,
shape=None,
@@ -96,9 +98,11 @@ Variable(
trainable=True,
autocast=True,
aggregation='none',
- name=None
+ synchronization='auto',
+ name=None,
+ **kwargs
)
version()
-visualization: Module(keras.api.visualization)
-wrappers: Module(keras.api.wrappers)
+visualization: Module(keras.visualization)
+wrappers: Module(keras.wrappers)
diff --git a/.tether/man/keras.utils.txt b/.tether/man/keras.utils.txt
index c54a975cb..d164c61d6 100644
--- a/.tether/man/keras.utils.txt
+++ b/.tether/man/keras.utils.txt
@@ -20,7 +20,7 @@ audio_dataset_from_directory(
follow_links=False,
verbose=True
)
-bounding_boxes: Module(keras.api.utils.bounding_boxes)
+bounding_boxes: Module(keras.utils.bounding_boxes)
clear_session(free_memory=True)
Config(**kwargs)
custom_object_scope(custom_objects)
@@ -89,7 +89,7 @@ img_to_array(
)
is_interactive_logging_enabled()
is_keras_tensor(x)
-legacy: Module(keras.api.utils.legacy)
+legacy: Module(keras.utils.legacy)
load_img(
path,
color_mode='rgb',
diff --git a/.tether/man/keras_model.txt b/.tether/man/keras_model.txt
index a288c1b05..6d7a8f60c 100644
--- a/.tether/man/keras_model.txt
+++ b/.tether/man/keras_model.txt
@@ -303,31 +303,47 @@ class Model(keras.src.backend.tensorflow.trainer.TensorFlowTrainer, keras.src.tr
| skip_mismatch=False,
| **kwargs
| )
- | Load weights from a file saved via `save_weights()`.
+ | Load the weights from a single file or sharded files.
|
- | Weights are loaded based on the network's
- | topology. This means the architecture should be the same as when the
- | weights were saved. Note that layers that don't have weights are not
- | taken into account in the topological ordering, so adding or removing
- | layers is fine as long as they don't have weights.
+ | Weights are loaded based on the network's topology. This means the
+ | architecture should be the same as when the weights were saved. Note
+ | that layers that don't have weights are not taken into account in the
+ | topological ordering, so adding or removing layers is fine as long as
+ | they don't have weights.
|
| **Partial weight loading**
|
| If you have modified your model, for instance by adding a new layer
- | (with weights) or by changing the shape of the weights of a layer,
- | you can choose to ignore errors and continue loading
- | by setting `skip_mismatch=True`. In this case any layer with
- | mismatching weights will be skipped. A warning will be displayed
- | for each skipped layer.
+ | (with weights) or by changing the shape of the weights of a layer, you
+ | can choose to ignore errors and continue loading by setting
+ | `skip_mismatch=True`. In this case any layer with mismatching weights
+ | will be skipped. A warning will be displayed for each skipped layer.
+ |
+ | **Sharding**
+ |
+ | When loading sharded weights, it is important to specify `filepath` that
+ | ends with `*.weights.json` which is used as the configuration file.
+ | Additionally, the sharded files `*_xxxxx.weights.h5` must be in the same
+ | directory as the configuration file.
|
| Args:
- | filepath: String, path to the weights file to load.
- | It can either be a `.weights.h5` file
- | or a legacy `.h5` weights file.
+ | filepath: `str` or `pathlib.Path` object. Path where the weights
+ | will be saved. When sharding, the filepath must end in
+ | `.weights.json`.
| skip_mismatch: Boolean, whether to skip loading of layers where
| there is a mismatch in the number of weights, or a mismatch in
| the shape of the weights.
|
+ | Example:
+ |
+ | ```python
+ | # Load the weights in a single file.
+ | model.load_weights("model.weights.h5")
+ |
+ | # Load the weights in sharded files.
+ | model.load_weights("model.weights.json")
+ | ```
+ |
| quantize(
| self,
| mode,
@@ -352,6 +368,16 @@ class Model(keras.src.backend.tensorflow.trainer.TensorFlowTrainer, keras.src.tr
| )
| Saves a model as a `.keras` file.
|
+ | Note that `model.save()` is an alias for `keras.saving.save_model()`.
+ |
+ | The saved `.keras` file contains:
+ |
+ | - The model's configuration (architecture)
+ | - The model's weights
+ | - The model's optimizer's state (if any)
+ |
+ | Thus models can be reinstantiated in the exact same state.
+ |
| Args:
| filepath: `str` or `pathlib.Path` object.
| The path where to save the model. Must end in `.keras`
@@ -380,29 +406,63 @@ class Model(keras.src.backend.tensorflow.trainer.TensorFlowTrainer, keras.src.tr
| assert np.allclose(model.predict(x), loaded_model.predict(x))
| ```
|
- | Note that `model.save()` is an alias for `keras.saving.save_model()`.
- |
- | The saved `.keras` file contains:
- |
- | - The model's configuration (architecture)
- | - The model's weights
- | - The model's optimizer's state (if any)
- |
- | Thus models can be reinstantiated in the exact same state.
- |
| save_weights(
| self,
| filepath,
- | overwrite=True
+ | overwrite=True,
+ | max_shard_size=None
| )
- | Saves all layer weights to a `.weights.h5` file.
+ | Saves all weights to a single file or sharded files.
+ |
+ | By default, the weights will be saved in a single `.weights.h5` file.
+ | If sharding is enabled (`max_shard_size` is not `None`), the weights
+ | will be saved in multiple files, each with a size at most
+ | `max_shard_size` (in GB). Additionally, a configuration file
+ | `.weights.json` will contain the metadata for the sharded files.
+ |
+ | The saved sharded files contain:
+ |
+ | - `*.weights.json`: The configuration file containing 'metadata' and
+ | 'weight_map'.
+ | - `*_xxxxxx.weights.h5`: The sharded files containing only the
+ | weights.
|
| Args:
- | filepath: `str` or `pathlib.Path` object.
- | Path where to save the model. Must end in `.weights.h5`.
- | overwrite: Whether we should overwrite any existing model
- | at the target location, or instead ask the user
- | via an interactive prompt.
+ | filepath: `str` or `pathlib.Path` object. Path where the weights
+ | will be saved. When sharding, the filepath must end in
+ | `.weights.json`. If `.weights.h5` is provided, it will be
+ | overridden.
+ | overwrite: Whether to overwrite any existing weights at the target
+ | location or instead ask the user via an interactive prompt.
+ | max_shard_size: `int` or `float`. Maximum size in GB for each
+ | sharded file. If `None`, no sharding will be done. Defaults to
+ | `None`.
+ |
+ | Example:
+ |
+ | ```python
+ | # Instantiate a EfficientNetV2L model with about 454MB of weights.
+ | model = keras.applications.EfficientNetV2L(weights=None)
+ |
+ | # Save the weights in a single file.
+ | model.save_weights("model.weights.h5")
+ |
+ | # Save the weights in sharded files. Use `max_shard_size=0.25` means
+ | # each sharded file will be at most ~250MB.
+ | model.save_weights("model.weights.json", max_shard_size=0.25)
+ |
+ | # Load the weights in a new model with the same architecture.
+ | loaded_model = keras.applications.EfficientNetV2L(weights=None)
+ | loaded_model.load_weights("model.weights.h5")
+ | x = keras.random.uniform((1, 480, 480, 3))
+ | assert np.allclose(model.predict(x), loaded_model.predict(x))
+ |
+ | # Load the sharded weights in a new model with the same architecture.
+ | loaded_model = keras.applications.EfficientNetV2L(weights=None)
+ | loaded_model.load_weights("model.weights.json")
+ | x = keras.random.uniform((1, 480, 480, 3))
+ | assert np.allclose(model.predict(x), loaded_model.predict(x))
+ | ```
|
| set_state_tree(self, state_tree)
| Assigns values to variables of the model.
diff --git a/.tether/man/keras_model_sequential.txt b/.tether/man/keras_model_sequential.txt
index c13d33388..2d91bcd38 100644
--- a/.tether/man/keras_model_sequential.txt
+++ b/.tether/man/keras_model_sequential.txt
@@ -87,7 +87,8 @@ class Sequential(keras.src.models.model.Model)
| self,
| inputs,
| training=None,
- | mask=None
+ | mask=None,
+ | **kwargs
| )
|
| compute_output_shape(self, input_shape)
@@ -96,7 +97,8 @@ class Sequential(keras.src.models.model.Model)
| self,
| inputs,
| training=None,
- | mask=None
+ | mask=None,
+ | **kwargs
| )
|
| get_config(self)
diff --git a/.tether/man/keras_variable.txt b/.tether/man/keras_variable.txt
index e5bde6b92..75d0602a7 100644
--- a/.tether/man/keras_variable.txt
+++ b/.tether/man/keras_variable.txt
@@ -8,7 +8,9 @@ class Variable(builtins.object)
| trainable=True,
| autocast=True,
| aggregation='none',
- | name=None
+ | synchronization='auto',
+ | name=None,
+ | **kwargs
| )
|
| Represents a backend-agnostic variable in Keras.
@@ -49,6 +51,7 @@ class Variable(builtins.object)
| value: The current value of the variable (NumPy array or tensor).
| name: The name of the variable (string).
| path: The path of the variable within the Keras model or layer (string).
+ | kwargs: Additional backend-specific keyword arguments.
|
| Examples:
|
@@ -120,7 +123,9 @@ class Variable(builtins.object)
| trainable=True,
| autocast=True,
| aggregation='none',
- | name=None
+ | synchronization='auto',
+ | name=None,
+ | **kwargs
| )
| Initialize self. See help(type(self)) for accurate signature.
|
@@ -215,6 +220,9 @@ class Variable(builtins.object)
| shape
| The shape of the variable.
|
+ | synchronization
+ | The strategy for synchronizing this variable.
+ |
| value
| The current value of the variable (numpy array or backend tensor).
|
diff --git a/.tether/man/layer_average_pooling_2d.txt b/.tether/man/layer_average_pooling_2d.txt
index f1cf610f4..9ba3b156a 100644
--- a/.tether/man/layer_average_pooling_2d.txt
+++ b/.tether/man/layer_average_pooling_2d.txt
@@ -16,7 +16,7 @@ class AveragePooling2D(keras.src.layers.pooling.base_pooling.BasePooling)
| (when `input_shape >= pool_size`)
|
| The resulting output shape when using the `"same"` padding option is:
- | `output_shape = math.floor((input_shape - 1) / strides) + 1`
+ | `output_shape = input_shape`
|
| Args:
| pool_size: int or tuple of 2 integers, factors by which to downscale
diff --git a/.tether/man/layer_conv_1d_transpose.txt b/.tether/man/layer_conv_1d_transpose.txt
index e6f51c257..6b5266904 100644
--- a/.tether/man/layer_conv_1d_transpose.txt
+++ b/.tether/man/layer_conv_1d_transpose.txt
@@ -1,7 +1,7 @@
Help on class Conv1DTranspose in module keras.src.layers.convolutional.conv1d_transpose:
class Conv1DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseConvTranspose)
- | Conv1DTranspose(filters, kernel_size, strides=1, padding='valid', data_format=None, dilation_rate=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs)
+ | Conv1DTranspose(filters, kernel_size, strides=1, padding='valid', output_padding=None, data_format=None, dilation_rate=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs)
|
| 1D transposed convolution layer.
|
@@ -23,6 +23,10 @@ class Conv1DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon
| `"valid"` means no padding. `"same"` results in padding evenly to
| the left/right or up/down of the input such that output has the same
| height/width dimension as the input.
+ | output_padding: An integer tuple/list of 1 integer specifying the
+ | amount of padding along the time dimension of the output tensor.
+ | The amount of output padding must be lower than the stride.
+ | If set to `None` (default), the output shape is inferred.
| data_format: string, either `"channels_last"` or `"channels_first"`.
| The ordering of the dimensions in the inputs. `"channels_last"`
| corresponds to inputs with shape `(batch, steps, features)`
@@ -30,8 +34,11 @@ class Conv1DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon
| `(batch, features, steps)`. It defaults to the `image_data_format`
| value found in your Keras config file at `~/.keras/keras.json`.
| If you never set it, then it will be `"channels_last"`.
- | dilation_rate: int or tuple/list of 1 integers, specifying the dilation
- | rate to use for dilated transposed convolution.
+ | dilation_rate: An integer tuple/list of 1 integer, specifying
+ | the dilation rate to use for dilated convolution.
+ | Currently, specifying a `dilation_rate` value != 1 is
+ | incompatible with specifying a stride value != 1.
+ | Also dilation rate larger than 1 is not currently supported.
| activation: Activation function. If `None`, no activation is applied.
| use_bias: bool, if `True`, bias will be added to the output.
| kernel_initializer: Initializer for the convolution kernel. If `None`,
@@ -104,6 +111,7 @@ class Conv1DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon
| kernel_size,
| strides=1,
| padding='valid',
+ | output_padding=None,
| data_format=None,
| dilation_rate=1,
| activation=None,
diff --git a/.tether/man/layer_conv_2d_transpose.txt b/.tether/man/layer_conv_2d_transpose.txt
index b540c565c..07e4151f9 100644
--- a/.tether/man/layer_conv_2d_transpose.txt
+++ b/.tether/man/layer_conv_2d_transpose.txt
@@ -1,7 +1,7 @@
Help on class Conv2DTranspose in module keras.src.layers.convolutional.conv2d_transpose:
class Conv2DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseConvTranspose)
- | Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs)
+ | Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs)
|
| 2D transposed convolution layer.
|
@@ -23,6 +23,14 @@ class Conv2DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon
| `"valid"` means no padding. `"same"` results in padding evenly to
| the left/right or up/down of the input. When `padding="same"` and
| `strides=1`, the output has the same size as the input.
+ | output_padding: An integer or tuple/list of 2 integers,
+ | specifying the amount of padding along the height and width
+ | of the output tensor.
+ | Can be a single integer to specify the same value for all
+ | spatial dimensions.
+ | The amount of output padding along a given dimension must be
+ | lower than the stride along that same dimension.
+ | If set to `None` (default), the output shape is inferred.
| data_format: string, either `"channels_last"` or `"channels_first"`.
| The ordering of the dimensions in the inputs. `"channels_last"`
| corresponds to inputs with shape
@@ -32,8 +40,13 @@ class Conv2DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon
| `image_data_format` value found in your Keras config file at
| `~/.keras/keras.json`. If you never set it, then it will be
| `"channels_last"`.
- | dilation_rate: int or tuple/list of 1 integers, specifying the dilation
- | rate to use for dilated transposed convolution.
+ | dilation_rate: An integer or tuple/list of 2 integers,
+ | specifying the dilation rate for
+ | all spatial dimensions for dilated convolution.
+ | Specifying different dilation rates
+ | for different dimensions is not supported.
+ | Currently, specifying any `dilation_rate` value != 1 is
+ | incompatible with specifying any stride value != 1.
| activation: Activation function. If `None`, no activation is applied.
| use_bias: bool, if `True`, bias will be added to the output.
| kernel_initializer: Initializer for the convolution kernel. If `None`,
@@ -106,6 +119,7 @@ class Conv2DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon
| kernel_size,
| strides=(1, 1),
| padding='valid',
+ | output_padding=None,
| data_format=None,
| dilation_rate=(1, 1),
| activation=None,
diff --git a/.tether/man/layer_conv_3d_transpose.txt b/.tether/man/layer_conv_3d_transpose.txt
index 35f84966f..9cdb0d464 100644
--- a/.tether/man/layer_conv_3d_transpose.txt
+++ b/.tether/man/layer_conv_3d_transpose.txt
@@ -1,7 +1,7 @@
Help on class Conv3DTranspose in module keras.src.layers.convolutional.conv3d_transpose:
class Conv3DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseConvTranspose)
- | Conv3DTranspose(filters, kernel_size, strides=(1, 1, 1), padding='valid', data_format=None, dilation_rate=(1, 1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs)
+ | Conv3DTranspose(filters, kernel_size, strides=(1, 1, 1), padding='valid', data_format=None, output_padding=None, dilation_rate=(1, 1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs)
|
| 3D transposed convolution layer.
|
@@ -23,6 +23,14 @@ class Conv3DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon
| `"valid"` means no padding. `"same"` results in padding evenly to
| the left/right or up/down of the input. When `padding="same"` and
| `strides=1`, the output has the same size as the input.
+ | output_padding: An integer or tuple/list of 3 integers,
+ | specifying the amount of padding along the depth, height, and
+ | width.
+ | Can be a single integer to specify the same value for all
+ | spatial dimensions.
+ | The amount of output padding along a given dimension must be
+ | lower than the stride along that same dimension.
+ | If set to `None` (default), the output shape is inferred.
| data_format: string, either `"channels_last"` or `"channels_first"`.
| The ordering of the dimensions in the inputs. `"channels_last"`
| corresponds to inputs with shape
@@ -32,8 +40,12 @@ class Conv3DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon
| It defaults to the `image_data_format` value found in your Keras
| config file at `~/.keras/keras.json`. If you never set it, then it
| will be `"channels_last"`.
- | dilation_rate: int or tuple/list of 1 integers, specifying the dilation
- | rate to use for dilated transposed convolution.
+ | dilation_rate: an integer or tuple/list of 3 integers, specifying
+ | the dilation rate to use for dilated convolution.
+ | Can be a single integer to specify the same value for
+ | all spatial dimensions.
+ | Currently, specifying any `dilation_rate` value != 1 is
+ | incompatible with specifying any stride value != 1.
| activation: Activation function. If `None`, no activation is applied.
| use_bias: bool, if `True`, bias will be added to the output.
| kernel_initializer: Initializer for the convolution kernel. If `None`,
@@ -112,6 +124,7 @@ class Conv3DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon
| strides=(1, 1, 1),
| padding='valid',
| data_format=None,
+ | output_padding=None,
| dilation_rate=(1, 1, 1),
| activation=None,
| use_bias=True,
diff --git a/.tether/man/layer_dense.txt b/.tether/man/layer_dense.txt
index b5d9b7b12..812851bc2 100644
--- a/.tether/man/layer_dense.txt
+++ b/.tether/man/layer_dense.txt
@@ -1,7 +1,7 @@
Help on class Dense in module keras.src.layers.core.dense:
class Dense(keras.src.layers.layer.Layer)
- | Dense(units, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, lora_rank=None, **kwargs)
+ | Dense(units, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, lora_rank=None, lora_alpha=None, **kwargs)
|
| Just your regular densely-connected NN layer.
|
@@ -46,6 +46,11 @@ class Dense(keras.src.layers.layer.Layer)
| computation cost of fine-tuning large dense layers.
| You can also enable LoRA on an existing
| `Dense` layer by calling `layer.enable_lora(rank)`.
+ | lora_alpha: Optional integer. If set, this parameter scales the
+ | low-rank adaptation delta (computed as the product of two lower-rank
+ | trainable matrices) during the forward pass. The delta is scaled by
+ | `lora_alpha / lora_rank`, allowing you to fine-tune the strength of
+ | the LoRA adjustment independently of `lora_rank`.
|
| Input shape:
| N-D tensor with shape: `(batch_size, ..., input_dim)`.
@@ -83,6 +88,7 @@ class Dense(keras.src.layers.layer.Layer)
| kernel_constraint=None,
| bias_constraint=None,
| lora_rank=None,
+ | lora_alpha=None,
| **kwargs
| )
| Initialize self. See help(type(self)) for accurate signature.
@@ -100,6 +106,7 @@ class Dense(keras.src.layers.layer.Layer)
| enable_lora(
| self,
| rank,
+ | lora_alpha=None,
| a_initializer='he_uniform',
| b_initializer='zeros'
| )
@@ -127,7 +134,7 @@ class Dense(keras.src.layers.layer.Layer)
|
| quantized_build(
| self,
- | input_shape,
+ | kernel_shape,
| mode
| )
|
diff --git a/.tether/man/layer_discretization.txt b/.tether/man/layer_discretization.txt
index 5bc0d285f..b0420b914 100644
--- a/.tether/man/layer_discretization.txt
+++ b/.tether/man/layer_discretization.txt
@@ -132,8 +132,6 @@ class Discretization(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer)
| repeating dataset, you must specify the `steps` argument. This
| argument is not supported with array inputs or list inputs.
|
- | build(self, input_shape=None)
- |
| call(self, inputs)
|
| compute_output_spec(self, inputs)
diff --git a/.tether/man/layer_einsum_dense.txt b/.tether/man/layer_einsum_dense.txt
index 178c0c90b..1347c4be0 100644
--- a/.tether/man/layer_einsum_dense.txt
+++ b/.tether/man/layer_einsum_dense.txt
@@ -1,7 +1,7 @@
Help on class EinsumDense in module keras.src.layers.core.einsum_dense:
class EinsumDense(keras.src.layers.layer.Layer)
- | EinsumDense(equation, output_shape, activation=None, bias_axes=None, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, kernel_constraint=None, bias_constraint=None, lora_rank=None, **kwargs)
+ | EinsumDense(equation, output_shape, activation=None, bias_axes=None, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, kernel_constraint=None, bias_constraint=None, lora_rank=None, lora_alpha=None, **kwargs)
|
| A layer that uses `einsum` as the backing computation.
|
@@ -43,6 +43,11 @@ class EinsumDense(keras.src.layers.layer.Layer)
| computation cost of fine-tuning large dense layers.
| You can also enable LoRA on an existing
| `EinsumDense` layer by calling `layer.enable_lora(rank)`.
+ | lora_alpha: Optional integer. If set, this parameter scales the
+ | low-rank adaptation delta (computed as the product of two lower-rank
+ | trainable matrices) during the forward pass. The delta is scaled by
+ | `lora_alpha / lora_rank`, allowing you to fine-tune the strength of
+ | the LoRA adjustment independently of `lora_rank`.
| **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
|
| Examples:
@@ -122,6 +127,7 @@ class EinsumDense(keras.src.layers.layer.Layer)
| kernel_constraint=None,
| bias_constraint=None,
| lora_rank=None,
+ | lora_alpha=None,
| **kwargs
| )
| Initialize self. See help(type(self)) for accurate signature.
@@ -139,6 +145,7 @@ class EinsumDense(keras.src.layers.layer.Layer)
| enable_lora(
| self,
| rank,
+ | lora_alpha=None,
| a_initializer='he_uniform',
| b_initializer='zeros'
| )
@@ -166,7 +173,7 @@ class EinsumDense(keras.src.layers.layer.Layer)
|
| quantized_build(
| self,
- | input_shape,
+ | kernel_shape,
| mode
| )
|
diff --git a/.tether/man/layer_embedding.txt b/.tether/man/layer_embedding.txt
index 82820af0d..00f8568d9 100644
--- a/.tether/man/layer_embedding.txt
+++ b/.tether/man/layer_embedding.txt
@@ -1,7 +1,7 @@
Help on class Embedding in module keras.src.layers.core.embedding:
class Embedding(keras.src.layers.layer.Layer)
- | Embedding(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, embeddings_constraint=None, mask_zero=False, weights=None, lora_rank=None, **kwargs)
+ | Embedding(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, embeddings_constraint=None, mask_zero=False, weights=None, lora_rank=None, lora_alpha=None, **kwargs)
|
| Turns nonnegative integers (indexes) into dense vectors of fixed size.
|
@@ -55,6 +55,11 @@ class Embedding(keras.src.layers.layer.Layer)
| computation cost of fine-tuning large embedding layers.
| You can also enable LoRA on an existing
| `Embedding` layer by calling `layer.enable_lora(rank)`.
+ | lora_alpha: Optional integer. If set, this parameter scales the
+ | low-rank adaptation delta (computed as the product of two lower-rank
+ | trainable matrices) during the forward pass. The delta is scaled by
+ | `lora_alpha / lora_rank`, allowing you to fine-tune the strength of
+ | the LoRA adjustment independently of `lora_rank`.
|
| Input shape:
| 2D tensor with shape: `(batch_size, input_length)`.
@@ -85,6 +90,7 @@ class Embedding(keras.src.layers.layer.Layer)
| mask_zero=False,
| weights=None,
| lora_rank=None,
+ | lora_alpha=None,
| **kwargs
| )
| Initialize self. See help(type(self)) for accurate signature.
@@ -101,9 +107,12 @@ class Embedding(keras.src.layers.layer.Layer)
|
| compute_output_shape(self, input_shape)
|
+ | compute_output_spec(self, inputs)
+ |
| enable_lora(
| self,
| rank,
+ | lora_alpha=None,
| a_initializer='he_uniform',
| b_initializer='zeros'
| )
@@ -131,7 +140,7 @@ class Embedding(keras.src.layers.layer.Layer)
|
| quantized_build(
| self,
- | input_shape,
+ | embeddings_shape,
| mode
| )
|
diff --git a/.tether/man/layer_layer_normalization.txt b/.tether/man/layer_layer_normalization.txt
index e8ec5fb04..f941f9d93 100644
--- a/.tether/man/layer_layer_normalization.txt
+++ b/.tether/man/layer_layer_normalization.txt
@@ -1,7 +1,7 @@
Help on class LayerNormalization in module keras.src.layers.normalization.layer_normalization:
class LayerNormalization(keras.src.layers.layer.Layer)
- | LayerNormalization(axis=-1, epsilon=0.001, center=True, scale=True, rms_scaling=False, beta_initializer='zeros', gamma_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, **kwargs)
+ | LayerNormalization(axis=-1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, **kwargs)
|
| Layer normalization layer (Ba et al., 2016).
|
@@ -77,12 +77,6 @@ class LayerNormalization(keras.src.layers.layer.Layer)
| When the next layer is linear (also e.g. `nn.relu`), this can be
| disabled since the scaling will be done by the next layer.
| Defaults to `True`.
- | rms_scaling: If True, `center` and `scale` are ignored, and the
- | inputs are scaled by `gamma` and the inverse square root
- | of the square of all inputs. This is an approximate and faster
- | approach that avoids ever computing the mean of the input. Note that
- | this *isn't* equivalent to the computation that the
- | `keras.layers.RMSNormalization` layer performs.
| beta_initializer: Initializer for the beta weight. Defaults to zeros.
| gamma_initializer: Initializer for the gamma weight. Defaults to ones.
| beta_regularizer: Optional regularizer for the beta weight.
@@ -119,7 +113,6 @@ class LayerNormalization(keras.src.layers.layer.Layer)
| epsilon=0.001,
| center=True,
| scale=True,
- | rms_scaling=False,
| beta_initializer='zeros',
| gamma_initializer='ones',
| beta_regularizer=None,
diff --git a/.tether/man/layer_random_elastic_transform.txt b/.tether/man/layer_random_elastic_transform.txt
new file mode 100644
index 000000000..ff1a2df5a
--- /dev/null
+++ b/.tether/man/layer_random_elastic_transform.txt
@@ -0,0 +1,130 @@
+Help on class RandomElasticTransform in module keras.src.layers.preprocessing.image_preprocessing.random_elastic_transform:
+
+class RandomElasticTransform(keras.src.layers.preprocessing.image_preprocessing.base_image_preprocessing_layer.BaseImagePreprocessingLayer)
+ | RandomElasticTransform(factor=1.0, scale=1.0, interpolation='bilinear', fill_mode='reflect', fill_value=0.0, value_range=(0, 255), seed=None, data_format=None, **kwargs)
+ |
+ | A preprocessing layer that applies random elastic transformations.
+ |
+ | This layer distorts input images by applying elastic deformations,
+ | simulating a physically realistic transformation. The magnitude of the
+ | distortion is controlled by the `scale` parameter, while the `factor`
+ | determines the probability of applying the transformation.
+ |
+ | Args:
+ | factor: A single float or a tuple of two floats.
+ | `factor` controls the probability of applying the transformation.
+ | - `factor=0.0` ensures no erasing is applied.
+ | - `factor=1.0` means erasing is always applied.
+ | - If a tuple `(min, max)` is provided, a probability value
+ | is sampled between `min` and `max` for each image.
+ | - If a single float is provided, a probability is sampled
+ | between `0.0` and the given float.
+ | Default is 1.0.
+ | scale: A float or a tuple of two floats defining the magnitude of
+ | the distortion applied.
+ | - If a tuple `(min, max)` is provided, a random scale value is
+ | sampled within this range.
+ | - If a single float is provided, a random scale value is sampled
+ | between `0.0` and the given float.
+ | Default is 1.0.
+ | interpolation: Interpolation mode. Supported values: `"nearest"`,
+ | `"bilinear"`.
+ | fill_mode: Points outside the boundaries of the input are filled
+ | according to the given mode. Available methods are `"constant"`,
+ | `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"constant"`.
+ | - `"reflect"`: `(d c b a | a b c d | d c b a)`
+ | The input is extended by reflecting about the edge of the last
+ | pixel.
+ | - `"constant"`: `(k k k k | a b c d | k k k k)`
+ | The input is extended by filling all values beyond
+ | the edge with the same constant value k specified by
+ | `fill_value`.
+ | - `"wrap"`: `(a b c d | a b c d | a b c d)`
+ | The input is extended by wrapping around to the opposite edge.
+ | - `"nearest"`: `(a a a a | a b c d | d d d d)`
+ | The input is extended by the nearest pixel.
+ | Note that when using torch backend, `"reflect"` is redirected to
+ | `"mirror"` `(c d c b | a b c d | c b a b)` because torch does not
+ | support `"reflect"`.
+ | Note that torch backend does not support `"wrap"`.
+ | fill_value: a float represents the value to be filled outside the
+ | boundaries when `fill_mode="constant"`.
+ | value_range: the range of values the incoming images will have.
+ | Represented as a two-number tuple written `[low, high]`. This is
+ | typically either `[0, 1]` or `[0, 255]` depending on how your
+ | preprocessing pipeline is set up.
+ | seed: Integer. Used to create a random seed.
+ |
+ | Method resolution order:
+ | RandomElasticTransform
+ | keras.src.layers.preprocessing.image_preprocessing.base_image_preprocessing_layer.BaseImagePreprocessingLayer
+ | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer
+ | keras.src.layers.layer.Layer
+ | keras.src.backend.tensorflow.layer.TFLayer
+ | keras.src.backend.tensorflow.trackable.KerasAutoTrackable
+ | tensorflow.python.trackable.autotrackable.AutoTrackable
+ | tensorflow.python.trackable.base.Trackable
+ | keras.src.ops.operation.Operation
+ | keras.src.saving.keras_saveable.KerasSaveable
+ | builtins.object
+ |
+ | Methods defined here:
+ |
+ | __init__(
+ | self,
+ | factor=1.0,
+ | scale=1.0,
+ | interpolation='bilinear',
+ | fill_mode='reflect',
+ | fill_value=0.0,
+ | value_range=(0, 255),
+ | seed=None,
+ | data_format=None,
+ | **kwargs
+ | )
+ | Initialize self. See help(type(self)) for accurate signature.
+ |
+ | compute_output_shape(self, input_shape)
+ |
+ | get_config(self)
+ | Returns the config of the object.
+ |
+ | An object config is a Python dictionary (serializable)
+ | containing the information needed to re-instantiate it.
+ |
+ | get_elastic_transform_params(
+ | self,
+ | height,
+ | width,
+ | factor
+ | )
+ |
+ | get_random_transformation(
+ | self,
+ | data,
+ | training=True,
+ | seed=None
+ | )
+ |
+ | transform_images(
+ | self,
+ | images,
+ | transformation,
+ | training=True
+ | )
+ |
+ | transform_labels(
+ | self,
+ | labels,
+ | transformation,
+ | training=True
+ | )
+ |
+ | transform_segmentation_masks(
+ | self,
+ | segmentation_masks,
+ | transformation,
+ | training=True
+ | )
+ |
+
diff --git a/.tether/man/layer_rescaling.txt b/.tether/man/layer_rescaling.txt
index 5f3809581..140f33d12 100644
--- a/.tether/man/layer_rescaling.txt
+++ b/.tether/man/layer_rescaling.txt
@@ -24,8 +24,16 @@ class Rescaling(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer)
| (independently of which backend you're using).
|
| Args:
- | scale: Float, the scale to apply to the inputs.
- | offset: Float, the offset to apply to the inputs.
+ | scale: Float, int, list, tuple or np.ndarray.
+ | The scale to apply to the inputs.
+ | If scalar, the same scale will be applied to
+ | all features or channels of input. If a list, tuple or
+ | 1D array, the scaling is applied per channel.
+ | offset: Float, int, list/tuple or numpy ndarray.
+ | The offset to apply to the inputs.
+ | If scalar, the same scale will be applied to
+ | all features or channels of input. If a list, tuple or
+ | 1D array, the scaling is applied per channel.
| **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
|
| Method resolution order:
diff --git a/.tether/man/layer_rnn.txt b/.tether/man/layer_rnn.txt
index b61ea0711..d86c3c0c9 100644
--- a/.tether/man/layer_rnn.txt
+++ b/.tether/man/layer_rnn.txt
@@ -140,7 +140,6 @@ class RNN(keras.src.layers.layer.Layer)
| shape=(self.units, self.units),
| initializer='uniform',
| name='recurrent_kernel')
- | self.built = True
|
| def call(self, inputs, states):
| prev_output = states[0]
diff --git a/.tether/man/layer_torch_module_wrapper.txt b/.tether/man/layer_torch_module_wrapper.txt
index b63b0377f..9d50c1956 100644
--- a/.tether/man/layer_torch_module_wrapper.txt
+++ b/.tether/man/layer_torch_module_wrapper.txt
@@ -1,7 +1,7 @@
Help on class TorchModuleWrapper in module keras.src.utils.torch_utils:
class TorchModuleWrapper(keras.src.layers.layer.Layer)
- | TorchModuleWrapper(module, name=None, **kwargs)
+ | TorchModuleWrapper(module, name=None, output_shape=None, **kwargs)
|
| Torch module wrapper layer.
|
@@ -17,6 +17,8 @@ class TorchModuleWrapper(keras.src.layers.layer.Layer)
| instance, then its parameters must be initialized before
| passing the instance to `TorchModuleWrapper` (e.g. by calling
| it once).
+ | output_shape :The shape of the output of this layer. It helps Keras
+ | perform automatic shape inference.
| name: The name of the layer (string).
|
| Example:
@@ -88,6 +90,7 @@ class TorchModuleWrapper(keras.src.layers.layer.Layer)
| self,
| module,
| name=None,
+ | output_shape=None,
| **kwargs
| )
| Initialize self. See help(type(self)) for accurate signature.
@@ -99,6 +102,8 @@ class TorchModuleWrapper(keras.src.layers.layer.Layer)
| **kwargs
| )
|
+ | compute_output_shape(self, input_shape)
+ |
| get_config(self)
| Returns the config of the object.
|
diff --git a/.tether/man/load_model_weights.txt b/.tether/man/load_model_weights.txt
index 9a57d484a..724a79df6 100644
--- a/.tether/man/load_model_weights.txt
+++ b/.tether/man/load_model_weights.txt
@@ -6,27 +6,44 @@ keras.Model.load_weights(
**kwargs
)
__doc__
-Load weights from a file saved via `save_weights()`.
+Load the weights from a single file or sharded files.
-Weights are loaded based on the network's
-topology. This means the architecture should be the same as when the
-weights were saved. Note that layers that don't have weights are not
-taken into account in the topological ordering, so adding or removing
-layers is fine as long as they don't have weights.
+Weights are loaded based on the network's topology. This means the
+architecture should be the same as when the weights were saved. Note
+that layers that don't have weights are not taken into account in the
+topological ordering, so adding or removing layers is fine as long as
+they don't have weights.
**Partial weight loading**
If you have modified your model, for instance by adding a new layer
-(with weights) or by changing the shape of the weights of a layer,
-you can choose to ignore errors and continue loading
-by setting `skip_mismatch=True`. In this case any layer with
-mismatching weights will be skipped. A warning will be displayed
-for each skipped layer.
+(with weights) or by changing the shape of the weights of a layer, you
+can choose to ignore errors and continue loading by setting
+`skip_mismatch=True`. In this case any layer with mismatching weights
+will be skipped. A warning will be displayed for each skipped layer.
+
+**Sharding**
+
+When loading sharded weights, it is important to specify `filepath` that
+ends with `*.weights.json` which is used as the configuration file.
+Additionally, the sharded files `*_xxxxx.weights.h5` must be in the same
+directory as the configuration file.
Args:
- filepath: String, path to the weights file to load.
- It can either be a `.weights.h5` file
- or a legacy `.h5` weights file.
+ filepath: `str` or `pathlib.Path` object. Path where the weights
+ will be saved. When sharding, the filepath must end in
+ `.weights.json`.
skip_mismatch: Boolean, whether to skip loading of layers where
there is a mismatch in the number of weights, or a mismatch in
the shape of the weights.
+
+Example:
+
+```python
+# Load the weights in a single file.
+model.load_weights("model.weights.h5")
+
+# Load the weights in sharded files.
+model.load_weights("model.weights.json")
+```
+
diff --git a/.tether/man/loss_categorical_generalized_cross_entropy.txt b/.tether/man/loss_categorical_generalized_cross_entropy.txt
new file mode 100644
index 000000000..57d2a8b85
--- /dev/null
+++ b/.tether/man/loss_categorical_generalized_cross_entropy.txt
@@ -0,0 +1,75 @@
+Help on class CategoricalGeneralizedCrossEntropy in module keras.src.losses.losses:
+
+class CategoricalGeneralizedCrossEntropy(LossFunctionWrapper)
+ | CategoricalGeneralizedCrossEntropy(q=0.5, reduction='sum_over_batch_size', name='categorical_generalized_cross_entropy', dtype=None)
+ |
+ | Computes the Generalized Cross Entropy loss between `y_true` & `y_pred`.
+ |
+ | Generalized Cross Entropy (GCE) is a noise-robust loss function
+ | that provides better robustness against noisy labels than
+ | standard cross entropy.
+ | It generalizes both cross entropy and mean absolute error through
+ | the parameter q, where values closer to 1 make the loss more robust
+ | to noisy labels.
+ |
+ | Formula:
+ | ```python
+ | loss = (1 - p**q) / q
+ | ```
+ | where `p` is the predicted probability for the true class and `q`
+ | is the noise parameter.
+ |
+ | Args:
+ | q: Float in range `(0, 1)`. It is the noise parameter.
+ | Controls the behavior of the loss:
+ | - As `q` approaches 0: Behaves more like cross entropy
+ | - As `q` approaches 1: Behaves more like mean absolute error
+ | Defaults to `0.5`
+ | reduction: Type of reduction to apply to the loss. In almost all cases
+ | this should be `"sum_over_batch_size"`. Supported options are
+ | `"sum"`, `"sum_over_batch_size"`, `"mean"`,
+ | `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss,
+ | `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the
+ | sample size, and `"mean_with_sample_weight"` sums the loss and
+ | divides by the sum of the sample weights. `"none"` and `None`
+ | perform no aggregation. Defaults to `"sum_over_batch_size"`.
+ | name: Optional name for the loss instance.
+ | dtype: The dtype of the loss's computations. Defaults to `None`, which
+ | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a
+ | `"float32"` unless set to different value
+ | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is
+ | provided, then the `compute_dtype` will be utilized.
+ |
+ | Example:
+ | ```python
+ | y_true = np.array([0, 1, 0, 1])
+ | y_pred = np.array([[0.7, 0.3], [0.2, 0.8], [0.6, 0.4], [0.4, 0.6]])
+ | keras.losses.CategoricalGeneralizedCrossEntropy()(y_true, y_pred)
+ | ```
+ |
+ | References:
+ | - [Zhang, Sabuncu, 2018](https://arxiv.org/abs/1805.07836)
+ | ("Generalized Cross Entropy Loss for Training
+ | Deep Neural Networks with Noisy Labels")
+ |
+ | Method resolution order:
+ | CategoricalGeneralizedCrossEntropy
+ | LossFunctionWrapper
+ | keras.src.losses.loss.Loss
+ | keras.src.saving.keras_saveable.KerasSaveable
+ | builtins.object
+ |
+ | Methods defined here:
+ |
+ | __init__(
+ | self,
+ | q=0.5,
+ | reduction='sum_over_batch_size',
+ | name='categorical_generalized_cross_entropy',
+ | dtype=None
+ | )
+ | Initialize self. See help(type(self)) for accurate signature.
+ |
+ | get_config(self)
+ |
+
diff --git a/.tether/man/loss_dice.txt b/.tether/man/loss_dice.txt
index 9ee7231ca..1aad5efd1 100644
--- a/.tether/man/loss_dice.txt
+++ b/.tether/man/loss_dice.txt
@@ -38,12 +38,12 @@ class Dice(LossFunctionWrapper)
| >>> y_pred = [[[[0.0], [1.0]], [[0.0], [1.0]]],
| ... [[[0.4], [0.0]], [[0.0], [0.9]]]]
| >>> axis = (1, 2, 3)
- | >>> loss = keras.losses.dice(y_true, y_pred, axis=axis)
+ | >>> loss = keras.losses.Dice(axis=axis, reduction=None)(y_true, y_pred)
| >>> assert loss.shape == (2,)
| >>> loss
| array([0.5, 0.75757575], shape=(2,), dtype=float32)
|
- | >>> loss = keras.losses.dice(y_true, y_pred)
+ | >>> loss = keras.losses.Dice()(y_true, y_pred)
| >>> assert loss.shape == ()
| >>> loss
| array(0.6164384, shape=(), dtype=float32)
diff --git a/.tether/man/loss_sparse_categorical_crossentropy.txt b/.tether/man/loss_sparse_categorical_crossentropy.txt
index 4f84895f1..c4b397d8c 100644
--- a/.tether/man/loss_sparse_categorical_crossentropy.txt
+++ b/.tether/man/loss_sparse_categorical_crossentropy.txt
@@ -1,7 +1,7 @@
Help on class SparseCategoricalCrossentropy in module keras.src.losses.losses:
class SparseCategoricalCrossentropy(LossFunctionWrapper)
- | SparseCategoricalCrossentropy(from_logits=False, ignore_class=None, reduction='sum_over_batch_size', name='sparse_categorical_crossentropy', dtype=None)
+ | SparseCategoricalCrossentropy(from_logits=False, ignore_class=None, reduction='sum_over_batch_size', axis=-1, name='sparse_categorical_crossentropy', dtype=None)
|
| Computes the crossentropy loss between the labels and predictions.
|
@@ -28,6 +28,8 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper)
| sample size, and `"mean_with_sample_weight"` sums the loss and
| divides by the sum of the sample weights. `"none"` and `None`
| perform no aggregation. Defaults to `"sum_over_batch_size"`.
+ | axis: The axis along which to compute crossentropy (the features
+ | axis). Defaults to `-1`.
| name: Optional name for the loss instance.
| dtype: The dtype of the loss's computations. Defaults to `None`, which
| means using `keras.backend.floatx()`. `keras.backend.floatx()` is a
@@ -37,8 +39,8 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper)
|
| Examples:
|
- | >>> y_true = [1, 2]
- | >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
+ | >>> y_true = np.array([1, 2])
+ | >>> y_pred = np.array([[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
| >>> # Using 'auto'/'sum_over_batch_size' reduction type.
| >>> scce = keras.losses.SparseCategoricalCrossentropy()
| >>> scce(y_true, y_pred)
@@ -81,6 +83,7 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper)
| from_logits=False,
| ignore_class=None,
| reduction='sum_over_batch_size',
+ | axis=-1,
| name='sparse_categorical_crossentropy',
| dtype=None
| )
diff --git a/.tether/man/metric_binary_focal_crossentropy.txt b/.tether/man/metric_binary_focal_crossentropy.txt
index ad3ddb52d..0aa9546df 100644
--- a/.tether/man/metric_binary_focal_crossentropy.txt
+++ b/.tether/man/metric_binary_focal_crossentropy.txt
@@ -53,8 +53,21 @@ Example:
>>> y_true = [[0, 1], [0, 0]]
>>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
->>> loss = keras.losses.binary_focal_crossentropy(
+>>> # In this instance, the first sample in the second batch is the
+>>> # 'easier' example.
+>>> focal_loss = keras.losses.binary_focal_crossentropy(
... y_true, y_pred, gamma=2)
>>> assert loss.shape == (2,)
->>> loss
+>>> focal_loss
array([0.330, 0.206], dtype=float32)
+>>> # Compare with binary_crossentropy
+>>> bce_loss = keras.losses.binary_focal_crossentropy(
+... y_true, y_pred)
+>>> bce_loss
+array([0.916, 0.714], dtype=float32)
+>>> # Binary focal crossentropy loss attributes more importance to the
+>>> # harder example which results in a higher loss for the first batch
+>>> # when normalized by binary cross entropy loss
+>>> focal_loss/bce_loss
+array([0.360, 0.289]
+
diff --git a/.tether/man/metric_sensitivity_at_specificity.txt b/.tether/man/metric_sensitivity_at_specificity.txt
index bc9735e85..c3c3dd8fd 100644
--- a/.tether/man/metric_sensitivity_at_specificity.txt
+++ b/.tether/man/metric_sensitivity_at_specificity.txt
@@ -56,7 +56,7 @@ class SensitivityAtSpecificity(SensitivitySpecificityBase)
| model.compile(
| optimizer='sgd',
| loss='binary_crossentropy',
- | metrics=[keras.metrics.SensitivityAtSpecificity()])
+ | metrics=[keras.metrics.SensitivityAtSpecificity(specificity=0.5)])
| ```
|
| Method resolution order:
diff --git a/.tether/man/metric_specificity_at_sensitivity.txt b/.tether/man/metric_specificity_at_sensitivity.txt
index d320e059f..6e21b36c9 100644
--- a/.tether/man/metric_specificity_at_sensitivity.txt
+++ b/.tether/man/metric_specificity_at_sensitivity.txt
@@ -56,7 +56,7 @@ class SpecificityAtSensitivity(SensitivitySpecificityBase)
| model.compile(
| optimizer='sgd',
| loss='binary_crossentropy',
- | metrics=[keras.metrics.SpecificityAtSensitivity()])
+ | metrics=[keras.metrics.SpecificityAtSensitivity(sensitivity=0.3)])
| ```
|
| Method resolution order:
diff --git a/.tether/man/op_angle.txt b/.tether/man/op_angle.txt
new file mode 100644
index 000000000..1a0b0114e
--- /dev/null
+++ b/.tether/man/op_angle.txt
@@ -0,0 +1,18 @@
+__signature__
+keras.ops.angle(x)
+__doc__
+Element-wise angle of a complex tensor.
+
+Arguments:
+ x: Input tensor. Can be real or complex.
+
+Returns:
+ Output tensor of same shape as x. containing the angle of each element
+ (in radians).
+
+Example:
+>>> x = keras.ops.convert_to_tensor([[1 + 3j, 2 - 5j], [4 - 3j, 3 + 2j]])
+>>> keras.ops.angle(x)
+array([[ 1.2490457, -1.19029 ],
+ [-0.6435011, 0.5880026]], dtype=float32)
+
diff --git a/.tether/man/op_bartlett.txt b/.tether/man/op_bartlett.txt
new file mode 100644
index 000000000..6c00ebeb1
--- /dev/null
+++ b/.tether/man/op_bartlett.txt
@@ -0,0 +1,17 @@
+__signature__
+keras.ops.bartlett(x)
+__doc__
+Bartlett window function.
+The Bartlett window is a triangular window that rises then falls linearly.
+
+Args:
+ x: Scalar or 1D Tensor. Window length.
+
+Returns:
+ A 1D tensor containing the Bartlett window values.
+
+Example:
+>>> x = keras.ops.convert_to_tensor(5)
+>>> keras.ops.bartlett(x)
+array([0. , 0.5, 1. , 0.5, 0. ], dtype=float32)
+
diff --git a/.tether/man/op_blackman.txt b/.tether/man/op_blackman.txt
new file mode 100644
index 000000000..50a5f5e4f
--- /dev/null
+++ b/.tether/man/op_blackman.txt
@@ -0,0 +1,18 @@
+__signature__
+keras.ops.blackman(x)
+__doc__
+Blackman window function.
+The Blackman window is a taper formed by using a weighted cosine.
+
+Args:
+ x: Scalar or 1D Tensor. Window length.
+
+Returns:
+ A 1D tensor containing the Blackman window values.
+
+Example:
+>>> x = keras.ops.convert_to_tensor(5)
+>>> keras.ops.blackman(x)
+array([-1.3877788e-17, 3.4000000e-01, 1.0000000e+00, 3.4000000e-01,
+ -1.3877788e-17], dtype=float32)
+
diff --git a/.tether/man/op_cbrt.txt b/.tether/man/op_cbrt.txt
new file mode 100644
index 000000000..d3e3c285a
--- /dev/null
+++ b/.tether/man/op_cbrt.txt
@@ -0,0 +1,14 @@
+__signature__
+keras.ops.cbrt(x)
+__doc__
+Computes the cube root of the input tensor, element-wise.
+
+This operation returns the real-valued cube root of `x`, handling
+negative numbers properly in the real domain.
+
+Args:
+ x: Input tensor.
+
+Returns:
+ A tensor containing the cube root of each element in `x`.
+
diff --git a/.tether/man/op_conv_transpose.txt b/.tether/man/op_conv_transpose.txt
index dab311e05..2e8863baa 100644
--- a/.tether/man/op_conv_transpose.txt
+++ b/.tether/man/op_conv_transpose.txt
@@ -2,7 +2,7 @@ __signature__
keras.ops.conv_transpose(
inputs,
kernel,
- strides,
+ strides=1,
padding='valid',
output_padding=None,
data_format=None,
@@ -51,3 +51,4 @@ Args:
Returns:
A tensor of rank N+2, the result of the conv operation.
+
diff --git a/.tether/man/op_corrcoef.txt b/.tether/man/op_corrcoef.txt
new file mode 100644
index 000000000..bff558a2c
--- /dev/null
+++ b/.tether/man/op_corrcoef.txt
@@ -0,0 +1,12 @@
+__signature__
+keras.ops.corrcoef(x)
+__doc__
+Compute the Pearson correlation coefficient matrix.
+
+Args:
+ x: A 2D tensor of shape `(N, D)`, where N is the number of variables
+ and D is the number of observations.
+
+Returns:
+ A tensor of shape `(N, N)` representing the correlation matrix.
+
diff --git a/.tether/man/op_deg2rad.txt b/.tether/man/op_deg2rad.txt
new file mode 100644
index 000000000..c8d24cda5
--- /dev/null
+++ b/.tether/man/op_deg2rad.txt
@@ -0,0 +1,21 @@
+__signature__
+keras.ops.deg2rad(x)
+__doc__
+Convert angles from degrees to radians.
+
+The conversion is defined as:
+`rad = deg * (π / 180)`
+
+Args:
+ x: Input tensor of angles in degrees.
+
+Returns:
+ A tensor containing angles converted to radians.
+
+Examples:
+>>> from keras import ops
+>>> ops.deg2rad(180.0)
+3.141592653589793
+>>> ops.deg2rad([0.0, 90.0, 180.0])
+array([0., 1.57079633, 3.14159265])
+
diff --git a/.tether/man/op_dot_product_attention.txt b/.tether/man/op_dot_product_attention.txt
index 52de124bf..14d58473d 100644
--- a/.tether/man/op_dot_product_attention.txt
+++ b/.tether/man/op_dot_product_attention.txt
@@ -7,7 +7,8 @@ keras.ops.dot_product_attention(
mask=None,
scale=None,
is_causal=False,
- flash_attention=None
+ flash_attention=None,
+ attn_logits_soft_cap=None
)
__doc__
Scaled dot product attention function.
@@ -47,6 +48,9 @@ Args:
attempt to use flash attention if the required conditions are met.
Typically, the inputs must be in float16 and bfloat16 dtype and the
input layout requirements may vary depending on the backend.
+ attn_logits_soft_cap: The value limit for maximum value of the
+ attention logits before the softmax function is applied. This is
+ only supported in JAX TPU backend. Defaults to None.
Returns:
An array of the attention output with the same shape of `query`.
diff --git a/.tether/man/op_einsum.txt b/.tether/man/op_einsum.txt
index 9591c8a54..90b9221d5 100644
--- a/.tether/man/op_einsum.txt
+++ b/.tether/man/op_einsum.txt
@@ -1,5 +1,9 @@
__signature__
-keras.ops.einsum(subscripts, *operands)
+keras.ops.einsum(
+ subscripts,
+ *operands,
+ **kwargs
+)
__doc__
Evaluates the Einstein summation convention on the operands.
diff --git a/.tether/man/op_hamming.txt b/.tether/man/op_hamming.txt
new file mode 100644
index 000000000..031efc106
--- /dev/null
+++ b/.tether/man/op_hamming.txt
@@ -0,0 +1,19 @@
+__signature__
+keras.ops.hamming(x)
+__doc__
+Hamming window function.
+
+The Hamming window is defined as:
+`w[n] = 0.54 - 0.46 * cos(2 * pi * n / (N - 1))` for `0 <= n <= N - 1`.
+
+Args:
+ x: Scalar or 1D Tensor. The window length.
+
+Returns:
+ A 1D tensor containing the Hamming window values.
+
+Example:
+>>> x = keras.ops.convert_to_tensor(5)
+>>> keras.ops.hamming(x)
+array([0.08, 0.54, 1. , 0.54, 0.08], dtype=float32)
+
diff --git a/.tether/man/op_hanning.txt b/.tether/man/op_hanning.txt
new file mode 100644
index 000000000..33126574d
--- /dev/null
+++ b/.tether/man/op_hanning.txt
@@ -0,0 +1,19 @@
+__signature__
+keras.ops.hanning(x)
+__doc__
+Hanning window function.
+
+The Hanning window is defined as:
+`w[n] = 0.5 - 0.5 * cos(2 * pi * n / (N - 1))` for `0 <= n <= N - 1`.
+
+Args:
+ x: Scalar or 1D Tensor. The window length.
+
+Returns:
+ A 1D tensor containing the Hanning window values.
+
+Example:
+>>> x = keras.ops.convert_to_tensor(5)
+>>> keras.ops.hanning(x)
+array([0. , 0.5, 1. , 0.5, 0. ], dtype=float32)
+
diff --git a/.tether/man/op_heaviside.txt b/.tether/man/op_heaviside.txt
new file mode 100644
index 000000000..bafbe1c8b
--- /dev/null
+++ b/.tether/man/op_heaviside.txt
@@ -0,0 +1,21 @@
+__signature__
+keras.ops.heaviside(x1, x2)
+__doc__
+Heaviside step function.
+
+The Heaviside step function is defined as:
+`heaviside(x1, x2) = 0 if x1 < 0, 1 if x1 > 0, x2 if x1 == 0`
+
+Args:
+ x1: A tensor input.
+ x2: A scalar or tensor, the value to return when `x1 == 0`.
+
+Returns:
+ A tensor with a shape determined by broadcasting `x1` and `x2`.
+
+Example:
+>>> x1 = keras.ops.convert_to_tensor([-2.0, 0.0, 3.0])
+>>> x2 = 0.5
+>>> keras.ops.heaviside(x1, x2)
+array([0. , 0.5, 1. ], dtype=float32)
+
diff --git a/.tether/man/op_image_elastic_transform.txt b/.tether/man/op_image_elastic_transform.txt
new file mode 100644
index 000000000..6a9cd7675
--- /dev/null
+++ b/.tether/man/op_image_elastic_transform.txt
@@ -0,0 +1,66 @@
+__signature__
+keras.ops.image.elastic_transform(
+ images,
+ alpha=20.0,
+ sigma=5.0,
+ interpolation='bilinear',
+ fill_mode='reflect',
+ fill_value=0.0,
+ seed=None,
+ data_format=None
+)
+__doc__
+Applies elastic deformation to the image(s).
+
+Args:
+ images: Input image or batch of images. Must be 3D or 4D.
+ alpha: Scaling factor that controls the intensity of the deformation.
+ sigma: Standard deviation of the Gaussian filter used for
+ smoothing the displacement fields.
+ interpolation: Interpolation method. Available methods are `"nearest"`,
+ and `"bilinear"`. Defaults to `"bilinear"`.
+ fill_mode: Points outside the boundaries of the input are filled
+ according to the given mode. Available methods are `"constant"`,
+ `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"constant"`.
+ - `"reflect"`: `(d c b a | a b c d | d c b a)`
+ The input is extended by reflecting about the edge of the last
+ pixel.
+ - `"constant"`: `(k k k k | a b c d | k k k k)`
+ The input is extended by filling all values beyond
+ the edge with the same constant value k specified by
+ `fill_value`.
+ - `"wrap"`: `(a b c d | a b c d | a b c d)`
+ The input is extended by wrapping around to the opposite edge.
+ - `"nearest"`: `(a a a a | a b c d | d d d d)`
+ The input is extended by the nearest pixel.
+ fill_value: Value used for points outside the boundaries of the input if
+ `fill_mode="constant"`. Defaults to `0`.
+ data_format: A string specifying the data format of the input tensor.
+ It can be either `"channels_last"` or `"channels_first"`.
+ `"channels_last"` corresponds to inputs with shape
+ `(batch, height, width, channels)`, while `"channels_first"`
+ corresponds to inputs with shape `(batch, channels, height, width)`.
+ If not specified, the value will default to
+ `keras.config.image_data_format`.
+
+Returns:
+ Transformed image or batch of images with elastic deformation.
+
+Examples:
+
+>>> x = np.random.random((2, 64, 80, 3)) # batch of 2 RGB images
+>>> y = keras.ops.image.elastic_transform(x)
+>>> y.shape
+(2, 64, 80, 3)
+
+>>> x = np.random.random((64, 80, 3)) # single RGB image
+>>> y = keras.ops.image.elastic_transform(x)
+>>> y.shape
+(64, 80, 3)
+
+>>> x = np.random.random((2, 3, 64, 80)) # batch of 2 RGB images
+>>> y = keras.ops.image.elastic_transform(
+... x, data_format="channels_first")
+>>> y.shape
+(2, 3, 64, 80)
+
diff --git a/.tether/man/op_kaiser.txt b/.tether/man/op_kaiser.txt
new file mode 100644
index 000000000..b49a7d3e4
--- /dev/null
+++ b/.tether/man/op_kaiser.txt
@@ -0,0 +1,22 @@
+__signature__
+keras.ops.kaiser(x, beta)
+__doc__
+Kaiser window function.
+
+The Kaiser window is defined as:
+`w[n] = I0(beta * sqrt(1 - (2n / (N - 1) - 1)^2)) / I0(beta)`
+where I0 is the modified zeroth-order Bessel function of the first kind.
+
+Args:
+ x: Scalar or 1D Tensor. The window length.
+ beta: Float. Shape parameter for the Kaiser window.
+
+Returns:
+ A 1D tensor containing the Kaiser window values.
+
+Example:
+>>> x = keras.ops.convert_to_tensor(5)
+>>> keras.ops.kaiser(x, beta=14.0)
+array([7.7268669e-06, 1.6493219e-01, 1.0000000e+00, 1.6493219e-01,
+ 7.7268669e-06], dtype=float32)
+
diff --git a/.tether/man/op_layer_normalization.txt b/.tether/man/op_layer_normalization.txt
new file mode 100644
index 000000000..33bc93250
--- /dev/null
+++ b/.tether/man/op_layer_normalization.txt
@@ -0,0 +1,35 @@
+__signature__
+keras.ops.layer_normalization(
+ x,
+ gamma=None,
+ beta=None,
+ axis=-1,
+ epsilon=None,
+ **kwargs
+)
+__doc__
+Layer normalization layer (Ba et al., 2016).
+
+Normalize the activations of the previous layer for each given example in a
+batch independently, rather than across a batch like Batch Normalization.
+i.e. applies a transformation that maintains the mean activation within each
+example close to 0 and the activation standard deviation close to 1.
+
+Args:
+ x: Input tensor.
+ gamma: Optional scaling factor for the normalization.
+ beta: Optional add offset for the normalized tensor.
+ axis: The axis or axes along which to perform normalization. Default to
+ `-1`.
+ epsilon: A lower bound value for the norm.
+ Defaults to `backend.epsilon()`.
+
+Returns:
+ The normalized array.
+
+Example:
+
+>>> x = keras.ops.arange(5, dtype="float32")
+>>> keras.ops.layer_normalization(x)
+array([-1.4142135, -0.70710677, 0.0, 0.7071067, 1.4142135])
+
diff --git a/.tether/man/op_rms_normalization.txt b/.tether/man/op_rms_normalization.txt
index 321afb838..d300b9e3a 100644
--- a/.tether/man/op_rms_normalization.txt
+++ b/.tether/man/op_rms_normalization.txt
@@ -1,7 +1,7 @@
__signature__
keras.ops.rms_normalization(
x,
- scale=1,
+ scale=None,
axis=-1,
epsilon=None
)
@@ -18,20 +18,19 @@ It is defined as `rms_normalization(x) = x * rsqrt(mean(square(x))) * scale`
Args:
x: Input tensor.
- axis: The axis or axes along which to perform normalization.
- Default to -1.
scale: Optional scaling factor for the normalization.
- epsilon: A lower bound value for the norm.
- Defaults to `backend.epsilon()`.
+ axis: The axis or axes along which to perform normalization. Defaults
+ to `-1`.
+ epsilon: A lower bound value for the norm. Defaults to
+ `backend.epsilon()`.
Returns:
The normalized array.
Example:
->>> x = np.random.rand(1, 10)
->>> x_norm = keras.ops.rms_normalization(x, (10,))
->>> print(x_norm)
+>>> x = keras.random.normal((1, 10))
+>>> keras.ops.rms_normalization(x)
array([[0.69384296, 0.94444374, 0.16551171, 0.05749961, 1.11008865,
- 0.52475186, 1.57686807, 1.69893307, 1.27292764, 0.30819128]])
+ 0.52475186, 1.57686807, 1.69893307, 1.27292764, 0.30819128]])
diff --git a/.tether/man/op_sparse_sigmoid.txt b/.tether/man/op_sparse_sigmoid.txt
new file mode 100644
index 000000000..06861c5a7
--- /dev/null
+++ b/.tether/man/op_sparse_sigmoid.txt
@@ -0,0 +1,23 @@
+__signature__
+keras.ops.sparse_sigmoid(x)
+__doc__
+Sparse sigmoid activation function.
+
+It is defined as
+
+`f(x) = 0` for `x <= -1`,
+`f(x) = 0.5 * (x + 1)` for `-1 < x < 1`,
+`f(x) = 1` for `x >= 1`.
+
+Args:
+ x: Input tensor.
+
+Returns:
+ A tensor with the same shape as `x`.
+
+Example:
+
+>>> x = keras.ops.convert_to_tensor([-6.0, 1.0, 0.0, 1.0, 6.0])
+>>> keras.ops.sparse_sigmoid(x)
+array([0. , 1. , 0.5, 1. , 1. ], dtype=float32)
+
diff --git a/.tether/man/op_vectorized_map.txt b/.tether/man/op_vectorized_map.txt
index 3af8b4df0..da1de681c 100644
--- a/.tether/man/op_vectorized_map.txt
+++ b/.tether/man/op_vectorized_map.txt
@@ -7,18 +7,18 @@ Schematically, `vectorized_map` implements the following,
in the case of a single tensor input `elements`:
```python
-def vectorized_map(function, elements)
+def vectorized_map(function, elements):
outputs = []
for e in elements:
outputs.append(function(e))
- return stack(outputs)
+ return np.stack(outputs)
```
In the case of an iterable of tensors `elements`,
it implements the following:
```python
-def vectorized_map(function, elements)
+def vectorized_map(function, elements):
batch_size = elements[0].shape[0]
outputs = []
for index in range(batch_size):
@@ -28,3 +28,4 @@ def vectorized_map(function, elements)
In this case, `function` is expected to take as input
a single list of tensor arguments.
+
diff --git a/.tether/man/op_view_as_complex.txt b/.tether/man/op_view_as_complex.txt
new file mode 100644
index 000000000..c195cbce8
--- /dev/null
+++ b/.tether/man/op_view_as_complex.txt
@@ -0,0 +1,25 @@
+__signature__
+keras.ops.view_as_complex(x)
+__doc__
+Converts a real tensor with shape `(..., 2)` to a complex tensor,
+where the last dimension represents the real and imaginary components
+of a complex tensor.
+
+Args:
+ x: A real tensor with last dimension of size 2.
+
+Returns:
+ A complex tensor with shape `x.shape[:-1]`.
+
+Example:
+
+```
+>>> import numpy as np
+>>> from keras import ops
+
+>>> real_imag = np.array([[1.0, 2.0], [3.0, 4.0]])
+>>> complex_tensor = ops.view_as_complex(real_imag)
+>>> complex_tensor
+array([1.+2.j, 3.+4.j])
+```
+
diff --git a/.tether/man/op_view_as_real.txt b/.tether/man/op_view_as_real.txt
new file mode 100644
index 000000000..9573606b5
--- /dev/null
+++ b/.tether/man/op_view_as_real.txt
@@ -0,0 +1,25 @@
+__signature__
+keras.ops.view_as_real(x)
+__doc__
+Converts a complex tensor to a real tensor with shape `(..., 2)`,
+where the last dimension represents the real and imaginary components.
+
+Args:
+ x: A complex tensor.
+
+Returns:
+ A real tensor where the last dimension contains the
+ real and imaginary parts.
+
+Example:
+```
+>>> import numpy as np
+>>> from keras import ops
+
+>>> complex_tensor = np.array([1 + 2j, 3 + 4j])
+>>> real = ops.view_as_real(complex_tensor)
+>>> real
+array([[1., 2.],
+ [3., 4.]])
+```
+
diff --git a/.tether/man/optimizer_lion.txt b/.tether/man/optimizer_lion.txt
index 987adadbd..fa0dc9138 100644
--- a/.tether/man/optimizer_lion.txt
+++ b/.tether/man/optimizer_lion.txt
@@ -7,13 +7,13 @@ class Lion(keras.src.optimizers.optimizer.Optimizer)
|
| The Lion optimizer is a stochastic-gradient-descent method that uses the
| sign operator to control the magnitude of the update, unlike other adaptive
- | optimizers such as Adam that rely on second-order moments. This make
+ | optimizers such as Adam that rely on second-order moments. This makes
| Lion more memory-efficient as it only keeps track of the momentum. According
| to the authors (see reference), its performance gain over Adam grows with
| the batch size. Because the update of Lion is produced through the sign
| operation, resulting in a larger norm, a suitable learning rate for Lion is
| typically 3-10x smaller than that for AdamW. The weight decay for Lion
- | should be in turn 3-10x larger than that for AdamW to maintain a
+ | should in turn be 3-10x larger than that for AdamW to maintain a
| similar strength (lr * wd).
|
| Args:
diff --git a/.tether/man/optimizer_muon.txt b/.tether/man/optimizer_muon.txt
new file mode 100644
index 000000000..47ed5a5c3
--- /dev/null
+++ b/.tether/man/optimizer_muon.txt
@@ -0,0 +1,160 @@
+Help on class Muon in module keras.src.optimizers.muon:
+
+class Muon(keras.src.optimizers.optimizer.Optimizer)
+ | Muon(learning_rate=0.001, adam_beta_1=0.9, adam_beta_2=0.999, epsilon=1e-07, weight_decay=0.1, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='muon', exclude_layers=None, exclude_embeddings=True, muon_a=3.4445, muon_b=-4.775, muon_c=2.0315, adam_lr_ratio=0.1, momentum=0.95, ns_steps=6, nesterov=True, **kwargs)
+ |
+ | Optimizer that implements the Muon algorithm.
+ |
+ | Note that this optimizer should not be used in the following layers:
+ |
+ | 1. Embedding layer
+ | 2. Final output fully connected layer
+ | 3. Any {0,1}-D variables
+ |
+ | These should all be optimized using AdamW.
+ |
+ | The Muon optimizer can use both the Muon update step or the
+ | AdamW update step based on the following:
+ |
+ | - For any variable that isn't 2D, 3D or 4D, the AdamW step
+ | will be used. This is not configurable.
+ | - If the argument `exclude_embeddings` (defaults to `True`) is set
+ | to `True`, the AdamW step will be used.
+ | - For any variablewith a name that matches an expression
+ | listed in the argument `exclude_layers` (a list), the
+ | AdamW step will be used.
+ | - Any other variable uses the Muon step.
+ |
+ | Typically, you only need to pass the name of your densely-connected
+ | output layer to `exclude_layers`, e.g.
+ | `exclude_layers=["output_dense"]`.
+ |
+ | References:
+ | - [Original implementation](https://github.com/KellerJordan/Muon)
+ | - [Liu et al, 2025](https://arxiv.org/abs/2502.16982)
+ |
+ | Args:
+ | learning_rate: A float,
+ | `keras.optimizers.schedules.LearningRateSchedule` instance, or
+ | a callable that takes no arguments and returns the actual value to
+ | use. The learning rate. Defaults to `0.001`.
+ | adam_beta_1: A float value or a constant float tensor, or a callable
+ | that takes no arguments and returns the actual value to use.
+ | The exponential decay rate for the 1st moment estimates. Defaults to
+ | `0.9`.
+ | adam_beta_2: A float value or a constant float tensor, ora callable
+ | that takes no arguments and returns the actual value to use.
+ | The exponential decay rate for the 2nd moment estimates. Defaults to
+ | `0.999`.
+ | epsilon: A small constant for numerical stability. This is
+ | "epsilon hat" in the Kingma and Ba paper
+ | (in the formula just before Section 2.1),
+ | not the epsilon in Algorithm 1 of the paper.
+ | It be used at Adamw.Defaults to `1e-7`.
+ | exclude_layers: List of strings, keywords of layer names to exclude.
+ | All layers with keywords in their path will use adamw.
+ | exclude_embeddings: Boolean value
+ | If True, embedding layers will use adamw.
+ | muon_a: Float, parameter a of the muon algorithm.
+ | It is recommended to use the default value
+ | muon_b: Float, parameter b of the muon algorithm.
+ | It is recommended to use the default value
+ | muon_c: Float, parameter c of the muon algorithm.
+ | It is recommended to use the default value
+ | adam_lr_ratio: Float, the ratio of the learning rate when
+ | using Adam to the main learning rate.
+ | it is recommended to set it to 0.1
+ | momentum: Float, momentum used by internal SGD.
+ | ns_steps: Integer, number of Newton-Schulz iterations to run.
+ | nesterov: Boolean, whether to use Nesterov-style momentum
+ | {{base_optimizer_keyword_args}}
+ |
+ | Method resolution order:
+ | Muon
+ | keras.src.optimizers.optimizer.Optimizer
+ | keras.src.backend.tensorflow.optimizer.TFOptimizer
+ | keras.src.backend.tensorflow.trackable.KerasAutoTrackable
+ | tensorflow.python.trackable.autotrackable.AutoTrackable
+ | tensorflow.python.trackable.base.Trackable
+ | keras.src.optimizers.base_optimizer.BaseOptimizer
+ | keras.src.saving.keras_saveable.KerasSaveable
+ | builtins.object
+ |
+ | Methods defined here:
+ |
+ | __init__(
+ | self,
+ | learning_rate=0.001,
+ | adam_beta_1=0.9,
+ | adam_beta_2=0.999,
+ | epsilon=1e-07,
+ | weight_decay=0.1,
+ | clipnorm=None,
+ | clipvalue=None,
+ | global_clipnorm=None,
+ | use_ema=False,
+ | ema_momentum=0.99,
+ | ema_overwrite_frequency=None,
+ | loss_scale_factor=None,
+ | gradient_accumulation_steps=None,
+ | name='muon',
+ | exclude_layers=None,
+ | exclude_embeddings=True,
+ | muon_a=3.4445,
+ | muon_b=-4.775,
+ | muon_c=2.0315,
+ | adam_lr_ratio=0.1,
+ | momentum=0.95,
+ | ns_steps=6,
+ | nesterov=True,
+ | **kwargs
+ | )
+ | Initialize self. See help(type(self)) for accurate signature.
+ |
+ | build(self, var_list)
+ | Initialize optimizer variables.
+ |
+ | Adam optimizer has 3 types of variables: momentums, velocities and
+ | velocity_hat (only set when amsgrad is applied),
+ |
+ | Args:
+ | var_list: list of model variables to build Adam variables on.
+ |
+ | get_config(self)
+ | Returns the config of the optimizer.
+ |
+ | An optimizer config is a Python dictionary (serializable)
+ | containing the configuration of an optimizer.
+ | The same optimizer can be reinstantiated later
+ | (without any saved state) from this configuration.
+ |
+ | Subclass optimizer should override this method to include other
+ | hyperparameters.
+ |
+ | Returns:
+ | Python dictionary.
+ |
+ | transpose_last_axis(self, X)
+ |
+ | update_step(
+ | self,
+ | gradient,
+ | variable,
+ | learning_rate
+ | )
+ |
+ | zeropower_via_newtonschulz5(
+ | self,
+ | x,
+ | steps: int
+ | )
+ | We apply the Newton-Schulz iteration to compute matrix G.
+ |
+ | We select a quintic iteration that maximizes the slope at zero. This
+ | approach helps minimize steps, even if the iteration doesn't fully
+ | converge across the interval. The result isn't exactly UV^T (from the
+ | SVD of G), but rather an approximation like US'V^T. Despite this
+ | approximation, model performance remains unaffected compared to using
+ | the exact UV^T from the SVD.
+ |
+
diff --git a/.tether/man/save_model_weights.txt b/.tether/man/save_model_weights.txt
index 2820c7a22..fafd3596e 100644
--- a/.tether/man/save_model_weights.txt
+++ b/.tether/man/save_model_weights.txt
@@ -2,14 +2,59 @@ __signature__
keras.Model.save_weights(
self,
filepath,
- overwrite=True
+ overwrite=True,
+ max_shard_size=None
)
__doc__
-Saves all layer weights to a `.weights.h5` file.
+Saves all weights to a single file or sharded files.
+
+By default, the weights will be saved in a single `.weights.h5` file.
+If sharding is enabled (`max_shard_size` is not `None`), the weights
+will be saved in multiple files, each with a size at most
+`max_shard_size` (in GB). Additionally, a configuration file
+`.weights.json` will contain the metadata for the sharded files.
+
+The saved sharded files contain:
+
+- `*.weights.json`: The configuration file containing 'metadata' and
+ 'weight_map'.
+- `*_xxxxxx.weights.h5`: The sharded files containing only the
+ weights.
Args:
- filepath: `str` or `pathlib.Path` object.
- Path where to save the model. Must end in `.weights.h5`.
- overwrite: Whether we should overwrite any existing model
- at the target location, or instead ask the user
- via an interactive prompt.
+ filepath: `str` or `pathlib.Path` object. Path where the weights
+ will be saved. When sharding, the filepath must end in
+ `.weights.json`. If `.weights.h5` is provided, it will be
+ overridden.
+ overwrite: Whether to overwrite any existing weights at the target
+ location or instead ask the user via an interactive prompt.
+ max_shard_size: `int` or `float`. Maximum size in GB for each
+ sharded file. If `None`, no sharding will be done. Defaults to
+ `None`.
+
+Example:
+
+```python
+# Instantiate a EfficientNetV2L model with about 454MB of weights.
+model = keras.applications.EfficientNetV2L(weights=None)
+
+# Save the weights in a single file.
+model.save_weights("model.weights.h5")
+
+# Save the weights in sharded files. Use `max_shard_size=0.25` means
+# each sharded file will be at most ~250MB.
+model.save_weights("model.weights.json", max_shard_size=0.25)
+
+# Load the weights in a new model with the same architecture.
+loaded_model = keras.applications.EfficientNetV2L(weights=None)
+loaded_model.load_weights("model.weights.h5")
+x = keras.random.uniform((1, 480, 480, 3))
+assert np.allclose(model.predict(x), loaded_model.predict(x))
+
+# Load the sharded weights in a new model with the same architecture.
+loaded_model = keras.applications.EfficientNetV2L(weights=None)
+loaded_model.load_weights("model.weights.json")
+x = keras.random.uniform((1, 480, 480, 3))
+assert np.allclose(model.predict(x), loaded_model.predict(x))
+```
+
diff --git a/.tether/man/to_categorical.txt b/.tether/man/to_categorical.txt
index ca6dbad25..bb45b450c 100644
--- a/.tether/man/to_categorical.txt
+++ b/.tether/man/to_categorical.txt
@@ -27,8 +27,7 @@ Example:
>>> b = np.array([.9, .04, .03, .03,
... .3, .45, .15, .13,
... .04, .01, .94, .05,
-... .12, .21, .5, .17],
-... shape=[4, 4])
+... .12, .21, .5, .17]).reshape(4,4)
>>> loss = keras.ops.categorical_crossentropy(a, b)
>>> print(np.around(loss, 5))
[0.10536 0.82807 0.1011 1.77196]
diff --git a/.tether/vignettes-src/making_new_layers_and_models_via_subclassing.Rmd b/.tether/vignettes-src/making_new_layers_and_models_via_subclassing.Rmd
index ddaf7aafd..9af53ff42 100644
--- a/.tether/vignettes-src/making_new_layers_and_models_via_subclassing.Rmd
+++ b/.tether/vignettes-src/making_new_layers_and_models_via_subclassing.Rmd
@@ -218,7 +218,7 @@ All layers you've seen so far in this guide work with all Keras backends.
The `keras.ops` namespace gives you access to:
- The NumPy API, e.g. `ops.matmul`, `ops.sum`, `ops.reshape`, `ops.stack`, etc.
-- Neural networks-specific APIs such as `ops.softmax`, `ops`.conv`, `ops.binary_crossentropy`, `ops.relu`, etc.
+- Neural networks-specific APIs such as `ops.softmax`, `ops.conv`, `ops.binary_crossentropy`, `ops.relu`, etc.
You can also use backend-native APIs in your layers (such as `tf.nn` functions),
but if you do this, then your layer will only be usable with the backend in question.
diff --git a/.tether/vignettes-src/transfer_learning.Rmd b/.tether/vignettes-src/transfer_learning.Rmd
index 9230064c8..ed90015ed 100644
--- a/.tether/vignettes-src/transfer_learning.Rmd
+++ b/.tether/vignettes-src/transfer_learning.Rmd
@@ -53,7 +53,7 @@ ImageNet dataset, and retraining it on the Kaggle "cats vs dogs" classification
dataset.
This is adapted from
-[Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python)
+[Deep Learning with Python](https://deeplearningwithpython.io/)
and the 2016 blog post
["building powerful image classification models using very little data"](https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html).
diff --git a/.tether/vignettes-src/writing_a_custom_training_loop_in_tensorflow.Rmd b/.tether/vignettes-src/writing_a_custom_training_loop_in_tensorflow.Rmd
index bfc75688b..888f85c33 100644
--- a/.tether/vignettes-src/writing_a_custom_training_loop_in_tensorflow.Rmd
+++ b/.tether/vignettes-src/writing_a_custom_training_loop_in_tensorflow.Rmd
@@ -368,7 +368,7 @@ A GAN training loop looks like this:
as real.
For a much more detailed overview of how GANs works, see
-[Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python).
+[Deep Learning with Python](https://deeplearningwithpython.io/).
Let's implement this training loop. First, create the discriminator meant to classify
fake vs real digits:
diff --git a/DESCRIPTION b/DESCRIPTION
index 5fc222534..d08d661f9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -53,5 +53,5 @@ Suggests:
rstudioapi,
R6,
jpeg
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index 9d4f3ec7a..a284e4f0e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,6 +8,8 @@ S3method("==",keras.src.backend.common.keras_tensor.KerasTensor)
S3method("==",keras_shape)
S3method("[",keras_shape)
S3method("[[",python_builtin_super_getter)
+S3method(Arg,keras.src.backend.Tensor)
+S3method(Arg,keras.src.backend.common.keras_tensor.KerasTensor)
S3method(Summary,keras_shape)
S3method(as.array,jax.Array)
S3method(as.array,jaxlib._jax.ArrayImpl)
@@ -102,6 +104,7 @@ export(activation_softmax)
export(activation_softplus)
export(activation_softsign)
export(activation_sparse_plus)
+export(activation_sparse_sigmoid)
export(activation_sparsemax)
export(activation_squareplus)
export(activation_tanh)
@@ -187,12 +190,17 @@ export(config_floatx)
export(config_image_data_format)
export(config_is_flash_attention_enabled)
export(config_is_interactive_logging_enabled)
+export(config_is_nnx_enabled)
export(config_is_traceback_filtering_enabled)
+export(config_max_epochs)
+export(config_max_steps_per_epoch)
export(config_set_backend)
export(config_set_dtype_policy)
export(config_set_epsilon)
export(config_set_floatx)
export(config_set_image_data_format)
+export(config_set_max_epochs)
+export(config_set_max_steps_per_epoch)
export(constraint_maxnorm)
export(constraint_minmaxnorm)
export(constraint_nonneg)
@@ -360,6 +368,7 @@ export(layer_random_color_degeneration)
export(layer_random_color_jitter)
export(layer_random_contrast)
export(layer_random_crop)
+export(layer_random_elastic_transform)
export(layer_random_erasing)
export(layer_random_flip)
export(layer_random_gaussian_blur)
@@ -415,6 +424,7 @@ export(loss_binary_crossentropy)
export(loss_binary_focal_crossentropy)
export(loss_categorical_crossentropy)
export(loss_categorical_focal_crossentropy)
+export(loss_categorical_generalized_cross_entropy)
export(loss_categorical_hinge)
export(loss_circle)
export(loss_cosine_similarity)
@@ -493,6 +503,7 @@ export(np_array)
export(op_abs)
export(op_add)
export(op_all)
+export(op_angle)
export(op_any)
export(op_append)
export(op_arange)
@@ -511,6 +522,7 @@ export(op_array)
export(op_associative_scan)
export(op_average)
export(op_average_pool)
+export(op_bartlett)
export(op_batch_normalization)
export(op_binary_crossentropy)
export(op_bincount)
@@ -521,9 +533,11 @@ export(op_bitwise_not)
export(op_bitwise_or)
export(op_bitwise_right_shift)
export(op_bitwise_xor)
+export(op_blackman)
export(op_broadcast_to)
export(op_cast)
export(op_categorical_crossentropy)
+export(op_cbrt)
export(op_ceil)
export(op_celu)
export(op_cholesky)
@@ -537,6 +551,7 @@ export(op_convert_to_array)
export(op_convert_to_numpy)
export(op_convert_to_tensor)
export(op_copy)
+export(op_corrcoef)
export(op_correlate)
export(op_cos)
export(op_cosh)
@@ -547,6 +562,7 @@ export(op_ctc_loss)
export(op_cumprod)
export(op_cumsum)
export(op_custom_gradient)
+export(op_deg2rad)
export(op_depthwise_conv)
export(op_det)
export(op_diag)
@@ -586,11 +602,14 @@ export(op_get_item)
export(op_glu)
export(op_greater)
export(op_greater_equal)
+export(op_hamming)
+export(op_hanning)
export(op_hard_shrink)
export(op_hard_sigmoid)
export(op_hard_silu)
export(op_hard_swish)
export(op_hard_tanh)
+export(op_heaviside)
export(op_histogram)
export(op_hstack)
export(op_identity)
@@ -598,6 +617,7 @@ export(op_ifft2)
export(op_imag)
export(op_image_affine_transform)
export(op_image_crop)
+export(op_image_elastic_transform)
export(op_image_extract_patches)
export(op_image_gaussian_blur)
export(op_image_hsv_to_rgb)
@@ -617,6 +637,8 @@ export(op_isfinite)
export(op_isinf)
export(op_isnan)
export(op_istft)
+export(op_kaiser)
+export(op_layer_normalization)
export(op_leaky_relu)
export(op_left_shift)
export(op_less)
@@ -718,6 +740,7 @@ export(op_solve_triangular)
export(op_sort)
export(op_sparse_categorical_crossentropy)
export(op_sparse_plus)
+export(op_sparse_sigmoid)
export(op_sparsemax)
export(op_split)
export(op_sqrt)
@@ -756,6 +779,8 @@ export(op_var)
export(op_vdot)
export(op_vectorize)
export(op_vectorized_map)
+export(op_view_as_complex)
+export(op_view_as_real)
export(op_vstack)
export(op_where)
export(op_while_loop)
@@ -771,6 +796,7 @@ export(optimizer_ftrl)
export(optimizer_lamb)
export(optimizer_lion)
export(optimizer_loss_scale)
+export(optimizer_muon)
export(optimizer_nadam)
export(optimizer_rmsprop)
export(optimizer_sgd)
diff --git a/NEWS.md b/NEWS.md
index 5f32df531..ec7e290a7 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -9,6 +9,53 @@
- Updated dependencies declared by `use_backend("jax", gpu=TRUE)`
for compatability with `keras-hub`.
+- Added training loop configuration helpers:
+ `config_max_epochs()`, `config_set_max_epochs()`, `config_max_steps_per_epoch()`,
+ and `config_set_max_steps_per_epoch()`. The caps can also be set via the
+ `KERAS_MAX_EPOCHS` and `KERAS_MAX_STEPS_PER_EPOCH` environment variables.
+ Added `config_is_nnx_enabled()` to check whether JAX NNX features are enabled.
+
+- LoRA-enabled layers (`layer_dense()`, `layer_embedding()`, `layer_einsum_dense()`)
+ gain a `lora_alpha` argument to scale the adaptation delta independently of the
+ chosen rank.
+
+- `keras_variable()` now accepts a `synchronization` argument for distributed
+ strategies.
+
+- `Layer$add_weight()` gains an `overwrite_with_gradient` option and
+ layers now provide a `symbolic_call()` method.
+
+- Transposed convolution utilities now follow the latest Keras API:
+ `op_conv_transpose()` defaults `strides = 1` and the `layer_conv_*_transpose()`
+ layers expose `output_padding` for precise shape control.
+
+- `layer_torch_module_wrapper()` gains an `output_shape` argument to help Keras
+ infer shapes when wrapping PyTorch modules.
+
+- `save_model_weights()` adds a `max_shard_size` argument to split large weight
+ files into manageable shards.
+
+- Added elastic deformation utilities for images: `layer_random_elastic_transform()`
+ and the lower-level `op_image_elastic_transform()`.
+
+- Added `loss_categorical_generalized_cross_entropy()` for training with noisy
+ labels.
+
+- Added the Muon optimizer via `optimizer_muon()`.
+
+- Added complex-valued helpers: S3 `Arg()` methods for tensors, `op_angle()`,
+ and conversions `op_view_as_real()` / `op_view_as_complex()`.
+
+- Added signal window operations: `op_bartlett()`, `op_blackman()`,
+ `op_hamming()`, `op_hanning()`, and `op_kaiser()`.
+
+- Expanded numeric operations with `op_layer_normalization()`, `op_cbrt()`,
+ `op_corrcoef()`, `op_deg2rad()`, `op_heaviside()`, the new `op_sparse_sigmoid()`
+ plus matching `activation_sparse_sigmoid()`, and an `attn_logits_soft_cap`
+ argument for `op_dot_product_attention()`.
+
+- `layer_layer_normalization()` removes the `rms_scaling` argument.
+
# keras3 1.4.0
- New `op_subset()` and `x@r[...]` methods enable tensor subsetting
diff --git a/R/Layer.R b/R/Layer.R
index 28cbb9d41..889dc6056 100644
--- a/R/Layer.R
+++ b/R/Layer.R
@@ -234,6 +234,7 @@
#' regularizer = NULL,
#' constraint = NULL,
#' aggregation = 'none',
+#' overwrite_with_gradient = FALSE,
#' name = NULL)
#' ```
#' Add a weight variable to the layer.
@@ -268,10 +269,12 @@
#' or string name of a built-in constraint.
#' Defaults to `NULL`.
#' * `aggregation`: Optional string, one of `NULL`, `"none"`, `"mean"`,
-#' `"sum"` or `"only_first_replica"`. Annotates the variable with
-#' the type of multi-replica aggregation to be used for this
-#' variable when writing custom data parallel training loops.
-#' Defaults to `"none"`.
+#' `"sum"` or `"only_first_replica"`. Annotates the variable with
+#' the type of multi-replica aggregation to be used for this
+#' variable when writing custom data parallel training loops.
+#' Defaults to `"none"`.
+#' * `overwrite_with_gradient`: Boolean, whether to overwrite the variable with
+#' the computed gradient. Useful for float8 training. Defaults to `FALSE`.
#' * `name`: String name of the variable. Useful for debugging purposes.
#'
#' Returns:
@@ -327,6 +330,10 @@
#' ```
#'
#' * ```r
+#' symbolic_call(...)
+#' ```
+#'
+#' * ```r
#' count_params()
#' ```
#' Count the total number of scalars composing the weights.
@@ -692,5 +699,3 @@ function(classname,
# ' @param .composing Bare Keras Layers (`layer_*` functions) conventionally
# have `object` as the first argument, which allows users to instantiate
# (`initialize`) and `call` one motion.
-
-
diff --git a/R/activations.R b/R/activations.R
index e674a2cf7..cc91b6124 100644
--- a/R/activations.R
+++ b/R/activations.R
@@ -25,7 +25,7 @@
#' Input tensor.
#'
#' @param alpha
-#' Numeric. See description for details.
+#' A scalar, slope of positive section. Defaults to `1.0`.
#'
#' @returns A tensor, the result from applying the activation to the input tensor `x`.
#' @export
@@ -793,6 +793,30 @@ function (x)
keras$activations$sparse_plus(x)
}
+#' Sparse sigmoid activation function.
+#'
+#' @description
+#' It is defined as
+#'
+#' `f(x) = 0` for `x <= -1`,
+#' `f(x) = 0.5 * (x + 1)` for `-1 < x < 1`,
+#' `f(x) = 1` for `x >= 1`.
+#'
+#' # Reference
+#' - [M. Blondel, A. F. T. Martins, V. Niculae, 2019](https://arxiv.org/pdf/1901.02324)
+#'
+#' @param x
+#' Input tensor.
+#'
+#' @family activations
+#' @inherit activation_elu return
+#' @export
+#' @tether keras.activations.sparse_sigmoid
+activation_sparse_sigmoid <-
+function (x) {
+ keras$activations$sparse_sigmoid(x)
+}
+
#' Sparsemax activation function.
#'
#' @description
diff --git a/R/callbacks.R b/R/callbacks.R
index dbd3e69ac..98e722deb 100644
--- a/R/callbacks.R
+++ b/R/callbacks.R
@@ -251,8 +251,8 @@ function (monitor = "val_loss", min_delta = 0L, patience = 0L,
#' `epoch`, `logs`
#' - `on_train_begin` and `on_train_end` expect one positional argument:
#' `logs`
-#' - `on_train_batch_begin` and `on_train_batch_end` expect two positional
-#' arguments: `batch`, `logs`
+#' - `on_train_batch_begin` and `on_train_batch_end` expect a positional
+#' argument `batch` and a named argument `logs`
#' - See `Callback` class definition for the full list of functions and their
#' expected arguments.
#'
@@ -326,7 +326,7 @@ function (monitor = "val_loss", min_delta = 0L, patience = 0L,
#' Any function in [`Callback()`] that you want to override by
#' passing `function_name = function`. For example,
#' `callback_lambda(.., on_train_end = train_end_fn)`. The custom function
-#' needs to have same arguments as the ones defined in [`Callback()`].
+#' needs to have the same arguments as the ones defined in [`Callback()`].
#'
#' @inherit callback_backup_and_restore return
#' @export
@@ -534,9 +534,8 @@ function (schedule, verbose = 0L)
#' decision to overwrite the current save file is made based on either
#' the maximization or the minimization of the monitored quantity.
#' For `val_acc`, this should be `"max"`, for `val_loss` this should be
-#' `"min"`, etc. In `"auto"` mode, the mode is set to `"max"` if the
-#' quantities monitored are `"acc"` or start with `"fmeasure"` and are
-#' set to `"min"` for the rest of the quantities.
+#' `"min"`, etc. In `"auto"` mode, the direction is automatically inferred from
+#' the name of the monitored quantity.
#'
#' @param save_weights_only
#' if TRUE, then only the model's weights will be saved
@@ -995,4 +994,3 @@ normalize_callbacks_with_metrics <- function(view_metrics, initial_epoch, callba
callbacks
}
-
diff --git a/R/config.R b/R/config.R
index 29bc5752e..a12dac02c 100644
--- a/R/config.R
+++ b/R/config.R
@@ -289,6 +289,70 @@ function (data_format)
}
+#' Configure the default training loop limits.
+#'
+#' @description
+#' These helpers control the caps that built-in training loops obey when running
+#' `fit()`, `evaluate()`, or `predict()`. The values can also be provided via the
+#' `KERAS_MAX_EPOCHS` or `KERAS_MAX_STEPS_PER_EPOCH` environment variables to
+#' quickly constrain a run without modifying source code.
+#'
+#' @returns
+#' `config_max_epochs()` and `config_max_steps_per_epoch()` return the current
+#' integer limits (or `NULL` if the cap is unset). The setter variants return
+#' `NULL` invisibly and are called for side effects.
+#'
+#' @param max_epochs
+#' Integer upper bound for epochs processed by built-in training loops. Use
+#' `NULL` to remove the cap.
+#'
+#' @param max_steps_per_epoch
+#' Integer upper bound for steps processed per epoch by built-in training
+#' loops. Use `NULL` to remove the cap.
+#'
+#' @name config_max_epochs
+#' @family config
+#' @rdname config_max_epochs
+#' @export
+#' @tether keras.config.max_epochs
+config_max_epochs <-
+function ()
+{
+ args <- capture_args()
+ do.call(keras$config$max_epochs, args)
+}
+
+#' @rdname config_max_epochs
+#' @export
+#' @tether keras.config.set_max_epochs
+config_set_max_epochs <-
+function (max_epochs)
+{
+ args <- capture_args(list(max_epochs = as_integer))
+ do.call(keras$config$set_max_epochs, args)
+}
+
+#' @rdname config_max_epochs
+#' @export
+#' @tether keras.config.max_steps_per_epoch
+config_max_steps_per_epoch <-
+function ()
+{
+ args <- capture_args()
+ do.call(keras$config$max_steps_per_epoch, args)
+}
+
+#' @rdname config_max_epochs
+#' @export
+#' @tether keras.config.set_max_steps_per_epoch
+config_set_max_steps_per_epoch <-
+function (max_steps_per_epoch)
+{
+ args <- capture_args(list(max_steps_per_epoch = as_integer))
+ do.call(keras$config$set_max_steps_per_epoch, args)
+}
+
+
#' Disables safe mode globally, allowing deserialization of lambdas.
#'
#' @returns No return value, called for side effects.
@@ -378,6 +442,21 @@ function ()
}
+#' Check whether NNX-specific features are enabled on the JAX backend.
+#'
+#' @returns
+#' Logical flag; `TRUE` if NNX backend features are enabled, `FALSE` otherwise.
+#'
+#' @export
+#' @family config
+#' @tether keras.config.is_nnx_enabled
+config_is_nnx_enabled <-
+function ()
+{
+ keras$config$is_nnx_enabled()
+}
+
+
#' Turn off traceback filtering.
#'
#' @description
diff --git a/R/freeze.R b/R/freeze.R
index 64d8cde72..44e6b8a1c 100644
--- a/R/freeze.R
+++ b/R/freeze.R
@@ -74,10 +74,11 @@
#' conv_base
#'
#' # Freeze only layers of a certain type, e.g, BatchNorm layers
-#' batch_norm_layer_class_name <- class(layer_batch_normalization())[1]
-#' is_batch_norm_layer <- function(x) inherits(x, batch_norm_layer_class_name)
+#' # batch_norm_layer_class_name <- class(layer_batch_normalization())[1]
+#' # is_batch_norm_layer <- function(x) inherits(x, batch_norm_layer_class_name)
+#' is_batch_norm_layer <- function(x) inherits(x, keras$layers$BatchNormalization)
#'
-#' model <- application_efficientnet_b0()
+#' model <- application_efficientnet_v2b0()
#' freeze_weights(model, which = is_batch_norm_layer)
#' # print(model)
#'
diff --git a/R/layers-backend-wrappers.R b/R/layers-backend-wrappers.R
index 32375b099..76e4e660e 100644
--- a/R/layers-backend-wrappers.R
+++ b/R/layers-backend-wrappers.R
@@ -76,6 +76,9 @@
#' passing the instance to `layer_torch_module_wrapper` (e.g. by calling
#' it once).
#'
+#' @param output_shape
+#' Shape of the output from this layer. Helps Keras infer shapes.
+#'
#' @param name
#' The name of the layer (string).
#'
@@ -91,10 +94,11 @@
#' @family layers
#' @tether keras.layers.TorchModuleWrapper
layer_torch_module_wrapper <-
-function (object, module, name = NULL, ...)
+function (object, module, output_shape = NULL, name = NULL, ...)
{
args <- capture_args(list(input_shape = normalize_shape,
- batch_size = as_integer, batch_input_shape = normalize_shape),
+ batch_size = as_integer, batch_input_shape = normalize_shape,
+ output_shape = normalize_shape),
ignore = "object")
create_layer(keras$layers$TorchModuleWrapper, object, args)
}
diff --git a/R/layers-convolutional.R b/R/layers-convolutional.R
index 64ff5410f..e83eab6a1 100644
--- a/R/layers-convolutional.R
+++ b/R/layers-convolutional.R
@@ -204,6 +204,10 @@ function (object, filters, kernel_size, strides = 1L, padding = "valid",
#' the left/right or up/down of the input such that output has the same
#' height/width dimension as the input.
#'
+#' @param output_padding
+#' Scalar integer. Amount of padding to add to the output length. Must be less
+#' than the stride. When `NULL` (default) the output size is inferred.
+#'
#' @param data_format
#' string, either `"channels_last"` or `"channels_first"`.
#' The ordering of the dimensions in the inputs. `"channels_last"`
@@ -214,8 +218,9 @@ function (object, filters, kernel_size, strides = 1L, padding = "valid",
#' If you never set it, then it will be `"channels_last"`.
#'
#' @param dilation_rate
-#' int or list of 1 integers, specifying the dilation
-#' rate to use for dilated transposed convolution.
+#' Scalar integer. Specifies the dilation rate. Values other
+#' than 1 currently require `strides = 1` and rates greater than 1 are not
+#' supported.
#'
#' @param activation
#' Activation function. If `NULL`, no activation is applied.
@@ -267,13 +272,14 @@ function (object, filters, kernel_size, strides = 1L, padding = "valid",
#' @tether keras.layers.Conv1DTranspose
layer_conv_1d_transpose <-
function (object, filters, kernel_size, strides = 1L, padding = "valid",
- data_format = NULL, dilation_rate = 1L, activation = NULL,
+ output_padding = NULL, data_format = NULL, dilation_rate = 1L, activation = NULL,
use_bias = TRUE, kernel_initializer = "glorot_uniform", bias_initializer = "zeros",
kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL,
kernel_constraint = NULL, bias_constraint = NULL, ...)
{
args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple,
- strides = as_integer_tuple, dilation_rate = as_integer_tuple,
+ strides = as_integer_tuple, output_padding = as_integer_tuple,
+ dilation_rate = as_integer_tuple,
input_shape = normalize_shape, batch_size = as_integer,
batch_input_shape = normalize_shape), ignore = "object")
create_layer(keras$layers$Conv1DTranspose, object, args)
@@ -489,6 +495,11 @@ function (object, filters, kernel_size, strides = list(1L, 1L),
#' the left/right or up/down of the input. When `padding="same"` and
#' `strides=1`, the output has the same size as the input.
#'
+#' @param output_padding
+#' Scalar integer or vector of two integers. Amount of padding to add to the
+#' height and width of the output tensor. Each element must be smaller than the
+#' corresponding stride. When `NULL` (default) the output size is inferred.
+#'
#' @param data_format
#' string, either `"channels_last"` or `"channels_first"`.
#' The ordering of the dimensions in the inputs. `"channels_last"`
@@ -501,8 +512,9 @@ function (object, filters, kernel_size, strides = list(1L, 1L),
#' `"channels_last"`.
#'
#' @param dilation_rate
-#' int or list of 1 integers, specifying the dilation
-#' rate to use for dilated transposed convolution.
+#' Scalar integer or vector of 2 integers specifying the dilation rate. Values
+#' other than 1 require `strides = 1`; different rates per dimension are not
+#' supported.
#'
#' @param activation
#' Activation function. If `NULL`, no activation is applied.
@@ -554,14 +566,15 @@ function (object, filters, kernel_size, strides = list(1L, 1L),
#' @tether keras.layers.Conv2DTranspose
layer_conv_2d_transpose <-
function (object, filters, kernel_size, strides = list(1L, 1L),
- padding = "valid", data_format = NULL, dilation_rate = list(
- 1L, 1L), activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform",
+ padding = "valid", output_padding = NULL, data_format = NULL,
+ dilation_rate = list(1L, 1L), activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform",
bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL,
activity_regularizer = NULL, kernel_constraint = NULL, bias_constraint = NULL,
...)
{
args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple,
- strides = as_integer_tuple, dilation_rate = as_integer_tuple,
+ strides = as_integer_tuple, output_padding = as_integer_tuple,
+ dilation_rate = as_integer_tuple,
input_shape = normalize_shape, batch_size = as_integer,
batch_input_shape = normalize_shape), ignore = "object")
create_layer(keras$layers$Conv2DTranspose, object, args)
@@ -639,7 +652,7 @@ function (object, filters, kernel_size, strides = list(1L, 1L),
#' will be `"channels_last"`.
#'
#' @param dilation_rate
-#' int or list of 3 integers, specifying the dilation
+#' int or vector of 3 ints, specifying the dilation
#' rate to use for dilated convolution.
#'
#' @param groups
@@ -778,6 +791,12 @@ function (object, filters, kernel_size, strides = list(1L, 1L,
#' the left/right or up/down of the input. When `padding="same"` and
#' `strides=1`, the output has the same size as the input.
#'
+#' @param output_padding
+#' Scalar integer or vector of three integers. Amount of padding to add to the
+#' depth, height, and width of the output tensor. Each element must be smaller
+#' than the corresponding stride. When `NULL` (default) the output size is
+#' inferred.
+#'
#' @param data_format
#' string, either `"channels_last"` or `"channels_first"`.
#' The ordering of the dimensions in the inputs. `"channels_last"`
@@ -790,8 +809,9 @@ function (object, filters, kernel_size, strides = list(1L, 1L,
#' will be `"channels_last"`.
#'
#' @param dilation_rate
-#' int or list of 1 integers, specifying the dilation
-#' rate to use for dilated transposed convolution.
+#' Scalar integer or vector of 3 integers specifying the dilation rate. Values
+#' other than 1 require `strides = 1`; different rates per dimension are not
+#' supported.
#'
#' @param activation
#' Activation function. If `NULL`, no activation is applied.
@@ -843,14 +863,15 @@ function (object, filters, kernel_size, strides = list(1L, 1L,
#' @tether keras.layers.Conv3DTranspose
layer_conv_3d_transpose <-
function (object, filters, kernel_size, strides = list(1L, 1L,
- 1L), padding = "valid", data_format = NULL, dilation_rate = list(
- 1L, 1L, 1L), activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform",
+ 1L), padding = "valid", output_padding = NULL, data_format = NULL,
+ dilation_rate = list(1L, 1L, 1L), activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform",
bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL,
activity_regularizer = NULL, kernel_constraint = NULL, bias_constraint = NULL,
...)
{
args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple,
- strides = as_integer_tuple, dilation_rate = as_integer_tuple,
+ strides = as_integer_tuple, output_padding = as_integer_tuple,
+ dilation_rate = as_integer_tuple,
input_shape = normalize_shape, batch_size = as_integer,
batch_input_shape = normalize_shape), ignore = "object")
create_layer(keras$layers$Conv3DTranspose, object, args)
diff --git a/R/layers-core.R b/R/layers-core.R
index 231c271e7..1cb2928b6 100644
--- a/R/layers-core.R
+++ b/R/layers-core.R
@@ -35,6 +35,7 @@
#' - ```r
#' enable_lora(
#' rank,
+#' lora_alpha = NULL,
#' a_initializer = 'he_uniform',
#' b_initializer = 'zeros'
#' )
@@ -94,6 +95,11 @@
#' You can also enable LoRA on an existing
#' `Dense` layer by calling `layer$enable_lora(rank)`.
#'
+#' @param lora_alpha
+#' Optional integer. Scales the low-rank adaptation delta during the forward
+#' pass. The delta is scaled by `lora_alpha / lora_rank`, letting you tune the
+#' LoRA adjustment strength independently of `lora_rank`.
+#'
#' @param object
#' Object to compose the layer with. A tensor, array, or sequential model.
#'
@@ -119,9 +125,11 @@ function (object, units, activation = NULL, use_bias = TRUE,
kernel_initializer = "glorot_uniform", bias_initializer = "zeros",
kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL,
kernel_constraint = NULL, bias_constraint = NULL, lora_rank = NULL,
+ lora_alpha = NULL,
...)
{
args <- capture_args(list(units = as_integer, lora_rank = as_integer,
+ lora_alpha = as_integer,
input_shape = normalize_shape, batch_size = as_integer,
batch_input_shape = normalize_shape), ignore = "object")
create_layer(keras$layers$Dense, object, args)
@@ -261,6 +269,11 @@ function (object, units, activation = NULL, use_bias = TRUE,
#' You can also enable LoRA on an existing
#' `EinsumDense` layer by calling `layer$enable_lora(rank)`.
#'
+#' @param lora_alpha
+#' Optional integer. Scales the low-rank adaptation delta during the forward
+#' pass. The delta is scaled by `lora_alpha / lora_rank`, letting you tune the
+#' LoRA adjustment strength independently of `lora_rank`.
+#'
#' @param ...
#' Base layer keyword arguments, such as `name` and `dtype`.
#'
@@ -280,10 +293,10 @@ function (object, equation, output_shape, activation = NULL,
bias_axes = NULL, kernel_initializer = "glorot_uniform",
bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL,
kernel_constraint = NULL, bias_constraint = NULL, lora_rank = NULL,
- ...)
+ lora_alpha = NULL, ...)
{
- args <- capture_args(list(lora_rank = as_integer, input_shape = normalize_shape,
- batch_size = as_integer, batch_input_shape = normalize_shape,
+ args <- capture_args(list(lora_rank = as_integer, lora_alpha = as_integer,
+ input_shape = normalize_shape, batch_size = as_integer, batch_input_shape = normalize_shape,
output_shape = normalize_shape), ignore = "object")
create_layer(keras$layers$EinsumDense, object, args)
}
@@ -324,12 +337,17 @@ function (object, equation, output_shape, activation = NULL,
#' - ```r
#' enable_lora(
#' rank,
+#' lora_alpha = NULL,
#' a_initializer = 'he_uniform',
#' b_initializer = 'zeros'
#' )
#' ```
#'
#' - ```r
+#' compute_output_spec(...)
+#' ```
+#'
+#' - ```r
#' quantize(mode, type_check = TRUE)
#' ```
#'
@@ -391,6 +409,11 @@ function (object, equation, output_shape, activation = NULL,
#' You can also enable LoRA on an existing
#' `Embedding` layer instance by calling `layer$enable_lora(rank)`.
#'
+#' @param lora_alpha
+#' Optional integer. Scales the low-rank adaptation delta during the forward
+#' pass. The delta is scaled by `lora_alpha / lora_rank`, letting you tune the
+#' LoRA adjustment strength independently of `lora_rank`.
+#'
#' @param object
#' Object to compose the layer with. A tensor, array, or sequential model.
#'
@@ -408,11 +431,13 @@ function (object, equation, output_shape, activation = NULL,
layer_embedding <-
function (object, input_dim, output_dim, embeddings_initializer = "uniform",
embeddings_regularizer = NULL, embeddings_constraint = NULL,
- mask_zero = FALSE, weights = NULL, lora_rank = NULL, ...)
+ mask_zero = FALSE, weights = NULL, lora_rank = NULL, lora_alpha = NULL,
+ ...)
{
args <- capture_args(list(input_dim = as_integer, output_dim = as_integer,
- input_shape = normalize_shape, batch_size = as_integer,
- batch_input_shape = normalize_shape, input_length = as_integer),
+ lora_rank = as_integer, lora_alpha = as_integer, input_shape = normalize_shape,
+ batch_size = as_integer, batch_input_shape = normalize_shape,
+ input_length = as_integer),
ignore = "object")
create_layer(keras$layers$Embedding, object, args)
}
diff --git a/R/layers-normalization.R b/R/layers-normalization.R
index 4ccd03d0b..26bdee3e5 100644
--- a/R/layers-normalization.R
+++ b/R/layers-normalization.R
@@ -349,14 +349,6 @@ function (object, groups = 32L, axis = -1L, epsilon = 0.001,
#' disabled since the scaling will be done by the next layer.
#' Defaults to `TRUE`.
#'
-#' @param rms_scaling
-#' If `TRUE`, `center` and `scale` are ignored, and the
-#' inputs are scaled by `gamma` and the inverse square root
-#' of the square of all inputs. This is an approximate and faster
-#' approach that avoids ever computing the mean of the input. Note that
-#' this *isn't* equivalent to the computation that the
-#' `layer_rms_normalization` layer performs.
-#'
#' @param beta_initializer
#' Initializer for the beta weight. Defaults to zeros.
#'
@@ -395,7 +387,7 @@ function (object, groups = 32L, axis = -1L, epsilon = 0.001,
#' @tether keras.layers.LayerNormalization
layer_layer_normalization <-
function (object, axis = -1L, epsilon = 0.001, center = TRUE,
- scale = TRUE, rms_scaling = FALSE, beta_initializer = "zeros",
+ scale = TRUE, beta_initializer = "zeros",
gamma_initializer = "ones", beta_regularizer = NULL, gamma_regularizer = NULL,
beta_constraint = NULL, gamma_constraint = NULL, ...)
{
diff --git a/R/layers-pooling.R b/R/layers-pooling.R
index a4ed58a72..0b36113a7 100644
--- a/R/layers-pooling.R
+++ b/R/layers-pooling.R
@@ -122,7 +122,7 @@ function (object, pool_size, strides = NULL, padding = "valid",
#' (when `input_shape >= pool_size`)
#'
#' The resulting output shape when using the `"same"` padding option is:
-#' `output_shape = math.floor((input_shape - 1) / strides) + 1`
+#' `output_shape = input_shape`
#'
#' # Input Shape
#' - If `data_format="channels_last"`:
diff --git a/R/layers-preprocessing.R b/R/layers-preprocessing.R
index 7ffdefb3f..994b7fe57 100644
--- a/R/layers-preprocessing.R
+++ b/R/layers-preprocessing.R
@@ -1681,10 +1681,14 @@ function (object, height_factor, width_factor = NULL, fill_mode = "reflect",
#' (independently of which backend you're using).
#'
#' @param scale
-#' Float, the scale to apply to the inputs.
+#' Numeric scalar, vector, or array. The scale to apply to the inputs. If
+#' scalar, the same scale is applied to every feature or channel; if a vector
+#' or array, scaling is applied per channel.
#'
#' @param offset
-#' Float, the offset to apply to the inputs.
+#' Numeric scalar, vector, or array. The offset to apply to the inputs. If
+#' scalar, the same offset is applied to every feature or channel; if a vector
+#' or array, the shift is applied per channel.
#'
#' @param ...
#' Base layer keyword arguments, such as `name` and `dtype`.
@@ -2655,6 +2659,95 @@ function (object, factor = 1, scale = list(0.02, 0.33), fill_value = NULL,
create_layer(keras$layers$RandomErasing, object, args)
}
+
+#' A preprocessing layer that applies random elastic transformations.
+#'
+#' @description
+#' This layer distorts input images by applying elastic deformations,
+#' simulating a physically realistic transformation. The magnitude of the
+#' distortion is controlled by the `scale` parameter, while the `factor`
+#' determines the probability of applying the transformation.
+#'
+#' @param factor
+#' A single float or a tuple of two floats.
+#' `factor` controls the probability of applying the transformation.
+#' - `factor = 0.0` ensures no transformation is applied.
+#' - `factor = 1.0` means the transformation is always applied.
+#' - If a tuple `(min, max)` is provided, a probability value
+#' is sampled between `min` and `max` for each image.
+#' - If a single float is provided, a probability is sampled
+#' between `0.0` and the given float.
+#' Default is `1.0`.
+#'
+#' @param scale
+#' A float or a tuple of two floats defining the magnitude of
+#' the distortion applied.
+#' - If a tuple `(min, max)` is provided, a random scale value is
+#' sampled within this range.
+#' - If a single float is provided, a random scale value is sampled
+#' between `0.0` and the given float.
+#' Default is `1.0`.
+#'
+#' @param interpolation
+#' Interpolation mode. Supported values: `"nearest"`,
+#' `"bilinear"`.
+#'
+#' @param fill_mode
+#' Points outside the boundaries of the input are filled
+#' according to the given mode. Available methods are `"constant"`,
+#' `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"reflect"`.
+#' - `"reflect"`: `(d c b a | a b c d | d c b a)`
+#' The input is extended by reflecting about the edge of the last
+#' pixel.
+#' - `"constant"`: `(k k k k | a b c d | k k k k)`
+#' The input is extended by filling all values beyond
+#' the edge with the same constant value `k` specified by
+#' `fill_value`.
+#' - `"wrap"`: `(a b c d | a b c d | a b c d)`
+#' The input is extended by wrapping around to the opposite edge.
+#' - `"nearest"`: `(a a a a | a b c d | d d d d)`
+#' The input is extended by the nearest pixel.
+#' When using the torch backend, `"reflect"` is redirected to
+#' `"mirror"` because torch does not support `"reflect"`.
+#' The torch backend also does not support `"wrap"`.
+#'
+#' @param fill_value
+#' A float representing the value to fill outside the boundaries when
+#' `fill_mode = "constant"`.
+#'
+#' @param value_range
+#' The range of values the incoming images will have.
+#' Represented as a two-number tuple written `[low, high]`. This is
+#' typically either `[0, 1]` or `[0, 255]` depending on how your
+#' preprocessing pipeline is set up.
+#'
+#' @param seed
+#' Integer. Used to create a random seed.
+#'
+#' @param object
+#' Object to compose the layer with. A tensor, array, or sequential model.
+#'
+#' @param ...
+#' For forward/backward compatability.
+#'
+#' @inheritParams layer_center_crop
+#'
+#' @export
+#' @tether keras.layers.RandomElasticTransform
+#' @family image preprocessing layers
+#' @family preprocessing layers
+#' @family layers
+layer_random_elastic_transform <-
+function (object, factor = 1, scale = 1, interpolation = "bilinear",
+ fill_mode = "reflect", fill_value = 0, value_range = list(0L, 255L),
+ seed = NULL, data_format = NULL, ...)
+{
+ args <- capture_args(list(seed = as_integer, input_shape = normalize_shape,
+ batch_size = as_integer, batch_input_shape = normalize_shape),
+ ignore = "object")
+ create_layer(keras$layers$RandomElasticTransform, object, args)
+}
+
#' Applies random Gaussian blur to images for data augmentation.
#'
#' @description
diff --git a/R/layers-rnn.R b/R/layers-rnn.R
index b1c283e4b..97a0d7790 100644
--- a/R/layers-rnn.R
+++ b/R/layers-rnn.R
@@ -1442,7 +1442,6 @@ function (units, activation = "tanh", recurrent_activation = "sigmoid",
#' initializer = 'uniform',
#' name = 'recurrent_kernel'
#' )
-#' self$built <- TRUE
#' },
#'
#' call = function(inputs, states) {
diff --git a/R/losses.R b/R/losses.R
index 2c6754049..136640812 100644
--- a/R/losses.R
+++ b/R/losses.R
@@ -647,6 +647,85 @@ function (y_true, y_pred, alpha = 0.25, gamma = 2,
}
+
+#' Computes the generalized cross entropy loss.
+#'
+#' @description
+#' The generalized cross entropy (GCE) loss offers robustness to noisy labels by
+#' interpolating between categorical cross entropy (`q -> 0`) and mean absolute
+#' error (`q -> 1`). For a true-class probability `p` and noise parameter `q`,
+#' the loss is `loss = (1 - p^q) / q`.
+#'
+#' # References
+#' - Zhang & Sabuncu (2018), "Generalized Cross Entropy Loss for Training Deep
+#' Neural Networks with Noisy Labels"
+#'
+#' # Examples
+#' ```{r}
+#' y_true <- c(0L, 1L, 0L, 1L)
+#' y_pred <- rbind(
+#' c(0.7, 0.3),
+#' c(0.2, 0.8),
+#' c(0.6, 0.4),
+#' c(0.4, 0.6)
+#' )
+#' gce <- loss_categorical_generalized_cross_entropy(q = 0.7)
+#' gce(y_true, y_pred)
+#' ```
+#'
+#' @returns
+#' Generalized cross entropy loss value(s).
+#'
+#' @param q
+#' Float in `(0, 1)`. Controls the transition between cross entropy and mean
+#' absolute error. Defaults to `0.5`.
+#'
+#' - As `q` approaches `0`: behaves like categorical cross entropy.
+#' - As `q` approaches `1`: behaves like mean absolute error.
+#'
+#' @param reduction
+#' Type of reduction to apply to the loss. In almost all cases
+#' this should be `"sum_over_batch_size"`. Supported options are
+#' `"sum"`, `"sum_over_batch_size"`, `"mean"`,
+#' `"mean_with_sample_weight"` or `NULL`. `"sum"` sums the loss,
+#' `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the
+#' sample size, and `"mean_with_sample_weight"` sums the loss and
+#' divides by the sum of the sample weights. `"none"` and `NULL`
+#' perform no aggregation. Defaults to `"sum_over_batch_size"`.
+#'
+#' @param name
+#' Optional name for the loss instance.
+#'
+#' @param dtype
+#' Dtype used for loss computations. Defaults to `config_floatx()` (the global
+#' float type).
+#'
+#' @param y_true
+#' Integer class indices with shape `(batch_size)` or `(batch_size, 1)`.
+#'
+#' @param y_pred
+#' Predicted class probabilities with shape `(batch_size, num_classes)`.
+#'
+#' @param ...
+#' For forward/backward compatibility.
+#'
+#' @export
+#' @family losses
+#' @tether keras.losses.CategoricalGeneralizedCrossEntropy
+loss_categorical_generalized_cross_entropy <-
+function (y_true, y_pred, q = 0.5, ..., reduction = "sum_over_batch_size",
+ name = "categorical_generalized_cross_entropy", dtype = NULL)
+{
+ args <- capture_args(list(
+ y_true = as_py_array,
+ y_pred = as_py_array
+ ))
+ callable <- if (missing(y_true) && missing(y_pred))
+ keras$losses$CategoricalGeneralizedCrossEntropy
+ else keras$losses$categorical_generalized_cross_entropy
+ do.call(callable, args)
+}
+
#' Computes the categorical hinge loss between `y_true` & `y_pred`.
#'
#' @description
@@ -820,12 +899,14 @@ function (y_true, y_pred, axis = -1L, ..., reduction = "sum_over_batch_size",
#' 1, 0, 1, 0.9), dim = c(2, 2, 2, 1))
#'
#' axis <- c(2, 3, 4)
-#' loss <- loss_dice(y_true, y_pred, axis = axis)
+#' loss_fn <- loss_dice(axis = axis, reduction = NULL)
+#' loss <- loss_fn(y_true, y_pred)
#' stopifnot(shape(loss) == shape(2))
#' loss
#'
#'
-#' loss = loss_dice(y_true, y_pred)
+#' loss_fn <- loss_dice()
+#' loss <- loss_fn(y_true, y_pred)
#' stopifnot(shape(loss) == shape())
#' loss
#' ```
@@ -1539,37 +1620,34 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size",
#'
#' # Examples
#' ```{r}
-#' y_true <- c(1, 2)
-#' y_pred <- rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1))
+#' y_true <- op_array(c(1L, 2L))
+#' y_pred <- op_array(rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1)))
#' loss <- loss_sparse_categorical_crossentropy(y_true, y_pred)
#' loss
#' ```
#' ```{r}
-#' y_true <- c(1, 2)
-#' y_pred <- rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1))
+#' y_true <- op_array(c(1L, 2L))
+#' y_pred <- op_array(rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1)))
#' # Using 'auto'/'sum_over_batch_size' reduction type.
#' scce <- loss_sparse_categorical_crossentropy()
-#' scce(op_array(y_true), op_array(y_pred))
-#' # 1.177
+#' scce(y_true, y_pred)
#' ```
#'
#' ```{r}
#' # Calling with 'sample_weight'.
-#' scce(op_array(y_true), op_array(y_pred), sample_weight = op_array(c(0.3, 0.7)))
+#' scce(y_true, y_pred, sample_weight = op_array(c(0.3, 0.7)))
#' ```
#'
#' ```{r}
#' # Using 'sum' reduction type.
#' scce <- loss_sparse_categorical_crossentropy(reduction="sum")
-#' scce(op_array(y_true), op_array(y_pred))
-#' # 2.354
+#' scce(y_true, y_pred)
#' ```
#'
#' ```{r}
#' # Using 'none' reduction type.
#' scce <- loss_sparse_categorical_crossentropy(reduction=NULL)
-#' scce(op_array(y_true), op_array(y_pred))
-#' # array([0.0513, 2.303], dtype=float32)
+#' scce(y_true, y_pred)
#' ```
#'
#' Usage with the `compile()` API:
diff --git a/R/metrics.R b/R/metrics.R
index afdd5fb05..42405adbc 100644
--- a/R/metrics.R
+++ b/R/metrics.R
@@ -23,8 +23,16 @@
#' ```{r}
#' y_true <- rbind(c(0, 1), c(0, 0))
#' y_pred <- rbind(c(0.6, 0.4), c(0.4, 0.6))
-#' loss <- loss_binary_focal_crossentropy(y_true, y_pred, gamma=2)
-#' loss
+#' focal_loss <- loss_binary_focal_crossentropy(y_true, y_pred, gamma = 2)
+#' focal_loss
+#'
+#' # Compare with binary crossentropy.
+#' # Binary focal crossentropy emphasizes harder examples, yielding a larger
+#' # relative loss where the model struggles.
+#' bce_loss <- loss_binary_crossentropy(y_true, y_pred)
+#' cbind(focal_loss = as.array(focal_loss),
+#' bce_loss = as.array(bce_loss),
+#' ratio = as.array(focal_loss / bce_loss))
#' ```
#'
#' @returns
diff --git a/R/model-persistence.R b/R/model-persistence.R
index 370728f8c..fc4a7a25a 100644
--- a/R/model-persistence.R
+++ b/R/model-persistence.R
@@ -166,18 +166,48 @@ function (model, custom_objects = NULL, compile = TRUE, safe_mode = TRUE)
}
-#' Saves all layer weights to a `.weights.h5` file.
+#' Saves all weights to a single file or sharded files.
#'
-#' @param model A keras Model object
+#' @description
+#' By default, the weights are saved in a single `.weights.h5` file. Enable
+#' sharding via `max_shard_size` to split weights across multiple files (in GB)
+#' and produce a `.weights.json` manifest that tracks shard metadata.
+#'
+#' The saved sharded files contain:
+#'
+#' * `*.weights.json`: configuration file containing `metadata` and
+#' `weight_map` entries.
+#' * `*_xxxxxx.weights.h5`: weight shards limited by `max_shard_size`.
+#'
+#' ```{r}
+#' model <-
+#' keras_model_sequential(input_shape = 2) |>
+#' layer_dense(4)
+#'
+#' path_h5 <- tempfile(fileext = ".weights.h5")
+#' path_json <- tempfile(fileext = ".weights.json")
+#'
+#' model |> save_model_weights(path_h5)
+#' model |> save_model_weights(path_json, max_shard_size = 0.01)
+#'
+#' model |> load_model_weights(path_h5)
+#' model |> load_model_weights(path_json)
+#' ```
+#'
+#' @param model A keras Model object.
#'
#' @param filepath
-#' string.
-#' Path where to save the model. Must end in `.weights.h5`.
+#' Path where the weights will be saved. Accepts `.weights.h5`, or when sharding
+#' is enabled, a `.weights.json` manifest path. If `.weights.h5` is provided
+#' while sharding, the filename will be overridden to end in `.weights.json`.
#'
#' @param overwrite
-#' Whether we should overwrite any existing model
-#' at the target location, or instead ask the user
-#' via an interactive prompt.
+#' Whether to overwrite any existing weights at the target location, or instead
+#' ask the user via an interactive prompt.
+#'
+#' @param max_shard_size
+#' Numeric size in GB for each sharded file. Use `NULL` to disable sharding.
+#'
#'
#' @returns This is called primarily for side effects. `model` is returned,
#' invisibly, to enable usage with the pipe.
@@ -186,17 +216,21 @@ function (model, custom_objects = NULL, compile = TRUE, safe_mode = TRUE)
#' @tether keras.Model.save_weights
#' @seealso
#' +
overwrite_with_gradient: Boolean, whether to overwrite the variable with
+the computed gradient. Useful for float8 training. Defaults to FALSE.
name: String name of the variable. Useful for debugging purposes.
Returns:
A backend tensor, wrapped in a KerasVariable class.
@@ -421,6 +424,8 @@
compute_output_spec(...)
symbolic_call(...)Count the total number of scalars composing the weights.
Returns: @@ -613,7 +618,7 @@
Other layers: layer_activation() layer_activation_elu() layer_activation_leaky_relu() layer_activation_parametric_relu() layer_activation_relu() layer_activation_softmax() layer_activity_regularization() layer_add() layer_additive_attention() layer_alpha_dropout() layer_attention() layer_aug_mix() layer_auto_contrast() layer_average() layer_average_pooling_1d() layer_average_pooling_2d() layer_average_pooling_3d() layer_batch_normalization() layer_bidirectional() layer_category_encoding() layer_center_crop() layer_concatenate() layer_conv_1d() layer_conv_1d_transpose() layer_conv_2d() layer_conv_2d_transpose() layer_conv_3d() layer_conv_3d_transpose() layer_conv_lstm_1d() layer_conv_lstm_2d() layer_conv_lstm_3d() layer_cropping_1d() layer_cropping_2d() layer_cropping_3d() layer_cut_mix() layer_dense() layer_depthwise_conv_1d() layer_depthwise_conv_2d() layer_discretization() layer_dot() layer_dropout() layer_einsum_dense() layer_embedding() layer_equalization() layer_feature_space() layer_flatten() layer_flax_module_wrapper() layer_gaussian_dropout() layer_gaussian_noise() layer_global_average_pooling_1d() layer_global_average_pooling_2d() layer_global_average_pooling_3d() layer_global_max_pooling_1d() layer_global_max_pooling_2d() layer_global_max_pooling_3d() layer_group_normalization() layer_group_query_attention() layer_gru() layer_hashed_crossing() layer_hashing() layer_identity() layer_integer_lookup() layer_jax_model_wrapper() layer_lambda() layer_layer_normalization() layer_lstm() layer_masking() layer_max_num_bounding_boxes() layer_max_pooling_1d() layer_max_pooling_2d() layer_max_pooling_3d() layer_maximum() layer_mel_spectrogram() layer_minimum() layer_mix_up() layer_multi_head_attention() layer_multiply() layer_normalization() layer_permute() layer_rand_augment() layer_random_brightness() layer_random_color_degeneration() layer_random_color_jitter() layer_random_contrast() layer_random_crop() layer_random_erasing() layer_random_flip() layer_random_gaussian_blur() layer_random_grayscale() layer_random_hue() layer_random_invert() layer_random_perspective() layer_random_posterization() layer_random_rotation() layer_random_saturation() layer_random_sharpness() layer_random_shear() layer_random_translation() layer_random_zoom() layer_repeat_vector() layer_rescaling() layer_reshape() layer_resizing() layer_rms_normalization() layer_rnn() layer_separable_conv_1d() layer_separable_conv_2d() layer_simple_rnn() layer_solarization() layer_spatial_dropout_1d() layer_spatial_dropout_2d() layer_spatial_dropout_3d() layer_spectral_normalization() layer_stft_spectrogram() layer_string_lookup() layer_subtract() layer_text_vectorization() layer_tfsm() layer_time_distributed() layer_torch_module_wrapper() layer_unit_normalization() layer_upsampling_1d() layer_upsampling_2d() layer_upsampling_3d() layer_zero_padding_1d() layer_zero_padding_2d() layer_zero_padding_3d() rnn_cell_gru() rnn_cell_lstm() rnn_cell_simple() rnn_cells_stack()
Other layers: layer_activation() layer_activation_elu() layer_activation_leaky_relu() layer_activation_parametric_relu() layer_activation_relu() layer_activation_softmax() layer_activity_regularization() layer_add() layer_additive_attention() layer_alpha_dropout() layer_attention() layer_aug_mix() layer_auto_contrast() layer_average() layer_average_pooling_1d() layer_average_pooling_2d() layer_average_pooling_3d() layer_batch_normalization() layer_bidirectional() layer_category_encoding() layer_center_crop() layer_concatenate() layer_conv_1d() layer_conv_1d_transpose() layer_conv_2d() layer_conv_2d_transpose() layer_conv_3d() layer_conv_3d_transpose() layer_conv_lstm_1d() layer_conv_lstm_2d() layer_conv_lstm_3d() layer_cropping_1d() layer_cropping_2d() layer_cropping_3d() layer_cut_mix() layer_dense() layer_depthwise_conv_1d() layer_depthwise_conv_2d() layer_discretization() layer_dot() layer_dropout() layer_einsum_dense() layer_embedding() layer_equalization() layer_feature_space() layer_flatten() layer_flax_module_wrapper() layer_gaussian_dropout() layer_gaussian_noise() layer_global_average_pooling_1d() layer_global_average_pooling_2d() layer_global_average_pooling_3d() layer_global_max_pooling_1d() layer_global_max_pooling_2d() layer_global_max_pooling_3d() layer_group_normalization() layer_group_query_attention() layer_gru() layer_hashed_crossing() layer_hashing() layer_identity() layer_integer_lookup() layer_jax_model_wrapper() layer_lambda() layer_layer_normalization() layer_lstm() layer_masking() layer_max_num_bounding_boxes() layer_max_pooling_1d() layer_max_pooling_2d() layer_max_pooling_3d() layer_maximum() layer_mel_spectrogram() layer_minimum() layer_mix_up() layer_multi_head_attention() layer_multiply() layer_normalization() layer_permute() layer_rand_augment() layer_random_brightness() layer_random_color_degeneration() layer_random_color_jitter() layer_random_contrast() layer_random_crop() layer_random_elastic_transform() layer_random_erasing() layer_random_flip() layer_random_gaussian_blur() layer_random_grayscale() layer_random_hue() layer_random_invert() layer_random_perspective() layer_random_posterization() layer_random_rotation() layer_random_saturation() layer_random_sharpness() layer_random_shear() layer_random_translation() layer_random_zoom() layer_repeat_vector() layer_rescaling() layer_reshape() layer_resizing() layer_rms_normalization() layer_rnn() layer_separable_conv_1d() layer_separable_conv_2d() layer_simple_rnn() layer_solarization() layer_spatial_dropout_1d() layer_spatial_dropout_2d() layer_spatial_dropout_3d() layer_spectral_normalization() layer_stft_spectrogram() layer_string_lookup() layer_subtract() layer_text_vectorization() layer_tfsm() layer_time_distributed() layer_torch_module_wrapper() layer_unit_normalization() layer_upsampling_1d() layer_upsampling_2d() layer_upsampling_3d() layer_zero_padding_1d() layer_zero_padding_2d() layer_zero_padding_3d() rnn_cell_gru() rnn_cell_lstm() rnn_cell_simple() rnn_cells_stack()
on_train_begin and on_train_end expect one positional argument:
logs
on_train_batch_begin and on_train_batch_end expect two positional
-arguments: batch, logs
on_train_batch_begin and on_train_batch_end expect a positional
+argument batch and a named argument logs
See Callback class definition for the full list of functions and their
expected arguments.
function_name = function. For example,
callback_lambda(.., on_train_end = train_end_fn). The custom function
-needs to have same arguments as the ones defined in Callback().
+needs to have the same arguments as the ones defined in Callback().
diff --git a/docs/dev/reference/callback_learning_rate_scheduler.html b/docs/dev/reference/callback_learning_rate_scheduler.html
index 415d7efda..6ddde712d 100644
--- a/docs/dev/reference/callback_learning_rate_scheduler.html
+++ b/docs/dev/reference/callback_learning_rate_scheduler.html
@@ -14,7 +14,7 @@
keras3
- 1.3.0.9001
+ 1.4.0.9000
val_loss this should be
-"min", etc. In "auto" mode, the mode is set to "max" if the
-quantities monitored are "acc" or start with "fmeasure" and are
-set to "min" for the rest of the quantities.
+"min", etc. In "auto" mode, the direction is automatically inferred from
+the name of the monitored quantity.
Other config backend: config_epsilon() config_floatx() config_image_data_format() config_set_epsilon() config_set_floatx() config_set_image_data_format()
Other backend: clear_session() config_epsilon() config_floatx() config_image_data_format() config_set_epsilon() config_set_floatx() config_set_image_data_format()
Other config: config_disable_flash_attention() config_disable_interactive_logging() config_disable_traceback_filtering() config_dtype_policy() config_enable_flash_attention() config_enable_interactive_logging() config_enable_traceback_filtering() config_enable_unsafe_deserialization() config_epsilon() config_floatx() config_image_data_format() config_is_interactive_logging_enabled() config_is_traceback_filtering_enabled() config_set_backend() config_set_dtype_policy() config_set_epsilon() config_set_floatx() config_set_image_data_format()
Other config: config_disable_flash_attention() config_disable_interactive_logging() config_disable_traceback_filtering() config_dtype_policy() config_enable_flash_attention() config_enable_interactive_logging() config_enable_traceback_filtering() config_enable_unsafe_deserialization() config_epsilon() config_floatx() config_image_data_format() config_is_interactive_logging_enabled() config_is_nnx_enabled() config_is_traceback_filtering_enabled() config_max_epochs() config_set_backend() config_set_dtype_policy() config_set_epsilon() config_set_floatx() config_set_image_data_format()