diff --git a/.tether/man/Layer.txt b/.tether/man/Layer.txt index c0622237c..eee664282 100644 --- a/.tether/man/Layer.txt +++ b/.tether/man/Layer.txt @@ -1,6 +1,6 @@ Help on class Layer in module keras.src.layers.layer: -class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation.Operation, keras.src.saving.keras_saveable.KerasSaveable) +class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation.Operation) | Layer(*args, **kwargs) | | This is the class from which all layers inherit. @@ -243,6 +243,7 @@ class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation. | regularizer=None, | constraint=None, | aggregation='none', + | overwrite_with_gradient=False, | name=None | ) | Add a weight variable to the layer. @@ -274,6 +275,9 @@ class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation. | the type of multi-replica aggregation to be used for this | variable when writing custom data parallel training loops. | Defaults to `"none"`. + | overwrite_with_gradient: Boolean, whether to overwrite the variable + | with the computed gradient. This is useful for float8 training. + | Defaults to `False`. | name: String name of the variable. Useful for debugging purposes. | | build(self, input_shape) @@ -448,6 +452,12 @@ class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation. | ref_var.assign(value) | ``` | + | symbolic_call( + | self, + | *args, + | **kwargs + | ) + | | ---------------------------------------------------------------------- | Static methods defined here: | diff --git a/.tether/man/activation_elu.txt b/.tether/man/activation_elu.txt index d2be33c3f..13a23b8be 100644 --- a/.tether/man/activation_elu.txt +++ b/.tether/man/activation_elu.txt @@ -19,6 +19,7 @@ and the information that is propagated to the next layer. Args: x: Input tensor. + alpha: A scalar, slope of positive section. Defaults to `1.0`. Reference: diff --git a/.tether/man/activation_sparse_sigmoid.txt b/.tether/man/activation_sparse_sigmoid.txt new file mode 100644 index 000000000..6de7b64d3 --- /dev/null +++ b/.tether/man/activation_sparse_sigmoid.txt @@ -0,0 +1,18 @@ +__signature__ +keras.activations.sparse_sigmoid(x) +__doc__ +Sparse sigmoid activation function. + +It is defined as + +`f(x) = 0` for `x <= -1`, +`f(x) = 0.5 * (x + 1)` for `-1 < x < 1`, +`f(x) = 1` for `x >= 1`. + +Args: + x: Input tensor. + +Reference: + +- [M. Blondel, A. F. T. Martins, V. Niculae, 2019](https://arxiv.org/pdf/1901.02324) + diff --git a/.tether/man/callback_early_stopping.txt b/.tether/man/callback_early_stopping.txt index f4e735529..286c062af 100644 --- a/.tether/man/callback_early_stopping.txt +++ b/.tether/man/callback_early_stopping.txt @@ -1,6 +1,6 @@ Help on class EarlyStopping in module keras.src.callbacks.early_stopping: -class EarlyStopping(keras.src.callbacks.callback.Callback) +class EarlyStopping(keras.src.callbacks.monitor_callback.MonitorCallback) | EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto', baseline=None, restore_best_weights=False, start_from_epoch=0) | | Stop training when a monitored metric has stopped improving. @@ -60,6 +60,7 @@ class EarlyStopping(keras.src.callbacks.callback.Callback) | | Method resolution order: | EarlyStopping + | keras.src.callbacks.monitor_callback.MonitorCallback | keras.src.callbacks.callback.Callback | builtins.object | diff --git a/.tether/man/callback_lambda.txt b/.tether/man/callback_lambda.txt index 44cdc12f8..030982b12 100644 --- a/.tether/man/callback_lambda.txt +++ b/.tether/man/callback_lambda.txt @@ -13,8 +13,8 @@ class LambdaCallback(keras.src.callbacks.callback.Callback) | `epoch`, `logs` | - `on_train_begin` and `on_train_end` expect one positional argument: | `logs` - | - `on_train_batch_begin` and `on_train_batch_end` expect two positional - | arguments: `batch`, `logs` + | - `on_train_batch_begin` and `on_train_batch_end` expect a positional + | argument `batch` and a keyword argument `logs` | - See `Callback` class definition for the full list of functions and their | expected arguments. | @@ -79,3 +79,4 @@ class LambdaCallback(keras.src.callbacks.callback.Callback) | ) | Initialize self. See help(type(self)) for accurate signature. | + diff --git a/.tether/man/callback_model_checkpoint.txt b/.tether/man/callback_model_checkpoint.txt index c79cb11f0..13bbc3b37 100644 --- a/.tether/man/callback_model_checkpoint.txt +++ b/.tether/man/callback_model_checkpoint.txt @@ -1,6 +1,6 @@ Help on class ModelCheckpoint in module keras.src.callbacks.model_checkpoint: -class ModelCheckpoint(keras.src.callbacks.callback.Callback) +class ModelCheckpoint(keras.src.callbacks.monitor_callback.MonitorCallback) | ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', save_freq='epoch', initial_value_threshold=None) | | Callback to save the Keras model or model weights at some frequency. @@ -95,9 +95,8 @@ class ModelCheckpoint(keras.src.callbacks.callback.Callback) | decision to overwrite the current save file is made based on either | the maximization or the minimization of the monitored quantity. | For `val_acc`, this should be `"max"`, for `val_loss` this should be - | `"min"`, etc. In `"auto"` mode, the mode is set to `"max"` if the - | quantities monitored are `"acc"` or start with `"fmeasure"` and are - | set to `"min"` for the rest of the quantities. + | `"min"`, etc. In `"auto"` mode, the direction is automatically + | inferred from the name of the monitored quantity. | save_weights_only: if `True`, then only the model's weights will be | saved (`model.save_weights(filepath)`), else the full model is | saved (`model.save(filepath)`). @@ -116,6 +115,7 @@ class ModelCheckpoint(keras.src.callbacks.callback.Callback) | | Method resolution order: | ModelCheckpoint + | keras.src.callbacks.monitor_callback.MonitorCallback | keras.src.callbacks.callback.Callback | builtins.object | diff --git a/.tether/man/callback_reduce_lr_on_plateau.txt b/.tether/man/callback_reduce_lr_on_plateau.txt index 33451653d..ab2d51828 100644 --- a/.tether/man/callback_reduce_lr_on_plateau.txt +++ b/.tether/man/callback_reduce_lr_on_plateau.txt @@ -1,6 +1,6 @@ Help on class ReduceLROnPlateau in module keras.src.callbacks.reduce_lr_on_plateau: -class ReduceLROnPlateau(keras.src.callbacks.callback.Callback) +class ReduceLROnPlateau(keras.src.callbacks.monitor_callback.MonitorCallback) | ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.0, **kwargs) | | Reduce learning rate when a metric has stopped improving. @@ -39,6 +39,7 @@ class ReduceLROnPlateau(keras.src.callbacks.callback.Callback) | | Method resolution order: | ReduceLROnPlateau + | keras.src.callbacks.monitor_callback.MonitorCallback | keras.src.callbacks.callback.Callback | builtins.object | diff --git a/.tether/man/config_is_nnx_enabled.txt b/.tether/man/config_is_nnx_enabled.txt new file mode 100644 index 000000000..658046298 --- /dev/null +++ b/.tether/man/config_is_nnx_enabled.txt @@ -0,0 +1,9 @@ +__signature__ +keras.config.is_nnx_enabled() +__doc__ +Checks whether NNX specific features are enabled for the JAX backend. + +Returns: + bool: `True` if NNX backend features are enabled, `False` otherwise. + Defaults to `False`. + diff --git a/.tether/man/config_max_epochs.txt b/.tether/man/config_max_epochs.txt new file mode 100644 index 000000000..945af4d2e --- /dev/null +++ b/.tether/man/config_max_epochs.txt @@ -0,0 +1,13 @@ +__signature__ +keras.config.max_epochs() +__doc__ +Get the maximum number of epochs for any call to fit. + +Retrieves the limit on the number of epochs set by +`keras.config.set_max_epochs` or the `KERAS_MAX_EPOCHS` environment +variable. + +Returns: + The integer limit on the number of epochs or `None`, if no limit has + been set. + diff --git a/.tether/man/config_max_steps_per_epoch.txt b/.tether/man/config_max_steps_per_epoch.txt new file mode 100644 index 000000000..de4b03408 --- /dev/null +++ b/.tether/man/config_max_steps_per_epoch.txt @@ -0,0 +1,13 @@ +__signature__ +keras.config.max_steps_per_epoch() +__doc__ +Get the maximum number of steps for any call to fit/evaluate/predict. + +Retrieves the limit on the number of epochs set by +`keras.config.set_max_steps_per_epoch` or the `KERAS_MAX_STEPS_PER_EPOCH` +environment variable. + +Args: + max_epochs: The integer limit on the number of epochs or `None`. If + `None`, no limit is applied. + diff --git a/.tether/man/config_set_max_epochs.txt b/.tether/man/config_set_max_epochs.txt new file mode 100644 index 000000000..0845c02a9 --- /dev/null +++ b/.tether/man/config_set_max_epochs.txt @@ -0,0 +1,13 @@ +__signature__ +keras.config.set_max_epochs(max_epochs) +__doc__ +Limit the maximum number of epochs for any call to fit. + +This will cap the number of epochs for any training run using `model.fit()`. +This is purely for debugging, and can also be set via the `KERAS_MAX_EPOCHS` +environment variable to quickly run a script without modifying its source. + +Args: + max_epochs: The integer limit on the number of epochs or `None`. If + `None`, no limit is applied. + diff --git a/.tether/man/config_set_max_steps_per_epoch.txt b/.tether/man/config_set_max_steps_per_epoch.txt new file mode 100644 index 000000000..f0b060b19 --- /dev/null +++ b/.tether/man/config_set_max_steps_per_epoch.txt @@ -0,0 +1,14 @@ +__signature__ +keras.config.set_max_steps_per_epoch(max_steps_per_epoch) +__doc__ +Limit the maximum number of steps for any call to fit/evaluate/predict. + +This will cap the number of steps for single epoch of a call to `fit()`, +`evaluate()`, or `predict()`. This is purely for debugging, and can also be +set via the `KERAS_MAX_STEPS_PER_EPOCH` environment variable to quickly run +a scrip without modifying its source. + +Args: + max_epochs: The integer limit on the number of epochs or `None`. If + `None`, no limit is applied. + diff --git a/.tether/man/keras.activations.txt b/.tether/man/keras.activations.txt index 48851e264..064021d14 100644 --- a/.tether/man/keras.activations.txt +++ b/.tether/man/keras.activations.txt @@ -31,6 +31,7 @@ softmax(x, axis=-1) softplus(x) softsign(x) sparse_plus(x) +sparse_sigmoid(x) sparsemax(x, axis=-1) squareplus(x, b=4) swish(x) diff --git a/.tether/man/keras.applications.txt b/.tether/man/keras.applications.txt index 9f9c401ce..bb8eb26c9 100644 --- a/.tether/man/keras.applications.txt +++ b/.tether/man/keras.applications.txt @@ -1,4 +1,4 @@ -convnext: Module(keras.api.applications.convnext) +convnext: Module(keras.applications.convnext) ConvNeXtBase( include_top=True, include_preprocessing=True, @@ -54,7 +54,7 @@ ConvNeXtXLarge( classifier_activation='softmax', name='convnext_xlarge' ) -densenet: Module(keras.api.applications.densenet) +densenet: Module(keras.applications.densenet) DenseNet121( include_top=True, weights='imagenet', @@ -85,8 +85,8 @@ DenseNet201( classifier_activation='softmax', name='densenet201' ) -efficientnet: Module(keras.api.applications.efficientnet) -efficientnet_v2: Module(keras.api.applications.efficientnet_v2) +efficientnet: Module(keras.applications.efficientnet) +efficientnet_v2: Module(keras.applications.efficientnet_v2) EfficientNetB0( include_top=True, weights='imagenet', @@ -244,9 +244,9 @@ EfficientNetV2S( include_preprocessing=True, name='efficientnetv2-s' ) -imagenet_utils: Module(keras.api.applications.imagenet_utils) -inception_resnet_v2: Module(keras.api.applications.inception_resnet_v2) -inception_v3: Module(keras.api.applications.inception_v3) +imagenet_utils: Module(keras.applications.imagenet_utils) +inception_resnet_v2: Module(keras.applications.inception_resnet_v2) +inception_v3: Module(keras.applications.inception_v3) InceptionResNetV2( include_top=True, weights='imagenet', @@ -267,7 +267,7 @@ InceptionV3( classifier_activation='softmax', name='inception_v3' ) -mobilenet: Module(keras.api.applications.mobilenet) +mobilenet: Module(keras.applications.mobilenet) MobileNet( input_shape=None, alpha=1.0, @@ -281,8 +281,8 @@ MobileNet( classifier_activation='softmax', name=None ) -mobilenet_v2: Module(keras.api.applications.mobilenet_v2) -mobilenet_v3: Module(keras.api.applications.mobilenet_v3) +mobilenet_v2: Module(keras.applications.mobilenet_v2) +mobilenet_v3: Module(keras.applications.mobilenet_v3) MobileNetV2( input_shape=None, alpha=1.0, @@ -322,7 +322,7 @@ MobileNetV3Small( include_preprocessing=True, name='MobileNetV3Small' ) -nasnet: Module(keras.api.applications.nasnet) +nasnet: Module(keras.applications.nasnet) NASNetLarge( input_shape=None, include_top=True, @@ -343,8 +343,8 @@ NASNetMobile( classifier_activation='softmax', name='nasnet_mobile' ) -resnet: Module(keras.api.applications.resnet) -resnet_v2: Module(keras.api.applications.resnet_v2) +resnet: Module(keras.applications.resnet) +resnet_v2: Module(keras.applications.resnet_v2) ResNet101( include_top=True, weights='imagenet', @@ -385,7 +385,7 @@ ResNet152V2( classifier_activation='softmax', name='resnet152v2' ) -resnet50: Module(keras.api.applications.resnet50) +resnet50: Module(keras.applications.resnet50) ResNet50( include_top=True, weights='imagenet', @@ -406,7 +406,7 @@ ResNet50V2( classifier_activation='softmax', name='resnet50v2' ) -vgg16: Module(keras.api.applications.vgg16) +vgg16: Module(keras.applications.vgg16) VGG16( include_top=True, weights='imagenet', @@ -417,7 +417,7 @@ VGG16( classifier_activation='softmax', name='vgg16' ) -vgg19: Module(keras.api.applications.vgg19) +vgg19: Module(keras.applications.vgg19) VGG19( include_top=True, weights='imagenet', @@ -428,7 +428,7 @@ VGG19( classifier_activation='softmax', name='vgg19' ) -xception: Module(keras.api.applications.xception) +xception: Module(keras.applications.xception) Xception( include_top=True, weights='imagenet', diff --git a/.tether/man/keras.config.txt b/.tether/man/keras.config.txt index ffbe8f439..451ef0afc 100644 --- a/.tether/man/keras.config.txt +++ b/.tether/man/keras.config.txt @@ -12,10 +12,15 @@ floatx() image_data_format() is_flash_attention_enabled() is_interactive_logging_enabled() +is_nnx_enabled() is_traceback_filtering_enabled() +max_epochs() +max_steps_per_epoch() set_backend(backend) set_dtype_policy(policy) set_epsilon(value) set_floatx(value) set_image_data_format(data_format) +set_max_epochs(max_epochs) +set_max_steps_per_epoch(max_steps_per_epoch) diff --git a/.tether/man/keras.datasets.txt b/.tether/man/keras.datasets.txt index f771e1b50..79bb80818 100644 --- a/.tether/man/keras.datasets.txt +++ b/.tether/man/keras.datasets.txt @@ -1,9 +1,9 @@ -boston_housing: Module(keras.api.datasets.boston_housing) -california_housing: Module(keras.api.datasets.california_housing) -cifar10: Module(keras.api.datasets.cifar10) -cifar100: Module(keras.api.datasets.cifar100) -fashion_mnist: Module(keras.api.datasets.fashion_mnist) -imdb: Module(keras.api.datasets.imdb) -mnist: Module(keras.api.datasets.mnist) -reuters: Module(keras.api.datasets.reuters) +boston_housing: Module(keras.datasets.boston_housing) +california_housing: Module(keras.datasets.california_housing) +cifar10: Module(keras.datasets.cifar10) +cifar100: Module(keras.datasets.cifar100) +fashion_mnist: Module(keras.datasets.fashion_mnist) +imdb: Module(keras.datasets.imdb) +mnist: Module(keras.datasets.mnist) +reuters: Module(keras.datasets.reuters) diff --git a/.tether/man/keras.distribution.txt b/.tether/man/keras.distribution.txt index ee66b1b01..67af628b9 100644 --- a/.tether/man/keras.distribution.txt +++ b/.tether/man/keras.distribution.txt @@ -21,6 +21,7 @@ ModelParallel( *, layout_map=None, batch_dim_name=None, + auto_shard_dataset=True, **kwargs ) set_distribution(value) diff --git a/.tether/man/keras.layers.txt b/.tether/man/keras.layers.txt index 91187291b..c6af1f082 100644 --- a/.tether/man/keras.layers.txt +++ b/.tether/man/keras.layers.txt @@ -153,6 +153,7 @@ Conv1DTranspose( kernel_size, strides=1, padding='valid', + output_padding=None, data_format=None, dilation_rate=1, activation=None, @@ -190,6 +191,7 @@ Conv2DTranspose( kernel_size, strides=(1, 1), padding='valid', + output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, @@ -228,6 +230,7 @@ Conv3DTranspose( strides=(1, 1, 1), padding='valid', data_format=None, + output_padding=None, dilation_rate=(1, 1, 1), activation=None, use_bias=True, @@ -354,6 +357,7 @@ Convolution1DTranspose( kernel_size, strides=1, padding='valid', + output_padding=None, data_format=None, dilation_rate=1, activation=None, @@ -391,6 +395,7 @@ Convolution2DTranspose( kernel_size, strides=(1, 1), padding='valid', + output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, @@ -429,6 +434,7 @@ Convolution3DTranspose( strides=(1, 1, 1), padding='valid', data_format=None, + output_padding=None, dilation_rate=(1, 1, 1), activation=None, use_bias=True, @@ -470,6 +476,7 @@ Dense( kernel_constraint=None, bias_constraint=None, lora_rank=None, + lora_alpha=None, **kwargs ) DepthwiseConv1D( @@ -546,6 +553,7 @@ EinsumDense( kernel_constraint=None, bias_constraint=None, lora_rank=None, + lora_alpha=None, **kwargs ) ELU(alpha=1.0, **kwargs) @@ -558,6 +566,7 @@ Embedding( mask_zero=False, weights=None, lora_rank=None, + lora_alpha=None, **kwargs ) Equalization( @@ -808,7 +817,6 @@ LayerNormalization( epsilon=0.001, center=True, scale=True, - rms_scaling=False, beta_initializer='zeros', gamma_initializer='ones', beta_regularizer=None, @@ -1028,6 +1036,17 @@ RandomCrop( name=None, **kwargs ) +RandomElasticTransform( + factor=1.0, + scale=1.0, + interpolation='bilinear', + fill_mode='reflect', + fill_value=0.0, + value_range=(0, 255), + seed=None, + data_format=None, + **kwargs +) RandomErasing( factor=1.0, scale=(0.02, 0.33), @@ -1404,6 +1423,7 @@ TimeDistributed(layer, **kwargs) TorchModuleWrapper( module, name=None, + output_shape=None, **kwargs ) UnitNormalization(axis=-1, **kwargs) diff --git a/.tether/man/keras.legacy.txt b/.tether/man/keras.legacy.txt index 13f97b309..43e1bbaf2 100644 --- a/.tether/man/keras.legacy.txt +++ b/.tether/man/keras.legacy.txt @@ -1,2 +1,2 @@ -saving: Module(keras.api.legacy.saving) +saving: Module(keras.legacy.saving) diff --git a/.tether/man/keras.losses.txt b/.tether/man/keras.losses.txt index b3c44ae29..33dc78c49 100644 --- a/.tether/man/keras.losses.txt +++ b/.tether/man/keras.losses.txt @@ -50,6 +50,11 @@ categorical_focal_crossentropy( label_smoothing=0.0, axis=-1 ) +categorical_generalized_cross_entropy( + y_true, + y_pred, + q +) categorical_hinge(y_true, y_pred) CategoricalCrossentropy( from_logits=False, @@ -69,6 +74,12 @@ CategoricalFocalCrossentropy( name='categorical_focal_crossentropy', dtype=None ) +CategoricalGeneralizedCrossEntropy( + q=0.5, + reduction='sum_over_batch_size', + name='categorical_generalized_cross_entropy', + dtype=None +) CategoricalHinge( reduction='sum_over_batch_size', name='categorical_hinge', @@ -197,6 +208,7 @@ SparseCategoricalCrossentropy( from_logits=False, ignore_class=None, reduction='sum_over_batch_size', + axis=-1, name='sparse_categorical_crossentropy', dtype=None ) diff --git a/.tether/man/keras.ops.image.txt b/.tether/man/keras.ops.image.txt index 242330155..4864fdf94 100644 --- a/.tether/man/keras.ops.image.txt +++ b/.tether/man/keras.ops.image.txt @@ -16,6 +16,16 @@ crop_images( target_width=None, data_format=None ) +elastic_transform( + images, + alpha=20.0, + sigma=5.0, + interpolation='bilinear', + fill_mode='reflect', + fill_value=0.0, + seed=None, + data_format=None +) extract_patches( images, size, diff --git a/.tether/man/keras.ops.nn.txt b/.tether/man/keras.ops.nn.txt index a5655066d..141ffedbc 100644 --- a/.tether/man/keras.ops.nn.txt +++ b/.tether/man/keras.ops.nn.txt @@ -37,7 +37,7 @@ conv( conv_transpose( inputs, kernel, - strides, + strides=1, padding='valid', output_padding=None, data_format=None, @@ -75,7 +75,8 @@ dot_product_attention( mask=None, scale=None, is_causal=False, - flash_attention=None + flash_attention=None, + attn_logits_soft_cap=None ) elu(x, alpha=1.0) gelu(x, approximate=True) @@ -85,6 +86,14 @@ hard_sigmoid(x) hard_silu(x) hard_swish(x) hard_tanh(x) +layer_normalization( + x, + gamma=None, + beta=None, + axis=-1, + epsilon=None, + **kwargs +) leaky_relu(x, negative_slope=0.2) log_sigmoid(x) log_softmax(x, axis=-1) @@ -132,7 +141,7 @@ relu(x) relu6(x) rms_normalization( x, - scale=1, + scale=None, axis=-1, epsilon=None ) @@ -159,6 +168,7 @@ sparse_categorical_crossentropy( axis=-1 ) sparse_plus(x) +sparse_sigmoid(x) sparsemax(x, axis=-1) squareplus(x, b=4) swish(x) diff --git a/.tether/man/keras.ops.numpy.txt b/.tether/man/keras.ops.numpy.txt index be45c6fe8..b91190169 100644 --- a/.tether/man/keras.ops.numpy.txt +++ b/.tether/man/keras.ops.numpy.txt @@ -16,6 +16,7 @@ amin( axis=None, keepdims=False ) +angle(x) any( x, axis=None, @@ -61,6 +62,7 @@ average( axis=None, weights=None ) +bartlett(x) bincount( x, weights=None, @@ -74,7 +76,9 @@ bitwise_not(x) bitwise_or(x, y) bitwise_right_shift(x, y) bitwise_xor(x, y) +blackman(x) broadcast_to(x, shape) +cbrt(x) ceil(x) clip( x, @@ -85,6 +89,7 @@ concatenate(xs, axis=0) conj(x) conjugate(x) copy(x) +corrcoef(x) correlate( x1, x2, @@ -111,6 +116,7 @@ cumsum( axis=None, dtype=None ) +deg2rad(x) diag(x, k=0) diagflat(x, k=0) diagonal( @@ -128,7 +134,11 @@ digitize(x, bins) divide(x1, x2) divide_no_nan(x1, x2) dot(x1, x2) -einsum(subscripts, *operands) +einsum( + subscripts, + *operands, + **kwargs +) empty(shape, dtype=None) equal(x1, x2) exp(x) @@ -157,6 +167,9 @@ full_like( get_item(x, key) greater(x1, x2) greater_equal(x1, x2) +hamming(x) +hanning(x) +heaviside(x1, x2) histogram( x, bins=10, @@ -176,6 +189,7 @@ isclose( isfinite(x) isinf(x) isnan(x) +kaiser(x, beta) left_shift(x, y) less(x1, x2) less_equal(x1, x2) @@ -293,6 +307,11 @@ rot90( axes=(0, 1) ) round(x, decimals=0) +searchsorted( + sorted_sequence, + values, + side='left' +) select( condlist, choicelist, diff --git a/.tether/man/keras.ops.txt b/.tether/man/keras.ops.txt index 71440537f..2081aad40 100644 --- a/.tether/man/keras.ops.txt +++ b/.tether/man/keras.ops.txt @@ -16,6 +16,7 @@ amin( axis=None, keepdims=False ) +angle(x) any( x, axis=None, @@ -74,6 +75,7 @@ average_pool( padding='valid', data_format=None ) +bartlett(x) batch_normalization( x, mean, @@ -101,6 +103,7 @@ bitwise_not(x) bitwise_or(x, y) bitwise_right_shift(x, y) bitwise_xor(x, y) +blackman(x) broadcast_to(x, shape) cast(x, dtype) categorical_crossentropy( @@ -109,6 +112,7 @@ categorical_crossentropy( from_logits=False, axis=-1 ) +cbrt(x) ceil(x) celu(x, alpha=1.0) cholesky(x) @@ -136,7 +140,7 @@ conv( conv_transpose( inputs, kernel, - strides, + strides=1, padding='valid', output_padding=None, data_format=None, @@ -150,6 +154,7 @@ convert_to_tensor( ragged=None ) copy(x) +corrcoef(x) correlate( x1, x2, @@ -193,6 +198,7 @@ cumsum( dtype=None ) custom_gradient(f) +deg2rad(x) depthwise_conv( inputs, kernel, @@ -227,12 +233,17 @@ dot_product_attention( mask=None, scale=None, is_causal=False, - flash_attention=None + flash_attention=None, + attn_logits_soft_cap=None ) dtype(x) eig(x) eigh(x) -einsum(subscripts, *operands) +einsum( + subscripts, + *operands, + **kwargs +) elu(x, alpha=1.0) empty(shape, dtype=None) equal(x1, x2) @@ -279,11 +290,14 @@ get_item(x, key) glu(x, axis=-1) greater(x1, x2) greater_equal(x1, x2) +hamming(x) +hanning(x) hard_shrink(x, threshold=0.5) hard_sigmoid(x) hard_silu(x) hard_swish(x) hard_tanh(x) +heaviside(x1, x2) histogram( x, bins=10, @@ -293,7 +307,7 @@ hstack(xs) identity(n, dtype=None) ifft2(x) imag(x) -image: Module(keras.api.ops.image) +image: Module(keras.ops.image) in_top_k( targets, predictions, @@ -322,11 +336,20 @@ istft( window='hann', center=True ) +kaiser(x, beta) +layer_normalization( + x, + gamma=None, + beta=None, + axis=-1, + epsilon=None, + **kwargs +) leaky_relu(x, negative_slope=0.2) left_shift(x, y) less(x1, x2) less_equal(x1, x2) -linalg: Module(keras.api.ops.linalg) +linalg: Module(keras.ops.linalg) linspace( start, stop, @@ -431,7 +454,7 @@ nan_to_num( ) ndim(x) negative(x) -nn: Module(keras.api.ops.nn) +nn: Module(keras.ops.nn) nonzero(x) norm( x, @@ -446,7 +469,7 @@ normalize( epsilon=None ) not_equal(x1, x2) -numpy: Module(keras.api.ops.numpy) +numpy: Module(keras.ops.numpy) one_hot( x, num_classes, @@ -504,7 +527,7 @@ rfft(x, fft_length=None) right_shift(x, y) rms_normalization( x, - scale=1, + scale=None, axis=-1, epsilon=None ) @@ -608,6 +631,7 @@ sparse_categorical_crossentropy( axis=-1 ) sparse_plus(x) +sparse_sigmoid(x) sparsemax(x, axis=-1) split( x, @@ -720,6 +744,8 @@ vectorize( signature=None ) vectorized_map(function, elements) +view_as_complex(x) +view_as_real(x) vstack(xs) where( condition, diff --git a/.tether/man/keras.optimizers.txt b/.tether/man/keras.optimizers.txt index 047b7c298..557b2e339 100644 --- a/.tether/man/keras.optimizers.txt +++ b/.tether/man/keras.optimizers.txt @@ -141,7 +141,7 @@ Lamb( name='lamb', **kwargs ) -legacy: Module(keras.api.optimizers.legacy) +legacy: Module(keras.optimizers.legacy) Lion( learning_rate=0.001, beta_1=0.9, @@ -164,6 +164,32 @@ LossScaleOptimizer( dynamic_growth_steps=2000, **kwargs ) +Muon( + learning_rate=0.001, + adam_beta_1=0.9, + adam_beta_2=0.999, + epsilon=1e-07, + weight_decay=0.1, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='muon', + exclude_layers=None, + exclude_embeddings=True, + muon_a=3.4445, + muon_b=-4.775, + muon_c=2.0315, + adam_lr_ratio=0.1, + momentum=0.95, + ns_steps=6, + nesterov=True, + **kwargs +) Nadam( learning_rate=0.001, beta_1=0.9, @@ -200,7 +226,7 @@ RMSprop( name='rmsprop', **kwargs ) -schedules: Module(keras.api.optimizers.schedules) +schedules: Module(keras.optimizers.schedules) serialize(optimizer) SGD( learning_rate=0.01, diff --git a/.tether/man/keras.preprocessing.txt b/.tether/man/keras.preprocessing.txt index c7da2c534..4ba34405b 100644 --- a/.tether/man/keras.preprocessing.txt +++ b/.tether/man/keras.preprocessing.txt @@ -1,4 +1,4 @@ -image: Module(keras.api.preprocessing.image) +image: Module(keras.preprocessing.image) image_dataset_from_directory( directory, labels='inferred', @@ -18,7 +18,7 @@ image_dataset_from_directory( data_format=None, verbose=True ) -sequence: Module(keras.api.preprocessing.sequence) +sequence: Module(keras.preprocessing.sequence) text_dataset_from_directory( directory, labels='inferred', diff --git a/.tether/man/keras.quantizers.txt b/.tether/man/keras.quantizers.txt index 0970068c1..c113bde91 100644 --- a/.tether/man/keras.quantizers.txt +++ b/.tether/man/keras.quantizers.txt @@ -29,6 +29,7 @@ fake_quant_with_min_max_vars( axis=None ) get(identifier, **kwargs) +pack_int4(arr, axis=0) quantize_and_dequantize( inputs, scale, @@ -37,4 +38,9 @@ quantize_and_dequantize( ) Quantizer(output_dtype='int8') serialize(initializer) +unpack_int4( + packed, + orig_len, + axis=0 +) diff --git a/.tether/man/keras.saving.txt b/.tether/man/keras.saving.txt index 1c17c37ff..24be1e1c6 100644 --- a/.tether/man/keras.saving.txt +++ b/.tether/man/keras.saving.txt @@ -38,6 +38,7 @@ save_weights( model, filepath, overwrite=True, + max_shard_size=None, **kwargs ) serialize_keras_object(obj) diff --git a/.tether/man/keras.txt b/.tether/man/keras.txt index 19675df83..9fb6f9837 100644 --- a/.tether/man/keras.txt +++ b/.tether/man/keras.txt @@ -1,15 +1,15 @@ -activations: Module(keras.api.activations) -applications: Module(keras.api.applications) -backend: Module(keras.api.backend) -callbacks: Module(keras.api.callbacks) -config: Module(keras.api.config) -constraints: Module(keras.api.constraints) -datasets: Module(keras.api.datasets) +activations: Module(keras.activations) +applications: Module(keras.applications) +backend: Module(keras.backend) +callbacks: Module(keras.callbacks) +config: Module(keras.config) +constraints: Module(keras.constraints) +datasets: Module(keras.datasets) device(device_name) -distribution: Module(keras.api.distribution) -dtype_policies: Module(keras.api.dtype_policies) +distribution: Module(keras.distribution) +dtype_policies: Module(keras.dtype_policies) DTypePolicy(name=None) -export: Module(keras.api.export) +export: Module(keras.export) FloatDTypePolicy(name=None) Function( inputs, @@ -17,7 +17,7 @@ Function( name=None ) Initializer() -initializers: Module(keras.api.initializers) +initializers: Module(keras.initializers) Input( shape=None, batch_size=None, @@ -46,49 +46,51 @@ KerasTensor( sparse=False, ragged=False, record_history=True, - name=None + name=None, + **kwargs ) Layer(*args, **kwargs) -layers: Module(keras.api.layers) -legacy: Module(keras.api.legacy) +layers: Module(keras.layers) +legacy: Module(keras.legacy) Loss( name=None, reduction='sum_over_batch_size', dtype=None ) -losses: Module(keras.api.losses) +losses: Module(keras.losses) Metric(dtype=None, name=None) -metrics: Module(keras.api.metrics) -mixed_precision: Module(keras.api.mixed_precision) +metrics: Module(keras.metrics) +mixed_precision: Module(keras.mixed_precision) Model(*args, **kwargs) -models: Module(keras.api.models) +models: Module(keras.models) name_scope(name, **kwargs) Operation(*args, **kwargs) ops: Module(keras.ops) Optimizer(*args, **kwargs) -optimizers: Module(keras.api.optimizers) -preprocessing: Module(keras.api.preprocessing) +optimizers: Module(keras.optimizers) +preprocessing: Module(keras.preprocessing) Quantizer(output_dtype='int8') -quantizers: Module(keras.api.quantizers) -random: Module(keras.api.random) +quantizers: Module(keras.quantizers) +random: Module(keras.random) Regularizer() -regularizers: Module(keras.api.regularizers) +regularizers: Module(keras.regularizers) remat(f) RematScope( mode='full', output_size_threshold=1024, layer_names=None ) -saving: Module(keras.api.saving) +saving: Module(keras.saving) Sequential(*args, **kwargs) +src: Module(keras.src) StatelessScope( state_mapping=None, collect_losses=False, initialize_variables=True ) SymbolicScope() -tree: Module(keras.api.tree) -utils: Module(keras.api.utils) +tree: Module(keras.tree) +utils: Module(keras.utils) Variable( initializer, shape=None, @@ -96,9 +98,11 @@ Variable( trainable=True, autocast=True, aggregation='none', - name=None + synchronization='auto', + name=None, + **kwargs ) version() -visualization: Module(keras.api.visualization) -wrappers: Module(keras.api.wrappers) +visualization: Module(keras.visualization) +wrappers: Module(keras.wrappers) diff --git a/.tether/man/keras.utils.txt b/.tether/man/keras.utils.txt index c54a975cb..d164c61d6 100644 --- a/.tether/man/keras.utils.txt +++ b/.tether/man/keras.utils.txt @@ -20,7 +20,7 @@ audio_dataset_from_directory( follow_links=False, verbose=True ) -bounding_boxes: Module(keras.api.utils.bounding_boxes) +bounding_boxes: Module(keras.utils.bounding_boxes) clear_session(free_memory=True) Config(**kwargs) custom_object_scope(custom_objects) @@ -89,7 +89,7 @@ img_to_array( ) is_interactive_logging_enabled() is_keras_tensor(x) -legacy: Module(keras.api.utils.legacy) +legacy: Module(keras.utils.legacy) load_img( path, color_mode='rgb', diff --git a/.tether/man/keras_model.txt b/.tether/man/keras_model.txt index a288c1b05..6d7a8f60c 100644 --- a/.tether/man/keras_model.txt +++ b/.tether/man/keras_model.txt @@ -303,31 +303,47 @@ class Model(keras.src.backend.tensorflow.trainer.TensorFlowTrainer, keras.src.tr | skip_mismatch=False, | **kwargs | ) - | Load weights from a file saved via `save_weights()`. + | Load the weights from a single file or sharded files. | - | Weights are loaded based on the network's - | topology. This means the architecture should be the same as when the - | weights were saved. Note that layers that don't have weights are not - | taken into account in the topological ordering, so adding or removing - | layers is fine as long as they don't have weights. + | Weights are loaded based on the network's topology. This means the + | architecture should be the same as when the weights were saved. Note + | that layers that don't have weights are not taken into account in the + | topological ordering, so adding or removing layers is fine as long as + | they don't have weights. | | **Partial weight loading** | | If you have modified your model, for instance by adding a new layer - | (with weights) or by changing the shape of the weights of a layer, - | you can choose to ignore errors and continue loading - | by setting `skip_mismatch=True`. In this case any layer with - | mismatching weights will be skipped. A warning will be displayed - | for each skipped layer. + | (with weights) or by changing the shape of the weights of a layer, you + | can choose to ignore errors and continue loading by setting + | `skip_mismatch=True`. In this case any layer with mismatching weights + | will be skipped. A warning will be displayed for each skipped layer. + | + | **Sharding** + | + | When loading sharded weights, it is important to specify `filepath` that + | ends with `*.weights.json` which is used as the configuration file. + | Additionally, the sharded files `*_xxxxx.weights.h5` must be in the same + | directory as the configuration file. | | Args: - | filepath: String, path to the weights file to load. - | It can either be a `.weights.h5` file - | or a legacy `.h5` weights file. + | filepath: `str` or `pathlib.Path` object. Path where the weights + | will be saved. When sharding, the filepath must end in + | `.weights.json`. | skip_mismatch: Boolean, whether to skip loading of layers where | there is a mismatch in the number of weights, or a mismatch in | the shape of the weights. | + | Example: + | + | ```python + | # Load the weights in a single file. + | model.load_weights("model.weights.h5") + | + | # Load the weights in sharded files. + | model.load_weights("model.weights.json") + | ``` + | | quantize( | self, | mode, @@ -352,6 +368,16 @@ class Model(keras.src.backend.tensorflow.trainer.TensorFlowTrainer, keras.src.tr | ) | Saves a model as a `.keras` file. | + | Note that `model.save()` is an alias for `keras.saving.save_model()`. + | + | The saved `.keras` file contains: + | + | - The model's configuration (architecture) + | - The model's weights + | - The model's optimizer's state (if any) + | + | Thus models can be reinstantiated in the exact same state. + | | Args: | filepath: `str` or `pathlib.Path` object. | The path where to save the model. Must end in `.keras` @@ -380,29 +406,63 @@ class Model(keras.src.backend.tensorflow.trainer.TensorFlowTrainer, keras.src.tr | assert np.allclose(model.predict(x), loaded_model.predict(x)) | ``` | - | Note that `model.save()` is an alias for `keras.saving.save_model()`. - | - | The saved `.keras` file contains: - | - | - The model's configuration (architecture) - | - The model's weights - | - The model's optimizer's state (if any) - | - | Thus models can be reinstantiated in the exact same state. - | | save_weights( | self, | filepath, - | overwrite=True + | overwrite=True, + | max_shard_size=None | ) - | Saves all layer weights to a `.weights.h5` file. + | Saves all weights to a single file or sharded files. + | + | By default, the weights will be saved in a single `.weights.h5` file. + | If sharding is enabled (`max_shard_size` is not `None`), the weights + | will be saved in multiple files, each with a size at most + | `max_shard_size` (in GB). Additionally, a configuration file + | `.weights.json` will contain the metadata for the sharded files. + | + | The saved sharded files contain: + | + | - `*.weights.json`: The configuration file containing 'metadata' and + | 'weight_map'. + | - `*_xxxxxx.weights.h5`: The sharded files containing only the + | weights. | | Args: - | filepath: `str` or `pathlib.Path` object. - | Path where to save the model. Must end in `.weights.h5`. - | overwrite: Whether we should overwrite any existing model - | at the target location, or instead ask the user - | via an interactive prompt. + | filepath: `str` or `pathlib.Path` object. Path where the weights + | will be saved. When sharding, the filepath must end in + | `.weights.json`. If `.weights.h5` is provided, it will be + | overridden. + | overwrite: Whether to overwrite any existing weights at the target + | location or instead ask the user via an interactive prompt. + | max_shard_size: `int` or `float`. Maximum size in GB for each + | sharded file. If `None`, no sharding will be done. Defaults to + | `None`. + | + | Example: + | + | ```python + | # Instantiate a EfficientNetV2L model with about 454MB of weights. + | model = keras.applications.EfficientNetV2L(weights=None) + | + | # Save the weights in a single file. + | model.save_weights("model.weights.h5") + | + | # Save the weights in sharded files. Use `max_shard_size=0.25` means + | # each sharded file will be at most ~250MB. + | model.save_weights("model.weights.json", max_shard_size=0.25) + | + | # Load the weights in a new model with the same architecture. + | loaded_model = keras.applications.EfficientNetV2L(weights=None) + | loaded_model.load_weights("model.weights.h5") + | x = keras.random.uniform((1, 480, 480, 3)) + | assert np.allclose(model.predict(x), loaded_model.predict(x)) + | + | # Load the sharded weights in a new model with the same architecture. + | loaded_model = keras.applications.EfficientNetV2L(weights=None) + | loaded_model.load_weights("model.weights.json") + | x = keras.random.uniform((1, 480, 480, 3)) + | assert np.allclose(model.predict(x), loaded_model.predict(x)) + | ``` | | set_state_tree(self, state_tree) | Assigns values to variables of the model. diff --git a/.tether/man/keras_model_sequential.txt b/.tether/man/keras_model_sequential.txt index c13d33388..2d91bcd38 100644 --- a/.tether/man/keras_model_sequential.txt +++ b/.tether/man/keras_model_sequential.txt @@ -87,7 +87,8 @@ class Sequential(keras.src.models.model.Model) | self, | inputs, | training=None, - | mask=None + | mask=None, + | **kwargs | ) | | compute_output_shape(self, input_shape) @@ -96,7 +97,8 @@ class Sequential(keras.src.models.model.Model) | self, | inputs, | training=None, - | mask=None + | mask=None, + | **kwargs | ) | | get_config(self) diff --git a/.tether/man/keras_variable.txt b/.tether/man/keras_variable.txt index e5bde6b92..75d0602a7 100644 --- a/.tether/man/keras_variable.txt +++ b/.tether/man/keras_variable.txt @@ -8,7 +8,9 @@ class Variable(builtins.object) | trainable=True, | autocast=True, | aggregation='none', - | name=None + | synchronization='auto', + | name=None, + | **kwargs | ) | | Represents a backend-agnostic variable in Keras. @@ -49,6 +51,7 @@ class Variable(builtins.object) | value: The current value of the variable (NumPy array or tensor). | name: The name of the variable (string). | path: The path of the variable within the Keras model or layer (string). + | kwargs: Additional backend-specific keyword arguments. | | Examples: | @@ -120,7 +123,9 @@ class Variable(builtins.object) | trainable=True, | autocast=True, | aggregation='none', - | name=None + | synchronization='auto', + | name=None, + | **kwargs | ) | Initialize self. See help(type(self)) for accurate signature. | @@ -215,6 +220,9 @@ class Variable(builtins.object) | shape | The shape of the variable. | + | synchronization + | The strategy for synchronizing this variable. + | | value | The current value of the variable (numpy array or backend tensor). | diff --git a/.tether/man/layer_average_pooling_2d.txt b/.tether/man/layer_average_pooling_2d.txt index f1cf610f4..9ba3b156a 100644 --- a/.tether/man/layer_average_pooling_2d.txt +++ b/.tether/man/layer_average_pooling_2d.txt @@ -16,7 +16,7 @@ class AveragePooling2D(keras.src.layers.pooling.base_pooling.BasePooling) | (when `input_shape >= pool_size`) | | The resulting output shape when using the `"same"` padding option is: - | `output_shape = math.floor((input_shape - 1) / strides) + 1` + | `output_shape = input_shape` | | Args: | pool_size: int or tuple of 2 integers, factors by which to downscale diff --git a/.tether/man/layer_conv_1d_transpose.txt b/.tether/man/layer_conv_1d_transpose.txt index e6f51c257..6b5266904 100644 --- a/.tether/man/layer_conv_1d_transpose.txt +++ b/.tether/man/layer_conv_1d_transpose.txt @@ -1,7 +1,7 @@ Help on class Conv1DTranspose in module keras.src.layers.convolutional.conv1d_transpose: class Conv1DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseConvTranspose) - | Conv1DTranspose(filters, kernel_size, strides=1, padding='valid', data_format=None, dilation_rate=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) + | Conv1DTranspose(filters, kernel_size, strides=1, padding='valid', output_padding=None, data_format=None, dilation_rate=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) | | 1D transposed convolution layer. | @@ -23,6 +23,10 @@ class Conv1DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon | `"valid"` means no padding. `"same"` results in padding evenly to | the left/right or up/down of the input such that output has the same | height/width dimension as the input. + | output_padding: An integer tuple/list of 1 integer specifying the + | amount of padding along the time dimension of the output tensor. + | The amount of output padding must be lower than the stride. + | If set to `None` (default), the output shape is inferred. | data_format: string, either `"channels_last"` or `"channels_first"`. | The ordering of the dimensions in the inputs. `"channels_last"` | corresponds to inputs with shape `(batch, steps, features)` @@ -30,8 +34,11 @@ class Conv1DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon | `(batch, features, steps)`. It defaults to the `image_data_format` | value found in your Keras config file at `~/.keras/keras.json`. | If you never set it, then it will be `"channels_last"`. - | dilation_rate: int or tuple/list of 1 integers, specifying the dilation - | rate to use for dilated transposed convolution. + | dilation_rate: An integer tuple/list of 1 integer, specifying + | the dilation rate to use for dilated convolution. + | Currently, specifying a `dilation_rate` value != 1 is + | incompatible with specifying a stride value != 1. + | Also dilation rate larger than 1 is not currently supported. | activation: Activation function. If `None`, no activation is applied. | use_bias: bool, if `True`, bias will be added to the output. | kernel_initializer: Initializer for the convolution kernel. If `None`, @@ -104,6 +111,7 @@ class Conv1DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon | kernel_size, | strides=1, | padding='valid', + | output_padding=None, | data_format=None, | dilation_rate=1, | activation=None, diff --git a/.tether/man/layer_conv_2d_transpose.txt b/.tether/man/layer_conv_2d_transpose.txt index b540c565c..07e4151f9 100644 --- a/.tether/man/layer_conv_2d_transpose.txt +++ b/.tether/man/layer_conv_2d_transpose.txt @@ -1,7 +1,7 @@ Help on class Conv2DTranspose in module keras.src.layers.convolutional.conv2d_transpose: class Conv2DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseConvTranspose) - | Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) + | Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) | | 2D transposed convolution layer. | @@ -23,6 +23,14 @@ class Conv2DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon | `"valid"` means no padding. `"same"` results in padding evenly to | the left/right or up/down of the input. When `padding="same"` and | `strides=1`, the output has the same size as the input. + | output_padding: An integer or tuple/list of 2 integers, + | specifying the amount of padding along the height and width + | of the output tensor. + | Can be a single integer to specify the same value for all + | spatial dimensions. + | The amount of output padding along a given dimension must be + | lower than the stride along that same dimension. + | If set to `None` (default), the output shape is inferred. | data_format: string, either `"channels_last"` or `"channels_first"`. | The ordering of the dimensions in the inputs. `"channels_last"` | corresponds to inputs with shape @@ -32,8 +40,13 @@ class Conv2DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon | `image_data_format` value found in your Keras config file at | `~/.keras/keras.json`. If you never set it, then it will be | `"channels_last"`. - | dilation_rate: int or tuple/list of 1 integers, specifying the dilation - | rate to use for dilated transposed convolution. + | dilation_rate: An integer or tuple/list of 2 integers, + | specifying the dilation rate for + | all spatial dimensions for dilated convolution. + | Specifying different dilation rates + | for different dimensions is not supported. + | Currently, specifying any `dilation_rate` value != 1 is + | incompatible with specifying any stride value != 1. | activation: Activation function. If `None`, no activation is applied. | use_bias: bool, if `True`, bias will be added to the output. | kernel_initializer: Initializer for the convolution kernel. If `None`, @@ -106,6 +119,7 @@ class Conv2DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon | kernel_size, | strides=(1, 1), | padding='valid', + | output_padding=None, | data_format=None, | dilation_rate=(1, 1), | activation=None, diff --git a/.tether/man/layer_conv_3d_transpose.txt b/.tether/man/layer_conv_3d_transpose.txt index 35f84966f..9cdb0d464 100644 --- a/.tether/man/layer_conv_3d_transpose.txt +++ b/.tether/man/layer_conv_3d_transpose.txt @@ -1,7 +1,7 @@ Help on class Conv3DTranspose in module keras.src.layers.convolutional.conv3d_transpose: class Conv3DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseConvTranspose) - | Conv3DTranspose(filters, kernel_size, strides=(1, 1, 1), padding='valid', data_format=None, dilation_rate=(1, 1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) + | Conv3DTranspose(filters, kernel_size, strides=(1, 1, 1), padding='valid', data_format=None, output_padding=None, dilation_rate=(1, 1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) | | 3D transposed convolution layer. | @@ -23,6 +23,14 @@ class Conv3DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon | `"valid"` means no padding. `"same"` results in padding evenly to | the left/right or up/down of the input. When `padding="same"` and | `strides=1`, the output has the same size as the input. + | output_padding: An integer or tuple/list of 3 integers, + | specifying the amount of padding along the depth, height, and + | width. + | Can be a single integer to specify the same value for all + | spatial dimensions. + | The amount of output padding along a given dimension must be + | lower than the stride along that same dimension. + | If set to `None` (default), the output shape is inferred. | data_format: string, either `"channels_last"` or `"channels_first"`. | The ordering of the dimensions in the inputs. `"channels_last"` | corresponds to inputs with shape @@ -32,8 +40,12 @@ class Conv3DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon | It defaults to the `image_data_format` value found in your Keras | config file at `~/.keras/keras.json`. If you never set it, then it | will be `"channels_last"`. - | dilation_rate: int or tuple/list of 1 integers, specifying the dilation - | rate to use for dilated transposed convolution. + | dilation_rate: an integer or tuple/list of 3 integers, specifying + | the dilation rate to use for dilated convolution. + | Can be a single integer to specify the same value for + | all spatial dimensions. + | Currently, specifying any `dilation_rate` value != 1 is + | incompatible with specifying any stride value != 1. | activation: Activation function. If `None`, no activation is applied. | use_bias: bool, if `True`, bias will be added to the output. | kernel_initializer: Initializer for the convolution kernel. If `None`, @@ -112,6 +124,7 @@ class Conv3DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseCon | strides=(1, 1, 1), | padding='valid', | data_format=None, + | output_padding=None, | dilation_rate=(1, 1, 1), | activation=None, | use_bias=True, diff --git a/.tether/man/layer_dense.txt b/.tether/man/layer_dense.txt index b5d9b7b12..812851bc2 100644 --- a/.tether/man/layer_dense.txt +++ b/.tether/man/layer_dense.txt @@ -1,7 +1,7 @@ Help on class Dense in module keras.src.layers.core.dense: class Dense(keras.src.layers.layer.Layer) - | Dense(units, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, lora_rank=None, **kwargs) + | Dense(units, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, lora_rank=None, lora_alpha=None, **kwargs) | | Just your regular densely-connected NN layer. | @@ -46,6 +46,11 @@ class Dense(keras.src.layers.layer.Layer) | computation cost of fine-tuning large dense layers. | You can also enable LoRA on an existing | `Dense` layer by calling `layer.enable_lora(rank)`. + | lora_alpha: Optional integer. If set, this parameter scales the + | low-rank adaptation delta (computed as the product of two lower-rank + | trainable matrices) during the forward pass. The delta is scaled by + | `lora_alpha / lora_rank`, allowing you to fine-tune the strength of + | the LoRA adjustment independently of `lora_rank`. | | Input shape: | N-D tensor with shape: `(batch_size, ..., input_dim)`. @@ -83,6 +88,7 @@ class Dense(keras.src.layers.layer.Layer) | kernel_constraint=None, | bias_constraint=None, | lora_rank=None, + | lora_alpha=None, | **kwargs | ) | Initialize self. See help(type(self)) for accurate signature. @@ -100,6 +106,7 @@ class Dense(keras.src.layers.layer.Layer) | enable_lora( | self, | rank, + | lora_alpha=None, | a_initializer='he_uniform', | b_initializer='zeros' | ) @@ -127,7 +134,7 @@ class Dense(keras.src.layers.layer.Layer) | | quantized_build( | self, - | input_shape, + | kernel_shape, | mode | ) | diff --git a/.tether/man/layer_discretization.txt b/.tether/man/layer_discretization.txt index 5bc0d285f..b0420b914 100644 --- a/.tether/man/layer_discretization.txt +++ b/.tether/man/layer_discretization.txt @@ -132,8 +132,6 @@ class Discretization(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) | repeating dataset, you must specify the `steps` argument. This | argument is not supported with array inputs or list inputs. | - | build(self, input_shape=None) - | | call(self, inputs) | | compute_output_spec(self, inputs) diff --git a/.tether/man/layer_einsum_dense.txt b/.tether/man/layer_einsum_dense.txt index 178c0c90b..1347c4be0 100644 --- a/.tether/man/layer_einsum_dense.txt +++ b/.tether/man/layer_einsum_dense.txt @@ -1,7 +1,7 @@ Help on class EinsumDense in module keras.src.layers.core.einsum_dense: class EinsumDense(keras.src.layers.layer.Layer) - | EinsumDense(equation, output_shape, activation=None, bias_axes=None, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, kernel_constraint=None, bias_constraint=None, lora_rank=None, **kwargs) + | EinsumDense(equation, output_shape, activation=None, bias_axes=None, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, kernel_constraint=None, bias_constraint=None, lora_rank=None, lora_alpha=None, **kwargs) | | A layer that uses `einsum` as the backing computation. | @@ -43,6 +43,11 @@ class EinsumDense(keras.src.layers.layer.Layer) | computation cost of fine-tuning large dense layers. | You can also enable LoRA on an existing | `EinsumDense` layer by calling `layer.enable_lora(rank)`. + | lora_alpha: Optional integer. If set, this parameter scales the + | low-rank adaptation delta (computed as the product of two lower-rank + | trainable matrices) during the forward pass. The delta is scaled by + | `lora_alpha / lora_rank`, allowing you to fine-tune the strength of + | the LoRA adjustment independently of `lora_rank`. | **kwargs: Base layer keyword arguments, such as `name` and `dtype`. | | Examples: @@ -122,6 +127,7 @@ class EinsumDense(keras.src.layers.layer.Layer) | kernel_constraint=None, | bias_constraint=None, | lora_rank=None, + | lora_alpha=None, | **kwargs | ) | Initialize self. See help(type(self)) for accurate signature. @@ -139,6 +145,7 @@ class EinsumDense(keras.src.layers.layer.Layer) | enable_lora( | self, | rank, + | lora_alpha=None, | a_initializer='he_uniform', | b_initializer='zeros' | ) @@ -166,7 +173,7 @@ class EinsumDense(keras.src.layers.layer.Layer) | | quantized_build( | self, - | input_shape, + | kernel_shape, | mode | ) | diff --git a/.tether/man/layer_embedding.txt b/.tether/man/layer_embedding.txt index 82820af0d..00f8568d9 100644 --- a/.tether/man/layer_embedding.txt +++ b/.tether/man/layer_embedding.txt @@ -1,7 +1,7 @@ Help on class Embedding in module keras.src.layers.core.embedding: class Embedding(keras.src.layers.layer.Layer) - | Embedding(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, embeddings_constraint=None, mask_zero=False, weights=None, lora_rank=None, **kwargs) + | Embedding(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, embeddings_constraint=None, mask_zero=False, weights=None, lora_rank=None, lora_alpha=None, **kwargs) | | Turns nonnegative integers (indexes) into dense vectors of fixed size. | @@ -55,6 +55,11 @@ class Embedding(keras.src.layers.layer.Layer) | computation cost of fine-tuning large embedding layers. | You can also enable LoRA on an existing | `Embedding` layer by calling `layer.enable_lora(rank)`. + | lora_alpha: Optional integer. If set, this parameter scales the + | low-rank adaptation delta (computed as the product of two lower-rank + | trainable matrices) during the forward pass. The delta is scaled by + | `lora_alpha / lora_rank`, allowing you to fine-tune the strength of + | the LoRA adjustment independently of `lora_rank`. | | Input shape: | 2D tensor with shape: `(batch_size, input_length)`. @@ -85,6 +90,7 @@ class Embedding(keras.src.layers.layer.Layer) | mask_zero=False, | weights=None, | lora_rank=None, + | lora_alpha=None, | **kwargs | ) | Initialize self. See help(type(self)) for accurate signature. @@ -101,9 +107,12 @@ class Embedding(keras.src.layers.layer.Layer) | | compute_output_shape(self, input_shape) | + | compute_output_spec(self, inputs) + | | enable_lora( | self, | rank, + | lora_alpha=None, | a_initializer='he_uniform', | b_initializer='zeros' | ) @@ -131,7 +140,7 @@ class Embedding(keras.src.layers.layer.Layer) | | quantized_build( | self, - | input_shape, + | embeddings_shape, | mode | ) | diff --git a/.tether/man/layer_layer_normalization.txt b/.tether/man/layer_layer_normalization.txt index e8ec5fb04..f941f9d93 100644 --- a/.tether/man/layer_layer_normalization.txt +++ b/.tether/man/layer_layer_normalization.txt @@ -1,7 +1,7 @@ Help on class LayerNormalization in module keras.src.layers.normalization.layer_normalization: class LayerNormalization(keras.src.layers.layer.Layer) - | LayerNormalization(axis=-1, epsilon=0.001, center=True, scale=True, rms_scaling=False, beta_initializer='zeros', gamma_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, **kwargs) + | LayerNormalization(axis=-1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, **kwargs) | | Layer normalization layer (Ba et al., 2016). | @@ -77,12 +77,6 @@ class LayerNormalization(keras.src.layers.layer.Layer) | When the next layer is linear (also e.g. `nn.relu`), this can be | disabled since the scaling will be done by the next layer. | Defaults to `True`. - | rms_scaling: If True, `center` and `scale` are ignored, and the - | inputs are scaled by `gamma` and the inverse square root - | of the square of all inputs. This is an approximate and faster - | approach that avoids ever computing the mean of the input. Note that - | this *isn't* equivalent to the computation that the - | `keras.layers.RMSNormalization` layer performs. | beta_initializer: Initializer for the beta weight. Defaults to zeros. | gamma_initializer: Initializer for the gamma weight. Defaults to ones. | beta_regularizer: Optional regularizer for the beta weight. @@ -119,7 +113,6 @@ class LayerNormalization(keras.src.layers.layer.Layer) | epsilon=0.001, | center=True, | scale=True, - | rms_scaling=False, | beta_initializer='zeros', | gamma_initializer='ones', | beta_regularizer=None, diff --git a/.tether/man/layer_random_elastic_transform.txt b/.tether/man/layer_random_elastic_transform.txt new file mode 100644 index 000000000..ff1a2df5a --- /dev/null +++ b/.tether/man/layer_random_elastic_transform.txt @@ -0,0 +1,130 @@ +Help on class RandomElasticTransform in module keras.src.layers.preprocessing.image_preprocessing.random_elastic_transform: + +class RandomElasticTransform(keras.src.layers.preprocessing.image_preprocessing.base_image_preprocessing_layer.BaseImagePreprocessingLayer) + | RandomElasticTransform(factor=1.0, scale=1.0, interpolation='bilinear', fill_mode='reflect', fill_value=0.0, value_range=(0, 255), seed=None, data_format=None, **kwargs) + | + | A preprocessing layer that applies random elastic transformations. + | + | This layer distorts input images by applying elastic deformations, + | simulating a physically realistic transformation. The magnitude of the + | distortion is controlled by the `scale` parameter, while the `factor` + | determines the probability of applying the transformation. + | + | Args: + | factor: A single float or a tuple of two floats. + | `factor` controls the probability of applying the transformation. + | - `factor=0.0` ensures no erasing is applied. + | - `factor=1.0` means erasing is always applied. + | - If a tuple `(min, max)` is provided, a probability value + | is sampled between `min` and `max` for each image. + | - If a single float is provided, a probability is sampled + | between `0.0` and the given float. + | Default is 1.0. + | scale: A float or a tuple of two floats defining the magnitude of + | the distortion applied. + | - If a tuple `(min, max)` is provided, a random scale value is + | sampled within this range. + | - If a single float is provided, a random scale value is sampled + | between `0.0` and the given float. + | Default is 1.0. + | interpolation: Interpolation mode. Supported values: `"nearest"`, + | `"bilinear"`. + | fill_mode: Points outside the boundaries of the input are filled + | according to the given mode. Available methods are `"constant"`, + | `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"constant"`. + | - `"reflect"`: `(d c b a | a b c d | d c b a)` + | The input is extended by reflecting about the edge of the last + | pixel. + | - `"constant"`: `(k k k k | a b c d | k k k k)` + | The input is extended by filling all values beyond + | the edge with the same constant value k specified by + | `fill_value`. + | - `"wrap"`: `(a b c d | a b c d | a b c d)` + | The input is extended by wrapping around to the opposite edge. + | - `"nearest"`: `(a a a a | a b c d | d d d d)` + | The input is extended by the nearest pixel. + | Note that when using torch backend, `"reflect"` is redirected to + | `"mirror"` `(c d c b | a b c d | c b a b)` because torch does not + | support `"reflect"`. + | Note that torch backend does not support `"wrap"`. + | fill_value: a float represents the value to be filled outside the + | boundaries when `fill_mode="constant"`. + | value_range: the range of values the incoming images will have. + | Represented as a two-number tuple written `[low, high]`. This is + | typically either `[0, 1]` or `[0, 255]` depending on how your + | preprocessing pipeline is set up. + | seed: Integer. Used to create a random seed. + | + | Method resolution order: + | RandomElasticTransform + | keras.src.layers.preprocessing.image_preprocessing.base_image_preprocessing_layer.BaseImagePreprocessingLayer + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | keras.src.saving.keras_saveable.KerasSaveable + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | factor=1.0, + | scale=1.0, + | interpolation='bilinear', + | fill_mode='reflect', + | fill_value=0.0, + | value_range=(0, 255), + | seed=None, + | data_format=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_elastic_transform_params( + | self, + | height, + | width, + | factor + | ) + | + | get_random_transformation( + | self, + | data, + | training=True, + | seed=None + | ) + | + | transform_images( + | self, + | images, + | transformation, + | training=True + | ) + | + | transform_labels( + | self, + | labels, + | transformation, + | training=True + | ) + | + | transform_segmentation_masks( + | self, + | segmentation_masks, + | transformation, + | training=True + | ) + | + diff --git a/.tether/man/layer_rescaling.txt b/.tether/man/layer_rescaling.txt index 5f3809581..140f33d12 100644 --- a/.tether/man/layer_rescaling.txt +++ b/.tether/man/layer_rescaling.txt @@ -24,8 +24,16 @@ class Rescaling(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) | (independently of which backend you're using). | | Args: - | scale: Float, the scale to apply to the inputs. - | offset: Float, the offset to apply to the inputs. + | scale: Float, int, list, tuple or np.ndarray. + | The scale to apply to the inputs. + | If scalar, the same scale will be applied to + | all features or channels of input. If a list, tuple or + | 1D array, the scaling is applied per channel. + | offset: Float, int, list/tuple or numpy ndarray. + | The offset to apply to the inputs. + | If scalar, the same scale will be applied to + | all features or channels of input. If a list, tuple or + | 1D array, the scaling is applied per channel. | **kwargs: Base layer keyword arguments, such as `name` and `dtype`. | | Method resolution order: diff --git a/.tether/man/layer_rnn.txt b/.tether/man/layer_rnn.txt index b61ea0711..d86c3c0c9 100644 --- a/.tether/man/layer_rnn.txt +++ b/.tether/man/layer_rnn.txt @@ -140,7 +140,6 @@ class RNN(keras.src.layers.layer.Layer) | shape=(self.units, self.units), | initializer='uniform', | name='recurrent_kernel') - | self.built = True | | def call(self, inputs, states): | prev_output = states[0] diff --git a/.tether/man/layer_torch_module_wrapper.txt b/.tether/man/layer_torch_module_wrapper.txt index b63b0377f..9d50c1956 100644 --- a/.tether/man/layer_torch_module_wrapper.txt +++ b/.tether/man/layer_torch_module_wrapper.txt @@ -1,7 +1,7 @@ Help on class TorchModuleWrapper in module keras.src.utils.torch_utils: class TorchModuleWrapper(keras.src.layers.layer.Layer) - | TorchModuleWrapper(module, name=None, **kwargs) + | TorchModuleWrapper(module, name=None, output_shape=None, **kwargs) | | Torch module wrapper layer. | @@ -17,6 +17,8 @@ class TorchModuleWrapper(keras.src.layers.layer.Layer) | instance, then its parameters must be initialized before | passing the instance to `TorchModuleWrapper` (e.g. by calling | it once). + | output_shape :The shape of the output of this layer. It helps Keras + | perform automatic shape inference. | name: The name of the layer (string). | | Example: @@ -88,6 +90,7 @@ class TorchModuleWrapper(keras.src.layers.layer.Layer) | self, | module, | name=None, + | output_shape=None, | **kwargs | ) | Initialize self. See help(type(self)) for accurate signature. @@ -99,6 +102,8 @@ class TorchModuleWrapper(keras.src.layers.layer.Layer) | **kwargs | ) | + | compute_output_shape(self, input_shape) + | | get_config(self) | Returns the config of the object. | diff --git a/.tether/man/load_model_weights.txt b/.tether/man/load_model_weights.txt index 9a57d484a..724a79df6 100644 --- a/.tether/man/load_model_weights.txt +++ b/.tether/man/load_model_weights.txt @@ -6,27 +6,44 @@ keras.Model.load_weights( **kwargs ) __doc__ -Load weights from a file saved via `save_weights()`. +Load the weights from a single file or sharded files. -Weights are loaded based on the network's -topology. This means the architecture should be the same as when the -weights were saved. Note that layers that don't have weights are not -taken into account in the topological ordering, so adding or removing -layers is fine as long as they don't have weights. +Weights are loaded based on the network's topology. This means the +architecture should be the same as when the weights were saved. Note +that layers that don't have weights are not taken into account in the +topological ordering, so adding or removing layers is fine as long as +they don't have weights. **Partial weight loading** If you have modified your model, for instance by adding a new layer -(with weights) or by changing the shape of the weights of a layer, -you can choose to ignore errors and continue loading -by setting `skip_mismatch=True`. In this case any layer with -mismatching weights will be skipped. A warning will be displayed -for each skipped layer. +(with weights) or by changing the shape of the weights of a layer, you +can choose to ignore errors and continue loading by setting +`skip_mismatch=True`. In this case any layer with mismatching weights +will be skipped. A warning will be displayed for each skipped layer. + +**Sharding** + +When loading sharded weights, it is important to specify `filepath` that +ends with `*.weights.json` which is used as the configuration file. +Additionally, the sharded files `*_xxxxx.weights.h5` must be in the same +directory as the configuration file. Args: - filepath: String, path to the weights file to load. - It can either be a `.weights.h5` file - or a legacy `.h5` weights file. + filepath: `str` or `pathlib.Path` object. Path where the weights + will be saved. When sharding, the filepath must end in + `.weights.json`. skip_mismatch: Boolean, whether to skip loading of layers where there is a mismatch in the number of weights, or a mismatch in the shape of the weights. + +Example: + +```python +# Load the weights in a single file. +model.load_weights("model.weights.h5") + +# Load the weights in sharded files. +model.load_weights("model.weights.json") +``` + diff --git a/.tether/man/loss_categorical_generalized_cross_entropy.txt b/.tether/man/loss_categorical_generalized_cross_entropy.txt new file mode 100644 index 000000000..57d2a8b85 --- /dev/null +++ b/.tether/man/loss_categorical_generalized_cross_entropy.txt @@ -0,0 +1,75 @@ +Help on class CategoricalGeneralizedCrossEntropy in module keras.src.losses.losses: + +class CategoricalGeneralizedCrossEntropy(LossFunctionWrapper) + | CategoricalGeneralizedCrossEntropy(q=0.5, reduction='sum_over_batch_size', name='categorical_generalized_cross_entropy', dtype=None) + | + | Computes the Generalized Cross Entropy loss between `y_true` & `y_pred`. + | + | Generalized Cross Entropy (GCE) is a noise-robust loss function + | that provides better robustness against noisy labels than + | standard cross entropy. + | It generalizes both cross entropy and mean absolute error through + | the parameter q, where values closer to 1 make the loss more robust + | to noisy labels. + | + | Formula: + | ```python + | loss = (1 - p**q) / q + | ``` + | where `p` is the predicted probability for the true class and `q` + | is the noise parameter. + | + | Args: + | q: Float in range `(0, 1)`. It is the noise parameter. + | Controls the behavior of the loss: + | - As `q` approaches 0: Behaves more like cross entropy + | - As `q` approaches 1: Behaves more like mean absolute error + | Defaults to `0.5` + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. Supported options are + | `"sum"`, `"sum_over_batch_size"`, `"mean"`, + | `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss, + | `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the + | sample size, and `"mean_with_sample_weight"` sums the loss and + | divides by the sum of the sample weights. `"none"` and `None` + | perform no aggregation. Defaults to `"sum_over_batch_size"`. + | name: Optional name for the loss instance. + | dtype: The dtype of the loss's computations. Defaults to `None`, which + | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a + | `"float32"` unless set to different value + | (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is + | provided, then the `compute_dtype` will be utilized. + | + | Example: + | ```python + | y_true = np.array([0, 1, 0, 1]) + | y_pred = np.array([[0.7, 0.3], [0.2, 0.8], [0.6, 0.4], [0.4, 0.6]]) + | keras.losses.CategoricalGeneralizedCrossEntropy()(y_true, y_pred) + | ``` + | + | References: + | - [Zhang, Sabuncu, 2018](https://arxiv.org/abs/1805.07836) + | ("Generalized Cross Entropy Loss for Training + | Deep Neural Networks with Noisy Labels") + | + | Method resolution order: + | CategoricalGeneralizedCrossEntropy + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | keras.src.saving.keras_saveable.KerasSaveable + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | q=0.5, + | reduction='sum_over_batch_size', + | name='categorical_generalized_cross_entropy', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | + diff --git a/.tether/man/loss_dice.txt b/.tether/man/loss_dice.txt index 9ee7231ca..1aad5efd1 100644 --- a/.tether/man/loss_dice.txt +++ b/.tether/man/loss_dice.txt @@ -38,12 +38,12 @@ class Dice(LossFunctionWrapper) | >>> y_pred = [[[[0.0], [1.0]], [[0.0], [1.0]]], | ... [[[0.4], [0.0]], [[0.0], [0.9]]]] | >>> axis = (1, 2, 3) - | >>> loss = keras.losses.dice(y_true, y_pred, axis=axis) + | >>> loss = keras.losses.Dice(axis=axis, reduction=None)(y_true, y_pred) | >>> assert loss.shape == (2,) | >>> loss | array([0.5, 0.75757575], shape=(2,), dtype=float32) | - | >>> loss = keras.losses.dice(y_true, y_pred) + | >>> loss = keras.losses.Dice()(y_true, y_pred) | >>> assert loss.shape == () | >>> loss | array(0.6164384, shape=(), dtype=float32) diff --git a/.tether/man/loss_sparse_categorical_crossentropy.txt b/.tether/man/loss_sparse_categorical_crossentropy.txt index 4f84895f1..c4b397d8c 100644 --- a/.tether/man/loss_sparse_categorical_crossentropy.txt +++ b/.tether/man/loss_sparse_categorical_crossentropy.txt @@ -1,7 +1,7 @@ Help on class SparseCategoricalCrossentropy in module keras.src.losses.losses: class SparseCategoricalCrossentropy(LossFunctionWrapper) - | SparseCategoricalCrossentropy(from_logits=False, ignore_class=None, reduction='sum_over_batch_size', name='sparse_categorical_crossentropy', dtype=None) + | SparseCategoricalCrossentropy(from_logits=False, ignore_class=None, reduction='sum_over_batch_size', axis=-1, name='sparse_categorical_crossentropy', dtype=None) | | Computes the crossentropy loss between the labels and predictions. | @@ -28,6 +28,8 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper) | sample size, and `"mean_with_sample_weight"` sums the loss and | divides by the sum of the sample weights. `"none"` and `None` | perform no aggregation. Defaults to `"sum_over_batch_size"`. + | axis: The axis along which to compute crossentropy (the features + | axis). Defaults to `-1`. | name: Optional name for the loss instance. | dtype: The dtype of the loss's computations. Defaults to `None`, which | means using `keras.backend.floatx()`. `keras.backend.floatx()` is a @@ -37,8 +39,8 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper) | | Examples: | - | >>> y_true = [1, 2] - | >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] + | >>> y_true = np.array([1, 2]) + | >>> y_pred = np.array([[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) | >>> # Using 'auto'/'sum_over_batch_size' reduction type. | >>> scce = keras.losses.SparseCategoricalCrossentropy() | >>> scce(y_true, y_pred) @@ -81,6 +83,7 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper) | from_logits=False, | ignore_class=None, | reduction='sum_over_batch_size', + | axis=-1, | name='sparse_categorical_crossentropy', | dtype=None | ) diff --git a/.tether/man/metric_binary_focal_crossentropy.txt b/.tether/man/metric_binary_focal_crossentropy.txt index ad3ddb52d..0aa9546df 100644 --- a/.tether/man/metric_binary_focal_crossentropy.txt +++ b/.tether/man/metric_binary_focal_crossentropy.txt @@ -53,8 +53,21 @@ Example: >>> y_true = [[0, 1], [0, 0]] >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] ->>> loss = keras.losses.binary_focal_crossentropy( +>>> # In this instance, the first sample in the second batch is the +>>> # 'easier' example. +>>> focal_loss = keras.losses.binary_focal_crossentropy( ... y_true, y_pred, gamma=2) >>> assert loss.shape == (2,) ->>> loss +>>> focal_loss array([0.330, 0.206], dtype=float32) +>>> # Compare with binary_crossentropy +>>> bce_loss = keras.losses.binary_focal_crossentropy( +... y_true, y_pred) +>>> bce_loss +array([0.916, 0.714], dtype=float32) +>>> # Binary focal crossentropy loss attributes more importance to the +>>> # harder example which results in a higher loss for the first batch +>>> # when normalized by binary cross entropy loss +>>> focal_loss/bce_loss +array([0.360, 0.289] + diff --git a/.tether/man/metric_sensitivity_at_specificity.txt b/.tether/man/metric_sensitivity_at_specificity.txt index bc9735e85..c3c3dd8fd 100644 --- a/.tether/man/metric_sensitivity_at_specificity.txt +++ b/.tether/man/metric_sensitivity_at_specificity.txt @@ -56,7 +56,7 @@ class SensitivityAtSpecificity(SensitivitySpecificityBase) | model.compile( | optimizer='sgd', | loss='binary_crossentropy', - | metrics=[keras.metrics.SensitivityAtSpecificity()]) + | metrics=[keras.metrics.SensitivityAtSpecificity(specificity=0.5)]) | ``` | | Method resolution order: diff --git a/.tether/man/metric_specificity_at_sensitivity.txt b/.tether/man/metric_specificity_at_sensitivity.txt index d320e059f..6e21b36c9 100644 --- a/.tether/man/metric_specificity_at_sensitivity.txt +++ b/.tether/man/metric_specificity_at_sensitivity.txt @@ -56,7 +56,7 @@ class SpecificityAtSensitivity(SensitivitySpecificityBase) | model.compile( | optimizer='sgd', | loss='binary_crossentropy', - | metrics=[keras.metrics.SpecificityAtSensitivity()]) + | metrics=[keras.metrics.SpecificityAtSensitivity(sensitivity=0.3)]) | ``` | | Method resolution order: diff --git a/.tether/man/op_angle.txt b/.tether/man/op_angle.txt new file mode 100644 index 000000000..1a0b0114e --- /dev/null +++ b/.tether/man/op_angle.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.angle(x) +__doc__ +Element-wise angle of a complex tensor. + +Arguments: + x: Input tensor. Can be real or complex. + +Returns: + Output tensor of same shape as x. containing the angle of each element + (in radians). + +Example: +>>> x = keras.ops.convert_to_tensor([[1 + 3j, 2 - 5j], [4 - 3j, 3 + 2j]]) +>>> keras.ops.angle(x) +array([[ 1.2490457, -1.19029 ], + [-0.6435011, 0.5880026]], dtype=float32) + diff --git a/.tether/man/op_bartlett.txt b/.tether/man/op_bartlett.txt new file mode 100644 index 000000000..6c00ebeb1 --- /dev/null +++ b/.tether/man/op_bartlett.txt @@ -0,0 +1,17 @@ +__signature__ +keras.ops.bartlett(x) +__doc__ +Bartlett window function. +The Bartlett window is a triangular window that rises then falls linearly. + +Args: + x: Scalar or 1D Tensor. Window length. + +Returns: + A 1D tensor containing the Bartlett window values. + +Example: +>>> x = keras.ops.convert_to_tensor(5) +>>> keras.ops.bartlett(x) +array([0. , 0.5, 1. , 0.5, 0. ], dtype=float32) + diff --git a/.tether/man/op_blackman.txt b/.tether/man/op_blackman.txt new file mode 100644 index 000000000..50a5f5e4f --- /dev/null +++ b/.tether/man/op_blackman.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.blackman(x) +__doc__ +Blackman window function. +The Blackman window is a taper formed by using a weighted cosine. + +Args: + x: Scalar or 1D Tensor. Window length. + +Returns: + A 1D tensor containing the Blackman window values. + +Example: +>>> x = keras.ops.convert_to_tensor(5) +>>> keras.ops.blackman(x) +array([-1.3877788e-17, 3.4000000e-01, 1.0000000e+00, 3.4000000e-01, + -1.3877788e-17], dtype=float32) + diff --git a/.tether/man/op_cbrt.txt b/.tether/man/op_cbrt.txt new file mode 100644 index 000000000..d3e3c285a --- /dev/null +++ b/.tether/man/op_cbrt.txt @@ -0,0 +1,14 @@ +__signature__ +keras.ops.cbrt(x) +__doc__ +Computes the cube root of the input tensor, element-wise. + +This operation returns the real-valued cube root of `x`, handling +negative numbers properly in the real domain. + +Args: + x: Input tensor. + +Returns: + A tensor containing the cube root of each element in `x`. + diff --git a/.tether/man/op_conv_transpose.txt b/.tether/man/op_conv_transpose.txt index dab311e05..2e8863baa 100644 --- a/.tether/man/op_conv_transpose.txt +++ b/.tether/man/op_conv_transpose.txt @@ -2,7 +2,7 @@ __signature__ keras.ops.conv_transpose( inputs, kernel, - strides, + strides=1, padding='valid', output_padding=None, data_format=None, @@ -51,3 +51,4 @@ Args: Returns: A tensor of rank N+2, the result of the conv operation. + diff --git a/.tether/man/op_corrcoef.txt b/.tether/man/op_corrcoef.txt new file mode 100644 index 000000000..bff558a2c --- /dev/null +++ b/.tether/man/op_corrcoef.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.corrcoef(x) +__doc__ +Compute the Pearson correlation coefficient matrix. + +Args: + x: A 2D tensor of shape `(N, D)`, where N is the number of variables + and D is the number of observations. + +Returns: + A tensor of shape `(N, N)` representing the correlation matrix. + diff --git a/.tether/man/op_deg2rad.txt b/.tether/man/op_deg2rad.txt new file mode 100644 index 000000000..c8d24cda5 --- /dev/null +++ b/.tether/man/op_deg2rad.txt @@ -0,0 +1,21 @@ +__signature__ +keras.ops.deg2rad(x) +__doc__ +Convert angles from degrees to radians. + +The conversion is defined as: +`rad = deg * (π / 180)` + +Args: + x: Input tensor of angles in degrees. + +Returns: + A tensor containing angles converted to radians. + +Examples: +>>> from keras import ops +>>> ops.deg2rad(180.0) +3.141592653589793 +>>> ops.deg2rad([0.0, 90.0, 180.0]) +array([0., 1.57079633, 3.14159265]) + diff --git a/.tether/man/op_dot_product_attention.txt b/.tether/man/op_dot_product_attention.txt index 52de124bf..14d58473d 100644 --- a/.tether/man/op_dot_product_attention.txt +++ b/.tether/man/op_dot_product_attention.txt @@ -7,7 +7,8 @@ keras.ops.dot_product_attention( mask=None, scale=None, is_causal=False, - flash_attention=None + flash_attention=None, + attn_logits_soft_cap=None ) __doc__ Scaled dot product attention function. @@ -47,6 +48,9 @@ Args: attempt to use flash attention if the required conditions are met. Typically, the inputs must be in float16 and bfloat16 dtype and the input layout requirements may vary depending on the backend. + attn_logits_soft_cap: The value limit for maximum value of the + attention logits before the softmax function is applied. This is + only supported in JAX TPU backend. Defaults to None. Returns: An array of the attention output with the same shape of `query`. diff --git a/.tether/man/op_einsum.txt b/.tether/man/op_einsum.txt index 9591c8a54..90b9221d5 100644 --- a/.tether/man/op_einsum.txt +++ b/.tether/man/op_einsum.txt @@ -1,5 +1,9 @@ __signature__ -keras.ops.einsum(subscripts, *operands) +keras.ops.einsum( + subscripts, + *operands, + **kwargs +) __doc__ Evaluates the Einstein summation convention on the operands. diff --git a/.tether/man/op_hamming.txt b/.tether/man/op_hamming.txt new file mode 100644 index 000000000..031efc106 --- /dev/null +++ b/.tether/man/op_hamming.txt @@ -0,0 +1,19 @@ +__signature__ +keras.ops.hamming(x) +__doc__ +Hamming window function. + +The Hamming window is defined as: +`w[n] = 0.54 - 0.46 * cos(2 * pi * n / (N - 1))` for `0 <= n <= N - 1`. + +Args: + x: Scalar or 1D Tensor. The window length. + +Returns: + A 1D tensor containing the Hamming window values. + +Example: +>>> x = keras.ops.convert_to_tensor(5) +>>> keras.ops.hamming(x) +array([0.08, 0.54, 1. , 0.54, 0.08], dtype=float32) + diff --git a/.tether/man/op_hanning.txt b/.tether/man/op_hanning.txt new file mode 100644 index 000000000..33126574d --- /dev/null +++ b/.tether/man/op_hanning.txt @@ -0,0 +1,19 @@ +__signature__ +keras.ops.hanning(x) +__doc__ +Hanning window function. + +The Hanning window is defined as: +`w[n] = 0.5 - 0.5 * cos(2 * pi * n / (N - 1))` for `0 <= n <= N - 1`. + +Args: + x: Scalar or 1D Tensor. The window length. + +Returns: + A 1D tensor containing the Hanning window values. + +Example: +>>> x = keras.ops.convert_to_tensor(5) +>>> keras.ops.hanning(x) +array([0. , 0.5, 1. , 0.5, 0. ], dtype=float32) + diff --git a/.tether/man/op_heaviside.txt b/.tether/man/op_heaviside.txt new file mode 100644 index 000000000..bafbe1c8b --- /dev/null +++ b/.tether/man/op_heaviside.txt @@ -0,0 +1,21 @@ +__signature__ +keras.ops.heaviside(x1, x2) +__doc__ +Heaviside step function. + +The Heaviside step function is defined as: +`heaviside(x1, x2) = 0 if x1 < 0, 1 if x1 > 0, x2 if x1 == 0` + +Args: + x1: A tensor input. + x2: A scalar or tensor, the value to return when `x1 == 0`. + +Returns: + A tensor with a shape determined by broadcasting `x1` and `x2`. + +Example: +>>> x1 = keras.ops.convert_to_tensor([-2.0, 0.0, 3.0]) +>>> x2 = 0.5 +>>> keras.ops.heaviside(x1, x2) +array([0. , 0.5, 1. ], dtype=float32) + diff --git a/.tether/man/op_image_elastic_transform.txt b/.tether/man/op_image_elastic_transform.txt new file mode 100644 index 000000000..6a9cd7675 --- /dev/null +++ b/.tether/man/op_image_elastic_transform.txt @@ -0,0 +1,66 @@ +__signature__ +keras.ops.image.elastic_transform( + images, + alpha=20.0, + sigma=5.0, + interpolation='bilinear', + fill_mode='reflect', + fill_value=0.0, + seed=None, + data_format=None +) +__doc__ +Applies elastic deformation to the image(s). + +Args: + images: Input image or batch of images. Must be 3D or 4D. + alpha: Scaling factor that controls the intensity of the deformation. + sigma: Standard deviation of the Gaussian filter used for + smoothing the displacement fields. + interpolation: Interpolation method. Available methods are `"nearest"`, + and `"bilinear"`. Defaults to `"bilinear"`. + fill_mode: Points outside the boundaries of the input are filled + according to the given mode. Available methods are `"constant"`, + `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"constant"`. + - `"reflect"`: `(d c b a | a b c d | d c b a)` + The input is extended by reflecting about the edge of the last + pixel. + - `"constant"`: `(k k k k | a b c d | k k k k)` + The input is extended by filling all values beyond + the edge with the same constant value k specified by + `fill_value`. + - `"wrap"`: `(a b c d | a b c d | a b c d)` + The input is extended by wrapping around to the opposite edge. + - `"nearest"`: `(a a a a | a b c d | d d d d)` + The input is extended by the nearest pixel. + fill_value: Value used for points outside the boundaries of the input if + `fill_mode="constant"`. Defaults to `0`. + data_format: A string specifying the data format of the input tensor. + It can be either `"channels_last"` or `"channels_first"`. + `"channels_last"` corresponds to inputs with shape + `(batch, height, width, channels)`, while `"channels_first"` + corresponds to inputs with shape `(batch, channels, height, width)`. + If not specified, the value will default to + `keras.config.image_data_format`. + +Returns: + Transformed image or batch of images with elastic deformation. + +Examples: + +>>> x = np.random.random((2, 64, 80, 3)) # batch of 2 RGB images +>>> y = keras.ops.image.elastic_transform(x) +>>> y.shape +(2, 64, 80, 3) + +>>> x = np.random.random((64, 80, 3)) # single RGB image +>>> y = keras.ops.image.elastic_transform(x) +>>> y.shape +(64, 80, 3) + +>>> x = np.random.random((2, 3, 64, 80)) # batch of 2 RGB images +>>> y = keras.ops.image.elastic_transform( +... x, data_format="channels_first") +>>> y.shape +(2, 3, 64, 80) + diff --git a/.tether/man/op_kaiser.txt b/.tether/man/op_kaiser.txt new file mode 100644 index 000000000..b49a7d3e4 --- /dev/null +++ b/.tether/man/op_kaiser.txt @@ -0,0 +1,22 @@ +__signature__ +keras.ops.kaiser(x, beta) +__doc__ +Kaiser window function. + +The Kaiser window is defined as: +`w[n] = I0(beta * sqrt(1 - (2n / (N - 1) - 1)^2)) / I0(beta)` +where I0 is the modified zeroth-order Bessel function of the first kind. + +Args: + x: Scalar or 1D Tensor. The window length. + beta: Float. Shape parameter for the Kaiser window. + +Returns: + A 1D tensor containing the Kaiser window values. + +Example: +>>> x = keras.ops.convert_to_tensor(5) +>>> keras.ops.kaiser(x, beta=14.0) +array([7.7268669e-06, 1.6493219e-01, 1.0000000e+00, 1.6493219e-01, + 7.7268669e-06], dtype=float32) + diff --git a/.tether/man/op_layer_normalization.txt b/.tether/man/op_layer_normalization.txt new file mode 100644 index 000000000..33bc93250 --- /dev/null +++ b/.tether/man/op_layer_normalization.txt @@ -0,0 +1,35 @@ +__signature__ +keras.ops.layer_normalization( + x, + gamma=None, + beta=None, + axis=-1, + epsilon=None, + **kwargs +) +__doc__ +Layer normalization layer (Ba et al., 2016). + +Normalize the activations of the previous layer for each given example in a +batch independently, rather than across a batch like Batch Normalization. +i.e. applies a transformation that maintains the mean activation within each +example close to 0 and the activation standard deviation close to 1. + +Args: + x: Input tensor. + gamma: Optional scaling factor for the normalization. + beta: Optional add offset for the normalized tensor. + axis: The axis or axes along which to perform normalization. Default to + `-1`. + epsilon: A lower bound value for the norm. + Defaults to `backend.epsilon()`. + +Returns: + The normalized array. + +Example: + +>>> x = keras.ops.arange(5, dtype="float32") +>>> keras.ops.layer_normalization(x) +array([-1.4142135, -0.70710677, 0.0, 0.7071067, 1.4142135]) + diff --git a/.tether/man/op_rms_normalization.txt b/.tether/man/op_rms_normalization.txt index 321afb838..d300b9e3a 100644 --- a/.tether/man/op_rms_normalization.txt +++ b/.tether/man/op_rms_normalization.txt @@ -1,7 +1,7 @@ __signature__ keras.ops.rms_normalization( x, - scale=1, + scale=None, axis=-1, epsilon=None ) @@ -18,20 +18,19 @@ It is defined as `rms_normalization(x) = x * rsqrt(mean(square(x))) * scale` Args: x: Input tensor. - axis: The axis or axes along which to perform normalization. - Default to -1. scale: Optional scaling factor for the normalization. - epsilon: A lower bound value for the norm. - Defaults to `backend.epsilon()`. + axis: The axis or axes along which to perform normalization. Defaults + to `-1`. + epsilon: A lower bound value for the norm. Defaults to + `backend.epsilon()`. Returns: The normalized array. Example: ->>> x = np.random.rand(1, 10) ->>> x_norm = keras.ops.rms_normalization(x, (10,)) ->>> print(x_norm) +>>> x = keras.random.normal((1, 10)) +>>> keras.ops.rms_normalization(x) array([[0.69384296, 0.94444374, 0.16551171, 0.05749961, 1.11008865, - 0.52475186, 1.57686807, 1.69893307, 1.27292764, 0.30819128]]) + 0.52475186, 1.57686807, 1.69893307, 1.27292764, 0.30819128]]) diff --git a/.tether/man/op_sparse_sigmoid.txt b/.tether/man/op_sparse_sigmoid.txt new file mode 100644 index 000000000..06861c5a7 --- /dev/null +++ b/.tether/man/op_sparse_sigmoid.txt @@ -0,0 +1,23 @@ +__signature__ +keras.ops.sparse_sigmoid(x) +__doc__ +Sparse sigmoid activation function. + +It is defined as + +`f(x) = 0` for `x <= -1`, +`f(x) = 0.5 * (x + 1)` for `-1 < x < 1`, +`f(x) = 1` for `x >= 1`. + +Args: + x: Input tensor. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = keras.ops.convert_to_tensor([-6.0, 1.0, 0.0, 1.0, 6.0]) +>>> keras.ops.sparse_sigmoid(x) +array([0. , 1. , 0.5, 1. , 1. ], dtype=float32) + diff --git a/.tether/man/op_vectorized_map.txt b/.tether/man/op_vectorized_map.txt index 3af8b4df0..da1de681c 100644 --- a/.tether/man/op_vectorized_map.txt +++ b/.tether/man/op_vectorized_map.txt @@ -7,18 +7,18 @@ Schematically, `vectorized_map` implements the following, in the case of a single tensor input `elements`: ```python -def vectorized_map(function, elements) +def vectorized_map(function, elements): outputs = [] for e in elements: outputs.append(function(e)) - return stack(outputs) + return np.stack(outputs) ``` In the case of an iterable of tensors `elements`, it implements the following: ```python -def vectorized_map(function, elements) +def vectorized_map(function, elements): batch_size = elements[0].shape[0] outputs = [] for index in range(batch_size): @@ -28,3 +28,4 @@ def vectorized_map(function, elements) In this case, `function` is expected to take as input a single list of tensor arguments. + diff --git a/.tether/man/op_view_as_complex.txt b/.tether/man/op_view_as_complex.txt new file mode 100644 index 000000000..c195cbce8 --- /dev/null +++ b/.tether/man/op_view_as_complex.txt @@ -0,0 +1,25 @@ +__signature__ +keras.ops.view_as_complex(x) +__doc__ +Converts a real tensor with shape `(..., 2)` to a complex tensor, +where the last dimension represents the real and imaginary components +of a complex tensor. + +Args: + x: A real tensor with last dimension of size 2. + +Returns: + A complex tensor with shape `x.shape[:-1]`. + +Example: + +``` +>>> import numpy as np +>>> from keras import ops + +>>> real_imag = np.array([[1.0, 2.0], [3.0, 4.0]]) +>>> complex_tensor = ops.view_as_complex(real_imag) +>>> complex_tensor +array([1.+2.j, 3.+4.j]) +``` + diff --git a/.tether/man/op_view_as_real.txt b/.tether/man/op_view_as_real.txt new file mode 100644 index 000000000..9573606b5 --- /dev/null +++ b/.tether/man/op_view_as_real.txt @@ -0,0 +1,25 @@ +__signature__ +keras.ops.view_as_real(x) +__doc__ +Converts a complex tensor to a real tensor with shape `(..., 2)`, +where the last dimension represents the real and imaginary components. + +Args: + x: A complex tensor. + +Returns: + A real tensor where the last dimension contains the + real and imaginary parts. + +Example: +``` +>>> import numpy as np +>>> from keras import ops + +>>> complex_tensor = np.array([1 + 2j, 3 + 4j]) +>>> real = ops.view_as_real(complex_tensor) +>>> real +array([[1., 2.], + [3., 4.]]) +``` + diff --git a/.tether/man/optimizer_lion.txt b/.tether/man/optimizer_lion.txt index 987adadbd..fa0dc9138 100644 --- a/.tether/man/optimizer_lion.txt +++ b/.tether/man/optimizer_lion.txt @@ -7,13 +7,13 @@ class Lion(keras.src.optimizers.optimizer.Optimizer) | | The Lion optimizer is a stochastic-gradient-descent method that uses the | sign operator to control the magnitude of the update, unlike other adaptive - | optimizers such as Adam that rely on second-order moments. This make + | optimizers such as Adam that rely on second-order moments. This makes | Lion more memory-efficient as it only keeps track of the momentum. According | to the authors (see reference), its performance gain over Adam grows with | the batch size. Because the update of Lion is produced through the sign | operation, resulting in a larger norm, a suitable learning rate for Lion is | typically 3-10x smaller than that for AdamW. The weight decay for Lion - | should be in turn 3-10x larger than that for AdamW to maintain a + | should in turn be 3-10x larger than that for AdamW to maintain a | similar strength (lr * wd). | | Args: diff --git a/.tether/man/optimizer_muon.txt b/.tether/man/optimizer_muon.txt new file mode 100644 index 000000000..47ed5a5c3 --- /dev/null +++ b/.tether/man/optimizer_muon.txt @@ -0,0 +1,160 @@ +Help on class Muon in module keras.src.optimizers.muon: + +class Muon(keras.src.optimizers.optimizer.Optimizer) + | Muon(learning_rate=0.001, adam_beta_1=0.9, adam_beta_2=0.999, epsilon=1e-07, weight_decay=0.1, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='muon', exclude_layers=None, exclude_embeddings=True, muon_a=3.4445, muon_b=-4.775, muon_c=2.0315, adam_lr_ratio=0.1, momentum=0.95, ns_steps=6, nesterov=True, **kwargs) + | + | Optimizer that implements the Muon algorithm. + | + | Note that this optimizer should not be used in the following layers: + | + | 1. Embedding layer + | 2. Final output fully connected layer + | 3. Any {0,1}-D variables + | + | These should all be optimized using AdamW. + | + | The Muon optimizer can use both the Muon update step or the + | AdamW update step based on the following: + | + | - For any variable that isn't 2D, 3D or 4D, the AdamW step + | will be used. This is not configurable. + | - If the argument `exclude_embeddings` (defaults to `True`) is set + | to `True`, the AdamW step will be used. + | - For any variablewith a name that matches an expression + | listed in the argument `exclude_layers` (a list), the + | AdamW step will be used. + | - Any other variable uses the Muon step. + | + | Typically, you only need to pass the name of your densely-connected + | output layer to `exclude_layers`, e.g. + | `exclude_layers=["output_dense"]`. + | + | References: + | - [Original implementation](https://github.com/KellerJordan/Muon) + | - [Liu et al, 2025](https://arxiv.org/abs/2502.16982) + | + | Args: + | learning_rate: A float, + | `keras.optimizers.schedules.LearningRateSchedule` instance, or + | a callable that takes no arguments and returns the actual value to + | use. The learning rate. Defaults to `0.001`. + | adam_beta_1: A float value or a constant float tensor, or a callable + | that takes no arguments and returns the actual value to use. + | The exponential decay rate for the 1st moment estimates. Defaults to + | `0.9`. + | adam_beta_2: A float value or a constant float tensor, ora callable + | that takes no arguments and returns the actual value to use. + | The exponential decay rate for the 2nd moment estimates. Defaults to + | `0.999`. + | epsilon: A small constant for numerical stability. This is + | "epsilon hat" in the Kingma and Ba paper + | (in the formula just before Section 2.1), + | not the epsilon in Algorithm 1 of the paper. + | It be used at Adamw.Defaults to `1e-7`. + | exclude_layers: List of strings, keywords of layer names to exclude. + | All layers with keywords in their path will use adamw. + | exclude_embeddings: Boolean value + | If True, embedding layers will use adamw. + | muon_a: Float, parameter a of the muon algorithm. + | It is recommended to use the default value + | muon_b: Float, parameter b of the muon algorithm. + | It is recommended to use the default value + | muon_c: Float, parameter c of the muon algorithm. + | It is recommended to use the default value + | adam_lr_ratio: Float, the ratio of the learning rate when + | using Adam to the main learning rate. + | it is recommended to set it to 0.1 + | momentum: Float, momentum used by internal SGD. + | ns_steps: Integer, number of Newton-Schulz iterations to run. + | nesterov: Boolean, whether to use Nesterov-style momentum + | {{base_optimizer_keyword_args}} + | + | Method resolution order: + | Muon + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | keras.src.saving.keras_saveable.KerasSaveable + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | learning_rate=0.001, + | adam_beta_1=0.9, + | adam_beta_2=0.999, + | epsilon=1e-07, + | weight_decay=0.1, + | clipnorm=None, + | clipvalue=None, + | global_clipnorm=None, + | use_ema=False, + | ema_momentum=0.99, + | ema_overwrite_frequency=None, + | loss_scale_factor=None, + | gradient_accumulation_steps=None, + | name='muon', + | exclude_layers=None, + | exclude_embeddings=True, + | muon_a=3.4445, + | muon_b=-4.775, + | muon_c=2.0315, + | adam_lr_ratio=0.1, + | momentum=0.95, + | ns_steps=6, + | nesterov=True, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, var_list) + | Initialize optimizer variables. + | + | Adam optimizer has 3 types of variables: momentums, velocities and + | velocity_hat (only set when amsgrad is applied), + | + | Args: + | var_list: list of model variables to build Adam variables on. + | + | get_config(self) + | Returns the config of the optimizer. + | + | An optimizer config is a Python dictionary (serializable) + | containing the configuration of an optimizer. + | The same optimizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | Subclass optimizer should override this method to include other + | hyperparameters. + | + | Returns: + | Python dictionary. + | + | transpose_last_axis(self, X) + | + | update_step( + | self, + | gradient, + | variable, + | learning_rate + | ) + | + | zeropower_via_newtonschulz5( + | self, + | x, + | steps: int + | ) + | We apply the Newton-Schulz iteration to compute matrix G. + | + | We select a quintic iteration that maximizes the slope at zero. This + | approach helps minimize steps, even if the iteration doesn't fully + | converge across the interval. The result isn't exactly UV^T (from the + | SVD of G), but rather an approximation like US'V^T. Despite this + | approximation, model performance remains unaffected compared to using + | the exact UV^T from the SVD. + | + diff --git a/.tether/man/save_model_weights.txt b/.tether/man/save_model_weights.txt index 2820c7a22..fafd3596e 100644 --- a/.tether/man/save_model_weights.txt +++ b/.tether/man/save_model_weights.txt @@ -2,14 +2,59 @@ __signature__ keras.Model.save_weights( self, filepath, - overwrite=True + overwrite=True, + max_shard_size=None ) __doc__ -Saves all layer weights to a `.weights.h5` file. +Saves all weights to a single file or sharded files. + +By default, the weights will be saved in a single `.weights.h5` file. +If sharding is enabled (`max_shard_size` is not `None`), the weights +will be saved in multiple files, each with a size at most +`max_shard_size` (in GB). Additionally, a configuration file +`.weights.json` will contain the metadata for the sharded files. + +The saved sharded files contain: + +- `*.weights.json`: The configuration file containing 'metadata' and + 'weight_map'. +- `*_xxxxxx.weights.h5`: The sharded files containing only the + weights. Args: - filepath: `str` or `pathlib.Path` object. - Path where to save the model. Must end in `.weights.h5`. - overwrite: Whether we should overwrite any existing model - at the target location, or instead ask the user - via an interactive prompt. + filepath: `str` or `pathlib.Path` object. Path where the weights + will be saved. When sharding, the filepath must end in + `.weights.json`. If `.weights.h5` is provided, it will be + overridden. + overwrite: Whether to overwrite any existing weights at the target + location or instead ask the user via an interactive prompt. + max_shard_size: `int` or `float`. Maximum size in GB for each + sharded file. If `None`, no sharding will be done. Defaults to + `None`. + +Example: + +```python +# Instantiate a EfficientNetV2L model with about 454MB of weights. +model = keras.applications.EfficientNetV2L(weights=None) + +# Save the weights in a single file. +model.save_weights("model.weights.h5") + +# Save the weights in sharded files. Use `max_shard_size=0.25` means +# each sharded file will be at most ~250MB. +model.save_weights("model.weights.json", max_shard_size=0.25) + +# Load the weights in a new model with the same architecture. +loaded_model = keras.applications.EfficientNetV2L(weights=None) +loaded_model.load_weights("model.weights.h5") +x = keras.random.uniform((1, 480, 480, 3)) +assert np.allclose(model.predict(x), loaded_model.predict(x)) + +# Load the sharded weights in a new model with the same architecture. +loaded_model = keras.applications.EfficientNetV2L(weights=None) +loaded_model.load_weights("model.weights.json") +x = keras.random.uniform((1, 480, 480, 3)) +assert np.allclose(model.predict(x), loaded_model.predict(x)) +``` + diff --git a/.tether/man/to_categorical.txt b/.tether/man/to_categorical.txt index ca6dbad25..bb45b450c 100644 --- a/.tether/man/to_categorical.txt +++ b/.tether/man/to_categorical.txt @@ -27,8 +27,7 @@ Example: >>> b = np.array([.9, .04, .03, .03, ... .3, .45, .15, .13, ... .04, .01, .94, .05, -... .12, .21, .5, .17], -... shape=[4, 4]) +... .12, .21, .5, .17]).reshape(4,4) >>> loss = keras.ops.categorical_crossentropy(a, b) >>> print(np.around(loss, 5)) [0.10536 0.82807 0.1011 1.77196] diff --git a/.tether/vignettes-src/making_new_layers_and_models_via_subclassing.Rmd b/.tether/vignettes-src/making_new_layers_and_models_via_subclassing.Rmd index ddaf7aafd..9af53ff42 100644 --- a/.tether/vignettes-src/making_new_layers_and_models_via_subclassing.Rmd +++ b/.tether/vignettes-src/making_new_layers_and_models_via_subclassing.Rmd @@ -218,7 +218,7 @@ All layers you've seen so far in this guide work with all Keras backends. The `keras.ops` namespace gives you access to: - The NumPy API, e.g. `ops.matmul`, `ops.sum`, `ops.reshape`, `ops.stack`, etc. -- Neural networks-specific APIs such as `ops.softmax`, `ops`.conv`, `ops.binary_crossentropy`, `ops.relu`, etc. +- Neural networks-specific APIs such as `ops.softmax`, `ops.conv`, `ops.binary_crossentropy`, `ops.relu`, etc. You can also use backend-native APIs in your layers (such as `tf.nn` functions), but if you do this, then your layer will only be usable with the backend in question. diff --git a/.tether/vignettes-src/transfer_learning.Rmd b/.tether/vignettes-src/transfer_learning.Rmd index 9230064c8..ed90015ed 100644 --- a/.tether/vignettes-src/transfer_learning.Rmd +++ b/.tether/vignettes-src/transfer_learning.Rmd @@ -53,7 +53,7 @@ ImageNet dataset, and retraining it on the Kaggle "cats vs dogs" classification dataset. This is adapted from -[Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python) +[Deep Learning with Python](https://deeplearningwithpython.io/) and the 2016 blog post ["building powerful image classification models using very little data"](https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html). diff --git a/.tether/vignettes-src/writing_a_custom_training_loop_in_tensorflow.Rmd b/.tether/vignettes-src/writing_a_custom_training_loop_in_tensorflow.Rmd index bfc75688b..888f85c33 100644 --- a/.tether/vignettes-src/writing_a_custom_training_loop_in_tensorflow.Rmd +++ b/.tether/vignettes-src/writing_a_custom_training_loop_in_tensorflow.Rmd @@ -368,7 +368,7 @@ A GAN training loop looks like this: as real. For a much more detailed overview of how GANs works, see -[Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python). +[Deep Learning with Python](https://deeplearningwithpython.io/). Let's implement this training loop. First, create the discriminator meant to classify fake vs real digits: diff --git a/DESCRIPTION b/DESCRIPTION index 5fc222534..d08d661f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -53,5 +53,5 @@ Suggests: rstudioapi, R6, jpeg -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 9d4f3ec7a..a284e4f0e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,8 @@ S3method("==",keras.src.backend.common.keras_tensor.KerasTensor) S3method("==",keras_shape) S3method("[",keras_shape) S3method("[[",python_builtin_super_getter) +S3method(Arg,keras.src.backend.Tensor) +S3method(Arg,keras.src.backend.common.keras_tensor.KerasTensor) S3method(Summary,keras_shape) S3method(as.array,jax.Array) S3method(as.array,jaxlib._jax.ArrayImpl) @@ -102,6 +104,7 @@ export(activation_softmax) export(activation_softplus) export(activation_softsign) export(activation_sparse_plus) +export(activation_sparse_sigmoid) export(activation_sparsemax) export(activation_squareplus) export(activation_tanh) @@ -187,12 +190,17 @@ export(config_floatx) export(config_image_data_format) export(config_is_flash_attention_enabled) export(config_is_interactive_logging_enabled) +export(config_is_nnx_enabled) export(config_is_traceback_filtering_enabled) +export(config_max_epochs) +export(config_max_steps_per_epoch) export(config_set_backend) export(config_set_dtype_policy) export(config_set_epsilon) export(config_set_floatx) export(config_set_image_data_format) +export(config_set_max_epochs) +export(config_set_max_steps_per_epoch) export(constraint_maxnorm) export(constraint_minmaxnorm) export(constraint_nonneg) @@ -360,6 +368,7 @@ export(layer_random_color_degeneration) export(layer_random_color_jitter) export(layer_random_contrast) export(layer_random_crop) +export(layer_random_elastic_transform) export(layer_random_erasing) export(layer_random_flip) export(layer_random_gaussian_blur) @@ -415,6 +424,7 @@ export(loss_binary_crossentropy) export(loss_binary_focal_crossentropy) export(loss_categorical_crossentropy) export(loss_categorical_focal_crossentropy) +export(loss_categorical_generalized_cross_entropy) export(loss_categorical_hinge) export(loss_circle) export(loss_cosine_similarity) @@ -493,6 +503,7 @@ export(np_array) export(op_abs) export(op_add) export(op_all) +export(op_angle) export(op_any) export(op_append) export(op_arange) @@ -511,6 +522,7 @@ export(op_array) export(op_associative_scan) export(op_average) export(op_average_pool) +export(op_bartlett) export(op_batch_normalization) export(op_binary_crossentropy) export(op_bincount) @@ -521,9 +533,11 @@ export(op_bitwise_not) export(op_bitwise_or) export(op_bitwise_right_shift) export(op_bitwise_xor) +export(op_blackman) export(op_broadcast_to) export(op_cast) export(op_categorical_crossentropy) +export(op_cbrt) export(op_ceil) export(op_celu) export(op_cholesky) @@ -537,6 +551,7 @@ export(op_convert_to_array) export(op_convert_to_numpy) export(op_convert_to_tensor) export(op_copy) +export(op_corrcoef) export(op_correlate) export(op_cos) export(op_cosh) @@ -547,6 +562,7 @@ export(op_ctc_loss) export(op_cumprod) export(op_cumsum) export(op_custom_gradient) +export(op_deg2rad) export(op_depthwise_conv) export(op_det) export(op_diag) @@ -586,11 +602,14 @@ export(op_get_item) export(op_glu) export(op_greater) export(op_greater_equal) +export(op_hamming) +export(op_hanning) export(op_hard_shrink) export(op_hard_sigmoid) export(op_hard_silu) export(op_hard_swish) export(op_hard_tanh) +export(op_heaviside) export(op_histogram) export(op_hstack) export(op_identity) @@ -598,6 +617,7 @@ export(op_ifft2) export(op_imag) export(op_image_affine_transform) export(op_image_crop) +export(op_image_elastic_transform) export(op_image_extract_patches) export(op_image_gaussian_blur) export(op_image_hsv_to_rgb) @@ -617,6 +637,8 @@ export(op_isfinite) export(op_isinf) export(op_isnan) export(op_istft) +export(op_kaiser) +export(op_layer_normalization) export(op_leaky_relu) export(op_left_shift) export(op_less) @@ -718,6 +740,7 @@ export(op_solve_triangular) export(op_sort) export(op_sparse_categorical_crossentropy) export(op_sparse_plus) +export(op_sparse_sigmoid) export(op_sparsemax) export(op_split) export(op_sqrt) @@ -756,6 +779,8 @@ export(op_var) export(op_vdot) export(op_vectorize) export(op_vectorized_map) +export(op_view_as_complex) +export(op_view_as_real) export(op_vstack) export(op_where) export(op_while_loop) @@ -771,6 +796,7 @@ export(optimizer_ftrl) export(optimizer_lamb) export(optimizer_lion) export(optimizer_loss_scale) +export(optimizer_muon) export(optimizer_nadam) export(optimizer_rmsprop) export(optimizer_sgd) diff --git a/NEWS.md b/NEWS.md index 5f32df531..ec7e290a7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,53 @@ - Updated dependencies declared by `use_backend("jax", gpu=TRUE)` for compatability with `keras-hub`. +- Added training loop configuration helpers: + `config_max_epochs()`, `config_set_max_epochs()`, `config_max_steps_per_epoch()`, + and `config_set_max_steps_per_epoch()`. The caps can also be set via the + `KERAS_MAX_EPOCHS` and `KERAS_MAX_STEPS_PER_EPOCH` environment variables. + Added `config_is_nnx_enabled()` to check whether JAX NNX features are enabled. + +- LoRA-enabled layers (`layer_dense()`, `layer_embedding()`, `layer_einsum_dense()`) + gain a `lora_alpha` argument to scale the adaptation delta independently of the + chosen rank. + +- `keras_variable()` now accepts a `synchronization` argument for distributed + strategies. + +- `Layer$add_weight()` gains an `overwrite_with_gradient` option and + layers now provide a `symbolic_call()` method. + +- Transposed convolution utilities now follow the latest Keras API: + `op_conv_transpose()` defaults `strides = 1` and the `layer_conv_*_transpose()` + layers expose `output_padding` for precise shape control. + +- `layer_torch_module_wrapper()` gains an `output_shape` argument to help Keras + infer shapes when wrapping PyTorch modules. + +- `save_model_weights()` adds a `max_shard_size` argument to split large weight + files into manageable shards. + +- Added elastic deformation utilities for images: `layer_random_elastic_transform()` + and the lower-level `op_image_elastic_transform()`. + +- Added `loss_categorical_generalized_cross_entropy()` for training with noisy + labels. + +- Added the Muon optimizer via `optimizer_muon()`. + +- Added complex-valued helpers: S3 `Arg()` methods for tensors, `op_angle()`, + and conversions `op_view_as_real()` / `op_view_as_complex()`. + +- Added signal window operations: `op_bartlett()`, `op_blackman()`, + `op_hamming()`, `op_hanning()`, and `op_kaiser()`. + +- Expanded numeric operations with `op_layer_normalization()`, `op_cbrt()`, + `op_corrcoef()`, `op_deg2rad()`, `op_heaviside()`, the new `op_sparse_sigmoid()` + plus matching `activation_sparse_sigmoid()`, and an `attn_logits_soft_cap` + argument for `op_dot_product_attention()`. + +- `layer_layer_normalization()` removes the `rms_scaling` argument. + # keras3 1.4.0 - New `op_subset()` and `x@r[...]` methods enable tensor subsetting diff --git a/R/Layer.R b/R/Layer.R index 28cbb9d41..889dc6056 100644 --- a/R/Layer.R +++ b/R/Layer.R @@ -234,6 +234,7 @@ #' regularizer = NULL, #' constraint = NULL, #' aggregation = 'none', +#' overwrite_with_gradient = FALSE, #' name = NULL) #' ``` #' Add a weight variable to the layer. @@ -268,10 +269,12 @@ #' or string name of a built-in constraint. #' Defaults to `NULL`. #' * `aggregation`: Optional string, one of `NULL`, `"none"`, `"mean"`, -#' `"sum"` or `"only_first_replica"`. Annotates the variable with -#' the type of multi-replica aggregation to be used for this -#' variable when writing custom data parallel training loops. -#' Defaults to `"none"`. +#' `"sum"` or `"only_first_replica"`. Annotates the variable with +#' the type of multi-replica aggregation to be used for this +#' variable when writing custom data parallel training loops. +#' Defaults to `"none"`. +#' * `overwrite_with_gradient`: Boolean, whether to overwrite the variable with +#' the computed gradient. Useful for float8 training. Defaults to `FALSE`. #' * `name`: String name of the variable. Useful for debugging purposes. #' #' Returns: @@ -327,6 +330,10 @@ #' ``` #' #' * ```r +#' symbolic_call(...) +#' ``` +#' +#' * ```r #' count_params() #' ``` #' Count the total number of scalars composing the weights. @@ -692,5 +699,3 @@ function(classname, # ' @param .composing Bare Keras Layers (`layer_*` functions) conventionally # have `object` as the first argument, which allows users to instantiate # (`initialize`) and `call` one motion. - - diff --git a/R/activations.R b/R/activations.R index e674a2cf7..cc91b6124 100644 --- a/R/activations.R +++ b/R/activations.R @@ -25,7 +25,7 @@ #' Input tensor. #' #' @param alpha -#' Numeric. See description for details. +#' A scalar, slope of positive section. Defaults to `1.0`. #' #' @returns A tensor, the result from applying the activation to the input tensor `x`. #' @export @@ -793,6 +793,30 @@ function (x) keras$activations$sparse_plus(x) } +#' Sparse sigmoid activation function. +#' +#' @description +#' It is defined as +#' +#' `f(x) = 0` for `x <= -1`, +#' `f(x) = 0.5 * (x + 1)` for `-1 < x < 1`, +#' `f(x) = 1` for `x >= 1`. +#' +#' # Reference +#' - [M. Blondel, A. F. T. Martins, V. Niculae, 2019](https://arxiv.org/pdf/1901.02324) +#' +#' @param x +#' Input tensor. +#' +#' @family activations +#' @inherit activation_elu return +#' @export +#' @tether keras.activations.sparse_sigmoid +activation_sparse_sigmoid <- +function (x) { + keras$activations$sparse_sigmoid(x) +} + #' Sparsemax activation function. #' #' @description diff --git a/R/callbacks.R b/R/callbacks.R index dbd3e69ac..98e722deb 100644 --- a/R/callbacks.R +++ b/R/callbacks.R @@ -251,8 +251,8 @@ function (monitor = "val_loss", min_delta = 0L, patience = 0L, #' `epoch`, `logs` #' - `on_train_begin` and `on_train_end` expect one positional argument: #' `logs` -#' - `on_train_batch_begin` and `on_train_batch_end` expect two positional -#' arguments: `batch`, `logs` +#' - `on_train_batch_begin` and `on_train_batch_end` expect a positional +#' argument `batch` and a named argument `logs` #' - See `Callback` class definition for the full list of functions and their #' expected arguments. #' @@ -326,7 +326,7 @@ function (monitor = "val_loss", min_delta = 0L, patience = 0L, #' Any function in [`Callback()`] that you want to override by #' passing `function_name = function`. For example, #' `callback_lambda(.., on_train_end = train_end_fn)`. The custom function -#' needs to have same arguments as the ones defined in [`Callback()`]. +#' needs to have the same arguments as the ones defined in [`Callback()`]. #' #' @inherit callback_backup_and_restore return #' @export @@ -534,9 +534,8 @@ function (schedule, verbose = 0L) #' decision to overwrite the current save file is made based on either #' the maximization or the minimization of the monitored quantity. #' For `val_acc`, this should be `"max"`, for `val_loss` this should be -#' `"min"`, etc. In `"auto"` mode, the mode is set to `"max"` if the -#' quantities monitored are `"acc"` or start with `"fmeasure"` and are -#' set to `"min"` for the rest of the quantities. +#' `"min"`, etc. In `"auto"` mode, the direction is automatically inferred from +#' the name of the monitored quantity. #' #' @param save_weights_only #' if TRUE, then only the model's weights will be saved @@ -995,4 +994,3 @@ normalize_callbacks_with_metrics <- function(view_metrics, initial_epoch, callba callbacks } - diff --git a/R/config.R b/R/config.R index 29bc5752e..a12dac02c 100644 --- a/R/config.R +++ b/R/config.R @@ -289,6 +289,70 @@ function (data_format) } +#' Configure the default training loop limits. +#' +#' @description +#' These helpers control the caps that built-in training loops obey when running +#' `fit()`, `evaluate()`, or `predict()`. The values can also be provided via the +#' `KERAS_MAX_EPOCHS` or `KERAS_MAX_STEPS_PER_EPOCH` environment variables to +#' quickly constrain a run without modifying source code. +#' +#' @returns +#' `config_max_epochs()` and `config_max_steps_per_epoch()` return the current +#' integer limits (or `NULL` if the cap is unset). The setter variants return +#' `NULL` invisibly and are called for side effects. +#' +#' @param max_epochs +#' Integer upper bound for epochs processed by built-in training loops. Use +#' `NULL` to remove the cap. +#' +#' @param max_steps_per_epoch +#' Integer upper bound for steps processed per epoch by built-in training +#' loops. Use `NULL` to remove the cap. +#' +#' @name config_max_epochs +#' @family config +#' @rdname config_max_epochs +#' @export +#' @tether keras.config.max_epochs +config_max_epochs <- +function () +{ + args <- capture_args() + do.call(keras$config$max_epochs, args) +} + +#' @rdname config_max_epochs +#' @export +#' @tether keras.config.set_max_epochs +config_set_max_epochs <- +function (max_epochs) +{ + args <- capture_args(list(max_epochs = as_integer)) + do.call(keras$config$set_max_epochs, args) +} + +#' @rdname config_max_epochs +#' @export +#' @tether keras.config.max_steps_per_epoch +config_max_steps_per_epoch <- +function () +{ + args <- capture_args() + do.call(keras$config$max_steps_per_epoch, args) +} + +#' @rdname config_max_epochs +#' @export +#' @tether keras.config.set_max_steps_per_epoch +config_set_max_steps_per_epoch <- +function (max_steps_per_epoch) +{ + args <- capture_args(list(max_steps_per_epoch = as_integer)) + do.call(keras$config$set_max_steps_per_epoch, args) +} + + #' Disables safe mode globally, allowing deserialization of lambdas. #' #' @returns No return value, called for side effects. @@ -378,6 +442,21 @@ function () } +#' Check whether NNX-specific features are enabled on the JAX backend. +#' +#' @returns +#' Logical flag; `TRUE` if NNX backend features are enabled, `FALSE` otherwise. +#' +#' @export +#' @family config +#' @tether keras.config.is_nnx_enabled +config_is_nnx_enabled <- +function () +{ + keras$config$is_nnx_enabled() +} + + #' Turn off traceback filtering. #' #' @description diff --git a/R/freeze.R b/R/freeze.R index 64d8cde72..44e6b8a1c 100644 --- a/R/freeze.R +++ b/R/freeze.R @@ -74,10 +74,11 @@ #' conv_base #' #' # Freeze only layers of a certain type, e.g, BatchNorm layers -#' batch_norm_layer_class_name <- class(layer_batch_normalization())[1] -#' is_batch_norm_layer <- function(x) inherits(x, batch_norm_layer_class_name) +#' # batch_norm_layer_class_name <- class(layer_batch_normalization())[1] +#' # is_batch_norm_layer <- function(x) inherits(x, batch_norm_layer_class_name) +#' is_batch_norm_layer <- function(x) inherits(x, keras$layers$BatchNormalization) #' -#' model <- application_efficientnet_b0() +#' model <- application_efficientnet_v2b0() #' freeze_weights(model, which = is_batch_norm_layer) #' # print(model) #' diff --git a/R/layers-backend-wrappers.R b/R/layers-backend-wrappers.R index 32375b099..76e4e660e 100644 --- a/R/layers-backend-wrappers.R +++ b/R/layers-backend-wrappers.R @@ -76,6 +76,9 @@ #' passing the instance to `layer_torch_module_wrapper` (e.g. by calling #' it once). #' +#' @param output_shape +#' Shape of the output from this layer. Helps Keras infer shapes. +#' #' @param name #' The name of the layer (string). #' @@ -91,10 +94,11 @@ #' @family layers #' @tether keras.layers.TorchModuleWrapper layer_torch_module_wrapper <- -function (object, module, name = NULL, ...) +function (object, module, output_shape = NULL, name = NULL, ...) { args <- capture_args(list(input_shape = normalize_shape, - batch_size = as_integer, batch_input_shape = normalize_shape), + batch_size = as_integer, batch_input_shape = normalize_shape, + output_shape = normalize_shape), ignore = "object") create_layer(keras$layers$TorchModuleWrapper, object, args) } diff --git a/R/layers-convolutional.R b/R/layers-convolutional.R index 64ff5410f..e83eab6a1 100644 --- a/R/layers-convolutional.R +++ b/R/layers-convolutional.R @@ -204,6 +204,10 @@ function (object, filters, kernel_size, strides = 1L, padding = "valid", #' the left/right or up/down of the input such that output has the same #' height/width dimension as the input. #' +#' @param output_padding +#' Scalar integer. Amount of padding to add to the output length. Must be less +#' than the stride. When `NULL` (default) the output size is inferred. +#' #' @param data_format #' string, either `"channels_last"` or `"channels_first"`. #' The ordering of the dimensions in the inputs. `"channels_last"` @@ -214,8 +218,9 @@ function (object, filters, kernel_size, strides = 1L, padding = "valid", #' If you never set it, then it will be `"channels_last"`. #' #' @param dilation_rate -#' int or list of 1 integers, specifying the dilation -#' rate to use for dilated transposed convolution. +#' Scalar integer. Specifies the dilation rate. Values other +#' than 1 currently require `strides = 1` and rates greater than 1 are not +#' supported. #' #' @param activation #' Activation function. If `NULL`, no activation is applied. @@ -267,13 +272,14 @@ function (object, filters, kernel_size, strides = 1L, padding = "valid", #' @tether keras.layers.Conv1DTranspose layer_conv_1d_transpose <- function (object, filters, kernel_size, strides = 1L, padding = "valid", - data_format = NULL, dilation_rate = 1L, activation = NULL, + output_padding = NULL, data_format = NULL, dilation_rate = 1L, activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform", bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, kernel_constraint = NULL, bias_constraint = NULL, ...) { args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple, - strides = as_integer_tuple, dilation_rate = as_integer_tuple, + strides = as_integer_tuple, output_padding = as_integer_tuple, + dilation_rate = as_integer_tuple, input_shape = normalize_shape, batch_size = as_integer, batch_input_shape = normalize_shape), ignore = "object") create_layer(keras$layers$Conv1DTranspose, object, args) @@ -489,6 +495,11 @@ function (object, filters, kernel_size, strides = list(1L, 1L), #' the left/right or up/down of the input. When `padding="same"` and #' `strides=1`, the output has the same size as the input. #' +#' @param output_padding +#' Scalar integer or vector of two integers. Amount of padding to add to the +#' height and width of the output tensor. Each element must be smaller than the +#' corresponding stride. When `NULL` (default) the output size is inferred. +#' #' @param data_format #' string, either `"channels_last"` or `"channels_first"`. #' The ordering of the dimensions in the inputs. `"channels_last"` @@ -501,8 +512,9 @@ function (object, filters, kernel_size, strides = list(1L, 1L), #' `"channels_last"`. #' #' @param dilation_rate -#' int or list of 1 integers, specifying the dilation -#' rate to use for dilated transposed convolution. +#' Scalar integer or vector of 2 integers specifying the dilation rate. Values +#' other than 1 require `strides = 1`; different rates per dimension are not +#' supported. #' #' @param activation #' Activation function. If `NULL`, no activation is applied. @@ -554,14 +566,15 @@ function (object, filters, kernel_size, strides = list(1L, 1L), #' @tether keras.layers.Conv2DTranspose layer_conv_2d_transpose <- function (object, filters, kernel_size, strides = list(1L, 1L), - padding = "valid", data_format = NULL, dilation_rate = list( - 1L, 1L), activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform", + padding = "valid", output_padding = NULL, data_format = NULL, + dilation_rate = list(1L, 1L), activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform", bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, kernel_constraint = NULL, bias_constraint = NULL, ...) { args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple, - strides = as_integer_tuple, dilation_rate = as_integer_tuple, + strides = as_integer_tuple, output_padding = as_integer_tuple, + dilation_rate = as_integer_tuple, input_shape = normalize_shape, batch_size = as_integer, batch_input_shape = normalize_shape), ignore = "object") create_layer(keras$layers$Conv2DTranspose, object, args) @@ -639,7 +652,7 @@ function (object, filters, kernel_size, strides = list(1L, 1L), #' will be `"channels_last"`. #' #' @param dilation_rate -#' int or list of 3 integers, specifying the dilation +#' int or vector of 3 ints, specifying the dilation #' rate to use for dilated convolution. #' #' @param groups @@ -778,6 +791,12 @@ function (object, filters, kernel_size, strides = list(1L, 1L, #' the left/right or up/down of the input. When `padding="same"` and #' `strides=1`, the output has the same size as the input. #' +#' @param output_padding +#' Scalar integer or vector of three integers. Amount of padding to add to the +#' depth, height, and width of the output tensor. Each element must be smaller +#' than the corresponding stride. When `NULL` (default) the output size is +#' inferred. +#' #' @param data_format #' string, either `"channels_last"` or `"channels_first"`. #' The ordering of the dimensions in the inputs. `"channels_last"` @@ -790,8 +809,9 @@ function (object, filters, kernel_size, strides = list(1L, 1L, #' will be `"channels_last"`. #' #' @param dilation_rate -#' int or list of 1 integers, specifying the dilation -#' rate to use for dilated transposed convolution. +#' Scalar integer or vector of 3 integers specifying the dilation rate. Values +#' other than 1 require `strides = 1`; different rates per dimension are not +#' supported. #' #' @param activation #' Activation function. If `NULL`, no activation is applied. @@ -843,14 +863,15 @@ function (object, filters, kernel_size, strides = list(1L, 1L, #' @tether keras.layers.Conv3DTranspose layer_conv_3d_transpose <- function (object, filters, kernel_size, strides = list(1L, 1L, - 1L), padding = "valid", data_format = NULL, dilation_rate = list( - 1L, 1L, 1L), activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform", + 1L), padding = "valid", output_padding = NULL, data_format = NULL, + dilation_rate = list(1L, 1L, 1L), activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform", bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, kernel_constraint = NULL, bias_constraint = NULL, ...) { args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple, - strides = as_integer_tuple, dilation_rate = as_integer_tuple, + strides = as_integer_tuple, output_padding = as_integer_tuple, + dilation_rate = as_integer_tuple, input_shape = normalize_shape, batch_size = as_integer, batch_input_shape = normalize_shape), ignore = "object") create_layer(keras$layers$Conv3DTranspose, object, args) diff --git a/R/layers-core.R b/R/layers-core.R index 231c271e7..1cb2928b6 100644 --- a/R/layers-core.R +++ b/R/layers-core.R @@ -35,6 +35,7 @@ #' - ```r #' enable_lora( #' rank, +#' lora_alpha = NULL, #' a_initializer = 'he_uniform', #' b_initializer = 'zeros' #' ) @@ -94,6 +95,11 @@ #' You can also enable LoRA on an existing #' `Dense` layer by calling `layer$enable_lora(rank)`. #' +#' @param lora_alpha +#' Optional integer. Scales the low-rank adaptation delta during the forward +#' pass. The delta is scaled by `lora_alpha / lora_rank`, letting you tune the +#' LoRA adjustment strength independently of `lora_rank`. +#' #' @param object #' Object to compose the layer with. A tensor, array, or sequential model. #' @@ -119,9 +125,11 @@ function (object, units, activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform", bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, kernel_constraint = NULL, bias_constraint = NULL, lora_rank = NULL, + lora_alpha = NULL, ...) { args <- capture_args(list(units = as_integer, lora_rank = as_integer, + lora_alpha = as_integer, input_shape = normalize_shape, batch_size = as_integer, batch_input_shape = normalize_shape), ignore = "object") create_layer(keras$layers$Dense, object, args) @@ -261,6 +269,11 @@ function (object, units, activation = NULL, use_bias = TRUE, #' You can also enable LoRA on an existing #' `EinsumDense` layer by calling `layer$enable_lora(rank)`. #' +#' @param lora_alpha +#' Optional integer. Scales the low-rank adaptation delta during the forward +#' pass. The delta is scaled by `lora_alpha / lora_rank`, letting you tune the +#' LoRA adjustment strength independently of `lora_rank`. +#' #' @param ... #' Base layer keyword arguments, such as `name` and `dtype`. #' @@ -280,10 +293,10 @@ function (object, equation, output_shape, activation = NULL, bias_axes = NULL, kernel_initializer = "glorot_uniform", bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL, kernel_constraint = NULL, bias_constraint = NULL, lora_rank = NULL, - ...) + lora_alpha = NULL, ...) { - args <- capture_args(list(lora_rank = as_integer, input_shape = normalize_shape, - batch_size = as_integer, batch_input_shape = normalize_shape, + args <- capture_args(list(lora_rank = as_integer, lora_alpha = as_integer, + input_shape = normalize_shape, batch_size = as_integer, batch_input_shape = normalize_shape, output_shape = normalize_shape), ignore = "object") create_layer(keras$layers$EinsumDense, object, args) } @@ -324,12 +337,17 @@ function (object, equation, output_shape, activation = NULL, #' - ```r #' enable_lora( #' rank, +#' lora_alpha = NULL, #' a_initializer = 'he_uniform', #' b_initializer = 'zeros' #' ) #' ``` #' #' - ```r +#' compute_output_spec(...) +#' ``` +#' +#' - ```r #' quantize(mode, type_check = TRUE) #' ``` #' @@ -391,6 +409,11 @@ function (object, equation, output_shape, activation = NULL, #' You can also enable LoRA on an existing #' `Embedding` layer instance by calling `layer$enable_lora(rank)`. #' +#' @param lora_alpha +#' Optional integer. Scales the low-rank adaptation delta during the forward +#' pass. The delta is scaled by `lora_alpha / lora_rank`, letting you tune the +#' LoRA adjustment strength independently of `lora_rank`. +#' #' @param object #' Object to compose the layer with. A tensor, array, or sequential model. #' @@ -408,11 +431,13 @@ function (object, equation, output_shape, activation = NULL, layer_embedding <- function (object, input_dim, output_dim, embeddings_initializer = "uniform", embeddings_regularizer = NULL, embeddings_constraint = NULL, - mask_zero = FALSE, weights = NULL, lora_rank = NULL, ...) + mask_zero = FALSE, weights = NULL, lora_rank = NULL, lora_alpha = NULL, + ...) { args <- capture_args(list(input_dim = as_integer, output_dim = as_integer, - input_shape = normalize_shape, batch_size = as_integer, - batch_input_shape = normalize_shape, input_length = as_integer), + lora_rank = as_integer, lora_alpha = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape, + input_length = as_integer), ignore = "object") create_layer(keras$layers$Embedding, object, args) } diff --git a/R/layers-normalization.R b/R/layers-normalization.R index 4ccd03d0b..26bdee3e5 100644 --- a/R/layers-normalization.R +++ b/R/layers-normalization.R @@ -349,14 +349,6 @@ function (object, groups = 32L, axis = -1L, epsilon = 0.001, #' disabled since the scaling will be done by the next layer. #' Defaults to `TRUE`. #' -#' @param rms_scaling -#' If `TRUE`, `center` and `scale` are ignored, and the -#' inputs are scaled by `gamma` and the inverse square root -#' of the square of all inputs. This is an approximate and faster -#' approach that avoids ever computing the mean of the input. Note that -#' this *isn't* equivalent to the computation that the -#' `layer_rms_normalization` layer performs. -#' #' @param beta_initializer #' Initializer for the beta weight. Defaults to zeros. #' @@ -395,7 +387,7 @@ function (object, groups = 32L, axis = -1L, epsilon = 0.001, #' @tether keras.layers.LayerNormalization layer_layer_normalization <- function (object, axis = -1L, epsilon = 0.001, center = TRUE, - scale = TRUE, rms_scaling = FALSE, beta_initializer = "zeros", + scale = TRUE, beta_initializer = "zeros", gamma_initializer = "ones", beta_regularizer = NULL, gamma_regularizer = NULL, beta_constraint = NULL, gamma_constraint = NULL, ...) { diff --git a/R/layers-pooling.R b/R/layers-pooling.R index a4ed58a72..0b36113a7 100644 --- a/R/layers-pooling.R +++ b/R/layers-pooling.R @@ -122,7 +122,7 @@ function (object, pool_size, strides = NULL, padding = "valid", #' (when `input_shape >= pool_size`) #' #' The resulting output shape when using the `"same"` padding option is: -#' `output_shape = math.floor((input_shape - 1) / strides) + 1` +#' `output_shape = input_shape` #' #' # Input Shape #' - If `data_format="channels_last"`: diff --git a/R/layers-preprocessing.R b/R/layers-preprocessing.R index 7ffdefb3f..994b7fe57 100644 --- a/R/layers-preprocessing.R +++ b/R/layers-preprocessing.R @@ -1681,10 +1681,14 @@ function (object, height_factor, width_factor = NULL, fill_mode = "reflect", #' (independently of which backend you're using). #' #' @param scale -#' Float, the scale to apply to the inputs. +#' Numeric scalar, vector, or array. The scale to apply to the inputs. If +#' scalar, the same scale is applied to every feature or channel; if a vector +#' or array, scaling is applied per channel. #' #' @param offset -#' Float, the offset to apply to the inputs. +#' Numeric scalar, vector, or array. The offset to apply to the inputs. If +#' scalar, the same offset is applied to every feature or channel; if a vector +#' or array, the shift is applied per channel. #' #' @param ... #' Base layer keyword arguments, such as `name` and `dtype`. @@ -2655,6 +2659,95 @@ function (object, factor = 1, scale = list(0.02, 0.33), fill_value = NULL, create_layer(keras$layers$RandomErasing, object, args) } + +#' A preprocessing layer that applies random elastic transformations. +#' +#' @description +#' This layer distorts input images by applying elastic deformations, +#' simulating a physically realistic transformation. The magnitude of the +#' distortion is controlled by the `scale` parameter, while the `factor` +#' determines the probability of applying the transformation. +#' +#' @param factor +#' A single float or a tuple of two floats. +#' `factor` controls the probability of applying the transformation. +#' - `factor = 0.0` ensures no transformation is applied. +#' - `factor = 1.0` means the transformation is always applied. +#' - If a tuple `(min, max)` is provided, a probability value +#' is sampled between `min` and `max` for each image. +#' - If a single float is provided, a probability is sampled +#' between `0.0` and the given float. +#' Default is `1.0`. +#' +#' @param scale +#' A float or a tuple of two floats defining the magnitude of +#' the distortion applied. +#' - If a tuple `(min, max)` is provided, a random scale value is +#' sampled within this range. +#' - If a single float is provided, a random scale value is sampled +#' between `0.0` and the given float. +#' Default is `1.0`. +#' +#' @param interpolation +#' Interpolation mode. Supported values: `"nearest"`, +#' `"bilinear"`. +#' +#' @param fill_mode +#' Points outside the boundaries of the input are filled +#' according to the given mode. Available methods are `"constant"`, +#' `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"reflect"`. +#' - `"reflect"`: `(d c b a | a b c d | d c b a)` +#' The input is extended by reflecting about the edge of the last +#' pixel. +#' - `"constant"`: `(k k k k | a b c d | k k k k)` +#' The input is extended by filling all values beyond +#' the edge with the same constant value `k` specified by +#' `fill_value`. +#' - `"wrap"`: `(a b c d | a b c d | a b c d)` +#' The input is extended by wrapping around to the opposite edge. +#' - `"nearest"`: `(a a a a | a b c d | d d d d)` +#' The input is extended by the nearest pixel. +#' When using the torch backend, `"reflect"` is redirected to +#' `"mirror"` because torch does not support `"reflect"`. +#' The torch backend also does not support `"wrap"`. +#' +#' @param fill_value +#' A float representing the value to fill outside the boundaries when +#' `fill_mode = "constant"`. +#' +#' @param value_range +#' The range of values the incoming images will have. +#' Represented as a two-number tuple written `[low, high]`. This is +#' typically either `[0, 1]` or `[0, 255]` depending on how your +#' preprocessing pipeline is set up. +#' +#' @param seed +#' Integer. Used to create a random seed. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inheritParams layer_center_crop +#' +#' @export +#' @tether keras.layers.RandomElasticTransform +#' @family image preprocessing layers +#' @family preprocessing layers +#' @family layers +layer_random_elastic_transform <- +function (object, factor = 1, scale = 1, interpolation = "bilinear", + fill_mode = "reflect", fill_value = 0, value_range = list(0L, 255L), + seed = NULL, data_format = NULL, ...) +{ + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$RandomElasticTransform, object, args) +} + #' Applies random Gaussian blur to images for data augmentation. #' #' @description diff --git a/R/layers-rnn.R b/R/layers-rnn.R index b1c283e4b..97a0d7790 100644 --- a/R/layers-rnn.R +++ b/R/layers-rnn.R @@ -1442,7 +1442,6 @@ function (units, activation = "tanh", recurrent_activation = "sigmoid", #' initializer = 'uniform', #' name = 'recurrent_kernel' #' ) -#' self$built <- TRUE #' }, #' #' call = function(inputs, states) { diff --git a/R/losses.R b/R/losses.R index 2c6754049..136640812 100644 --- a/R/losses.R +++ b/R/losses.R @@ -647,6 +647,85 @@ function (y_true, y_pred, alpha = 0.25, gamma = 2, } + +#' Computes the generalized cross entropy loss. +#' +#' @description +#' The generalized cross entropy (GCE) loss offers robustness to noisy labels by +#' interpolating between categorical cross entropy (`q -> 0`) and mean absolute +#' error (`q -> 1`). For a true-class probability `p` and noise parameter `q`, +#' the loss is `loss = (1 - p^q) / q`. +#' +#' # References +#' - Zhang & Sabuncu (2018), "Generalized Cross Entropy Loss for Training Deep +#' Neural Networks with Noisy Labels" +#' +#' # Examples +#' ```{r} +#' y_true <- c(0L, 1L, 0L, 1L) +#' y_pred <- rbind( +#' c(0.7, 0.3), +#' c(0.2, 0.8), +#' c(0.6, 0.4), +#' c(0.4, 0.6) +#' ) +#' gce <- loss_categorical_generalized_cross_entropy(q = 0.7) +#' gce(y_true, y_pred) +#' ``` +#' +#' @returns +#' Generalized cross entropy loss value(s). +#' +#' @param q +#' Float in `(0, 1)`. Controls the transition between cross entropy and mean +#' absolute error. Defaults to `0.5`. +#' +#' - As `q` approaches `0`: behaves like categorical cross entropy. +#' - As `q` approaches `1`: behaves like mean absolute error. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. Supported options are +#' `"sum"`, `"sum_over_batch_size"`, `"mean"`, +#' `"mean_with_sample_weight"` or `NULL`. `"sum"` sums the loss, +#' `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the +#' sample size, and `"mean_with_sample_weight"` sums the loss and +#' divides by the sum of the sample weights. `"none"` and `NULL` +#' perform no aggregation. Defaults to `"sum_over_batch_size"`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param dtype +#' Dtype used for loss computations. Defaults to `config_floatx()` (the global +#' float type). +#' +#' @param y_true +#' Integer class indices with shape `(batch_size)` or `(batch_size, 1)`. +#' +#' @param y_pred +#' Predicted class probabilities with shape `(batch_size, num_classes)`. +#' +#' @param ... +#' For forward/backward compatibility. +#' +#' @export +#' @family losses +#' @tether keras.losses.CategoricalGeneralizedCrossEntropy +loss_categorical_generalized_cross_entropy <- +function (y_true, y_pred, q = 0.5, ..., reduction = "sum_over_batch_size", + name = "categorical_generalized_cross_entropy", dtype = NULL) +{ + args <- capture_args(list( + y_true = as_py_array, + y_pred = as_py_array + )) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$CategoricalGeneralizedCrossEntropy + else keras$losses$categorical_generalized_cross_entropy + do.call(callable, args) +} + #' Computes the categorical hinge loss between `y_true` & `y_pred`. #' #' @description @@ -820,12 +899,14 @@ function (y_true, y_pred, axis = -1L, ..., reduction = "sum_over_batch_size", #' 1, 0, 1, 0.9), dim = c(2, 2, 2, 1)) #' #' axis <- c(2, 3, 4) -#' loss <- loss_dice(y_true, y_pred, axis = axis) +#' loss_fn <- loss_dice(axis = axis, reduction = NULL) +#' loss <- loss_fn(y_true, y_pred) #' stopifnot(shape(loss) == shape(2)) #' loss #' #' -#' loss = loss_dice(y_true, y_pred) +#' loss_fn <- loss_dice() +#' loss <- loss_fn(y_true, y_pred) #' stopifnot(shape(loss) == shape()) #' loss #' ``` @@ -1539,37 +1620,34 @@ function (y_true, y_pred, ..., reduction = "sum_over_batch_size", #' #' # Examples #' ```{r} -#' y_true <- c(1, 2) -#' y_pred <- rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1)) +#' y_true <- op_array(c(1L, 2L)) +#' y_pred <- op_array(rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1))) #' loss <- loss_sparse_categorical_crossentropy(y_true, y_pred) #' loss #' ``` #' ```{r} -#' y_true <- c(1, 2) -#' y_pred <- rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1)) +#' y_true <- op_array(c(1L, 2L)) +#' y_pred <- op_array(rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1))) #' # Using 'auto'/'sum_over_batch_size' reduction type. #' scce <- loss_sparse_categorical_crossentropy() -#' scce(op_array(y_true), op_array(y_pred)) -#' # 1.177 +#' scce(y_true, y_pred) #' ``` #' #' ```{r} #' # Calling with 'sample_weight'. -#' scce(op_array(y_true), op_array(y_pred), sample_weight = op_array(c(0.3, 0.7))) +#' scce(y_true, y_pred, sample_weight = op_array(c(0.3, 0.7))) #' ``` #' #' ```{r} #' # Using 'sum' reduction type. #' scce <- loss_sparse_categorical_crossentropy(reduction="sum") -#' scce(op_array(y_true), op_array(y_pred)) -#' # 2.354 +#' scce(y_true, y_pred) #' ``` #' #' ```{r} #' # Using 'none' reduction type. #' scce <- loss_sparse_categorical_crossentropy(reduction=NULL) -#' scce(op_array(y_true), op_array(y_pred)) -#' # array([0.0513, 2.303], dtype=float32) +#' scce(y_true, y_pred) #' ``` #' #' Usage with the `compile()` API: diff --git a/R/metrics.R b/R/metrics.R index afdd5fb05..42405adbc 100644 --- a/R/metrics.R +++ b/R/metrics.R @@ -23,8 +23,16 @@ #' ```{r} #' y_true <- rbind(c(0, 1), c(0, 0)) #' y_pred <- rbind(c(0.6, 0.4), c(0.4, 0.6)) -#' loss <- loss_binary_focal_crossentropy(y_true, y_pred, gamma=2) -#' loss +#' focal_loss <- loss_binary_focal_crossentropy(y_true, y_pred, gamma = 2) +#' focal_loss +#' +#' # Compare with binary crossentropy. +#' # Binary focal crossentropy emphasizes harder examples, yielding a larger +#' # relative loss where the model struggles. +#' bce_loss <- loss_binary_crossentropy(y_true, y_pred) +#' cbind(focal_loss = as.array(focal_loss), +#' bce_loss = as.array(bce_loss), +#' ratio = as.array(focal_loss / bce_loss)) #' ``` #' #' @returns diff --git a/R/model-persistence.R b/R/model-persistence.R index 370728f8c..fc4a7a25a 100644 --- a/R/model-persistence.R +++ b/R/model-persistence.R @@ -166,18 +166,48 @@ function (model, custom_objects = NULL, compile = TRUE, safe_mode = TRUE) } -#' Saves all layer weights to a `.weights.h5` file. +#' Saves all weights to a single file or sharded files. #' -#' @param model A keras Model object +#' @description +#' By default, the weights are saved in a single `.weights.h5` file. Enable +#' sharding via `max_shard_size` to split weights across multiple files (in GB) +#' and produce a `.weights.json` manifest that tracks shard metadata. +#' +#' The saved sharded files contain: +#' +#' * `*.weights.json`: configuration file containing `metadata` and +#' `weight_map` entries. +#' * `*_xxxxxx.weights.h5`: weight shards limited by `max_shard_size`. +#' +#' ```{r} +#' model <- +#' keras_model_sequential(input_shape = 2) |> +#' layer_dense(4) +#' +#' path_h5 <- tempfile(fileext = ".weights.h5") +#' path_json <- tempfile(fileext = ".weights.json") +#' +#' model |> save_model_weights(path_h5) +#' model |> save_model_weights(path_json, max_shard_size = 0.01) +#' +#' model |> load_model_weights(path_h5) +#' model |> load_model_weights(path_json) +#' ``` +#' +#' @param model A keras Model object. #' #' @param filepath -#' string. -#' Path where to save the model. Must end in `.weights.h5`. +#' Path where the weights will be saved. Accepts `.weights.h5`, or when sharding +#' is enabled, a `.weights.json` manifest path. If `.weights.h5` is provided +#' while sharding, the filename will be overridden to end in `.weights.json`. #' #' @param overwrite -#' Whether we should overwrite any existing model -#' at the target location, or instead ask the user -#' via an interactive prompt. +#' Whether to overwrite any existing weights at the target location, or instead +#' ask the user via an interactive prompt. +#' +#' @param max_shard_size +#' Numeric size in GB for each sharded file. Use `NULL` to disable sharding. +#' #' #' @returns This is called primarily for side effects. `model` is returned, #' invisibly, to enable usage with the pipe. @@ -186,17 +216,21 @@ function (model, custom_objects = NULL, compile = TRUE, safe_mode = TRUE) #' @tether keras.Model.save_weights #' @seealso #' + -# + save_model_weights <- -function (model, filepath, overwrite = FALSE) +function (model, filepath, overwrite = FALSE, max_shard_size = NULL) { overwrite <- confirm_overwrite(filepath, overwrite) - keras$Model$save_weights(model, filepath, overwrite = overwrite) + keras$Model$save_weights( + model, + filepath, + overwrite = overwrite, + max_shard_size = max_shard_size + ) invisible(model) } -#' Load weights from a file saved via `save_model_weights()`. +#' Load the weights from a single file or sharded files. #' #' @description #' Weights are loaded based on the network's @@ -205,6 +239,12 @@ function (model, filepath, overwrite = FALSE) #' taken into account in the topological ordering, so adding or removing #' layers is fine as long as they don't have weights. #' +#' **Sharding** +#' +#' When loading sharded weights, specify a `filepath` ending in +#' `".weights.json"` (the configuration file), with the corresponding shard files +#' (`*_xxxxx.weights.h5`) located alongside it. +#' #' **Partial weight loading** #' #' If you have modified your model, for instance by adding a new layer @@ -214,10 +254,16 @@ function (model, filepath, overwrite = FALSE) #' mismatching weights will be skipped. A warning will be displayed #' for each skipped layer. #' +#' # Examples +#' ```r +#' model |> load_model_weights("model.weights.h5") +#' model |> load_model_weights("model.weights.json") +#' ``` +#' #' @param filepath -#' String, path to the weights file to load. -#' It can either be a `.weights.h5` file -#' or a legacy `.h5` weights file. +#' Path or path-like object to the weights. Accepts `.weights.h5`, legacy `.h5`, +#' or sharded weights through a `.weights.json` manifest sitting alongside the +#' shard files (`*_xxxxx.weights.h5`). #' #' @param skip_mismatch #' Boolean, whether to skip loading of layers where diff --git a/R/ops-image.R b/R/ops-image.R index 00363bce8..3a52d4f50 100644 --- a/R/ops-image.R +++ b/R/ops-image.R @@ -475,6 +475,94 @@ function (images, top_cropping = NULL, left_cropping = NULL, do.call(ops$image$crop_images, args) } +#' Applies elastic deformation to the image(s). +#' +#' @description +#' Apply random elastic deformation to 3D or 4D image tensors. +#' +#' # Examples +#' ```{r} +#' x <- random_uniform(c(2, 64, 80, 3)) # batch of 2 RGB images +#' y <- op_image_elastic_transform(x) +#' op_shape(y) +#' ``` +#' +#' ```{r} +#' x <- random_uniform(c(64, 80, 3)) # single RGB image +#' y <- op_image_elastic_transform(x) +#' op_shape(y) +#' ``` +#' +#' ```{r} +#' x <- random_uniform(c(2, 3, 64, 80)) # batch of 2 RGB images +#' y <- op_image_elastic_transform( +#' x, +#' data_format = "channels_first", +#' seed = 123 +#' ) +#' op_shape(y) +#' ``` +#' +#' @returns +#' Transformed image or batch of images with elastic deformation. +#' +#' @param images +#' Input image or batch of images. Must be 3D or 4D. +#' +#' @param alpha +#' Scaling factor that controls the intensity of the deformation. +#' +#' @param sigma +#' Standard deviation of the Gaussian filter used for +#' smoothing the displacement fields. +#' +#' @param interpolation +#' Interpolation method. Available methods are `"nearest"`, +#' and `"bilinear"`. Defaults to `"bilinear"`. +#' +#' @param fill_mode +#' Points outside the boundaries of the input are filled +#' according to the given mode. Available methods are `"constant"`, +#' `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"reflect"`. +#' - `"reflect"`: `(d c b a | a b c d | d c b a)` +#' The input is extended by reflecting about the edge of the last +#' pixel. +#' - `"constant"`: `(k k k k | a b c d | k k k k)` +#' The input is extended by filling all values beyond +#' the edge with the same constant value `k` specified by +#' `fill_value`. +#' - `"wrap"`: `(a b c d | a b c d | a b c d)` +#' The input is extended by wrapping around to the opposite edge. +#' - `"nearest"`: `(a a a a | a b c d | d d d d)` +#' The input is extended by the nearest pixel. +#' +#' @param fill_value +#' Value used for points outside the boundaries of the input if +#' `fill_mode="constant"`. Defaults to `0`. +#' +#' @param seed +#' Optional integer seed for the random number generator. +#' +#' @param data_format +#' A string specifying the data format of the input tensor. +#' It can be either `"channels_last"` or `"channels_first"`. +#' `"channels_last"` corresponds to inputs with shape +#' `(batch, height, width, channels)`, while `"channels_first"` +#' corresponds to inputs with shape `(batch, channels, height, width)`. +#' If not specified, the value will default to +#' `keras.config.image_data_format`. +#' +#' @export +#' @tether keras.ops.image.elastic_transform +op_image_elastic_transform <- +function (images, alpha = 20, sigma = 5, interpolation = "bilinear", + fill_mode = "reflect", fill_value = 0, seed = NULL, data_format = NULL) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$ops$image$elastic_transform, args) +} + + #' Convert RGB images to grayscale. #' #' @description diff --git a/R/ops-nn.R b/R/ops-nn.R index bb7db4f21..936b0ef5f 100644 --- a/R/ops-nn.R +++ b/R/ops-nn.R @@ -111,6 +111,55 @@ function (x, axis = -1L, order = 2L, epsilon = NULL) } +#' Layer normalization (Ba et al., 2016). +#' +#' @description +#' Normalizes activations in `x` for each example independently by centering to +#' mean 0 and scaling to unit variance along the specified `axis`. +#' +#' # Examples +#' ```{r} +#' x <- op_arange(5, dtype = "float32") +#' op_layer_normalization(x) +#' ``` +#' +#' @returns +#' Tensor with the same shape as `x` containing the normalized values. +#' +#' @param x +#' Input tensor. +#' +#' @param gamma +#' Optional scaling factor applied to the normalized output. +#' +#' @param beta +#' Optional offset added to the normalized output. +#' +#' @param axis +#' Axis or axes along which to compute statistics. Defaults to `-1`. +#' +#' @param epsilon +#' Small constant added to the variance for numerical stability. +#' +#' @param ... +#' For forward/backward compatibility. +#' +#' @export +#' @family nn ops +#' @family ops +#' @tether keras.ops.layer_normalization +op_layer_normalization <- +function (x, gamma = NULL, beta = NULL, axis = -1L, epsilon = NULL, ...) +{ + args <- capture_args(list( + axis = as_axis, + gamma = as_array, + beta = as_array + )) + do.call(ops$layer_normalization, args) +} + + #' Peak Signal-to-Noise Ratio (PSNR) function. #' #' @description @@ -214,13 +263,17 @@ ops$psnr(x1, x2, max_val) #' Typically, the inputs must be in float16 and bfloat16 dtype and the #' input layout requirements may vary depending on the backend. #' +#' @param attn_logits_soft_cap +#' Optional numeric cap on the attention logits before softmax. Only supported +#' on the JAX TPU backend. +#' #' @export #' @tether keras.ops.dot_product_attention #' @family nn ops #' @family ops op_dot_product_attention <- function (query, key, value, bias = NULL, mask = NULL, scale = NULL, - is_causal = FALSE, flash_attention = NULL) + is_causal = FALSE, flash_attention = NULL, attn_logits_soft_cap = NULL) { args <- capture_args() do.call(ops$dot_product_attention, args) @@ -481,6 +534,35 @@ op_sparse_plus <- function (x) ops$sparse_plus(x) +#' Sparse sigmoid activation function. +#' +#' @description +#' It is defined as +#' +#' `f(x) = 0` for `x <= -1`, +#' `f(x) = 0.5 * (x + 1)` for `-1 < x < 1`, +#' `f(x) = 1` for `x >= 1`. +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(-1.0, 0.0, 1.0)) +#' op_sparse_sigmoid(x) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @tether keras.ops.sparse_sigmoid +#' @family nn ops +#' @family ops +op_sparse_sigmoid <- +function (x) +ops$sparse_sigmoid(x) + #' Sparsemax activation function. #' #' @description @@ -635,10 +717,10 @@ op_polar <- #' #' # Examples #' -#' ```python -#' x <- random_uniform(c(1, 10)) -#' x_norm <- op_rms_normalization(x, scale = 10) -#' x_norm +#' ```{r, eval = FALSE} +#' x <- random_normal(c(1, 10)) +#' op_rms_normalization(x) +#' op_rms_normalization(x, scale = 10) #' ``` #' #' @returns @@ -647,13 +729,12 @@ op_polar <- #' @param x #' Input tensor. #' -#' @param axis -#' The axis or axes along which to perform normalization. -#' Default to -1. -#' #' @param scale #' Optional scaling factor for the normalization. #' +#' @param axis +#' The axis or axes along which to perform normalization. Defaults to `-1`. +#' #' @param epsilon #' A lower bound value for the norm. #' Defaults to `config_epsilon()`. @@ -663,7 +744,7 @@ op_polar <- #' @family nn ops #' @family ops op_rms_normalization <- -function (x, scale = 1L, axis = -1L, epsilon = NULL) +function (x, scale = NULL, axis = -1L, epsilon = NULL) { args <- capture_args(list(axis = as_axis)) do.call(keras$ops$rms_normalization, args) diff --git a/R/ops-numpy.R b/R/ops-numpy.R index e8c47c92e..c326e866a 100644 --- a/R/ops-numpy.R +++ b/R/ops-numpy.R @@ -71,6 +71,243 @@ function (x, kth, axis = -1L, zero_indexed = FALSE) } +#' Compute the Pearson correlation coefficient matrix. +#' +#' +#' # Examples +#' ```{r} +#' x <- op_array(matrix(c(1, 2, 3, +#' 2, 3, 4), nrow = 2, byrow = TRUE)) +#' op_corrcoef(x) +#' ``` +#' +#' @param x +#' A 2D tensor of shape `(N, D)`, where `N` is the number of variables +#' and `D` is the number of observations. +#' +#' @returns +#' A tensor of shape `(N, N)` representing the correlation matrix. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.corrcoef +op_corrcoef <- +function (x) +ops$corrcoef(x) + + +#' Computes the cube root of the input tensor, element-wise. +#' +#' @description +#' Returns the real-valued cube root of `x`, handling negative inputs in the +#' real domain. +#' +#' # Examples +#' ```{r} +#' op_cbrt(c(-8, 0, 8)) +#' ``` +#' +#' @param x +#' Input tensor. +#' +#' @returns +#' A tensor containing the cube root of each element in `x`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.cbrt +op_cbrt <- +function (x) +ops$cbrt(x) + + +#' Convert angles from degrees to radians. +#' +#' @description +#' The conversion is defined as: +#' `rad = deg * (pi / 180)`. +#' +#' # Examples +#' ```{r} +#' op_deg2rad(c(0, 90, 180)) +#' ``` +#' +#' @returns +#' A tensor containing angles converted to radians. +#' +#' @param x +#' Input tensor of angles in degrees. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.deg2rad +op_deg2rad <- +function (x) +ops$deg2rad(x) + + +#' Bartlett window function. +#' +#' @description +#' The Bartlett window is a triangular window that rises then falls linearly. +#' +#' # Examples +#' ```{r} +#' op_bartlett(5) +#' ``` +#' +#' @returns +#' A 1D tensor containing the window values. +#' +#' @param x +#' Length of the window. Must be a positive integer. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.bartlett +op_bartlett <- +function (x) +ops$bartlett(as_integer(x)) + + +#' Blackman window function. +#' +#' @description +#' The Blackman window is a taper formed by using a weighted cosine. +#' +#' @param x +#' Length of the window. Must be a positive integer. +#' +#' @returns +#' A 1D tensor containing the window values. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.blackman +op_blackman <- +function (x) +ops$blackman(as_integer(x)) + + +#' Hamming window function. +#' +#' @description +#' The Hamming window is defined as: +#' `w[n] = 0.54 - 0.46 * cos(2 * pi * n / (N - 1))` for `0 <= n <= N - 1`. +#' +#' # Examples +#' ```{r} +#' op_hamming(5) +#' ``` +#' +#' @returns +#' A 1D tensor containing the window values. +#' +#' @param x +#' Length of the window. Must be a positive integer. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.hamming +op_hamming <- +function (x) +ops$hamming(as_integer(x)) + + +#' Hanning window function. +#' +#' @description +#' The Hanning window is defined as: +#' `w[n] = 0.5 - 0.5 * cos(2 * pi * n / (N - 1))` for `0 <= n <= N - 1`. +#' +#' # Examples +#' ```{r} +#' op_hanning(5) +#' ``` +#' +#' @returns +#' A 1D tensor containing the window values. +#' +#' @param x +#' Length of the window. Must be a positive integer. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.hanning +op_hanning <- +function (x) +ops$hanning(as_integer(x)) + + +#' Heaviside step function. +#' +#' @description +#' The Heaviside step function is defined as: +#' `heaviside(x1, x2) = 0` if `x1 < 0`, +#' `heaviside(x1, x2) = 1` if `x1 > 0`, and +#' `heaviside(x1, x2) = x2` if `x1 == 0`. +#' +#' # Examples +#' ```{r} +#' x1 <- op_array(c(-2, 0, 3)) +#' op_heaviside(x1, 0.5) +#' ``` +#' +#' @returns +#' A tensor broadcast from `x1` and `x2` containing `0`, `1`, or `x2`. +#' +#' @param x1 +#' Tensor input. +#' +#' @param x2 +#' Value to use when `x1 == 0`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.heaviside +op_heaviside <- +function (x1, x2) +ops$heaviside(x1, x2) + + +#' Kaiser window function. +#' +#' @description +#' The Kaiser window is defined as: +#' `w[n] = I0(beta * sqrt(1 - (2 * n / (N - 1) - 1)^2)) / I0(beta)` where +#' `I0` is the modified zeroth-order Bessel function of the first kind. +#' +#' # Examples +#' ```{r} +#' op_kaiser(5, beta = 14) +#' ``` +#' +#' @returns +#' A 1D tensor containing the window values. +#' +#' @param x +#' Length of the window. Must be a positive integer. +#' +#' @param beta +#' Shape parameter for the window. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.kaiser +op_kaiser <- +function (x, beta) +ops$kaiser(as_integer(x), beta) + + #' Compute the bit-wise AND of two arrays element-wise. #' #' @description diff --git a/R/ops.R b/R/ops.R index bec8d9854..3365c6f7a 100644 --- a/R/ops.R +++ b/R/ops.R @@ -361,30 +361,36 @@ function (inputs, indices, updates) do.call(ops$scatter_update, args) } -#' Perform a binary search +#' Perform a binary search. #' #' @description -#' Perform a binary search, returning indices for insertion of `values` -#' into `sorted_sequence` that maintain the sorting order. +#' Return insertion indices that keep `values` in sorted order when placed +#' into `sorted_sequence`. +#' +#' # Examples +#' ```{r} +#' sorted <- op_array(c(-1, 0, 2, 4)) +#' queries <- c(-2, 0, 3, 7) +#' op_searchsorted(sorted, queries) +#' op_searchsorted(sorted, queries, side = "right", zero_indexed = TRUE) +#' ``` #' #' @returns -#' Tensor of insertion indices of same shape as `values`. +#' Tensor of insertion indices with the same shape as `values`. #' #' @param sorted_sequence -#' 1-D input tensor, sorted along the innermost -#' dimension. +#' 1-D input tensor that is sorted along its innermost dimension. #' #' @param values #' N-D tensor of query insertion values. #' #' @param side -#' `'left'` or `'right'`, specifying the direction in which to insert -#' for the equality case (tie-breaker). +#' Either `"left"` or `"right"`, choosing which side to insert on ties. #' #' @param zero_indexed -#' If `TRUE`, the returned indices are zero-based (`0` encodes to first +#' If `TRUE`, the returned indices are zero-based (`0` encodes the first #' position); if `FALSE` (default), the returned indices are one-based (`1` -#' encodes to first position). +#' encodes the first position). #' #' @export #' @family core ops @@ -392,8 +398,7 @@ function (inputs, indices, updates) #' @tether keras.ops.searchsorted op_searchsorted <- function (sorted_sequence, values, side = "left", zero_indexed = FALSE) { - - result <- ops$searchsorted(as_array(sorted_sequence), as_array(values), side) + result <- ops$searchsorted(as_py_array(sorted_sequence), as_py_array(values), side) if (zero_indexed) result else result + 1L } @@ -1784,7 +1789,7 @@ function (target, output, from_logits = FALSE, axis = -1L) #' int or int tuple/list of `len(inputs_spatial_shape)`, #' specifying the strides of the convolution along each spatial #' dimension. If `strides` is int, then every spatial dimension shares -#' the same `strides`. +#' the same `strides`. Defaults to `1`. #' #' @param padding #' string, either `"valid"` or `"same"`. `"valid"` means no @@ -1886,7 +1891,7 @@ function (inputs, kernel, strides = 1L, padding = "valid", data_format = NULL, # + #' @tether keras.ops.conv_transpose op_conv_transpose <- -function (inputs, kernel, strides, padding = "valid", output_padding = NULL, +function (inputs, kernel, strides = 1L, padding = "valid", output_padding = NULL, data_format = NULL, dilation_rate = 1L) { args <- capture_args(list(strides = as_integer, output_padding = as_integer, @@ -4026,6 +4031,88 @@ function (x) ops$conjugate(x) +#' Element-wise angle of a complex tensor. +#' +#' @description +#' Returns the phase angle (in radians) of each element in `x`. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(matrix(c(1 + 3i, 2 - 5i, 4 - 3i, 3 + 2i), nrow = 2)) +#' op_angle(x) +#' ``` +#' +#' @returns +#' Tensor with the same shape as `x`, containing element-wise angles. +#' +#' @param x +#' Input tensor. Can be real or complex. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.angle +op_angle <- +function (x) +ops$angle(x) + + +#' Convert a real tensor with two channels into a complex tensor. +#' +#' @description +#' Expects a real-valued tensor whose last dimension has size `2`, holding the +#' real and imaginary parts. Returns the corresponding complex tensor with the +#' last dimension removed. +#' +#' # Examples +#' ```{r} +#' x <- op_array(matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE)) +#' op_view_as_complex(x) +#' ``` +#' +#' @returns +#' A complex tensor with shape `op_shape(x)[-length(op_shape(x))]`. +#' +#' @param x +#' Real-valued tensor whose trailing dimension encodes the complex components. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.view_as_complex +op_view_as_complex <- +function (x) +ops$view_as_complex(x) + + +#' Convert a complex tensor into a stacked real representation. +#' +#' @description +#' Produces a real-valued tensor where the last dimension gathers the real and +#' imaginary parts of the complex input. +#' +#' # Examples +#' ```{r} +#' x <- op_array(matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE)) +#' z <- op_view_as_complex(x) +#' op_view_as_real(z) +#' ``` +#' +#' @returns +#' A real tensor with shape `c(op_shape(x), 2)` containing real and imaginary parts. +#' +#' @param x +#' Complex-valued tensor to be converted. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.view_as_real +op_view_as_real <- +function (x) +ops$view_as_real(x) + + #' Returns a copy of `x`. #' #' @returns @@ -4668,14 +4755,14 @@ ops$dot(x1, x2) #' output form. #' #' @param ... -#' The operands to compute the Einstein sum of. +#' The operands to compute the Einstein sum of as unnamed arguments. +#' Additional named arguments are forwarded to the underlying backend. #' #' @export #' @family numpy ops #' @family ops #' @seealso #' + -# + #' @tether keras.ops.einsum op_einsum <- function (subscripts, ...) diff --git a/R/optimizers.R b/R/optimizers.R index f10ed7250..d3e48ea0f 100644 --- a/R/optimizers.R +++ b/R/optimizers.R @@ -1044,13 +1044,13 @@ function (learning_rate = 0.001, beta_1 = 0.9, beta_2 = 0.999, #' @description #' The Lion optimizer is a stochastic-gradient-descent method that uses the #' sign operator to control the magnitude of the update, unlike other adaptive -#' optimizers such as Adam that rely on second-order moments. This make +#' optimizers such as Adam that rely on second-order moments. This makes #' Lion more memory-efficient as it only keeps track of the momentum. According #' to the authors (see reference), its performance gain over Adam grows with #' the batch size. Because the update of Lion is produced through the sign #' operation, resulting in a larger norm, a suitable learning rate for Lion is #' typically 3-10x smaller than that for AdamW. The weight decay for Lion -#' should be in turn 3-10x larger than that for AdamW to maintain a +#' should in turn be 3-10x larger than that for AdamW to maintain a #' similar strength (lr * wd). #' #' # References @@ -1270,6 +1270,178 @@ function (inner_optimizer, initial_scale = 32768, dynamic_growth_steps = 2000L, do.call(keras$optimizers$LossScaleOptimizer, args) } +#' Optimizer that implements the Muon algorithm. +#' +#' @description +#' Note that this optimizer should not be used in the following layers: +#' +#' 1. Embedding layer +#' 2. Final output fully connected layer +#' 3. Any 0- or 1-D variables +#' +#' These should all be optimized using AdamW. +#' +#' The Muon optimizer can use both the Muon update step or the +#' AdamW update step based on the following: +#' +#' - For any variable that isn't 2D, 3D or 4D, the AdamW step +#' will be used. This is not configurable. +#' - If the argument `exclude_embeddings` (defaults to `TRUE`) is set +#' to `TRUE`, the AdamW step will be used. +#' - For any variable with a name that matches an expression +#' listed in the argument `exclude_layers` (a list), the +#' AdamW step will be used. +#' - Any other variable uses the Muon step. +#' +#' Typically, you only need to pass the name of your densely-connected +#' output layer to `exclude_layers`, e.g. +#' `exclude_layers = "output_dense"`. +#' +#' # References +#' - [Original implementation](https://github.com/KellerJordan/Muon) +#' - [Liu et al, 2025](https://arxiv.org/abs/2502.16982) +#' +#' @param learning_rate +#' A float, +#' `LearningRateSchedule()` instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.001`. +#' +#' @param adam_beta_1 +#' A float value or a constant float tensor, or a callable +#' that takes no arguments and returns the actual value to use. +#' The exponential decay rate for the 1st moment estimates. Defaults to +#' `0.9`. +#' +#' @param adam_beta_2 +#' A float value or a constant float tensor, ora callable +#' that takes no arguments and returns the actual value to use. +#' The exponential decay rate for the 2nd moment estimates. Defaults to +#' `0.999`. +#' +#' @param epsilon +#' A small constant for numerical stability. This is +#' "epsilon hat" in the Kingma and Ba paper +#' (in the formula just before Section 2.1), +#' not the epsilon in Algorithm 1 of the paper. +#' It is used as in AdamW. Defaults to `1e-7`. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average (EMA) is applied. EMA consists of +#' computing an exponential moving average of the weights of the model (as +#' the weight values change after each training batch), and periodically +#' overwriting the weights with their moving average. +#' +#' @param ema_momentum +#' Float, defaults to `0.99`. Only used if `use_ema = TRUE`. +#' This is the momentum to use when computing the EMA of the model's +#' weights: `new_average = ema_momentum * old_average + (1 - ema_momentum) * +#' current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or `NULL`, defaults to `NULL`. Only used if `use_ema = TRUE`. Every +#' `ema_overwrite_frequency` steps of iterations, we overwrite the model +#' variable by its moving average. If `NULL`, the optimizer does not overwrite +#' model variables in the middle of training, and you need to explicitly +#' overwrite the variables at the end of training by calling +#' `optimizer$finalize_variable_values()` (which updates the model variables +#' in-place). When using the built-in `fit()` training loop, this happens +#' automatically after the last epoch, and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will be multiplied the loss +#' before computing gradients, and the inverse of the scale factor will be +#' multiplied by the gradients before updating variables. Useful for +#' preventing underflow during mixed precision training. Alternately, +#' [`optimizer_loss_scale()`] will automatically set a loss scale factor. +#' +#' @param gradient_accumulation_steps +#' Int or `NULL`. If an int, model and optimizer variables will not be updated +#' at every step; instead they will be updated every `gradient_accumulation_steps` +#' steps, using the average value of the gradients since the last update. This +#' is known as "gradient accumulation". This can be useful when your batch size is +#' very small, in order to reduce gradient noise at each update step. EMA +#' frequency will look at "accumulated" iterations value (optimizer steps // +#' gradient_accumulation_steps). Learning rate schedules will look at "real" +#' iterations value (optimizer steps). +#' +#' @param exclude_layers +#' List of strings, keywords of layer names to exclude. +#' All layers with keywords in their path will use AdamW. +#' +#' @param exclude_embeddings +#' Boolean value. +#' If `TRUE`, embedding layers will use AdamW. +#' +#' @param muon_a +#' Float, parameter a of the muon algorithm. +#' It is recommended to use the default value. +#' +#' @param muon_b +#' Float, parameter b of the muon algorithm. +#' It is recommended to use the default value. +#' +#' @param muon_c +#' Float, parameter c of the muon algorithm. +#' It is recommended to use the default value. +#' +#' @param adam_lr_ratio +#' Float, the ratio of the learning rate when +#' using Adam to the main learning rate. +#' it is recommended to set it to `0.1`. +#' +#' @param momentum +#' Float, momentum used by internal SGD. +#' +#' @param ns_steps +#' Integer, number of Newton-Schulz iterations to run. +#' +#' @param nesterov +#' Boolean, whether to use Nesterov-style momentum. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatibility. +#' +#' @export +#' @family optimizers +#' @returns an `Optimizer` instance +#' @tether keras.optimizers.Muon +optimizer_muon <- +function (learning_rate = 0.001, adam_beta_1 = 0.9, adam_beta_2 = 0.999, + epsilon = 1e-07, weight_decay = 0.1, clipnorm = NULL, clipvalue = NULL, + global_clipnorm = NULL, use_ema = FALSE, ema_momentum = 0.99, + ema_overwrite_frequency = NULL, loss_scale_factor = NULL, + gradient_accumulation_steps = NULL, name = "muon", exclude_layers = NULL, + exclude_embeddings = TRUE, muon_a = 3.4445, muon_b = -4.775, + muon_c = 2.0315, adam_lr_ratio = 0.1, momentum = 0.95, ns_steps = 6L, + nesterov = TRUE, ...) +{ + args <- capture_args(list( + ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer, + ns_steps = as_integer)) + do.call(keras$optimizers$Muon, args) +} + #' Optimizer that implements the Nadam algorithm. #' diff --git a/R/py-classes.R b/R/py-classes.R index d55a4dd93..27680f036 100644 --- a/R/py-classes.R +++ b/R/py-classes.R @@ -493,7 +493,7 @@ custom_fn <- function(name, fn) { #' @export #' @aliases py_class #' @keywords internal -#' @seealso [`%<-active%()`] +#' @seealso \code{\link{\%<-active\%}} #' #' @examples #' \dontrun{ @@ -754,10 +754,10 @@ function(x) { #' @returns `value`, invisibly #' @export #' -#' @details Active bindings defined in a [`%py_class%`] are converted to +#' @details Active bindings defined in a \code{\link{\%py_class\%}} are converted to #' `@property` decorated methods. #' -#' @seealso [`makeActiveBinding()`] +#' @seealso [`makeActiveBinding()`], \code{\link{\%py_class\%}} #' @keywords internal #' #' @examples diff --git a/R/s3-methods.R b/R/s3-methods.R index 64b8c5fd2..af8cfebee 100644 --- a/R/s3-methods.R +++ b/R/s3-methods.R @@ -11,6 +11,14 @@ } +#' @export +Arg.keras.src.backend.common.keras_tensor.KerasTensor <- function(z) { + op_angle(z) +} +#' @export +Arg.keras.src.backend.Tensor <- Arg.keras.src.backend.common.keras_tensor.KerasTensor + + #' @export as.array.keras.src.backend.common.variables.KerasVariable <- function(x, ...) { @@ -102,4 +110,3 @@ py_to_r__keras.src.utils.tracking.TrackedSet <- function(x) import("builtins")$l # } # rm(list = c("generic", "cls")) - diff --git a/R/variable.R b/R/variable.R index 4afca3e00..a914ba79b 100644 --- a/R/variable.R +++ b/R/variable.R @@ -14,6 +14,7 @@ #' - `trainable`: Whether the variable is trainable (boolean). #' - `autocast`: Whether the variable supports autocasting (boolean). #' - `aggregation`: How a distributed variable will be aggregated (string). +#' - `synchronization`: Strategy for synchronizing the variable across devices (string). #' - `value`: The current value of the variable (NumPy array or tensor). #' - `name`: The name of the variable (string). #' - `path`: The path of the variable within the Keras model or layer (string). @@ -88,15 +89,23 @@ #' to be taken into account by downstream backends or users. Defaults #' to `"none"`. #' +#' @param synchronization +#' Optional string specifying how distributed values should be synchronized. +#' Defaults to `"auto"`. +#' #' @param name #' Optional. A unique name for the variable. Automatically generated #' if not set. #' +#' @param ... +#' Additional backend-specific keyword arguments forwarded to `keras$Variable()`. +#' #' @export #' @tether keras.src.backend.common.variables.Variable keras_variable <- function (initializer, shape = NULL, dtype = NULL, trainable = TRUE, - autocast = TRUE, aggregation = "none", name = NULL) + autocast = TRUE, aggregation = "none", synchronization = "auto", + name = NULL, ...) { args <- capture_args(list(shape = normalize_shape)) do.call(keras$Variable, args) diff --git a/docs/dev/LICENSE-text.html b/docs/dev/LICENSE-text.html index 6df9ffd5d..173dbc624 100644 --- a/docs/dev/LICENSE-text.html +++ b/docs/dev/LICENSE-text.html @@ -8,7 +8,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

It is defined as

+

f(x) = 0 for x <= -1, +f(x) = 0.5 * (x + 1) for -1 < x < 1, +f(x) = 1 for x >= 1.

+
+ +
+

Usage

+
activation_sparse_sigmoid(x)
+
+ +
+

Arguments

+ + +
x
+

Input tensor.

+ +
+
+

Value

+

A tensor, the result from applying the activation to the input tensor x.

+
+ + + +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/activation_sparsemax.html b/docs/dev/reference/activation_sparsemax.html index 4b5f1d80e..3caa13d32 100644 --- a/docs/dev/reference/activation_sparsemax.html +++ b/docs/dev/reference/activation_sparsemax.html @@ -12,7 +12,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

Check whether NNX-specific features are enabled on the JAX backend.

+
+ +
+

Usage

+
config_is_nnx_enabled()
+
+ +
+

Value

+

Logical flag; TRUE if NNX backend features are enabled, FALSE otherwise.

+
+ + +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/config_is_traceback_filtering_enabled.html b/docs/dev/reference/config_is_traceback_filtering_enabled.html index f965d2bcf..46a696412 100644 --- a/docs/dev/reference/config_is_traceback_filtering_enabled.html +++ b/docs/dev/reference/config_is_traceback_filtering_enabled.html @@ -28,7 +28,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

These helpers control the caps that built-in training loops obey when running +fit(), evaluate(), or predict(). The values can also be provided via the +KERAS_MAX_EPOCHS or KERAS_MAX_STEPS_PER_EPOCH environment variables to +quickly constrain a run without modifying source code.

+
+ +
+

Usage

+
config_max_epochs()
+
+config_set_max_epochs(max_epochs)
+
+config_max_steps_per_epoch()
+
+config_set_max_steps_per_epoch(max_steps_per_epoch)
+
+ +
+

Arguments

+ + +
max_epochs
+

Integer upper bound for epochs processed by built-in training loops. Use +NULL to remove the cap.

+ + +
max_steps_per_epoch
+

Integer upper bound for steps processed per epoch by built-in training +loops. Use NULL to remove the cap.

+ +
+
+

Value

+

config_max_epochs() and config_max_steps_per_epoch() return the current +integer limits (or NULL if the cap is unset). The setter variants return +NULL invisibly and are called for side effects.

+
+ + +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/config_max_steps_per_epoch.html b/docs/dev/reference/config_max_steps_per_epoch.html new file mode 100644 index 000000000..a1c838315 --- /dev/null +++ b/docs/dev/reference/config_max_steps_per_epoch.html @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/docs/dev/reference/config_set_backend.html b/docs/dev/reference/config_set_backend.html index e83993394..1f8336727 100644 --- a/docs/dev/reference/config_set_backend.html +++ b/docs/dev/reference/config_set_backend.html @@ -8,7 +8,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

This layer distorts input images by applying elastic deformations, +simulating a physically realistic transformation. The magnitude of the +distortion is controlled by the scale parameter, while the factor +determines the probability of applying the transformation.

+
+ +
+

Usage

+
layer_random_elastic_transform(
+  object,
+  factor = 1,
+  scale = 1,
+  interpolation = "bilinear",
+  fill_mode = "reflect",
+  fill_value = 0,
+  value_range = list(0L, 255L),
+  seed = NULL,
+  data_format = NULL,
+  ...
+)
+
+ +
+

Arguments

+ + +
object
+

Object to compose the layer with. A tensor, array, or sequential model.

+ + +
factor
+

A single float or a tuple of two floats. +factor controls the probability of applying the transformation.

  • factor = 0.0 ensures no transformation is applied.

  • +
  • factor = 1.0 means the transformation is always applied.

  • +
  • If a tuple (min, max) is provided, a probability value +is sampled between min and max for each image.

  • +
  • If a single float is provided, a probability is sampled +between 0.0 and the given float. +Default is 1.0.

  • +
+ + +
scale
+

A float or a tuple of two floats defining the magnitude of +the distortion applied.

  • If a tuple (min, max) is provided, a random scale value is +sampled within this range.

  • +
  • If a single float is provided, a random scale value is sampled +between 0.0 and the given float. +Default is 1.0.

  • +
+ + +
interpolation
+

Interpolation mode. Supported values: "nearest", +"bilinear".

+ + +
fill_mode
+

Points outside the boundaries of the input are filled +according to the given mode. Available methods are "constant", +"nearest", "wrap" and "reflect". Defaults to "reflect".

  • "reflect": (d c b a | a b c d | d c b a) +The input is extended by reflecting about the edge of the last +pixel.

  • +
  • "constant": (k k k k | a b c d | k k k k) +The input is extended by filling all values beyond +the edge with the same constant value k specified by +fill_value.

  • +
  • "wrap": (a b c d | a b c d | a b c d) +The input is extended by wrapping around to the opposite edge.

  • +
  • "nearest": (a a a a | a b c d | d d d d) +The input is extended by the nearest pixel. +When using the torch backend, "reflect" is redirected to +"mirror" because torch does not support "reflect". +The torch backend also does not support "wrap".

  • +
+ + +
fill_value
+

A float representing the value to fill outside the boundaries when +fill_mode = "constant".

+ + +
value_range
+

The range of values the incoming images will have. +Represented as a two-number tuple written [low, high]. This is +typically either [0, 1] or [0, 255] depending on how your +preprocessing pipeline is set up.

+ + +
seed
+

Integer. Used to create a random seed.

+ + +
data_format
+

string, either "channels_last" or "channels_first". +The ordering of the dimensions in the inputs. "channels_last" +corresponds to inputs with shape (batch, height, width, channels) +while "channels_first" corresponds to inputs with shape +(batch, channels, height, width). It defaults to the +image_data_format value found in your Keras config file at +~/.keras/keras.json. If you never set it, then it will be +"channels_last".

+ + +
...
+

For forward/backward compatability.

+ +
+
+

See also

+

Other image preprocessing layers:
layer_aug_mix()
layer_auto_contrast()
layer_center_crop()
layer_cut_mix()
layer_equalization()
layer_max_num_bounding_boxes()
layer_mix_up()
layer_rand_augment()
layer_random_color_degeneration()
layer_random_color_jitter()
layer_random_erasing()
layer_random_gaussian_blur()
layer_random_grayscale()
layer_random_hue()
layer_random_invert()
layer_random_perspective()
layer_random_posterization()
layer_random_saturation()
layer_random_sharpness()
layer_random_shear()
layer_rescaling()
layer_resizing()
layer_solarization()

+

Other preprocessing layers:
layer_aug_mix()
layer_auto_contrast()
layer_category_encoding()
layer_center_crop()
layer_cut_mix()
layer_discretization()
layer_equalization()
layer_feature_space()
layer_hashed_crossing()
layer_hashing()
layer_integer_lookup()
layer_max_num_bounding_boxes()
layer_mel_spectrogram()
layer_mix_up()
layer_normalization()
layer_rand_augment()
layer_random_brightness()
layer_random_color_degeneration()
layer_random_color_jitter()
layer_random_contrast()
layer_random_crop()
layer_random_erasing()
layer_random_flip()
layer_random_gaussian_blur()
layer_random_grayscale()
layer_random_hue()
layer_random_invert()
layer_random_perspective()
layer_random_posterization()
layer_random_rotation()
layer_random_saturation()
layer_random_sharpness()
layer_random_shear()
layer_random_translation()
layer_random_zoom()
layer_rescaling()
layer_resizing()
layer_solarization()
layer_stft_spectrogram()
layer_string_lookup()
layer_text_vectorization()

+

Other layers:
Layer()
layer_activation()
layer_activation_elu()
layer_activation_leaky_relu()
layer_activation_parametric_relu()
layer_activation_relu()
layer_activation_softmax()
layer_activity_regularization()
layer_add()
layer_additive_attention()
layer_alpha_dropout()
layer_attention()
layer_aug_mix()
layer_auto_contrast()
layer_average()
layer_average_pooling_1d()
layer_average_pooling_2d()
layer_average_pooling_3d()
layer_batch_normalization()
layer_bidirectional()
layer_category_encoding()
layer_center_crop()
layer_concatenate()
layer_conv_1d()
layer_conv_1d_transpose()
layer_conv_2d()
layer_conv_2d_transpose()
layer_conv_3d()
layer_conv_3d_transpose()
layer_conv_lstm_1d()
layer_conv_lstm_2d()
layer_conv_lstm_3d()
layer_cropping_1d()
layer_cropping_2d()
layer_cropping_3d()
layer_cut_mix()
layer_dense()
layer_depthwise_conv_1d()
layer_depthwise_conv_2d()
layer_discretization()
layer_dot()
layer_dropout()
layer_einsum_dense()
layer_embedding()
layer_equalization()
layer_feature_space()
layer_flatten()
layer_flax_module_wrapper()
layer_gaussian_dropout()
layer_gaussian_noise()
layer_global_average_pooling_1d()
layer_global_average_pooling_2d()
layer_global_average_pooling_3d()
layer_global_max_pooling_1d()
layer_global_max_pooling_2d()
layer_global_max_pooling_3d()
layer_group_normalization()
layer_group_query_attention()
layer_gru()
layer_hashed_crossing()
layer_hashing()
layer_identity()
layer_integer_lookup()
layer_jax_model_wrapper()
layer_lambda()
layer_layer_normalization()
layer_lstm()
layer_masking()
layer_max_num_bounding_boxes()
layer_max_pooling_1d()
layer_max_pooling_2d()
layer_max_pooling_3d()
layer_maximum()
layer_mel_spectrogram()
layer_minimum()
layer_mix_up()
layer_multi_head_attention()
layer_multiply()
layer_normalization()
layer_permute()
layer_rand_augment()
layer_random_brightness()
layer_random_color_degeneration()
layer_random_color_jitter()
layer_random_contrast()
layer_random_crop()
layer_random_erasing()
layer_random_flip()
layer_random_gaussian_blur()
layer_random_grayscale()
layer_random_hue()
layer_random_invert()
layer_random_perspective()
layer_random_posterization()
layer_random_rotation()
layer_random_saturation()
layer_random_sharpness()
layer_random_shear()
layer_random_translation()
layer_random_zoom()
layer_repeat_vector()
layer_rescaling()
layer_reshape()
layer_resizing()
layer_rms_normalization()
layer_rnn()
layer_separable_conv_1d()
layer_separable_conv_2d()
layer_simple_rnn()
layer_solarization()
layer_spatial_dropout_1d()
layer_spatial_dropout_2d()
layer_spatial_dropout_3d()
layer_spectral_normalization()
layer_stft_spectrogram()
layer_string_lookup()
layer_subtract()
layer_text_vectorization()
layer_tfsm()
layer_time_distributed()
layer_torch_module_wrapper()
layer_unit_normalization()
layer_upsampling_1d()
layer_upsampling_2d()
layer_upsampling_3d()
layer_zero_padding_1d()
layer_zero_padding_2d()
layer_zero_padding_3d()
rnn_cell_gru()
rnn_cell_lstm()
rnn_cell_simple()
rnn_cells_stack()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/layer_random_erasing.html b/docs/dev/reference/layer_random_erasing.html index 2feee009a..0b7b39b38 100644 --- a/docs/dev/reference/layer_random_erasing.html +++ b/docs/dev/reference/layer_random_erasing.html @@ -12,7 +12,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

The generalized cross entropy (GCE) loss offers robustness to noisy labels by +interpolating between categorical cross entropy (q -> 0) and mean absolute +error (q -> 1). For a true-class probability p and noise parameter q, +the loss is loss = (1 - p^q) / q.

+
+ +
+

Usage

+
loss_categorical_generalized_cross_entropy(
+  y_true,
+  y_pred,
+  q = 0.5,
+  ...,
+  reduction = "sum_over_batch_size",
+  name = "categorical_generalized_cross_entropy",
+  dtype = NULL
+)
+
+ +
+

Arguments

+ + +
y_true
+

Integer class indices with shape (batch_size) or (batch_size, 1).

+ + +
y_pred
+

Predicted class probabilities with shape (batch_size, num_classes).

+ + +
q
+

Float in (0, 1). Controls the transition between cross entropy and mean +absolute error. Defaults to 0.5.

  • As q approaches 0: behaves like categorical cross entropy.

  • +
  • As q approaches 1: behaves like mean absolute error.

  • +
+ + +
...
+

For forward/backward compatibility.

+ + +
reduction
+

Type of reduction to apply to the loss. In almost all cases +this should be "sum_over_batch_size". Supported options are +"sum", "sum_over_batch_size", "mean", +"mean_with_sample_weight" or NULL. "sum" sums the loss, +"sum_over_batch_size" and "mean" sum the loss and divide by the +sample size, and "mean_with_sample_weight" sums the loss and +divides by the sum of the sample weights. "none" and NULL +perform no aggregation. Defaults to "sum_over_batch_size".

+ + +
name
+

Optional name for the loss instance.

+ + +
dtype
+

Dtype used for loss computations. Defaults to config_floatx() (the global +float type).

+ +
+
+

Value

+

Generalized cross entropy loss value(s).

+
+
+

References

+ +
  • Zhang & Sabuncu (2018), "Generalized Cross Entropy Loss for Training Deep +Neural Networks with Noisy Labels"

  • +
+
+

Examples

+

y_true <- c(0L, 1L, 0L, 1L)
+y_pred <- rbind(
+  c(0.7, 0.3),
+  c(0.2, 0.8),
+  c(0.6, 0.4),
+  c(0.4, 0.6)
+)
+gce <- loss_categorical_generalized_cross_entropy(q = 0.7)
+gce(y_true, y_pred)

+

## tf.Tensor(0.34529287, shape=(), dtype=float32)
+

+
+ + +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/loss_categorical_hinge.html b/docs/dev/reference/loss_categorical_hinge.html index b892434bf..0775edc3e 100644 --- a/docs/dev/reference/loss_categorical_hinge.html +++ b/docs/dev/reference/loss_categorical_hinge.html @@ -14,7 +14,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000
+ + + + + +
+
+
+ +
+

Returns the phase angle (in radians) of each element in x.

+
+ +
+

Usage

+
op_angle(x)
+
+ +
+

Arguments

+ + +
x
+

Input tensor. Can be real or complex.

+ +
+
+

Value

+

Tensor with the same shape as x, containing element-wise angles.

+
+
+

Examples

+

x <- op_convert_to_tensor(matrix(c(1 + 3i, 2 - 5i, 4 - 3i, 3 + 2i), nrow = 2))
+op_angle(x)

+

## tf.Tensor(
+## [[ 1.24904577 -0.64350111]
+##  [-1.19028995  0.5880026 ]], shape=(2, 2), dtype=float64)
+

+
+
+

See also

+

Other numpy ops:
op_abs()
op_add()
op_all()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_bartlett()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cbrt()
op_ceil()
op_clip()
op_concatenate()
op_conj()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_cumprod()
op_cumsum()
op_deg2rad()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_einsum()
op_empty()
op_equal()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_eye()
op_flip()
op_floor()
op_floor_divide()
op_full()
op_full_like()
op_get_item()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_imag()
op_inner()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_kaiser()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_lstsq()
op_matmul()
op_max()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moveaxis()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_not_equal()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_repeat()
op_reshape()
op_right_shift()
op_roll()
op_rot90()
op_round()
op_saturate_cast()
op_select()
op_sign()
op_signbit()
op_sin()
op_sinh()
op_size()
op_sort()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_subtract()
op_sum()
op_swapaxes()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_var()
op_vdot()
op_vectorize()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_zeros()
op_zeros_like()

+

Other ops:
op_abs()
op_add()
op_all()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_any.html b/docs/dev/reference/op_any.html index e9d51765e..06594ca55 100644 --- a/docs/dev/reference/op_any.html +++ b/docs/dev/reference/op_any.html @@ -8,7 +8,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

The Bartlett window is a triangular window that rises then falls linearly.

+
+ +
+

Usage

+
op_bartlett(x)
+
+ +
+

Arguments

+ + +
x
+

Length of the window. Must be a positive integer.

+ +
+
+

Value

+

A 1D tensor containing the window values.

+
+
+

Examples

+

+

## tf.Tensor([0.  0.5 1.  0.5 0. ], shape=(5), dtype=float32)
+

+
+
+

See also

+

Other numpy ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cbrt()
op_ceil()
op_clip()
op_concatenate()
op_conj()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_cumprod()
op_cumsum()
op_deg2rad()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_einsum()
op_empty()
op_equal()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_eye()
op_flip()
op_floor()
op_floor_divide()
op_full()
op_full_like()
op_get_item()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_imag()
op_inner()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_kaiser()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_lstsq()
op_matmul()
op_max()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moveaxis()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_not_equal()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_repeat()
op_reshape()
op_right_shift()
op_roll()
op_rot90()
op_round()
op_saturate_cast()
op_select()
op_sign()
op_signbit()
op_sin()
op_sinh()
op_size()
op_sort()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_subtract()
op_sum()
op_swapaxes()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_var()
op_vdot()
op_vectorize()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_zeros()
op_zeros_like()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_batch_normalization.html b/docs/dev/reference/op_batch_normalization.html index 36b6e6251..5808f9145 100644 --- a/docs/dev/reference/op_batch_normalization.html +++ b/docs/dev/reference/op_batch_normalization.html @@ -10,7 +10,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

The Blackman window is a taper formed by using a weighted cosine.

+
+ +
+

Usage

+
op_blackman(x)
+
+ +
+

Arguments

+ + +
x
+

Length of the window. Must be a positive integer.

+ +
+
+

Value

+

A 1D tensor containing the window values.

+
+
+

See also

+

Other numpy ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_bartlett()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_broadcast_to()
op_cbrt()
op_ceil()
op_clip()
op_concatenate()
op_conj()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_cumprod()
op_cumsum()
op_deg2rad()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_einsum()
op_empty()
op_equal()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_eye()
op_flip()
op_floor()
op_floor_divide()
op_full()
op_full_like()
op_get_item()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_imag()
op_inner()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_kaiser()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_lstsq()
op_matmul()
op_max()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moveaxis()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_not_equal()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_repeat()
op_reshape()
op_right_shift()
op_roll()
op_rot90()
op_round()
op_saturate_cast()
op_select()
op_sign()
op_signbit()
op_sin()
op_sinh()
op_size()
op_sort()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_subtract()
op_sum()
op_swapaxes()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_var()
op_vdot()
op_vectorize()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_zeros()
op_zeros_like()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_broadcast_to.html b/docs/dev/reference/op_broadcast_to.html index b54ce522b..513ff6087 100644 --- a/docs/dev/reference/op_broadcast_to.html +++ b/docs/dev/reference/op_broadcast_to.html @@ -8,7 +8,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

Returns the real-valued cube root of x, handling negative inputs in the +real domain.

+
+ +
+

Usage

+
op_cbrt(x)
+
+ +
+

Arguments

+ + +
x
+

Input tensor.

+ +
+
+

Value

+

A tensor containing the cube root of each element in x.

+
+
+

Examples

+

op_cbrt(c(-8, 0, 8))

+

## tf.Tensor([-2.  0.  2.], shape=(3), dtype=float32)
+

+
+
+

See also

+

Other numpy ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_bartlett()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_ceil()
op_clip()
op_concatenate()
op_conj()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_cumprod()
op_cumsum()
op_deg2rad()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_einsum()
op_empty()
op_equal()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_eye()
op_flip()
op_floor()
op_floor_divide()
op_full()
op_full_like()
op_get_item()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_imag()
op_inner()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_kaiser()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_lstsq()
op_matmul()
op_max()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moveaxis()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_not_equal()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_repeat()
op_reshape()
op_right_shift()
op_roll()
op_rot90()
op_round()
op_saturate_cast()
op_select()
op_sign()
op_signbit()
op_sin()
op_sinh()
op_size()
op_sort()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_subtract()
op_sum()
op_swapaxes()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_var()
op_vdot()
op_vectorize()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_zeros()
op_zeros_like()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_ceil.html b/docs/dev/reference/op_ceil.html index b3f70333d..89c0ecf46 100644 --- a/docs/dev/reference/op_ceil.html +++ b/docs/dev/reference/op_ceil.html @@ -10,7 +10,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

Compute the Pearson correlation coefficient matrix.

+
+ +
+

Usage

+
op_corrcoef(x)
+
+ +
+

Arguments

+ + +
x
+

A 2D tensor of shape (N, D), where N is the number of variables +and D is the number of observations.

+ +
+
+

Value

+

A tensor of shape (N, N) representing the correlation matrix.

+
+
+

Examples

+

x <- op_array(matrix(c(1, 2, 3,
+                       2, 3, 4), nrow = 2, byrow = TRUE))
+op_corrcoef(x)

+

## tf.Tensor(
+## [[1. 1.]
+##  [1. 1.]], shape=(2, 2), dtype=float64)
+

+
+
+

See also

+

Other numpy ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_bartlett()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cbrt()
op_ceil()
op_clip()
op_concatenate()
op_conj()
op_copy()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_cumprod()
op_cumsum()
op_deg2rad()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_einsum()
op_empty()
op_equal()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_eye()
op_flip()
op_floor()
op_floor_divide()
op_full()
op_full_like()
op_get_item()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_imag()
op_inner()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_kaiser()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_lstsq()
op_matmul()
op_max()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moveaxis()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_not_equal()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_repeat()
op_reshape()
op_right_shift()
op_roll()
op_rot90()
op_round()
op_saturate_cast()
op_select()
op_sign()
op_signbit()
op_sin()
op_sinh()
op_size()
op_sort()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_subtract()
op_sum()
op_swapaxes()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_var()
op_vdot()
op_vectorize()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_zeros()
op_zeros_like()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_correlate.html b/docs/dev/reference/op_correlate.html index 4849df660..9ff007908 100644 --- a/docs/dev/reference/op_correlate.html +++ b/docs/dev/reference/op_correlate.html @@ -8,7 +8,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

The conversion is defined as: +rad = deg * (pi / 180).

+
+ +
+

Usage

+
op_deg2rad(x)
+
+ +
+

Arguments

+ + +
x
+

Input tensor of angles in degrees.

+ +
+
+

Value

+

A tensor containing angles converted to radians.

+
+
+

Examples

+

op_deg2rad(c(0, 90, 180))

+

## tf.Tensor([0.        1.5707964 3.1415927], shape=(3), dtype=float32)
+

+
+
+

See also

+

Other numpy ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_bartlett()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cbrt()
op_ceil()
op_clip()
op_concatenate()
op_conj()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_cumprod()
op_cumsum()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_einsum()
op_empty()
op_equal()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_eye()
op_flip()
op_floor()
op_floor_divide()
op_full()
op_full_like()
op_get_item()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_imag()
op_inner()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_kaiser()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_lstsq()
op_matmul()
op_max()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moveaxis()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_not_equal()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_repeat()
op_reshape()
op_right_shift()
op_roll()
op_rot90()
op_round()
op_saturate_cast()
op_select()
op_sign()
op_signbit()
op_sin()
op_sinh()
op_size()
op_sort()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_subtract()
op_sum()
op_swapaxes()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_var()
op_vdot()
op_vectorize()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_zeros()
op_zeros_like()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_depthwise_conv.html b/docs/dev/reference/op_depthwise_conv.html index c3280e6f2..f96f7e642 100644 --- a/docs/dev/reference/op_depthwise_conv.html +++ b/docs/dev/reference/op_depthwise_conv.html @@ -8,7 +8,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000
+ + + + + +
+
+
+ +
+

The Hamming window is defined as: +w[n] = 0.54 - 0.46 * cos(2 * pi * n / (N - 1)) for 0 <= n <= N - 1.

+
+ +
+

Usage

+
op_hamming(x)
+
+ +
+

Arguments

+ + +
x
+

Length of the window. Must be a positive integer.

+ +
+
+

Value

+

A 1D tensor containing the window values.

+
+
+

Examples

+

+

## tf.Tensor([0.08000001 0.54       1.         0.54       0.08000001], shape=(5), dtype=float32)
+

+
+
+

See also

+

Other numpy ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_bartlett()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cbrt()
op_ceil()
op_clip()
op_concatenate()
op_conj()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_cumprod()
op_cumsum()
op_deg2rad()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_einsum()
op_empty()
op_equal()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_eye()
op_flip()
op_floor()
op_floor_divide()
op_full()
op_full_like()
op_get_item()
op_greater()
op_greater_equal()
op_hanning()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_imag()
op_inner()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_kaiser()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_lstsq()
op_matmul()
op_max()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moveaxis()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_not_equal()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_repeat()
op_reshape()
op_right_shift()
op_roll()
op_rot90()
op_round()
op_saturate_cast()
op_select()
op_sign()
op_signbit()
op_sin()
op_sinh()
op_size()
op_sort()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_subtract()
op_sum()
op_swapaxes()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_var()
op_vdot()
op_vectorize()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_zeros()
op_zeros_like()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_hanning.html b/docs/dev/reference/op_hanning.html new file mode 100644 index 000000000..aaa6606aa --- /dev/null +++ b/docs/dev/reference/op_hanning.html @@ -0,0 +1,111 @@ + +Hanning window function. — op_hanning • keras3 + Skip to contents + + + +
+
+
+ +
+

The Hanning window is defined as: +w[n] = 0.5 - 0.5 * cos(2 * pi * n / (N - 1)) for 0 <= n <= N - 1.

+
+ +
+

Usage

+
op_hanning(x)
+
+ +
+

Arguments

+ + +
x
+

Length of the window. Must be a positive integer.

+ +
+
+

Value

+

A 1D tensor containing the window values.

+
+
+

Examples

+

+

## tf.Tensor([0.  0.5 1.  0.5 0. ], shape=(5), dtype=float32)
+

+
+
+

See also

+

Other numpy ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_bartlett()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cbrt()
op_ceil()
op_clip()
op_concatenate()
op_conj()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_cumprod()
op_cumsum()
op_deg2rad()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_einsum()
op_empty()
op_equal()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_eye()
op_flip()
op_floor()
op_floor_divide()
op_full()
op_full_like()
op_get_item()
op_greater()
op_greater_equal()
op_hamming()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_imag()
op_inner()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_kaiser()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_lstsq()
op_matmul()
op_max()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moveaxis()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_not_equal()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_repeat()
op_reshape()
op_right_shift()
op_roll()
op_rot90()
op_round()
op_saturate_cast()
op_select()
op_sign()
op_signbit()
op_sin()
op_sinh()
op_size()
op_sort()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_subtract()
op_sum()
op_swapaxes()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_var()
op_vdot()
op_vectorize()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_zeros()
op_zeros_like()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_hard_shrink.html b/docs/dev/reference/op_hard_shrink.html index 577958c65..bce1beba5 100644 --- a/docs/dev/reference/op_hard_shrink.html +++ b/docs/dev/reference/op_hard_shrink.html @@ -12,7 +12,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

The Heaviside step function is defined as: +heaviside(x1, x2) = 0 if x1 < 0, +heaviside(x1, x2) = 1 if x1 > 0, and +heaviside(x1, x2) = x2 if x1 == 0.

+
+ +
+

Usage

+
op_heaviside(x1, x2)
+
+ +
+

Arguments

+ + +
x1
+

Tensor input.

+ + +
x2
+

Value to use when x1 == 0.

+ +
+
+

Value

+

A tensor broadcast from x1 and x2 containing 0, 1, or x2.

+
+
+

Examples

+

x1 <- op_array(c(-2, 0, 3))
+op_heaviside(x1, 0.5)

+

## tf.Tensor([0.  0.5 1. ], shape=(3), dtype=float32)
+

+
+
+

See also

+

Other numpy ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_bartlett()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cbrt()
op_ceil()
op_clip()
op_concatenate()
op_conj()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_cumprod()
op_cumsum()
op_deg2rad()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_einsum()
op_empty()
op_equal()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_eye()
op_flip()
op_floor()
op_floor_divide()
op_full()
op_full_like()
op_get_item()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_histogram()
op_hstack()
op_identity()
op_imag()
op_inner()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_kaiser()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_lstsq()
op_matmul()
op_max()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moveaxis()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_not_equal()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_repeat()
op_reshape()
op_right_shift()
op_roll()
op_rot90()
op_round()
op_saturate_cast()
op_select()
op_sign()
op_signbit()
op_sin()
op_sinh()
op_size()
op_sort()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_subtract()
op_sum()
op_swapaxes()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_var()
op_vdot()
op_vectorize()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_zeros()
op_zeros_like()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_histogram.html b/docs/dev/reference/op_histogram.html index b1ec9a662..d86fb40b3 100644 --- a/docs/dev/reference/op_histogram.html +++ b/docs/dev/reference/op_histogram.html @@ -8,7 +8,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

Apply random elastic deformation to 3D or 4D image tensors.

+
+ +
+

Usage

+
op_image_elastic_transform(
+  images,
+  alpha = 20,
+  sigma = 5,
+  interpolation = "bilinear",
+  fill_mode = "reflect",
+  fill_value = 0,
+  seed = NULL,
+  data_format = NULL
+)
+
+ +
+

Arguments

+ + +
images
+

Input image or batch of images. Must be 3D or 4D.

+ + +
alpha
+

Scaling factor that controls the intensity of the deformation.

+ + +
sigma
+

Standard deviation of the Gaussian filter used for +smoothing the displacement fields.

+ + +
interpolation
+

Interpolation method. Available methods are "nearest", +and "bilinear". Defaults to "bilinear".

+ + +
fill_mode
+

Points outside the boundaries of the input are filled +according to the given mode. Available methods are "constant", +"nearest", "wrap" and "reflect". Defaults to "reflect".

  • "reflect": (d c b a | a b c d | d c b a) +The input is extended by reflecting about the edge of the last +pixel.

  • +
  • "constant": (k k k k | a b c d | k k k k) +The input is extended by filling all values beyond +the edge with the same constant value k specified by +fill_value.

  • +
  • "wrap": (a b c d | a b c d | a b c d) +The input is extended by wrapping around to the opposite edge.

  • +
  • "nearest": (a a a a | a b c d | d d d d) +The input is extended by the nearest pixel.

  • +
+ + +
fill_value
+

Value used for points outside the boundaries of the input if +fill_mode="constant". Defaults to 0.

+ + +
seed
+

Optional integer seed for the random number generator.

+ + +
data_format
+

A string specifying the data format of the input tensor. +It can be either "channels_last" or "channels_first". +"channels_last" corresponds to inputs with shape +(batch, height, width, channels), while "channels_first" +corresponds to inputs with shape (batch, channels, height, width). +If not specified, the value will default to +keras.config.image_data_format.

+ +
+
+

Value

+

Transformed image or batch of images with elastic deformation.

+
+
+

Examples

+

x <- random_uniform(c(2, 64, 80, 3))  # batch of 2 RGB images
+y <- op_image_elastic_transform(x)
+op_shape(y)

+

## shape(2, 64, 80, 3)
+

+

x <- random_uniform(c(64, 80, 3))  # single RGB image
+y <- op_image_elastic_transform(x)
+op_shape(y)

+

## shape(64, 80, 3)
+

+

x <- random_uniform(c(2, 3, 64, 80))  # batch of 2 RGB images
+y <- op_image_elastic_transform(
+  x,
+  data_format = "channels_first",
+  seed = 123
+)
+op_shape(y)

+

## shape(2, 3, 64, 80)
+

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_image_extract_patches.html b/docs/dev/reference/op_image_extract_patches.html index 65b8c0d36..0def8a219 100644 --- a/docs/dev/reference/op_image_extract_patches.html +++ b/docs/dev/reference/op_image_extract_patches.html @@ -8,7 +8,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

The Kaiser window is defined as: +w[n] = I0(beta * sqrt(1 - (2 * n / (N - 1) - 1)^2)) / I0(beta) where +I0 is the modified zeroth-order Bessel function of the first kind.

+
+ +
+

Usage

+
op_kaiser(x, beta)
+
+ +
+

Arguments

+ + +
x
+

Length of the window. Must be a positive integer.

+ + +
beta
+

Shape parameter for the window.

+ +
+
+

Value

+

A 1D tensor containing the window values.

+
+
+

Examples

+

op_kaiser(5, beta = 14)

+

## tf.Tensor([7.7268669e-06 1.6493215e-01 1.0000000e+00 1.6493215e-01 7.7268669e-06], shape=(5), dtype=float32)
+

+
+
+

See also

+

Other numpy ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_bartlett()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cbrt()
op_ceil()
op_clip()
op_concatenate()
op_conj()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_cumprod()
op_cumsum()
op_deg2rad()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_einsum()
op_empty()
op_equal()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_eye()
op_flip()
op_floor()
op_floor_divide()
op_full()
op_full_like()
op_get_item()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_imag()
op_inner()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_lstsq()
op_matmul()
op_max()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moveaxis()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_not_equal()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_repeat()
op_reshape()
op_right_shift()
op_roll()
op_rot90()
op_round()
op_saturate_cast()
op_select()
op_sign()
op_signbit()
op_sin()
op_sinh()
op_size()
op_sort()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_subtract()
op_sum()
op_swapaxes()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_var()
op_vdot()
op_vectorize()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_zeros()
op_zeros_like()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_layer_normalization.html b/docs/dev/reference/op_layer_normalization.html new file mode 100644 index 000000000..270e19ea4 --- /dev/null +++ b/docs/dev/reference/op_layer_normalization.html @@ -0,0 +1,139 @@ + +Layer normalization (Ba et al., 2016). — op_layer_normalization • keras3 + Skip to contents + + + +
+
+
+ +
+

Normalizes activations in x for each example independently by centering to +mean 0 and scaling to unit variance along the specified axis.

+
+ +
+

Usage

+
op_layer_normalization(
+  x,
+  gamma = NULL,
+  beta = NULL,
+  axis = -1L,
+  epsilon = NULL,
+  ...
+)
+
+ +
+

Arguments

+ + +
x
+

Input tensor.

+ + +
gamma
+

Optional scaling factor applied to the normalized output.

+ + +
beta
+

Optional offset added to the normalized output.

+ + +
axis
+

Axis or axes along which to compute statistics. Defaults to -1.

+ + +
epsilon
+

Small constant added to the variance for numerical stability.

+ + +
...
+

For forward/backward compatibility.

+ +
+
+

Value

+

Tensor with the same shape as x containing the normalized values.

+
+
+

Examples

+

x <- op_arange(5, dtype = "float32")
+op_layer_normalization(x)

+

## tf.Tensor([-1.4142134 -0.7071067  0.         0.7071068  1.4142137], shape=(5), dtype=float32)
+

+
+
+

See also

+

Other nn ops:
op_average_pool()
op_batch_normalization()
op_binary_crossentropy()
op_categorical_crossentropy()
op_celu()
op_conv()
op_conv_transpose()
op_ctc_loss()
op_depthwise_conv()
op_dot_product_attention()
op_elu()
op_gelu()
op_glu()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_leaky_relu()
op_log_sigmoid()
op_log_softmax()
op_max_pool()
op_moments()
op_multi_hot()
op_normalize()
op_one_hot()
op_polar()
op_psnr()
op_relu()
op_relu6()
op_rms_normalization()
op_selu()
op_separable_conv()
op_sigmoid()
op_silu()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_squareplus()
op_tanh_shrink()
op_threshold()
op_unravel_index()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_leaky_relu.html b/docs/dev/reference/op_leaky_relu.html index eeadead53..1ac12e36f 100644 --- a/docs/dev/reference/op_leaky_relu.html +++ b/docs/dev/reference/op_leaky_relu.html @@ -10,7 +10,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

It is defined as

+

f(x) = 0 for x <= -1, +f(x) = 0.5 * (x + 1) for -1 < x < 1, +f(x) = 1 for x >= 1.

+
+ +
+

Usage

+
op_sparse_sigmoid(x)
+
+ +
+

Arguments

+ + +
x
+

Input tensor.

+ +
+
+

Value

+

A tensor with the same shape as x.

+
+
+

Examples

+

x <- op_array(c(-1.0, 0.0, 1.0))
+op_sparse_sigmoid(x)

+

## tf.Tensor([0.  0.5 1. ], shape=(3), dtype=float32)
+

+
+
+

See also

+

Other nn ops:
op_average_pool()
op_batch_normalization()
op_binary_crossentropy()
op_categorical_crossentropy()
op_celu()
op_conv()
op_conv_transpose()
op_ctc_loss()
op_depthwise_conv()
op_dot_product_attention()
op_elu()
op_gelu()
op_glu()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_layer_normalization()
op_leaky_relu()
op_log_sigmoid()
op_log_softmax()
op_max_pool()
op_moments()
op_multi_hot()
op_normalize()
op_one_hot()
op_polar()
op_psnr()
op_relu()
op_relu6()
op_rms_normalization()
op_selu()
op_separable_conv()
op_sigmoid()
op_silu()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparsemax()
op_squareplus()
op_tanh_shrink()
op_threshold()
op_unravel_index()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_sparsemax.html b/docs/dev/reference/op_sparsemax.html index 4c2b28b9d..326c91827 100644 --- a/docs/dev/reference/op_sparsemax.html +++ b/docs/dev/reference/op_sparsemax.html @@ -12,7 +12,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

Expects a real-valued tensor whose last dimension has size 2, holding the +real and imaginary parts. Returns the corresponding complex tensor with the +last dimension removed.

+
+ +
+

Usage

+
op_view_as_complex(x)
+
+ +
+

Arguments

+ + +
x
+

Real-valued tensor whose trailing dimension encodes the complex components.

+ +
+
+

Value

+

A complex tensor with shape op_shape(x)[-length(op_shape(x))].

+
+
+

Examples

+

x <- op_array(matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE))
+op_view_as_complex(x)

+

## tf.Tensor([1.+2.j 3.+4.j], shape=(2), dtype=complex64)
+

+
+
+

See also

+

Other numpy ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_bartlett()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cbrt()
op_ceil()
op_clip()
op_concatenate()
op_conj()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_cumprod()
op_cumsum()
op_deg2rad()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_einsum()
op_empty()
op_equal()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_eye()
op_flip()
op_floor()
op_floor_divide()
op_full()
op_full_like()
op_get_item()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_imag()
op_inner()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_kaiser()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_lstsq()
op_matmul()
op_max()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moveaxis()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_not_equal()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_repeat()
op_reshape()
op_right_shift()
op_roll()
op_rot90()
op_round()
op_saturate_cast()
op_select()
op_sign()
op_signbit()
op_sin()
op_sinh()
op_size()
op_sort()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_subtract()
op_sum()
op_swapaxes()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_var()
op_vdot()
op_vectorize()
op_view_as_real()
op_vstack()
op_where()
op_zeros()
op_zeros_like()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_real()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_view_as_real.html b/docs/dev/reference/op_view_as_real.html new file mode 100644 index 000000000..0768cd900 --- /dev/null +++ b/docs/dev/reference/op_view_as_real.html @@ -0,0 +1,115 @@ + +Convert a complex tensor into a stacked real representation. — op_view_as_real • keras3 + Skip to contents + + + +
+
+
+ +
+

Produces a real-valued tensor where the last dimension gathers the real and +imaginary parts of the complex input.

+
+ +
+

Usage

+
op_view_as_real(x)
+
+ +
+

Arguments

+ + +
x
+

Complex-valued tensor to be converted.

+ +
+
+

Value

+

A real tensor with shape c(op_shape(x), 2) containing real and imaginary parts.

+
+
+

Examples

+

x <- op_array(matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE))
+z <- op_view_as_complex(x)
+op_view_as_real(z)

+

## tf.Tensor(
+## [[1. 2.]
+##  [3. 4.]], shape=(2, 2), dtype=float32)
+

+
+
+

See also

+

Other numpy ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_average()
op_bartlett()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cbrt()
op_ceil()
op_clip()
op_concatenate()
op_conj()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_cumprod()
op_cumsum()
op_deg2rad()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_einsum()
op_empty()
op_equal()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_eye()
op_flip()
op_floor()
op_floor_divide()
op_full()
op_full_like()
op_get_item()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_imag()
op_inner()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_kaiser()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_lstsq()
op_matmul()
op_max()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moveaxis()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_not_equal()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_power()
op_prod()
op_quantile()
op_ravel()
op_real()
op_reciprocal()
op_repeat()
op_reshape()
op_right_shift()
op_roll()
op_rot90()
op_round()
op_saturate_cast()
op_select()
op_sign()
op_signbit()
op_sin()
op_sinh()
op_size()
op_sort()
op_split()
op_sqrt()
op_square()
op_squeeze()
op_stack()
op_std()
op_subtract()
op_sum()
op_swapaxes()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tensordot()
op_tile()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_var()
op_vdot()
op_vectorize()
op_view_as_complex()
op_vstack()
op_where()
op_zeros()
op_zeros_like()

+

Other ops:
op_abs()
op_add()
op_all()
op_angle()
op_any()
op_append()
op_arange()
op_arccos()
op_arccosh()
op_arcsin()
op_arcsinh()
op_arctan()
op_arctan2()
op_arctanh()
op_argmax()
op_argmin()
op_argpartition()
op_argsort()
op_array()
op_associative_scan()
op_average()
op_average_pool()
op_bartlett()
op_batch_normalization()
op_binary_crossentropy()
op_bincount()
op_bitwise_and()
op_bitwise_invert()
op_bitwise_left_shift()
op_bitwise_not()
op_bitwise_or()
op_bitwise_right_shift()
op_bitwise_xor()
op_blackman()
op_broadcast_to()
op_cast()
op_categorical_crossentropy()
op_cbrt()
op_ceil()
op_celu()
op_cholesky()
op_clip()
op_concatenate()
op_cond()
op_conj()
op_conv()
op_conv_transpose()
op_convert_to_numpy()
op_convert_to_tensor()
op_copy()
op_corrcoef()
op_correlate()
op_cos()
op_cosh()
op_count_nonzero()
op_cross()
op_ctc_decode()
op_ctc_loss()
op_cumprod()
op_cumsum()
op_custom_gradient()
op_deg2rad()
op_depthwise_conv()
op_det()
op_diag()
op_diagflat()
op_diagonal()
op_diff()
op_digitize()
op_divide()
op_divide_no_nan()
op_dot()
op_dot_product_attention()
op_dtype()
op_eig()
op_eigh()
op_einsum()
op_elu()
op_empty()
op_equal()
op_erf()
op_erfinv()
op_exp()
op_exp2()
op_expand_dims()
op_expm1()
op_extract_sequences()
op_eye()
op_fft()
op_fft2()
op_flip()
op_floor()
op_floor_divide()
op_fori_loop()
op_full()
op_full_like()
op_gelu()
op_get_item()
op_glu()
op_greater()
op_greater_equal()
op_hamming()
op_hanning()
op_hard_shrink()
op_hard_sigmoid()
op_hard_silu()
op_hard_tanh()
op_heaviside()
op_histogram()
op_hstack()
op_identity()
op_ifft2()
op_imag()
op_image_affine_transform()
op_image_crop()
op_image_extract_patches()
op_image_gaussian_blur()
op_image_hsv_to_rgb()
op_image_map_coordinates()
op_image_pad()
op_image_perspective_transform()
op_image_resize()
op_image_rgb_to_grayscale()
op_image_rgb_to_hsv()
op_in_top_k()
op_inner()
op_inv()
op_irfft()
op_is_tensor()
op_isclose()
op_isfinite()
op_isinf()
op_isnan()
op_istft()
op_kaiser()
op_layer_normalization()
op_leaky_relu()
op_left_shift()
op_less()
op_less_equal()
op_linspace()
op_log()
op_log10()
op_log1p()
op_log2()
op_log_sigmoid()
op_log_softmax()
op_logaddexp()
op_logdet()
op_logical_and()
op_logical_not()
op_logical_or()
op_logical_xor()
op_logspace()
op_logsumexp()
op_lstsq()
op_lu_factor()
op_map()
op_matmul()
op_max()
op_max_pool()
op_maximum()
op_mean()
op_median()
op_meshgrid()
op_min()
op_minimum()
op_mod()
op_moments()
op_moveaxis()
op_multi_hot()
op_multiply()
op_nan_to_num()
op_ndim()
op_negative()
op_nonzero()
op_norm()
op_normalize()
op_not_equal()
op_one_hot()
op_ones()
op_ones_like()
op_outer()
op_pad()
op_polar()
op_power()
op_prod()
op_psnr()
op_qr()
op_quantile()
op_ravel()
op_real()
op_rearrange()
op_reciprocal()
op_relu()
op_relu6()
op_repeat()
op_reshape()
op_rfft()
op_right_shift()
op_rms_normalization()
op_roll()
op_rot90()
op_round()
op_rsqrt()
op_saturate_cast()
op_scan()
op_scatter()
op_scatter_update()
op_searchsorted()
op_segment_max()
op_segment_sum()
op_select()
op_selu()
op_separable_conv()
op_shape()
op_sigmoid()
op_sign()
op_signbit()
op_silu()
op_sin()
op_sinh()
op_size()
op_slice()
op_slice_update()
op_slogdet()
op_soft_shrink()
op_softmax()
op_softplus()
op_softsign()
op_solve()
op_solve_triangular()
op_sort()
op_sparse_categorical_crossentropy()
op_sparse_plus()
op_sparse_sigmoid()
op_sparsemax()
op_split()
op_sqrt()
op_square()
op_squareplus()
op_squeeze()
op_stack()
op_std()
op_stft()
op_stop_gradient()
op_subset()
op_subtract()
op_sum()
op_svd()
op_swapaxes()
op_switch()
op_take()
op_take_along_axis()
op_tan()
op_tanh()
op_tanh_shrink()
op_tensordot()
op_threshold()
op_tile()
op_top_k()
op_trace()
op_transpose()
op_tri()
op_tril()
op_triu()
op_trunc()
op_unravel_index()
op_unstack()
op_var()
op_vdot()
op_vectorize()
op_vectorized_map()
op_view_as_complex()
op_vstack()
op_where()
op_while_loop()
op_zeros()
op_zeros_like()

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/op_vstack.html b/docs/dev/reference/op_vstack.html index 67e3887c1..168d21828 100644 --- a/docs/dev/reference/op_vstack.html +++ b/docs/dev/reference/op_vstack.html @@ -8,7 +8,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000 + + + + + +
+
+
+ +
+

Note that this optimizer should not be used in the following layers:

  1. Embedding layer

  2. +
  3. Final output fully connected layer

  4. +
  5. Any 0- or 1-D variables

  6. +

These should all be optimized using AdamW.

+

The Muon optimizer can use both the Muon update step or the +AdamW update step based on the following:

  • For any variable that isn't 2D, 3D or 4D, the AdamW step +will be used. This is not configurable.

  • +
  • If the argument exclude_embeddings (defaults to TRUE) is set +to TRUE, the AdamW step will be used.

  • +
  • For any variable with a name that matches an expression +listed in the argument exclude_layers (a list), the +AdamW step will be used.

  • +
  • Any other variable uses the Muon step.

  • +

Typically, you only need to pass the name of your densely-connected +output layer to exclude_layers, e.g. +exclude_layers = "output_dense".

+
+ +
+

Usage

+
optimizer_muon(
+  learning_rate = 0.001,
+  adam_beta_1 = 0.9,
+  adam_beta_2 = 0.999,
+  epsilon = 1e-07,
+  weight_decay = 0.1,
+  clipnorm = NULL,
+  clipvalue = NULL,
+  global_clipnorm = NULL,
+  use_ema = FALSE,
+  ema_momentum = 0.99,
+  ema_overwrite_frequency = NULL,
+  loss_scale_factor = NULL,
+  gradient_accumulation_steps = NULL,
+  name = "muon",
+  exclude_layers = NULL,
+  exclude_embeddings = TRUE,
+  muon_a = 3.4445,
+  muon_b = -4.775,
+  muon_c = 2.0315,
+  adam_lr_ratio = 0.1,
+  momentum = 0.95,
+  ns_steps = 6L,
+  nesterov = TRUE,
+  ...
+)
+
+ +
+

Arguments

+ + +
learning_rate
+

A float, +LearningRateSchedule() instance, or +a callable that takes no arguments and returns the actual value to +use. The learning rate. Defaults to 0.001.

+ + +
adam_beta_1
+

A float value or a constant float tensor, or a callable +that takes no arguments and returns the actual value to use. +The exponential decay rate for the 1st moment estimates. Defaults to +0.9.

+ + +
adam_beta_2
+

A float value or a constant float tensor, ora callable +that takes no arguments and returns the actual value to use. +The exponential decay rate for the 2nd moment estimates. Defaults to +0.999.

+ + +
epsilon
+

A small constant for numerical stability. This is +"epsilon hat" in the Kingma and Ba paper +(in the formula just before Section 2.1), +not the epsilon in Algorithm 1 of the paper. +It is used as in AdamW. Defaults to 1e-7.

+ + +
weight_decay
+

Float. If set, weight decay is applied.

+ + +
clipnorm
+

Float. If set, the gradient of each weight is individually +clipped so that its norm is no higher than this value.

+ + +
clipvalue
+

Float. If set, the gradient of each weight is clipped to be +no higher than this value.

+ + +
global_clipnorm
+

Float. If set, the gradient of all weights is clipped +so that their global norm is no higher than this value.

+ + +
use_ema
+

Boolean, defaults to FALSE. +If TRUE, exponential moving average (EMA) is applied. EMA consists of +computing an exponential moving average of the weights of the model (as +the weight values change after each training batch), and periodically +overwriting the weights with their moving average.

+ + +
ema_momentum
+

Float, defaults to 0.99. Only used if use_ema = TRUE. +This is the momentum to use when computing the EMA of the model's +weights: new_average = ema_momentum * old_average + (1 - ema_momentum) * current_variable_value.

+ + +
ema_overwrite_frequency
+

Int or NULL, defaults to NULL. Only used if use_ema = TRUE. Every +ema_overwrite_frequency steps of iterations, we overwrite the model +variable by its moving average. If NULL, the optimizer does not overwrite +model variables in the middle of training, and you need to explicitly +overwrite the variables at the end of training by calling +optimizer$finalize_variable_values() (which updates the model variables +in-place). When using the built-in fit() training loop, this happens +automatically after the last epoch, and you don't need to do anything.

+ + +
loss_scale_factor
+

Float or NULL. If a float, the scale factor will be multiplied the loss +before computing gradients, and the inverse of the scale factor will be +multiplied by the gradients before updating variables. Useful for +preventing underflow during mixed precision training. Alternately, +optimizer_loss_scale() will automatically set a loss scale factor.

+ + +
gradient_accumulation_steps
+

Int or NULL. If an int, model and optimizer variables will not be updated +at every step; instead they will be updated every gradient_accumulation_steps +steps, using the average value of the gradients since the last update. This +is known as "gradient accumulation". This can be useful when your batch size is +very small, in order to reduce gradient noise at each update step. EMA +frequency will look at "accumulated" iterations value (optimizer steps // +gradient_accumulation_steps). Learning rate schedules will look at "real" +iterations value (optimizer steps).

+ + +
name
+

String, name for the object

+ + +
exclude_layers
+

List of strings, keywords of layer names to exclude. +All layers with keywords in their path will use AdamW.

+ + +
exclude_embeddings
+

Boolean value. +If TRUE, embedding layers will use AdamW.

+ + +
muon_a
+

Float, parameter a of the muon algorithm. +It is recommended to use the default value.

+ + +
muon_b
+

Float, parameter b of the muon algorithm. +It is recommended to use the default value.

+ + +
muon_c
+

Float, parameter c of the muon algorithm. +It is recommended to use the default value.

+ + +
adam_lr_ratio
+

Float, the ratio of the learning rate when +using Adam to the main learning rate. +it is recommended to set it to 0.1.

+ + +
momentum
+

Float, momentum used by internal SGD.

+ + +
ns_steps
+

Integer, number of Newton-Schulz iterations to run.

+ + +
nesterov
+

Boolean, whether to use Nesterov-style momentum.

+ + +
...
+

For forward/backward compatibility.

+ +
+
+

Value

+

an Optimizer instance

+
+ + + +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/dev/reference/optimizer_nadam.html b/docs/dev/reference/optimizer_nadam.html index 2fdcd1eb4..28072e4ff 100644 --- a/docs/dev/reference/optimizer_nadam.html +++ b/docs/dev/reference/optimizer_nadam.html @@ -10,7 +10,7 @@ keras3 - 1.3.0.9001 + 1.4.0.9000