Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,11 @@ so you aren't waiting around forever!

## Formatting Code

We use `flake8`, `isort` and `black` for code formatting. You can run
the following commands manually every time you want to format your code:
KerasHub uses [Ruff](https://docs.astral.sh/ruff/) to format the code. You can
run `the following commands manually every time you want to format your code:

- Run `shell/format.sh` to format your code
- Run `shell/lint.sh` to check the result.

If after running these the CI flow is still failing, try updating `flake8`,
`isort` and `black`. This can be done by running `pip install --upgrade black`,
`pip install --upgrade flake8`, and `pip install --upgrade isort`.
If after running these the CI flow is still failing, try updating `ruff`
with `pip install --upgrade ruff`.
12 changes: 6 additions & 6 deletions keras_hub/api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,18 +200,18 @@
)
from keras_hub.src.models.image_to_image import ImageToImage
from keras_hub.src.models.inpaint import Inpaint
from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone
from keras_hub.src.models.llama3.llama3_causal_lm import Llama3CausalLM
from keras_hub.src.models.llama3.llama3_causal_lm_preprocessor import (
Llama3CausalLMPreprocessor,
)
from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
from keras_hub.src.models.llama.llama_backbone import LlamaBackbone
from keras_hub.src.models.llama.llama_causal_lm import LlamaCausalLM
from keras_hub.src.models.llama.llama_causal_lm_preprocessor import (
LlamaCausalLMPreprocessor,
)
from keras_hub.src.models.llama.llama_tokenizer import LlamaTokenizer
from keras_hub.src.models.llama3.llama3_backbone import Llama3Backbone
from keras_hub.src.models.llama3.llama3_causal_lm import Llama3CausalLM
from keras_hub.src.models.llama3.llama3_causal_lm_preprocessor import (
Llama3CausalLMPreprocessor,
)
from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
from keras_hub.src.models.masked_lm import MaskedLM
from keras_hub.src.models.masked_lm_preprocessor import MaskedLMPreprocessor
from keras_hub.src.models.mistral.mistral_backbone import MistralBackbone
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/api/tokenizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
from keras_hub.src.models.gemma.gemma_tokenizer import GemmaTokenizer
from keras_hub.src.models.gpt2.gpt2_tokenizer import GPT2Tokenizer
from keras_hub.src.models.gpt_neo_x.gpt_neo_x_tokenizer import GPTNeoXTokenizer
from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
from keras_hub.src.models.llama.llama_tokenizer import LlamaTokenizer
from keras_hub.src.models.llama3.llama3_tokenizer import Llama3Tokenizer
from keras_hub.src.models.mistral.mistral_tokenizer import MistralTokenizer
from keras_hub.src.models.opt.opt_tokenizer import OPTTokenizer
from keras_hub.src.models.pali_gemma.pali_gemma_tokenizer import (
Expand Down
4 changes: 2 additions & 2 deletions keras_hub/src/bounding_box/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def encode_box_to_deltas(

if encoding_format not in ["center_xywh", "center_yxhw"]:
raise ValueError(
"`encoding_format` should be one of 'center_xywh' or 'center_yxhw', "
f"got {encoding_format}"
"`encoding_format` should be one of 'center_xywh' or "
f"'center_yxhw', got {encoding_format}"
)

encoded_anchors = convert_format(
Expand Down
10 changes: 6 additions & 4 deletions keras_hub/src/bounding_box/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,12 @@ def test_clip_to_image_filters_fully_out_bounding_boxes(self):
bounding_boxes, bounding_box_format="xyxy", images=image
)

self.assertAllEqual(
bounding_boxes["boxes"],
np.array([[-1, -1, -1, -1], [100, 100, 256, 256]]),
),
(
self.assertAllEqual(
bounding_boxes["boxes"],
np.array([[-1, -1, -1, -1], [100, 100, 256, 256]]),
),
)
self.assertAllEqual(
bounding_boxes["classes"],
np.array([-1, 0]),
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/layers/modeling/f_net_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(
layer_norm_epsilon=1e-5,
kernel_initializer="glorot_uniform",
bias_initializer="zeros",
**kwargs
**kwargs,
):
super().__init__(**kwargs)
self.intermediate_dim = intermediate_dim
Expand Down
3 changes: 2 additions & 1 deletion keras_hub/src/layers/modeling/masked_lm_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ class MaskedLMHead(keras.layers.Layer):
token_embedding: Optional. A `keras_hub.layers.ReversibleEmbedding`
instance. If passed, the layer will be used to project from the
`hidden_dim` of the model to the output `vocabulary_size`.
intermediate_activation: The activation function of intermediate dense layer.
intermediate_activation: The activation function of intermediate dense
layer.
activation: The activation function for the outputs of the layer.
Usually either `None` (return logits), or `"softmax"`
(return probabilities).
Expand Down
14 changes: 8 additions & 6 deletions keras_hub/src/layers/modeling/rms_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@

@keras_hub_export("keras_hub.layers.RMSNormalization")
class RMSNormalization(keras.layers.Layer):
"""
Root Mean Square (RMS) Normalization layer.
"""Root Mean Square (RMS) Normalization layer.

This layer normalizes the input tensor based on its RMS value and applies
a learned scaling factor.

Args:
input_dim: int. The dimensionality of the input tensor.
"""
Expand All @@ -21,12 +22,13 @@ def __init__(self, input_dim):
)

def call(self, x):
"""
Applies RMS normalization to the input tensor.
"""Applies RMS normalization to the input tensor.

Args:
x: KerasTensor. Input tensor of shape (batch_size, input_dim).
x: Input tensor of shape (batch_size, input_dim).

Returns:
KerasTensor: The RMS-normalized tensor of the same shape (batch_size, input_dim),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did Ruff automatically removed KerasTensor from here or you removed it manually? 🤔

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed it manually while fixing line length issues. We can return either a kerastensor (symbolic tensor) or torch or jax or tf tensor. Better to just say tensor and shorter for line length.

The RMS-normalized tensor of the same shape (batch_size, input_dim),
scaled by the learned `scale` parameter.
"""
x = ops.cast(x, float)
Expand Down
5 changes: 3 additions & 2 deletions keras_hub/src/layers/modeling/rotary_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ class RotaryEmbedding(keras.layers.Layer):
This layer encodes absolute positional information with a rotation
matrix. It calculates the rotary encoding with a mix of sine and
cosine functions with geometrically increasing wavelengths.
Defined and formulated in [RoFormer: Enhanced Transformer with Rotary Position Embedding](https://arxiv.org/abs/2104.09864v4).
Defined and formulated in
[RoFormer: Enhanced Transformer with Rotary Position Embedding](https://arxiv.org/abs/2104.09864v4).
The input must be a tensor with shape a sequence dimension and a feature
dimension. Typically, this will either an input with shape
`(batch_size, sequence_length, feature_length)` or
Expand Down Expand Up @@ -65,7 +66,7 @@ def __init__(
scaling_factor=1.0,
sequence_axis=1,
feature_axis=-1,
**kwargs
**kwargs,
):
super().__init__(**kwargs)
self.max_wavelength = max_wavelength
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(
tie_weights=True,
embeddings_initializer="uniform",
mask_zero=False,
**kwargs
**kwargs,
):
super().__init__(**kwargs)
if vocabulary_size is None:
Expand Down
14 changes: 8 additions & 6 deletions keras_hub/src/layers/modeling/transformer_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
from keras_hub.src.layers.modeling.cached_multi_head_attention import (
CachedMultiHeadAttention,
)
from keras_hub.src.utils.keras_utils import clone_initializer

from keras_hub.src.layers.modeling.transformer_layer_utils import ( # isort:skip
from keras_hub.src.layers.modeling.transformer_layer_utils import (
compute_causal_mask,
)
from keras_hub.src.layers.modeling.transformer_layer_utils import (
merge_padding_and_attention_mask,
)
from keras_hub.src.utils.keras_utils import clone_initializer


@keras_hub_export("keras_hub.layers.TransformerDecoder")
Expand Down Expand Up @@ -265,13 +266,13 @@ def call(
`[batch_size, decoder_sequence_length]`.
decoder_attention_mask: a boolean Tensor. Customized decoder
sequence mask, must be of shape
`[batch_size, decoder_sequence_length, decoder_sequence_length]`.
`[batch_size, decoder_sequence_length, decoder_sequence_length]`
encoder_padding_mask: a boolean Tensor, the padding mask of encoder
sequence, must be of shape
`[batch_size, encoder_sequence_length]`.
encoder_attention_mask: a boolean Tensor. Customized encoder
sequence mask, must be of shape
`[batch_size, encoder_sequence_length, encoder_sequence_length]`.
`[batch_size, encoder_sequence_length, encoder_sequence_length]`
self_attention_cache: a dense float Tensor. The cache of key/values
pairs in the self-attention layer. Has shape
`[batch_size, 2, max_seq_len, num_heads, key_dims]`.
Expand Down Expand Up @@ -435,7 +436,8 @@ def _compute_self_attention_mask(
input_length = output_length = ops.shape(decoder_sequence)[1]
# We need to handle a rectangular causal mask when doing cached
# decoding. For generative inference, `decoder_sequence` will
# generally be length 1, and `cache` will be the full generation length.
# generally be length 1, and `cache` will be the full generation
# length.
if self_attention_cache is not None:
input_length = ops.shape(self_attention_cache)[2]

Expand Down
4 changes: 3 additions & 1 deletion keras_hub/src/layers/modeling/transformer_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,9 @@ def call(
[batch_size, sequence_length, sequence_length].
training: a boolean indicating whether the layer should behave in
training mode or in inference mode.
return_attention_scores: a boolean indicating whether the output should be `(attention_output, attention_scores)` if `True` or `attention_output` if `False`. Defaults to `False`.
return_attention_scores: a boolean indicating whether the output
should be `(attention_output, attention_scores)` if `True` or
`attention_output` if `False`. Defaults to `False`.

Returns:
A Tensor of the same shape as the `inputs`.
Expand Down
3 changes: 2 additions & 1 deletion keras_hub/src/layers/modeling/transformer_encoder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,6 @@ def test_attention_scores(self):
)
self.assertAllEqual(outputs.shape, inputs.shape)

# attention scores shape (batch_size, num_of_attn_heads, seq_length, seq_length)
# attention scores shape
# (batch_size, num_of_attn_heads, seq_length, seq_length)
self.assertAllEqual(attention_scores.shape, [1, 2, 4, 4])
2 changes: 1 addition & 1 deletion keras_hub/src/metrics/bleu.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def _tokenizer(self, inputs):
return inputs

def _get_ngrams(self, segment, max_order):
"""Extracts all n-grams up to a given maximum order from an input segment.
"""Extracts all n-grams up to a given maximum order from an input.

Uses Python ops. Inspired from
https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py.
Expand Down
1 change: 0 additions & 1 deletion keras_hub/src/models/albert/albert_backbone_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def setUp(self):
"num_heads": 2,
"num_groups": 1,
"num_inner_repetitions": 1,
"num_inner_repetitions": 1,
"embedding_dim": 16,
"hidden_dim": 2,
"intermediate_dim": 4,
Expand Down
14 changes: 7 additions & 7 deletions keras_hub/src/models/albert/albert_text_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
class AlbertTextClassifier(TextClassifier):
"""An end-to-end ALBERT model for classification tasks

This model attaches a classification head to a `keras_hub.model.AlbertBackbone`
backbone, mapping from the backbone outputs to logit output suitable for
a classification task. For usage of this model with pre-trained weights, see
the `from_preset()` method.
This model attaches a classification head to a
`keras_hub.model.AlbertBackbone` backbone, mapping from the backbone outputs
to logit output suitable for a classification task. For usage of this model
with pre-trained weights, see the `from_preset()` method.

This model can optionally be configured with a `preprocessor` layer, in
which case it will automatically apply preprocessing to raw inputs during
Expand All @@ -36,9 +36,9 @@ class AlbertTextClassifier(TextClassifier):
Args:
backbone: A `keras_hub.models.AlertBackbone` instance.
num_classes: int. Number of classes to predict.
preprocessor: A `keras_hub.models.AlbertTextClassifierPreprocessor` or `None`. If
`None`, this model will not apply preprocessing, and inputs should
be preprocessed before calling the model.
preprocessor: A `keras_hub.models.AlbertTextClassifierPreprocessor` or
`None`. If `None`, this model will not apply preprocessing, and
inputs should be preprocessed before calling the model.
activation: Optional `str` or callable. The
activation function to use on the model outputs. Set
`activation="softmax"` to return output probabilities.
Expand Down
8 changes: 4 additions & 4 deletions keras_hub/src/models/bart/bart_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ class BartBackbone(Backbone):
described in
["BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension"](https://arxiv.org/abs/1910.13461).

The default constructor gives a fully customizable, randomly initialized BART
model with any number of layers, heads, and embedding dimensions. To load
preset architectures and weights, use the `from_preset` constructor.
The default constructor gives a fully customizable, randomly initialized
BART model with any number of layers, heads, and embedding dimensions. To
load preset architectures and weights, use the `from_preset` constructor.

Disclaimer: Pre-trained models are provided on an "as is" basis, without
warranties or conditions of any kind. The underlying model is provided by a
Expand Down Expand Up @@ -78,7 +78,7 @@ class BartBackbone(Backbone):
)
output = model(input_data)
```
"""
""" # noqa: E501

def __init__(
self,
Expand Down
17 changes: 9 additions & 8 deletions keras_hub/src/models/bart/bart_seq_2_seq_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ class BartSeq2SeqLM(Seq2SeqLM):
bart_lm.generate("The quick brown fox", max_length=30)
```

Use `generate()` with encoder inputs and an incomplete decoder input (prompt).
Use `generate()` with encoder inputs and an incomplete decoder input
(prompt).
```python
bart_lm = keras_hub.models.BartSeq2SeqLM.from_preset("bart_base_en")
bart_lm.generate(
Expand All @@ -79,10 +80,10 @@ class BartSeq2SeqLM(Seq2SeqLM):
prompt = {
"encoder_token_ids": np.array([[0, 133, 2119, 6219, 23602, 2, 1, 1]]),
"encoder_padding_mask": np.array(
[[True, True, True, True, True, True, False, False]]
[[1, 1, 1, 1, 1, 1, 0, 0]]
),
"decoder_token_ids": np.array([[2, 0, 133, 1769, 2, 1, 1]]),
"decoder_padding_mask": np.array([[True, True, True, True, False, False]])
"decoder_padding_mask": np.array([[1, 1, 1, 1, 0, 0]])
}

bart_lm = keras_hub.models.BartSeq2SeqLM.from_preset(
Expand All @@ -95,7 +96,7 @@ class BartSeq2SeqLM(Seq2SeqLM):
Call `fit()` on a single batch.
```python
features = {
"encoder_text": ["The quick brown fox jumped.", "I forgot my homework."],
"encoder_text": ["The quick fox jumped.", "I forgot my homework."],
"decoder_text": ["The fast hazel fox leapt.", "I forgot my assignment."]
}
bart_lm = keras_hub.models.BartSeq2SeqLM.from_preset("bart_base_en")
Expand Down Expand Up @@ -195,7 +196,7 @@ def call_decoder_with_cache(
cross_attention_cache=None,
cross_attention_cache_update_index=None,
):
"""Forward pass with a key/value caches for generative decoding..
"""Forward pass with a key/value caches for generative decoding.

`call_decoder_with_cache` adds an additional inference-time forward pass
for the model for seq2seq text generation. Unlike calling the model
Expand Down Expand Up @@ -241,7 +242,7 @@ def call_decoder_with_cache(
key/value cache in the decoder's self-attention layer and
`cross_attention_cache` is the key/value cache in the decoder's
cross-attention layer.
"""
""" # noqa: E501
# Embedding layers.
tokens = self.backbone.token_embedding(decoder_token_ids)
positions = self.backbone.decoder_position_embedding(
Expand Down Expand Up @@ -331,7 +332,7 @@ def _initialize_cache(self, encoder_token_ids, decoder_token_ids):
def _build_cache(
self, encoder_token_ids, encoder_padding_mask, decoder_token_ids
):
"""Builds the self-attention cache and the cross-attention cache (key/value pairs)."""
"""Builds the self-attention cache and the cross-attention cache."""
encoder_hidden_states = self.call_encoder(
token_ids=encoder_token_ids, padding_mask=encoder_padding_mask
)
Expand Down Expand Up @@ -417,7 +418,7 @@ def next(prompt, cache, index):
prompt = ops.slice(prompt, [0, cache_index], [num_samples, 1])

def repeat_tensor(x):
"""Repeats tensors along batch axis to match dim for beam search."""
"""Repeats along batch axis to match dim for beam search."""
if ops.shape(x)[0] == num_samples:
return x
return ops.repeat(x, repeats=num_samples // batch_size, axis=0)
Expand Down
6 changes: 4 additions & 2 deletions keras_hub/src/models/bert/bert_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@
"bert_base_multi": {
"metadata": {
"description": (
"12-layer BERT model where case is maintained. Trained on trained on Wikipedias of 104 languages"
"12-layer BERT model where case is maintained. Trained on "
"trained on Wikipedias of 104 languages"
),
"params": 177853440,
"path": "bert",
Expand Down Expand Up @@ -101,7 +102,8 @@
"bert_tiny_en_uncased_sst2": {
"metadata": {
"description": (
"The bert_tiny_en_uncased backbone model fine-tuned on the SST-2 sentiment analysis dataset."
"The bert_tiny_en_uncased backbone model fine-tuned on the "
"SST-2 sentiment analysis dataset."
),
"params": 4385920,
"path": "bert",
Expand Down
6 changes: 3 additions & 3 deletions keras_hub/src/models/bert/bert_text_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ class BertTextClassifier(TextClassifier):
Args:
backbone: A `keras_hub.models.BertBackbone` instance.
num_classes: int. Number of classes to predict.
preprocessor: A `keras_hub.models.BertTextClassifierPreprocessor` or `None`. If
`None`, this model will not apply preprocessing, and inputs should
be preprocessed before calling the model.
preprocessor: A `keras_hub.models.BertTextClassifierPreprocessor` or
`None`. If `None`, this model will not apply preprocessing, and
inputs should be preprocessed before calling the model.
activation: Optional `str` or callable. The
activation function to use on the model outputs. Set
`activation="softmax"` to return output probabilities.
Expand Down
Loading
Loading