You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi,
I'm trying to use either the gpt2 or t5-3b models with txtai (as it is mentioned in one of the notebooks that any model listed on the Hugging Face would work), but I receive several errors:
ERROR:transformers.tokenization_utils_base:Using pad_token, but it is not set yet.
Traceback (most recent call last):
File "./text-ai.py", line 24, in
similarities = embeddings.similarity(s, queries)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/txtai/embeddings.py", line 227, in similarity
query = self.transform((None, query, None)).reshape(1, -1)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/txtai/embeddings.py", line 178, in transform
embedding = self.model.transform(document)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/txtai/vectors.py", line 257, in transform
return self.model.encode([" ".join(document[1])], show_progress_bar=False)[0]
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sentence_transformers/SentenceTransformer.py", line 176, in encode
sentence_features = self.get_sentence_features(text, longest_seq)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sentence_transformers/SentenceTransformer.py", line 219, in get_sentence_features
return self._first_module().get_sentence_features(*features)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sentence_transformers/models/Transformer.py", line 61, in get_sentence_features
return self.tokenizer.prepare_for_model(tokens, max_length=pad_seq_length, pad_to_max_length=True, return_tensors='pt', truncation=True)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/transformers/tokenization_utils_base.py", line 2021, in prepare_for_model
padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/transformers/tokenization_utils_base.py", line 1529, in _get_padding_truncation_strategies
raise ValueError(
ValueError: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as pad_token(tokenizer.pad_token = tokenizer.eos_token e.g.) or add a new pad token via tokenizer.add_special_tokens({'pad_token': '[PAD]'}).
or for T5:
File "./text-ai.py", line 24, in
similarities = embeddings.similarity(s, queries)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/txtai/embeddings.py", line 227, in similarity
query = self.transform((None, query, None)).reshape(1, -1)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/txtai/embeddings.py", line 178, in transform
embedding = self.model.transform(document)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/txtai/vectors.py", line 257, in transform
return self.model.encode([" ".join(document[1])], show_progress_bar=False)[0]
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sentence_transformers/SentenceTransformer.py", line 187, in encode
out_features = self.forward(features)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/torch/nn/modules/container.py", line 100, in forward
input = module(input)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in call
result = self.forward(*input, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sentence_transformers/models/Transformer.py", line 25, in forward
output_states = self.auto_model(**features)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in call
result = self.forward(*input, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/transformers/modeling_t5.py", line 965, in forward
decoder_outputs = self.decoder(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in call
result = self.forward(*input, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/transformers/modeling_t5.py", line 684, in forward
raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
What am I missing?
Thanks!
The text was updated successfully, but these errors were encountered:
Have you tried these with the latest version of txtai? There was a compatibility issue between sentence-transformers and transformers 3.1 which has been resolved.
Hi,
I'm trying to use either the gpt2 or t5-3b models with txtai (as it is mentioned in one of the notebooks that any model listed on the Hugging Face would work), but I receive several errors:
ERROR:transformers.tokenization_utils_base:Using pad_token, but it is not set yet.
Traceback (most recent call last):
File "./text-ai.py", line 24, in
similarities = embeddings.similarity(s, queries)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/txtai/embeddings.py", line 227, in similarity
query = self.transform((None, query, None)).reshape(1, -1)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/txtai/embeddings.py", line 178, in transform
embedding = self.model.transform(document)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/txtai/vectors.py", line 257, in transform
return self.model.encode([" ".join(document[1])], show_progress_bar=False)[0]
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sentence_transformers/SentenceTransformer.py", line 176, in encode
sentence_features = self.get_sentence_features(text, longest_seq)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sentence_transformers/SentenceTransformer.py", line 219, in get_sentence_features
return self._first_module().get_sentence_features(*features)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sentence_transformers/models/Transformer.py", line 61, in get_sentence_features
return self.tokenizer.prepare_for_model(tokens, max_length=pad_seq_length, pad_to_max_length=True, return_tensors='pt', truncation=True)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/transformers/tokenization_utils_base.py", line 2021, in prepare_for_model
padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/transformers/tokenization_utils_base.py", line 1529, in _get_padding_truncation_strategies
raise ValueError(
ValueError: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as
pad_token
(tokenizer.pad_token = tokenizer.eos_token e.g.)
or add a new pad token viatokenizer.add_special_tokens({'pad_token': '[PAD]'})
.or for T5:
File "./text-ai.py", line 24, in
similarities = embeddings.similarity(s, queries)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/txtai/embeddings.py", line 227, in similarity
query = self.transform((None, query, None)).reshape(1, -1)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/txtai/embeddings.py", line 178, in transform
embedding = self.model.transform(document)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/txtai/vectors.py", line 257, in transform
return self.model.encode([" ".join(document[1])], show_progress_bar=False)[0]
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sentence_transformers/SentenceTransformer.py", line 187, in encode
out_features = self.forward(features)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/torch/nn/modules/container.py", line 100, in forward
input = module(input)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in call
result = self.forward(*input, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/sentence_transformers/models/Transformer.py", line 25, in forward
output_states = self.auto_model(**features)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in call
result = self.forward(*input, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/transformers/modeling_t5.py", line 965, in forward
decoder_outputs = self.decoder(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/torch/nn/modules/module.py", line 550, in call
result = self.forward(*input, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/transformers/modeling_t5.py", line 684, in forward
raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
What am I missing?
Thanks!
The text was updated successfully, but these errors were encountered: