Commit
to use Fast tokenizer
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,7 +30,7 @@ def __init__( | |
self.total_len = len(self.train_data) | ||
|
||
def create_one_example(self, text_encoding: List[int], is_query=False): | ||
item = self.tok.encode_plus( | ||
item = self.tok.prepare_for_model( | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
MXueguang
Author
Contributor
|
||
text_encoding, | ||
truncation='only_first', | ||
max_length=self.data_args.q_max_len if is_query else self.data_args.p_max_len, | ||
|
@@ -95,7 +95,7 @@ def __len__(self): | |
|
||
def __getitem__(self, item) -> Tuple[str, BatchEncoding]: | ||
text_id, text = (self.encode_data[item][f] for f in self.input_keys) | ||
encoded_text = self.tok.encode_plus( | ||
encoded_text = self.tok.prepare_for_model( | ||
text, | ||
max_length=self.max_len, | ||
truncation='only_first', | ||
|
I recently tried to train a DPR model with the latest version. Looks like it's supposed to be
.encode_plus
?