Skip to content

Commit

Permalink
allow for ByteTokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Jun 14, 2024
1 parent c98b1aa commit 752a08d
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
7 changes: 5 additions & 2 deletions py/llguidance/rust/py.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,15 @@ impl LLTokenizer {
));
}

let tok_eos = tokenizer.getattr("eos_token_id")?.extract::<u32>()?;
let tokens = tokenizer.getattr("tokens")?.extract::<Vec<Vec<u8>>>()?;
let tok_eos = tokenizer
.getattr("eos_token_id")?
.extract::<Option<u32>>()?
.unwrap_or(tokens.len() as u32);
let tok_bos = tokenizer
.getattr("bos_token_id")?
.extract::<u32>()
.map_or(None, |v| Some(v));
let tokens = tokenizer.getattr("tokens")?.extract::<Vec<Vec<u8>>>()?;
let info = TokRxInfo {
vocab_size: tokens.len() as u32,
tok_eos,
Expand Down

0 comments on commit 752a08d

Please sign in to comment.