rustformers · philpax · Apr 22, 2023 · Apr 6, 2023 · Apr 6, 2023 · Apr 6, 2023
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/ggml-loader/src/lib.rs b/ggml-loader/src/lib.rs
@@ -96,12 +96,6 @@ pub trait LoadHandler<T, R: BufRead + Seek> {
 
     fn load_hyper_parameters(&mut self, reader: &mut R) -> ControlFlow<T, PartialHyperparameters>;
 
-    /// multi-file loading is not supported
-    /// To handle that yourself, return [`ControlFlow::Break(_)`] here
-    fn load_multipart(&mut self, reader: &mut R) -> ControlFlow<T> {
-        ControlFlow::Continue(())
-    }
-
     /// callback to get tensor buffer to populate
     ///
     /// # Returns
@@ -128,7 +122,7 @@ pub fn load_model_from_reader<T, R: BufRead + Seek>(
         ggml::FILE_MAGIC_UNVERSIONED => ContainerType::GGML,
         magic => return Err(LoadError::InvalidMagic(magic)),
     };
-    retchk(handler.got_container_type(container_type))?;
+    controlflow_to_result(handler.got_container_type(container_type))?;
 
     // Load format version
     match container_type {
@@ -142,7 +136,7 @@ pub fn load_model_from_reader<T, R: BufRead + Seek>(
     }
 
     // Load hyper params
-    let hparams = retchk(handler.load_hyper_parameters(reader))?;
+    let hparams = controlflow_to_result(handler.load_hyper_parameters(reader))?;
     let n_vocab = hparams.n_vocab;
 
     // Load vocabulary
@@ -156,15 +150,12 @@ pub fn load_model_from_reader<T, R: BufRead + Seek>(
                 0.
             }
         };
-        retchk(handler.got_vocab_token(i, token, token_score))?;
+        controlflow_to_result(handler.got_vocab_token(i, token, token_score))?;
     }
 
     // Load tensor data
     match container_type {
-        ContainerType::GGMF | ContainerType::GGML => {
-            retchk(handler.load_multipart(reader))?;
-            load_weights(reader, handler, false)
-        }
+        ContainerType::GGMF | ContainerType::GGML => load_weights(reader, handler, false),
         ContainerType::GGJT => load_weights(reader, handler, true),
     }
 }
@@ -227,7 +218,7 @@ pub fn load_weights<T, R: BufRead + Seek>(
             start_offset: offset_aligned,
         };
 
-        match retchk(handler.tensor_buffer(tensor_info))? {
+        match controlflow_to_result(handler.tensor_buffer(tensor_info))? {
             TensorDataTreatment::CopyInto(buf) => {
                 if align {
                     reader.seek(SeekFrom::Start(offset_aligned))?;

diff --git a/ggml-loader/src/util.rs b/ggml-loader/src/util.rs
@@ -62,14 +62,14 @@ pub fn decode_element_type_res<T>(ftype: i32) -> Result<ElementType, LoadError<T
     }
 }
 
-pub fn retchk<A, B>(x: ControlFlow<A, B>) -> Result<B, LoadError<A>> {
+pub fn controlflow_to_result<A, B>(x: ControlFlow<A, B>) -> Result<B, LoadError<A>> {
     match x {
         ControlFlow::Continue(x) => Ok(x),
         ControlFlow::Break(y) => Err(LoadError::UserInterrupted(y)),
     }
 }
 
-pub fn brkchk<A, B, C: Into<A>>(x: Result<B, C>) -> ControlFlow<A, B> {
+pub fn result_to_controlflow<A, B, C: Into<A>>(x: Result<B, C>) -> ControlFlow<A, B> {
     match x {
         Ok(x) => ControlFlow::Continue(x),
         Err(y) => ControlFlow::Break(y.into()),

diff --git a/llama-cli/src/cli_args.rs b/llama-cli/src/cli_args.rs
@@ -288,7 +288,7 @@ impl ModelLoad {
                     } => {
                         let current_part = current_part + 1;
                         log::info!(
-                            "Loading model part {}/{} from '{}' (mmap: {})\n",
+                            "Loading model part {}/{} from '{}' (mmap preferred: {})\n",
                             current_part,
                             total_parts,
                             file.to_string_lossy(),

diff --git a/llama-rs/Cargo.toml b/llama-rs/Cargo.toml
@@ -23,7 +23,6 @@ memmap2 = "0.5.10"
 serde_json = { version = "1.0", optional = true }
 protobuf = { version = "= 2.14.0", optional = true }
 rust_tokenizers = { version = "3.1.2", optional = true }
-log = "*"
 
 [features]
 convert = ["dep:serde_json", "dep:protobuf", "dep:rust_tokenizers"]
diff --git a/llama-rs/src/lib.rs b/llama-rs/src/lib.rs
@@ -22,7 +22,7 @@ pub use inference_session::{
 pub use loader_common::{LoadError, LoadProgress};
 pub use model::{Hyperparameters, Model};
 pub use util::TokenUtf8Buffer;
-pub use vocabulary::{AddTokenError, TokenBias, TokenId, Vocabulary};
+pub use vocabulary::{TokenBias, TokenId, Vocabulary};
 
 /// The end of text token.
 pub const EOT_TOKEN_ID: TokenId = 2; // Hardcoded (for now?)

diff --git a/llama-rs/src/loader.rs b/llama-rs/src/loader.rs
@@ -99,7 +99,7 @@ pub(crate) fn load(
                 }
             };
 
-            vocab.push_token(id, token, score)?;
+            vocab.push_token(id, token, score);
         }
 
         vocab

diff --git a/llama-rs/src/loader2.rs b/llama-rs/src/loader2.rs
@@ -9,7 +9,10 @@ use std::{
     path::{Path, PathBuf},
 };
 
-use crate::{util::mulf, Hyperparameters, LoadError, LoadProgress, Model, TokenId, Vocabulary};
+use crate::{
+    util::{self, mulf},
+    Hyperparameters, LoadError, LoadProgress, Model, TokenId, Vocabulary,
+};
 
 impl LoadError {
     fn from_ggml_loader_error(value: ggml_loader::LoadError<LoadError>, path: PathBuf) -> Self {
@@ -39,6 +42,11 @@ pub(crate) fn load(
 ) -> Result<Model, LoadError> {
     let main_path = path.as_ref();
 
+    let paths = util::find_all_model_files(main_path)?;
+    if paths.len() != 1 {
+        return Err(LoadError::MultipartNotSupported { paths });
+    }
+
     let file = File::open(main_path).map_err(|e| LoadError::OpenFileFailed {
         source: e,
         path: main_path.to_owned(),
@@ -110,24 +118,16 @@ impl<F: FnMut(LoadProgress)> ggml_loader::LoadHandler<LoadError, BufReader<&File
             Ok(id) => id,
             Err(err) => return ControlFlow::Break(LoadError::InvalidIntegerConversion(err)),
         };
-        if let Err(err) = self.vocab.push_token(id, token, score) {
-            return ControlFlow::Break(LoadError::from(err));
-        }
-
-        ControlFlow::Continue(())
-    }
+        self.vocab.push_token(id, token, score);
 
-    fn load_multipart(&mut self, _reader: &mut BufReader<&File>) -> ControlFlow<LoadError> {
-        // todo
-        log::warn!("multipart model is not supported");
         ControlFlow::Continue(())
     }
 
     fn tensor_buffer(&mut self, info: TensorInfo) -> ControlFlow<LoadError, TensorDataTreatment> {
         let model = match &mut self.model {
             Some(model) => model,
             None => {
-                let model = brkchk(self.create_model(self.vocab.clone()))?;
+                let model = result_to_controlflow(self.create_model(self.vocab.clone()))?;
                 self.model.insert(model)
             }
         };

diff --git a/llama-rs/src/loader_common.rs b/llama-rs/src/loader_common.rs
@@ -2,7 +2,7 @@ use std::path::{Path, PathBuf};
 
 use thiserror::Error;
 
-use crate::{util::FindAllModelFilesError, vocabulary::AddTokenError, Hyperparameters};
+use crate::{util::FindAllModelFilesError, Hyperparameters};
 
 /// Each variant represents a step within the process of loading the model.
 /// These can be used to report progress to the user.
@@ -78,9 +78,6 @@ pub enum LoadError {
     #[error("invalid integer conversion")]
     /// One of the integers encountered could not be converted to a more appropriate type.
     InvalidIntegerConversion(#[from] std::num::TryFromIntError),
-    /// While loading, a token could not be added to the vocabulary.
-    #[error("failed to add token to vocabulary: {0}")]
-    VocabularyAddTokenFailed(#[from] AddTokenError),
     #[error("unsupported f16_: {0}")]
     /// One of the integers encountered could not be converted to a more appropriate type.
     UnsupportedElementType(i32),
@@ -149,6 +146,14 @@ pub enum LoadError {
         /// The path that failed.
         path: PathBuf,
     },
+    /// Multiple parts of the model were found.
+    ///
+    /// Multi-part models are not supported. Please convert the model to a single part.
+    #[error("multipart models are not supported")]
+    MultipartNotSupported {
+        /// The paths that were found.
+        paths: Vec<PathBuf>,
+    },
 }
 impl From<FindAllModelFilesError> for LoadError {
     fn from(value: FindAllModelFilesError) -> Self {

diff --git a/llama-rs/src/model.rs b/llama-rs/src/model.rs
@@ -124,6 +124,9 @@ impl Model {
         n_context_tokens: usize,
         load_progress_callback: impl FnMut(LoadProgress),
     ) -> Result<Model, LoadError> {
+        // Loader2 is the default. It can support GGML, GGMF and GGJT, but does not support multipart models.
+        //
+        // Loader1 is the old loader. It can support multipart models, but will be deprecated.
         let use_loader_2: bool = match std::env::var("GGML_LOADER").as_deref() {
             Ok("2") => true,
             Ok("1") => false,

diff --git a/llama-rs/src/vocabulary.rs b/llama-rs/src/vocabulary.rs
@@ -1,7 +1,5 @@
 use std::{collections::HashMap, str::FromStr};
 
-use thiserror::Error;
-
 use crate::InferenceError;
 
 /// The identifier of a token in a vocabulary.
@@ -26,41 +24,23 @@ pub struct Vocabulary {
     pub(crate) max_token_length: usize,
 }
 
-#[derive(Debug, Clone, Error)]
-/// Errors encountered when adding a token to a vocabulary.
-pub enum AddTokenError {
-    #[error("the id of token added should be {expected_id}; is {actual_id}")]
-    /// The token that was added does not have the expected ID.
-    WrongId {
-        /// The expected ID.
-        expected_id: TokenId,
-        /// The actual ID.
-        actual_id: TokenId,
-    },
-}
-
 impl Vocabulary {
     /// Add a token to the vocabulary.
     ///
     /// The token added must have `id` directly after the last token in the vocabulary.
-    pub fn push_token(
-        &mut self,
-        id: TokenId,
-        content: Token,
-        score: TokenScore,
-    ) -> Result<(), AddTokenError> {
+    pub fn push_token(&mut self, id: TokenId, content: Token, score: TokenScore) {
+        // These are loader invariants. If this is broken, then the loader is broken and this is a bug,
+        // not an issue with the model itself.
         assert_eq!(self.id_to_token.len(), self.id_to_token_score.len());
         if self.id_to_token.len() != id as usize || self.id_to_token_score.len() != id as usize {
-            return Err(AddTokenError::WrongId {
-                expected_id: self.id_to_token.len() as TokenId,
-                actual_id: id,
-            });
+            let expected_id = self.id_to_token.len() as TokenId;
+            panic!("the id of token added should be {expected_id}; is {id}");
         }
+
         self.max_token_length = self.max_token_length.max(content.len());
         self.id_to_token.push(content.clone());
         self.id_to_token_score.push(score);
         self.token_to_id.insert(content, id);
-        Ok(())
     }
 
     pub(crate) fn token(&self, idx: usize) -> &[u8] {