In [1]:
from litert_tools.pipeline import pipeline
from litert_tools.pipeline import task_file_processor as task_file_processor_lib
from litert_tools.pipeline import tokenizer as tokenizer_lib
from litert_tools.pipeline.pipeline import LiteRTLlmPipeline
from ai_edge_litert import interpreter as interpreter_lib
from typing import Optional
import sentencepiece as sp

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [2]:
def load(
      filename: str,
      tokenizer_location: Optional[str] = None,
  ) -> LiteRTLlmPipeline:

    try:
      if filename and filename.endswith(".task"):
        # Extract tflite, tokenizer and metadata from .task bundle
        file_processor = task_file_processor_lib.TaskFileProcessor(
            filename, cache_dir='cache'
        )
        model_path = file_processor.get_tflite_file_path()

        tokenizer_path = file_processor.get_tokenizer_file_path()
        raw_tokenizer = sp.SentencePieceProcessor()
        raw_tokenizer.Load(tokenizer_path)

        prompt_template = file_processor.get_prompt_template()
      else:
          raise ValueError(f"Unsupported file type: {filename}")
    except Exception as e:
      raise ValueError(f"Failed to load model from {filename}: {e}"
          "Failed to obtain tokenizer from %s: %s",
          tokenizer_location,
          e,
      )

    # Wrap the loaded tokenizer
    tokenizer = tokenizer_lib.Tokenizer(raw_tokenizer, prompt_template)

    # Load the interpreter
    print("Loading TFLite model from: %s", model_path)
    try:
      interpreter = interpreter_lib.InterpreterWithCustomOps(
          custom_op_registerers=["pywrap_genai_ops.GenAIOpsRegisterer"],
          model_path=model_path,
          num_threads=2,  # Consider making num_threads configurable
          experimental_default_delegate_latest_features=True,
      )
    except Exception as e:
      raise ValueError(
          "Failed to load TFLite interpreter from %s: %s", model_path, e
      )
      raise

    # Create and return the pipeline with the wrapped tokenizer
    pipeline = LiteRTLlmPipeline(interpreter, tokenizer)
    print("LiteRTLlmPipeline loaded successfully.")
    return pipeline

In [3]:
runner = load('gemma3-1b-it-int4.task')

Loading TFLite model from: %s cache/TF_LITE_PREFILL_DECODE
LiteRTLlmPipeline loaded successfully.


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [4]:
prompt = "สวัสดีครับ"
output = runner.generate(prompt, max_decode_steps=None)
print(output)

สวัสดีครับ! ยินดีค่ะ/ครับ! ถ้าคุณต้องการอะไรจากผม/ฉันบ้าง บอกได้เลยค่ะ/ครับ

สวัสดีครับ! ยินดีค่ะ/ครับ! ถ้าคุณต้องการอะไรจากผม/ฉันบ้าง บอกได้เลยค่ะ/ครับ

