-
Notifications
You must be signed in to change notification settings - Fork 47
Open
Description
Hello folks, I am trying to run a local LLM but am constantly getting hit by the error below. I have attached a reproducible example as well. I tried running it with Flutter and only Dart. Am I missing anything? Assistance would be much appriciated, thanks!
[Running] dart "/Users/skywar56/Documents/Flutter/quiz_wrapper/lib/main.dart"
Starting LLM CLI Chat App with Isolates...
Using lib: /Users/skywar56/Documents/Flutter/quiz_wrapper/assets/libllama.dylib
Using model: /Users/skywar56/Documents/Flutter/quiz_wrapper/assets/gemma-3-4b-it-Q4_K_M.gguf
Loading model (this may take a bit)...
❌ Failed to initialize model: TimeoutException: Operation "model loading" timed out
#0 LlamaParent._sendCommand.<anonymous closure> (package:llama_cpp_dart/src/isolate_parent.dart:183:9)
#1 _RootZone.run (dart:async/zone.dart:1843:54)
#2 Future.timeout.<anonymous closure> (dart:async/future_impl.dart:1057:34)
<asynchronous suspension>
#3 LlamaParent._sendCommand (package:llama_cpp_dart/src/isolate_parent.dart:180:12)
<asynchronous suspension>
#4 LlamaParent.init (package:llama_cpp_dart/src/isolate_parent.dart:167:5)
<asynchronous suspension>
#5 main (package:quiz_wrapper/main.dart:279:5)
<asynchronous suspension>
import 'dart:io';
import 'dart:async';
import 'package:llama_cpp_dart/llama_cpp_dart.dart';
void main() async {
print("Starting LLM CLI Chat App with Isolates...");
// Library path setup
Llama.libraryPath = "/Users/skywar56/Documents/Flutter/quiz_wrapper/llama.cpp/build/bin/libllama.dylib";
// Setup parameters
ContextParams contextParams = ContextParams();
contextParams.nPredict = 8192;
contextParams.nCtx = 8192;
contextParams.nBatch = 512;
final samplerParams = SamplerParams();
samplerParams.temp = 0.7;
samplerParams.topK = 64;
samplerParams.topP = 0.95;
samplerParams.penaltyRepeat = 1.1;
// Initialize load command for the isolate
final loadCommand = LlamaLoad(
path: "/Users/skywar56/Documents/Flutter/quiz_wrapper/assets/gemma-3-4b-it-Q4_K_M.gguf",
modelParams: ModelParams(),
contextParams: contextParams,
samplingParams: samplerParams,
);
print("Loading model, please wait...");
// Create the LLM parent that will spawn an isolate
final llamaParent = LlamaParent(loadCommand);
try {
await llamaParent.init();
// Add a timeout to prevent infinite waiting
int attempts = 0;
const maxAttempts = 60;
print("Waiting for model to be ready...");
while (llamaParent.status != LlamaStatus.ready && attempts < maxAttempts) {
await Future.delayed(Duration(milliseconds: 500));
attempts++;
if (attempts % 10 == 0) {
print("Still waiting... Status: ${llamaParent.status}");
}
if (llamaParent.status == LlamaStatus.error) {
print("Error loading model. Exiting.");
exit(1);
}
}
if (attempts >= maxAttempts && llamaParent.status != LlamaStatus.ready) {
print("Timeout waiting for model to be ready. Current status: ${llamaParent.status}");
print("Continuing anyway as the model might be ready despite status not being updated...");
}
print("Model loaded successfully in isolate! Status: ${llamaParent.status}");
} catch (e) {
print("Error initializing model: $e");
exit(1);
}
// Initialize chat history with system prompt
ChatHistory chatHistory = ChatHistory();
chatHistory.addMessage(
role: Role.system,
content: "You are a helpful, concise assistant. Keep your answers informative but brief.",
);
print("Chat history initialized with system prompt");
print("\n=== Chat started (type 'exit' to quit) ===\n");
// Set up a completer to help manage when completions are finished
Completer<void> completionDone = Completer<void>();
StringBuffer currentResponse = StringBuffer();
// bool processingMessage = false;
llamaParent.stream.listen(
(token) {
stdout
..write(token)
..flush();
currentResponse.write(token);
},
onError: (e) {
print("\nSTREAM ERROR: $e");
},
);
// Listen for completion events
llamaParent.completions.listen((event) {
if (event.success) {
if (chatHistory.messages.isNotEmpty && chatHistory.messages.last.role == Role.assistant) {
chatHistory.messages.last = Message(role: Role.assistant, content: currentResponse.toString());
}
currentResponse.clear();
if (!completionDone.isCompleted) {
completionDone.complete();
}
} else {
print("Completion failed for prompt: ${event.promptId}");
}
});
// Chat loop
bool chatActive = true;
while (chatActive) {
// Get user input
stdout.write("\nYou: ");
String? userInput = stdin.readLineSync();
// Check for exit command
if (userInput == null || userInput.toLowerCase() == 'exit') {
chatActive = false;
print("\nExiting chat. bye!");
print(chatHistory.exportFormat(ChatFormat.gemini));
break;
}
// Add user message to history
chatHistory.addMessage(role: Role.user, content: userInput);
// Add empty assistant message
chatHistory.addMessage(role: Role.assistant, content: "");
// Create a new completer for this message
completionDone = Completer<void>();
// Prepare prompt for the model
String prompt = chatHistory.exportFormat(ChatFormat.gemini, leaveLastAssistantOpen: true);
await llamaParent.sendPrompt(prompt);
// Indicate that we're about to process a new message
stdout.write("\nAssistant: ");
// processingMessage = true;
// Wait for completion before continuing to next message
try {
await completionDone.future.timeout(
Duration(seconds: 60),
onTimeout: () {
print("\nTimeout waiting for response. Continuing anyway...");
},
);
} catch (e) {
print("\nError waiting for completion: $e");
}
print(""); // Add a newline after the response
}
// Clean up
llamaParent.dispose();
}[✓] Flutter (Channel stable, 3.32.0, on macOS 15.6.1 24G90 darwin-arm64, locale en-SG)
[✓] Android toolchain - develop for Android devices (Android SDK version 34.0.0)
[✓] Xcode - develop for iOS and macOS (Xcode 16.2)
[✓] Chrome - develop for the web
[✓] Android Studio (version 2024.2)
[✓] VS Code (version 1.103.2)
[✓] Connected device (3 available)
! Error: Browsing on the local area network for Varun’s Apple Watch. Ensure the device is unlocked and discoverable via Bluetooth. (code -27)
! Error: Browsing on the local area network for Varun’s iPhone. Ensure the device is unlocked and attached with a cable or associated with the same local area network as this Mac.
The device must be opted into Developer Mode to connect wirelessly. (code -27)
[✓] Network resources
• No issues found!
Xrok
Metadata
Metadata
Assignees
Labels
No labels