Skip to content

Commit

Permalink
[Example] ggml: add CI for phi-3, yi-1.5-9b (#143)
Browse files Browse the repository at this point in the history
* [Example] ggml: rename phi-3-mini to phi-3, test more phi-3 models

Signed-off-by: dm4 <dm4@secondstate.io>

* [CI] llama: use b2963

Signed-off-by: dm4 <dm4@secondstate.io>

* [Example] ggml: update chatml example for better CI testing

Signed-off-by: dm4 <dm4@secondstate.io>

* [CI] llama: use latest plugin with both wasmedge 0.13 and 0.14

Signed-off-by: dm4 <dm4@secondstate.io>

---------

Signed-off-by: dm4 <dm4@secondstate.io>
  • Loading branch information
dm4 committed May 28, 2024
1 parent 8b3991c commit 654ffc5
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 42 deletions.
73 changes: 62 additions & 11 deletions .github/workflows/llama.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ jobs:
build:
strategy:
matrix:
runner: [ubuntu-20.04, macos-13, macos-m1]
runner: [ubuntu-20.04, macos-m1]
wasmedge: ["0.13.5", "0.14.0"]
plugin: [wasi_nn-ggml]
job:
- name: "Tiny Llama"
Expand Down Expand Up @@ -224,6 +225,34 @@ jobs:
default \
'<start_of_turn>user Where is the capital of Japan? <end_of_turn><start_of_turn>model'
- name: Yi 1.5 9B 16K
run: |
test -f ~/.wasmedge/env && source ~/.wasmedge/env
cd wasmedge-ggml/chatml
curl -LO https://huggingface.co/second-state/Yi-1.5-9B-Chat-16K-GGUF/resolve/main/Yi-1.5-9B-Chat-16K-Q5_K_M.gguf
cargo build --target wasm32-wasi --release
time wasmedge --dir .:. \
--env n_gpu_layers="$NGL" \
--env reverse_prompt='<|im_end|>' \
--nn-preload default:GGML:AUTO:Yi-1.5-9B-Chat-16K-Q5_K_M.gguf \
target/wasm32-wasi/release/wasmedge-ggml-chatml.wasm \
default \
$'<|im_start|>system\nYou are an AI assistant<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
- name: Yi 1.5 9B
run: |
test -f ~/.wasmedge/env && source ~/.wasmedge/env
cd wasmedge-ggml/chatml
curl -LO https://huggingface.co/second-state/Yi-1.5-9B-Chat-GGUF/resolve/main/Yi-1.5-9B-Chat-Q5_K_M.gguf
cargo build --target wasm32-wasi --release
time wasmedge --dir .:. \
--env n_gpu_layers="$NGL" \
--env reverse_prompt='<|im_end|>' \
--nn-preload default:GGML:AUTO:Yi-1.5-9B-Chat-Q5_K_M.gguf \
target/wasm32-wasi/release/wasmedge-ggml-chatml.wasm \
default \
$'<|im_start|>system\nYou are an AI assistant<|im_end|>\n<|im_start|>user\nWhere is the capital of Japan?<|im_end|>\n<|im_start|>assistant'
- name: Grammar Example
run: |
test -f ~/.wasmedge/env && source ~/.wasmedge/env
Expand Down Expand Up @@ -259,26 +288,48 @@ jobs:
default \
$'[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, please do not share false information.\n<</SYS>>\nWhat is the capital of Japan?[/INST]'
- name: Phi 3 Mini
- name: Phi 3 Mini 4k
run: |
test -f ~/.wasmedge/env && source ~/.wasmedge/env
cd wasmedge-ggml/test/phi-3-mini
cd wasmedge-ggml/test/phi-3
curl -LO https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf
cargo build --target wasm32-wasi --release
time wasmedge --dir .:. \
--env n_gpu_layers="$NGL" \
--nn-preload default:GGML:AUTO:Phi-3-mini-4k-instruct-q4.gguf \
target/wasm32-wasi/release/wasmedge-ggml-phi-3-mini.wasm \
target/wasm32-wasi/release/wasmedge-ggml-phi-3.wasm \
default \
$'<|user|>\nWhat is the capital of Japan?<|end|>\n<|assistant|>'
- name: Build llama-stream
- name: Phi 3 Mini 128k
run: |
cd wasmedge-ggml/llama-stream
test -f ~/.wasmedge/env && source ~/.wasmedge/env
cd wasmedge-ggml/test/phi-3
curl -LO https://huggingface.co/second-state/Phi-3-mini-128k-instruct-GGUF/resolve/main/Phi-3-mini-128k-instruct-Q5_K_M.gguf
cargo build --target wasm32-wasi --release
time wasmedge --dir .:. \
--env n_gpu_layers="$NGL" \
--nn-preload default:GGML:AUTO:Phi-3-mini-128k-instruct-Q5_K_M.gguf \
target/wasm32-wasi/release/wasmedge-ggml-phi-3.wasm \
default \
$'<|user|>\nWhat is the capital of Japan?<|end|>\n<|assistant|>'
- name: Build chatml
- name: Phi 3 Medium 4k
run: |
cd wasmedge-ggml/chatml
test -f ~/.wasmedge/env && source ~/.wasmedge/env
cd wasmedge-ggml/test/phi-3
curl -LO https://huggingface.co/second-state/Phi-3-medium-4k-instruct-GGUF/resolve/main/Phi-3-medium-4k-instruct-Q5_K_M.gguf
cargo build --target wasm32-wasi --release
time wasmedge --dir .:. \
--env n_gpu_layers="$NGL" \
--nn-preload default:GGML:AUTO:Phi-3-medium-4k-instruct-Q5_K_M.gguf \
target/wasm32-wasi/release/wasmedge-ggml-phi-3.wasm \
default \
$'<|user|>\nWhat is the capital of Japan?<|end|>\n<|assistant|>'
- name: Build llama-stream
run: |
cd wasmedge-ggml/llama-stream
cargo build --target wasm32-wasi --release
- name: Build llava-base64-stream
Expand All @@ -290,6 +341,7 @@ jobs:
- runner: macos-m1
ngl: 100
- runner: ubuntu-20.04
wasmedge: "0.14.0"
plugin: wasi_nn-ggml
job:
name: C4AI Command-R v01
Expand All @@ -305,7 +357,7 @@ jobs:
default \
'<|START_OF_TURN_TOKEN|><|USER_TOKEN|>What is the capital of the United States?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'
name: ${{ matrix.runner }} - ${{ matrix.job.name }} - ${{ matrix.plugin }}
name: ${{ matrix.runner }} - ${{ matrix.job.name }} - ${{ matrix.wasmedge }} - ${{ matrix.plugin }}
runs-on: ${{ matrix.runner }}
steps:
- uses: actions/checkout@v4
Expand All @@ -316,8 +368,7 @@ jobs:
- name: Install WasmEdge + WASI-NN + GGML
run: |
VERSION=0.13.5
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | bash -s -- -v $VERSION --plugins ${{ matrix.plugin }}
curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh | bash -s -- -v ${{ matrix.wasmedge }} --plugins ${{ matrix.plugin }}
- name: Set environment variable
run: echo "NGL=${{ matrix.ngl || 0 }}" >> $GITHUB_ENV
Expand Down
97 changes: 67 additions & 30 deletions wasmedge-ggml/chatml/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use serde_json::Value;
use std::collections::HashMap;
use serde_json::{json, Value};
use std::env;
use std::io;
use wasmedge_wasi_nn::{
Expand All @@ -19,6 +18,27 @@ fn read_input() -> String {
}
}

fn get_options_from_env() -> Value {
let mut options = json!({});
if let Ok(val) = env::var("enable_log") {
options["enable-log"] = serde_json::from_str(val.as_str())
.expect("invalid value for enable-log option (true/false)")
}
if let Ok(val) = env::var("n_gpu_layers") {
options["n-gpu-layers"] =
serde_json::from_str(val.as_str()).expect("invalid ngl value (unsigned integer")
}
if let Ok(val) = env::var("ctx_size") {
options["ctx-size"] =
serde_json::from_str(val.as_str()).expect("invalid ctx-size value (unsigned integer")
}
if let Ok(val) = env::var("reverse_prompt") {
options["reverse-prompt"] = json!(val.as_str())
}

options
}

fn set_data_to_context(context: &mut GraphExecutionContext, data: Vec<u8>) -> Result<(), Error> {
context.set_input(0, TensorType::U8, &[1], &data)
}
Expand Down Expand Up @@ -56,11 +76,9 @@ fn main() {
let args: Vec<String> = env::args().collect();
let model_name: &str = &args[1];

// Set options for the graph. Check our README for more details.
let mut options = HashMap::new();
options.insert("enable-log", Value::from(false));
options.insert("n-gpu-layers", Value::from(0));
options.insert("ctx-size", Value::from(512));
// Set options for the graph. Check our README for more details:
// https://github.com/second-state/WasmEdge-WASINN-examples/tree/master/wasmedge-ggml#parameters
let options = get_options_from_env();

// Create graph and initialize context.
let graph = GraphBuilder::new(GraphEncoding::Ggml, ExecutionTarget::AUTO)
Expand All @@ -82,6 +100,48 @@ fn main() {
// )
// .expect("Failed to set metadata");

// If there is a third argument, use it as the prompt and enter non-interactive mode.
// This is mainly for the CI workflow.
if args.len() >= 3 {
let prompt = &args[2];
// Set the prompt.
println!("Prompt:\n{}", prompt);
let tensor_data = prompt.as_bytes().to_vec();
context
.set_input(0, TensorType::U8, &[1], &tensor_data)
.expect("Failed to set input");
println!("Response:");

// Get the number of input tokens and llama.cpp versions.
let input_metadata = get_metadata_from_context(&context);
println!("[INFO] llama_commit: {}", input_metadata["llama_commit"]);
println!(
"[INFO] llama_build_number: {}",
input_metadata["llama_build_number"]
);
println!(
"[INFO] Number of input tokens: {}",
input_metadata["input_tokens"]
);

// Get the output.
context.compute().expect("Failed to compute");
let output = get_output_from_context(&context);
println!("{}", output.trim());

// Retrieve the output metadata.
let metadata = get_metadata_from_context(&context);
println!(
"[INFO] Number of input tokens: {}",
metadata["input_tokens"]
);
println!(
"[INFO] Number of output tokens: {}",
metadata["output_tokens"]
);
std::process::exit(0);
}

let mut saved_prompt = String::new();
let system_prompt = String::from("You are a helpful, respectful and honest assistant. Always answer as short as possible, while being safe." );

Expand All @@ -101,18 +161,6 @@ fn main() {
set_data_to_context(&mut context, saved_prompt.as_bytes().to_vec())
.expect("Failed to set input");

// Get the number of input tokens and llama.cpp versions.
// let input_metadata = get_metadata_from_context(&context);
// println!("[INFO] llama_commit: {}", input_metadata["llama_commit"]);
// println!(
// "[INFO] llama_build_number: {}",
// input_metadata["llama_build_number"]
// );
// println!(
// "[INFO] Number of input tokens: {}",
// input_metadata["input_tokens"]
// );

// Execute the inference.
let mut reset_prompt = false;
match context.compute() {
Expand Down Expand Up @@ -141,16 +189,5 @@ fn main() {
output = output.trim().to_string();
saved_prompt = format!("{}{}<|im_end|>\n", saved_prompt, output);
}

// Retrieve the output metadata.
// let metadata = get_metadata_from_context(&context);
// println!(
// "[INFO] Number of input tokens: {}",
// metadata["input_tokens"]
// );
// println!(
// "[INFO] Number of output tokens: {}",
// metadata["output_tokens"]
// );
}
}
Binary file modified wasmedge-ggml/chatml/wasmedge-ggml-chatml.wasm
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "wasmedge-ggml-phi-3-mini"
name = "wasmedge-ggml-phi-3"
version = "0.1.0"
edition = "2021"

Expand Down
File renamed without changes.
File renamed without changes.
Binary file added wasmedge-ggml/test/phi-3/wasmedge-ggml-phi-3.wasm
Binary file not shown.

0 comments on commit 654ffc5

Please sign in to comment.