From 21361a0cf735362b3254927b421622edb4e03e3f Mon Sep 17 00:00:00 2001 From: Phodal Huang Date: Tue, 27 Jun 2023 12:48:51 +0800 Subject: [PATCH] docs: add for local best practises --- src/SUMMARY.md | 3 +- src/develop-llm-app-best-practise.md | 216 ++++++++++++++++++++++++++ src/develop-llm-app-example-comate.md | 102 ------------ 3 files changed, 217 insertions(+), 104 deletions(-) create mode 100644 src/develop-llm-app-best-practise.md diff --git a/src/SUMMARY.md b/src/SUMMARY.md index eccf681..840fb01 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -25,8 +25,7 @@ - [LLM 应用示例:AI + DevOps](./develop-llm-app-example-ai-devops.md) - [工具示例:AutoDev](./develop-llm-app-example-autodev.md) - [面临的挑战](./aidevops-challenges.md) -- [LLM 应用示例:Co-mate 难点解析](./develop-llm-app-example-comate.md) - +- [LLM 应用示例:最佳实践示例](./develop-llm-app-best-practise.md) # LLM 应用手册 - 微调 + LLMOps篇 diff --git a/src/develop-llm-app-best-practise.md b/src/develop-llm-app-best-practise.md new file mode 100644 index 0000000..f64c226 --- /dev/null +++ b/src/develop-llm-app-best-practise.md @@ -0,0 +1,216 @@ +# LLM 应用示例:最佳实践示例 + +## LLM 应用开发模式:本地小模型 + +在 Co-mate 中,我们在本地引入了 SentenceTransformer 来处理用户的输入,优在本地分析、匹配用户的输入,并处理。当匹配到结果后直接调用本地的函数,当匹配不到结果时调用远端的处理函数来处理。 + +HuggingFace: [https://huggingface.co/sentence-transformers](https://huggingface.co/sentence-transformers) + +在原理上主要是参考了 GitHub Copilot、 Bloop 的实现,通过本地的小模型来处理用户的输入,然后再通过远端的大模型来处理用户的输入。 + +### Rust 实现示例 + +Rust 相关示例:[https://github.com/unit-mesh/unit-agent](https://github.com/unit-mesh/unit-agent) + +```rust +pub fn embed(&self, sequence: &str) -> anyhow::Result { + let tokenizer_output = self.tokenizer.encode(sequence, true).unwrap(); + + let input_ids = tokenizer_output.get_ids(); + let attention_mask = tokenizer_output.get_attention_mask(); + let token_type_ids = tokenizer_output.get_type_ids(); + let length = input_ids.len(); + trace!("embedding {} tokens {:?}", length, sequence); + + let inputs_ids_array = ndarray::Array::from_shape_vec( + (1, length), + input_ids.iter().map(|&x| x as i64).collect(), + )?; + + let attention_mask_array = ndarray::Array::from_shape_vec( + (1, length), + attention_mask.iter().map(|&x| x as i64).collect(), + )?; + + let token_type_ids_array = ndarray::Array::from_shape_vec( + (1, length), + token_type_ids.iter().map(|&x| x as i64).collect(), + )?; + + let outputs = self.session.run([ + InputTensor::from_array(inputs_ids_array.into_dyn()), + InputTensor::from_array(attention_mask_array.into_dyn()), + InputTensor::from_array(token_type_ids_array.into_dyn()), + ])?; + + let output_tensor: OrtOwnedTensor = outputs[0].try_extract().unwrap(); + let sequence_embedding = &*output_tensor.view(); + let pooled = sequence_embedding.mean_axis(Axis(1)).unwrap(); + + Ok(pooled.to_owned().as_slice().unwrap().to_vec()) +} +``` + +### Kotlin 实现示例 + +```kotlin +class Semantic(val tokenizer: HuggingFaceTokenizer, val session: OrtSession, val env: OrtEnvironment) { + fun embed( + sequence: String, + ): FloatArray { + val tokenized = tokenizer.encode(sequence, true) + + val inputIds = tokenized.ids + val attentionMask = tokenized.attentionMask + val typeIds = tokenized.typeIds + + val tensorInput = OrtUtil.reshape(inputIds, longArrayOf(1, inputIds.size.toLong())) + val tensorAttentionMask = OrtUtil.reshape(attentionMask, longArrayOf(1, attentionMask.size.toLong())) + val tensorTypeIds = OrtUtil.reshape(typeIds, longArrayOf(1, typeIds.size.toLong())) + + val result = session.run( + mapOf( + "input_ids" to OnnxTensor.createTensor(env, tensorInput), + "attention_mask" to OnnxTensor.createTensor(env, tensorAttentionMask), + "token_type_ids" to OnnxTensor.createTensor(env, tensorTypeIds), + ), + ) + + val outputTensor = result.get(0) as OnnxTensor + val output = outputTensor.floatBuffer.array() + + return output + } + + + companion object { + fun create(): Semantic { + val classLoader = Thread.currentThread().getContextClassLoader() + + val tokenizerPath = classLoader.getResource("model/tokenizer.json")!!.toURI() + val onnxPath = classLoader.getResource("model/model.onnx")!!.toURI() + + try { + val env: Map = HashMap() + val array: List = tokenizerPath.toString().split("!") + FileSystems.newFileSystem(URI.create(array[0]), env) + } catch (e: Exception) { +// e.printStackTrace() + } + + val tokenizer = HuggingFaceTokenizer.newInstance(Paths.get(tokenizerPath)) + val ortEnv = OrtEnvironment.getEnvironment() + val sessionOptions = OrtSession.SessionOptions() + + // load onnxPath as byte[] + val onnxPathAsByteArray = Files.readAllBytes(Paths.get(onnxPath)) + + val session = ortEnv.createSession(onnxPathAsByteArray, sessionOptions) + + return Semantic(tokenizer, session, ortEnv) + } + } +} +``` + +## LLM 应用开发模式:Stream 封装 + +### 服务端 API 调用:Kotlin 实现 + +机制:结合 callbackFlow 来实现 + +```kotlin +fun stream(text: String): Flow { + val systemMessage = ChatMessage(ChatMessageRole.USER.value(), text) + + messages.add(systemMessage) + + val completionRequest = ChatCompletionRequest.builder() + .model(openAiVersion) + .temperature(0.0) + .messages(messages) + .build() + + return callbackFlow { + withContext(Dispatchers.IO) { + service.streamChatCompletion(completionRequest) + .doOnError(Throwable::printStackTrace) + .blockingForEach { response -> + val completion = response.choices[0].message + if (completion != null && completion.content != null) { + trySend(completion.content) + } + } + + close() + } + } +} +``` + +## 客户端 API 调用:TypeScript 实现 + +机制:依赖于 Vercel 的 AI 库,提供对于 Stream 的封装 + +```typescript +import { Message, OpenAIStream, StreamingTextResponse } from 'ai' +import { Configuration, OpenAIApi } from 'openai-edge' + +export async function stream(apiKey: string, messages: Message[], isStream: boolean = true) { + let basePath = process.env.OPENAI_PROXY_URL + if (basePath == null) { + basePath = 'https://api.openai.com' + } + + const configuration = new Configuration({ + apiKey: apiKey || process.env.OPENAI_API_KEY, + basePath + }) + + const openai = new OpenAIApi(configuration) + + const res = await openai.createChatCompletion({ + model: 'gpt-3.5-turbo', + messages, + temperature: 0.7, + stream: isStream + }) + + if (!isStream) { + return res + } + + const stream = OpenAIStream(res, {}) + + return new StreamingTextResponse(stream) +} +``` + +### 客户端 UI 实现:Fetch + +```typescript +const decoder = new TextDecoder() +export function decodeAIStreamChunk(chunk: Uint8Array): string { + return decoder.decode(chunk) +} + + await fetch("/api/action/tooling", { + method: "POST", + body: JSON.stringify(tooling), + }).then(async response => { + onResult(await response.json()) + let result = "" + const reader = response.body.getReader() + while (true) { + const { done, value } = await reader.read() + if (done) { + break + } + + result += decodeAIStreamChunk(value) + onResult(result) + } + + isPending = false + }); +``` diff --git a/src/develop-llm-app-example-comate.md b/src/develop-llm-app-example-comate.md index 897fe29..6a6cf36 100644 --- a/src/develop-llm-app-example-comate.md +++ b/src/develop-llm-app-example-comate.md @@ -1,103 +1 @@ # LLM 应用示例:Co-mate 难点解析 - -## LLM 应用开发模式:Stream 封装 - -### 服务端 API 调用:Kotlin 实现 - -机制:结合 callbackFlow 来实现 - -```kotlin -fun stream(text: String): Flow { - val systemMessage = ChatMessage(ChatMessageRole.USER.value(), text) - - messages.add(systemMessage) - - val completionRequest = ChatCompletionRequest.builder() - .model(openAiVersion) - .temperature(0.0) - .messages(messages) - .build() - - return callbackFlow { - withContext(Dispatchers.IO) { - service.streamChatCompletion(completionRequest) - .doOnError(Throwable::printStackTrace) - .blockingForEach { response -> - val completion = response.choices[0].message - if (completion != null && completion.content != null) { - trySend(completion.content) - } - } - - close() - } - } -} -``` - -## 客户端 API 调用:TypeScript 实现 - -机制:依赖于 Vercel 的 AI 库,提供对于 Stream 的封装 - -```typescript -import { Message, OpenAIStream, StreamingTextResponse } from 'ai' -import { Configuration, OpenAIApi } from 'openai-edge' - -export async function stream(apiKey: string, messages: Message[], isStream: boolean = true) { - let basePath = process.env.OPENAI_PROXY_URL - if (basePath == null) { - basePath = 'https://api.openai.com' - } - - const configuration = new Configuration({ - apiKey: apiKey || process.env.OPENAI_API_KEY, - basePath - }) - - const openai = new OpenAIApi(configuration) - - const res = await openai.createChatCompletion({ - model: 'gpt-3.5-turbo', - messages, - temperature: 0.7, - stream: isStream - }) - - if (!isStream) { - return res - } - - const stream = OpenAIStream(res, {}) - - return new StreamingTextResponse(stream) -} -``` - -### 客户端 UI 实现:Fetch - -```typescript -const decoder = new TextDecoder() -export function decodeAIStreamChunk(chunk: Uint8Array): string { - return decoder.decode(chunk) -} - - await fetch("/api/action/tooling", { - method: "POST", - body: JSON.stringify(tooling), - }).then(async response => { - onResult(await response.json()) - let result = "" - const reader = response.body.getReader() - while (true) { - const { done, value } = await reader.read() - if (done) { - break - } - - result += decodeAIStreamChunk(value) - onResult(result) - } - - isPending = false - }); -```