Skip to content

Commit

Permalink
docs: add for local best practises
Browse files Browse the repository at this point in the history
  • Loading branch information
phodal committed Jun 27, 2023
1 parent b3541d6 commit 21361a0
Show file tree
Hide file tree
Showing 3 changed files with 217 additions and 104 deletions.
3 changes: 1 addition & 2 deletions src/SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
- [LLM 应用示例:AI + DevOps](./develop-llm-app-example-ai-devops.md)
- [工具示例:AutoDev](./develop-llm-app-example-autodev.md)
- [面临的挑战](./aidevops-challenges.md)
- [LLM 应用示例:Co-mate 难点解析](./develop-llm-app-example-comate.md)

- [LLM 应用示例:最佳实践示例](./develop-llm-app-best-practise.md)

# LLM 应用手册 - 微调 + LLMOps篇

Expand Down
216 changes: 216 additions & 0 deletions src/develop-llm-app-best-practise.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
# LLM 应用示例:最佳实践示例

## LLM 应用开发模式:本地小模型

在 Co-mate 中,我们在本地引入了 SentenceTransformer 来处理用户的输入,优在本地分析、匹配用户的输入,并处理。当匹配到结果后直接调用本地的函数,当匹配不到结果时调用远端的处理函数来处理。

HuggingFace: [https://huggingface.co/sentence-transformers](https://huggingface.co/sentence-transformers)

在原理上主要是参考了 GitHub Copilot、 Bloop 的实现,通过本地的小模型来处理用户的输入,然后再通过远端的大模型来处理用户的输入。

### Rust 实现示例

Rust 相关示例:[https://github.com/unit-mesh/unit-agent](https://github.com/unit-mesh/unit-agent)

```rust
pub fn embed(&self, sequence: &str) -> anyhow::Result<Embedding> {
let tokenizer_output = self.tokenizer.encode(sequence, true).unwrap();

let input_ids = tokenizer_output.get_ids();
let attention_mask = tokenizer_output.get_attention_mask();
let token_type_ids = tokenizer_output.get_type_ids();
let length = input_ids.len();
trace!("embedding {} tokens {:?}", length, sequence);

let inputs_ids_array = ndarray::Array::from_shape_vec(
(1, length),
input_ids.iter().map(|&x| x as i64).collect(),
)?;

let attention_mask_array = ndarray::Array::from_shape_vec(
(1, length),
attention_mask.iter().map(|&x| x as i64).collect(),
)?;

let token_type_ids_array = ndarray::Array::from_shape_vec(
(1, length),
token_type_ids.iter().map(|&x| x as i64).collect(),
)?;

let outputs = self.session.run([
InputTensor::from_array(inputs_ids_array.into_dyn()),
InputTensor::from_array(attention_mask_array.into_dyn()),
InputTensor::from_array(token_type_ids_array.into_dyn()),
])?;

let output_tensor: OrtOwnedTensor<f32, _> = outputs[0].try_extract().unwrap();
let sequence_embedding = &*output_tensor.view();
let pooled = sequence_embedding.mean_axis(Axis(1)).unwrap();

Ok(pooled.to_owned().as_slice().unwrap().to_vec())
}
```

### Kotlin 实现示例

```kotlin
class Semantic(val tokenizer: HuggingFaceTokenizer, val session: OrtSession, val env: OrtEnvironment) {
fun embed(
sequence: String,
): FloatArray {
val tokenized = tokenizer.encode(sequence, true)

val inputIds = tokenized.ids
val attentionMask = tokenized.attentionMask
val typeIds = tokenized.typeIds

val tensorInput = OrtUtil.reshape(inputIds, longArrayOf(1, inputIds.size.toLong()))
val tensorAttentionMask = OrtUtil.reshape(attentionMask, longArrayOf(1, attentionMask.size.toLong()))
val tensorTypeIds = OrtUtil.reshape(typeIds, longArrayOf(1, typeIds.size.toLong()))

val result = session.run(
mapOf(
"input_ids" to OnnxTensor.createTensor(env, tensorInput),
"attention_mask" to OnnxTensor.createTensor(env, tensorAttentionMask),
"token_type_ids" to OnnxTensor.createTensor(env, tensorTypeIds),
),
)

val outputTensor = result.get(0) as OnnxTensor
val output = outputTensor.floatBuffer.array()

return output
}


companion object {
fun create(): Semantic {
val classLoader = Thread.currentThread().getContextClassLoader()

val tokenizerPath = classLoader.getResource("model/tokenizer.json")!!.toURI()
val onnxPath = classLoader.getResource("model/model.onnx")!!.toURI()

try {
val env: Map<String, String> = HashMap()
val array: List<String> = tokenizerPath.toString().split("!")
FileSystems.newFileSystem(URI.create(array[0]), env)
} catch (e: Exception) {
// e.printStackTrace()
}

val tokenizer = HuggingFaceTokenizer.newInstance(Paths.get(tokenizerPath))
val ortEnv = OrtEnvironment.getEnvironment()
val sessionOptions = OrtSession.SessionOptions()

// load onnxPath as byte[]
val onnxPathAsByteArray = Files.readAllBytes(Paths.get(onnxPath))

val session = ortEnv.createSession(onnxPathAsByteArray, sessionOptions)

return Semantic(tokenizer, session, ortEnv)
}
}
}
```

## LLM 应用开发模式:Stream 封装

### 服务端 API 调用:Kotlin 实现

机制:结合 callbackFlow 来实现

```kotlin
fun stream(text: String): Flow<String> {
val systemMessage = ChatMessage(ChatMessageRole.USER.value(), text)

messages.add(systemMessage)

val completionRequest = ChatCompletionRequest.builder()
.model(openAiVersion)
.temperature(0.0)
.messages(messages)
.build()

return callbackFlow {
withContext(Dispatchers.IO) {
service.streamChatCompletion(completionRequest)
.doOnError(Throwable::printStackTrace)
.blockingForEach { response ->
val completion = response.choices[0].message
if (completion != null && completion.content != null) {
trySend(completion.content)
}
}

close()
}
}
}
```

## 客户端 API 调用:TypeScript 实现

机制:依赖于 Vercel 的 AI 库,提供对于 Stream 的封装

```typescript
import { Message, OpenAIStream, StreamingTextResponse } from 'ai'
import { Configuration, OpenAIApi } from 'openai-edge'

export async function stream(apiKey: string, messages: Message[], isStream: boolean = true) {
let basePath = process.env.OPENAI_PROXY_URL
if (basePath == null) {
basePath = 'https://api.openai.com'
}

const configuration = new Configuration({
apiKey: apiKey || process.env.OPENAI_API_KEY,
basePath
})

const openai = new OpenAIApi(configuration)

const res = await openai.createChatCompletion({
model: 'gpt-3.5-turbo',
messages,
temperature: 0.7,
stream: isStream
})

if (!isStream) {
return res
}

const stream = OpenAIStream(res, {})

return new StreamingTextResponse(stream)
}
```

### 客户端 UI 实现:Fetch

```typescript
const decoder = new TextDecoder()
export function decodeAIStreamChunk(chunk: Uint8Array): string {
return decoder.decode(chunk)
}

await fetch("/api/action/tooling", {
method: "POST",
body: JSON.stringify(tooling),
}).then(async response => {
onResult(await response.json())
let result = ""
const reader = response.body.getReader()
while (true) {
const { done, value } = await reader.read()
if (done) {
break
}

result += decodeAIStreamChunk(value)
onResult(result)
}

isPending = false
});
```
102 changes: 0 additions & 102 deletions src/develop-llm-app-example-comate.md
Original file line number Diff line number Diff line change
@@ -1,103 +1 @@
# LLM 应用示例:Co-mate 难点解析

## LLM 应用开发模式:Stream 封装

### 服务端 API 调用:Kotlin 实现

机制:结合 callbackFlow 来实现

```kotlin
fun stream(text: String): Flow<String> {
val systemMessage = ChatMessage(ChatMessageRole.USER.value(), text)

messages.add(systemMessage)

val completionRequest = ChatCompletionRequest.builder()
.model(openAiVersion)
.temperature(0.0)
.messages(messages)
.build()

return callbackFlow {
withContext(Dispatchers.IO) {
service.streamChatCompletion(completionRequest)
.doOnError(Throwable::printStackTrace)
.blockingForEach { response ->
val completion = response.choices[0].message
if (completion != null && completion.content != null) {
trySend(completion.content)
}
}

close()
}
}
}
```

## 客户端 API 调用:TypeScript 实现

机制:依赖于 Vercel 的 AI 库,提供对于 Stream 的封装

```typescript
import { Message, OpenAIStream, StreamingTextResponse } from 'ai'
import { Configuration, OpenAIApi } from 'openai-edge'

export async function stream(apiKey: string, messages: Message[], isStream: boolean = true) {
let basePath = process.env.OPENAI_PROXY_URL
if (basePath == null) {
basePath = 'https://api.openai.com'
}

const configuration = new Configuration({
apiKey: apiKey || process.env.OPENAI_API_KEY,
basePath
})

const openai = new OpenAIApi(configuration)

const res = await openai.createChatCompletion({
model: 'gpt-3.5-turbo',
messages,
temperature: 0.7,
stream: isStream
})

if (!isStream) {
return res
}

const stream = OpenAIStream(res, {})

return new StreamingTextResponse(stream)
}
```

### 客户端 UI 实现:Fetch

```typescript
const decoder = new TextDecoder()
export function decodeAIStreamChunk(chunk: Uint8Array): string {
return decoder.decode(chunk)
}

await fetch("/api/action/tooling", {
method: "POST",
body: JSON.stringify(tooling),
}).then(async response => {
onResult(await response.json())
let result = ""
const reader = response.body.getReader()
while (true) {
const { done, value } = await reader.read()
if (done) {
break
}

result += decodeAIStreamChunk(value)
onResult(result)
}

isPending = false
});
```

0 comments on commit 21361a0

Please sign in to comment.