Skip to content

Commit d839ab0

Browse files
committed
feat(gpt-runner-core): optimize token count and md json parse
1 parent cf91fee commit d839ab0

File tree

13 files changed

+542
-396
lines changed

13 files changed

+542
-396
lines changed

package.json

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,33 +35,33 @@
3535
"@types/prettier": "^2.7.3",
3636
"@types/react": "^18.2.14",
3737
"@vitejs/plugin-legacy": "^4.1.0",
38-
"@vitest/ui": "^0.32.4",
38+
"@vitest/ui": "^0.33.0",
3939
"bumpp": "^9.1.1",
4040
"eslint": "8.44.0",
41-
"esno": "^0.16.3",
41+
"esno": "^0.17.0",
4242
"execa": "^7.1.1",
4343
"fast-glob": "^3.3.0",
4444
"fs-extra": "^11.1.1",
4545
"jiti": "^1.19.1",
4646
"jsdom": "^22.1.0",
4747
"lint-staged": "^13.2.3",
4848
"msw": "1.2.2",
49-
"pnpm": "8.6.6",
50-
"prettier": "^2.8.8",
49+
"pnpm": "8.6.7",
50+
"prettier": "^3.0.0",
5151
"react": "^18.2.0",
5252
"rollup": "^3.26.2",
5353
"semver": "^7.5.4",
5454
"simple-git-hooks": "^2.8.1",
5555
"taze": "^0.11.2",
56-
"terser": "^5.18.2",
56+
"terser": "^5.19.0",
5757
"tsup": "^7.1.0",
5858
"typescript": "^5.1.6",
5959
"unbuild": "^0.8.11",
6060
"unplugin-auto-import": "^0.16.6",
61-
"vite": "^4.4.2",
61+
"vite": "^4.4.3",
6262
"vite-plugin-inspect": "^0.7.32",
6363
"vite-plugin-pages": "^0.31.0",
64-
"vitest": "^0.32.4"
64+
"vitest": "^0.33.0"
6565
},
6666
"pnpm": {
6767
"overrides": {
@@ -77,4 +77,4 @@
7777
"eslint --cache --fix"
7878
]
7979
}
80-
}
80+
}

packages/gpt-runner-core/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
"dependencies": {
4949
"@nicepkg/gpt-runner-shared": "workspace:*",
5050
"ignore": "^5.2.4",
51-
"langchain": "^0.0.102",
51+
"langchain": "^0.0.107",
5252
"unconfig": "^0.3.9"
5353
}
54-
}
54+
}

packages/gpt-runner-core/src/core/count-tokens.ts

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,20 @@
11
import fs from 'node:fs'
2-
3-
// import { Tiktoken } from 'tiktoken/lite'
4-
// import cl100kBase from 'tiktoken/encoders/cl100k_base.json'
52
import { PathUtils } from '@nicepkg/gpt-runner-shared/node'
3+
import { isChineseCharacter } from '@nicepkg/gpt-runner-shared'
4+
5+
export function countTokenQuick(text: string): number {
6+
let chineseCount = 0
7+
let otherCount = 0
8+
9+
for (const char of text) {
10+
if (isChineseCharacter(char))
11+
chineseCount += 1
612

7-
// slow but accurate
8-
// export function countTokens(text: string) {
9-
// const encoding = new Tiktoken(
10-
// cl100kBase.bpe_ranks,
11-
// cl100kBase.special_tokens,
12-
// cl100kBase.pat_str,
13-
// )
14-
// const tokens = encoding.encode(text)
15-
// encoding.free()
16-
// return tokens.length
17-
// }
13+
else
14+
otherCount += 1
15+
}
1816

19-
// fast but inaccurate
20-
export function countTokenQuick(text: string) {
21-
// int
22-
return Math.floor(text.length / 3.5)
17+
return chineseCount * 2 + (otherCount / 3.5)
2318
}
2419

2520
export function countFileTokens(filePath: string, quick = true) {

packages/gpt-runner-core/src/core/parser/md.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ export async function gptMdFileParser(params: GptMdFileParserParams): Promise<Si
1515
// match ```json
1616
const configJsonString = content.match(/^\s*?```json([\s\S]*?)```/i)?.[1]?.trim()
1717

18-
const singleFileConfig = singleFileConfigWithDefault(configJsonString ? tryParseJson(configJsonString) : {})
18+
const singleFileConfig = singleFileConfigWithDefault(configJsonString ? tryParseJson(configJsonString, true) : {})
1919

2020
type ResolveConfigKey = 'userPrompt' | 'systemPrompt'
2121
const resolveTitleConfig: {

packages/gpt-runner-core/src/langchain/fixes/chat-prompt-template.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import { ChatPromptTemplate } from 'langchain/prompts'
2-
import type { BaseChatMessage, InputValues } from 'langchain/schema'
2+
import type { BaseMessage, InputValues } from 'langchain/schema'
33

4-
ChatPromptTemplate.prototype.formatMessages = async function (values: InputValues): Promise<BaseChatMessage[]> {
4+
ChatPromptTemplate.prototype.formatMessages = async function (values: InputValues): Promise<BaseMessage[]> {
55
const allValues = await this.mergePartialAndUserVariables(values)
6-
let resultMessages: BaseChatMessage[] = []
6+
let resultMessages: BaseMessage[] = []
77
for (const promptMessage of this.promptMessages) {
88
const inputValues = promptMessage.inputVariables.reduce((acc, inputVariable) => {
99
if (!(inputVariable in allValues)) {

packages/gpt-runner-core/src/langchain/helper.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,20 @@ import type { SingleChatMessage } from '@nicepkg/gpt-runner-shared/common'
22
import { ChatRole } from '@nicepkg/gpt-runner-shared/common'
33
import { AIMessagePromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate } from 'langchain/prompts'
44
import type { BaseMessageStringPromptTemplate } from 'langchain/dist/prompts/chat'
5-
import type { BaseChatMessage } from 'langchain/schema'
6-
import { AIChatMessage, HumanChatMessage, SystemChatMessage } from 'langchain/schema'
5+
import type { BaseMessage } from 'langchain/schema'
6+
import { AIMessage, HumanMessage, SystemMessage } from 'langchain/schema'
77

88
export function mapStoredMessagesToChatMessages(
99
messages: SingleChatMessage[],
10-
): BaseChatMessage[] {
10+
): BaseMessage[] {
1111
return messages.map((message) => {
1212
switch (message.name) {
1313
case ChatRole.User:
14-
return new HumanChatMessage(message.text)
14+
return new HumanMessage(message.text)
1515
case ChatRole.Assistant:
16-
return new AIChatMessage(message.text)
16+
return new AIMessage(message.text)
1717
case ChatRole.System:
18-
return new SystemChatMessage(message.text)
18+
return new SystemMessage(message.text)
1919
default:
2020
throw new Error('Role must be defined for generic messages')
2121
}

packages/gpt-runner-shared/package.json

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
"http-proxy-agent": "*",
8585
"https-proxy-agent": "*",
8686
"ip": "*",
87+
"jsonc-parser": "*",
8788
"launch-editor": "*",
8889
"minimatch": "*",
8990
"socket.io": "*",
@@ -99,19 +100,20 @@
99100
"debug": "^4.3.4",
100101
"find-free-ports": "^3.1.1",
101102
"http-proxy-agent": "^7.0.0",
102-
"https-proxy-agent": "^7.0.0",
103+
"https-proxy-agent": "^7.0.1",
103104
"ip": "^1.1.8",
105+
"jsonc-parser": "^3.2.0",
104106
"launch-editor": "^2.6.0",
105107
"minimatch": "^9.0.3",
106108
"open": "^8.4.2",
107109
"socket.io": "^4.7.1",
108110
"socket.io-client": "^4.7.1",
109111
"zod": "^3.21.4",
110-
"zod-to-json-schema": "^3.21.3"
112+
"zod-to-json-schema": "^3.21.4"
111113
},
112114
"devDependencies": {
113115
"@types/express": "^4.17.17",
114116
"@types/ip": "^1.1.0",
115117
"express": "^4.18.2"
116118
}
117-
}
119+
}

packages/gpt-runner-shared/src/common/helpers/common.ts

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { AxiosError } from 'axios'
2+
import * as jsonc from 'jsonc-parser'
23
import type { TreeItem } from '../types'
34

45
export function sleep(ms: number) {
@@ -82,9 +83,10 @@ export function travelTreeDeepFirst<T extends TreeItem<Record<string, any>>, R e
8283
return travel(tree) as R[]
8384
}
8485

85-
export function tryParseJson(str: string) {
86+
export function tryParseJson(str: string, supportJsonc = false) {
8687
try {
87-
return JSON.parse(str?.trim() ?? '')
88+
const target = str?.trim() ?? ''
89+
return supportJsonc ? jsonc.parse(target) : JSON.parse(target)
8890
}
8991
catch (e) {
9092
console.error('tryParseJson error: ', str, e)
@@ -246,3 +248,15 @@ export function waitForCondition(conditionFn: (...args: any[]) => boolean, timeo
246248
}, 100)
247249
})
248250
}
251+
252+
export function isChineseCharacter(char: string): boolean {
253+
const charCode = char.charCodeAt(0)
254+
return (charCode >= 0x4E00 && charCode <= 0x9FFF)
255+
|| (charCode >= 0x3400 && charCode <= 0x4DBF)
256+
|| (charCode >= 0x20000 && charCode <= 0x2A6DF)
257+
|| (charCode >= 0x2A700 && charCode <= 0x2B73F)
258+
|| (charCode >= 0x2B740 && charCode <= 0x2B81F)
259+
|| (charCode >= 0x2B820 && charCode <= 0x2CEAF)
260+
|| (charCode >= 0xF900 && charCode <= 0xFAFF)
261+
|| (charCode >= 0x2F800 && charCode <= 0x2FA1F)
262+
}

packages/gpt-runner-vscode/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,12 @@
134134
"@nicepkg/gpt-runner-web": "workspace:*",
135135
"@types/vscode": "^1.71.0",
136136
"@vscode/vsce": "^2.19.0",
137-
"esno": "^0.16.3",
137+
"esno": "^0.17.0",
138138
"eventemitter3": "^5.0.1",
139139
"execa": "^7.1.1",
140140
"fs-extra": "^11.1.1",
141141
"jiti": "^1.19.1",
142142
"uuid": "^9.0.0",
143143
"wait-port": "^1.0.4"
144144
}
145-
}
145+
}

packages/gpt-runner-web/client/src/helpers/utils.ts

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import type { ParsedUrlQuery } from 'node:querystring'
2-
import { formatSourceValue } from '@nicepkg/gpt-runner-shared/common'
2+
import { formatSourceValue, isChineseCharacter } from '@nicepkg/gpt-runner-shared/common'
33
import type { MutableRefObject, Ref } from 'react'
44

55
export function createEl<T extends keyof HTMLElementTagNameMap>(tag: T,
@@ -44,9 +44,19 @@ export function formatNumWithK(num: number) {
4444
return `${(num / 1000).toFixed(1)}k`
4545
}
4646

47-
export function countTokenQuick(text: string) {
48-
// int
49-
return Math.floor(text.length / 3.5)
47+
export function countTokenQuick(text: string): number {
48+
let chineseCount = 0
49+
let otherCount = 0
50+
51+
for (const char of text) {
52+
if (isChineseCharacter(char))
53+
chineseCount += 1
54+
55+
else
56+
otherCount += 1
57+
}
58+
59+
return chineseCount * 2 + (otherCount / 3.5)
5060
}
5161

5262
export function isDomHidden(el: HTMLElement) {

0 commit comments

Comments
 (0)