Skip to content

Commit

Permalink
feat: prompts enhancementation
Browse files Browse the repository at this point in the history
  • Loading branch information
yetone committed May 2, 2023
1 parent 81beda3 commit 50e0697
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 89 deletions.
27 changes: 27 additions & 0 deletions src/common/lang.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ export const supportLanguages: [string, string][] = [
['zh-Hant', '繁體中文'],
['yue', '粤语'],
['wyw', '古文'],
['jdbhw', '近代白话文'],
['xdbhw', '现代白话文'],
['ja', '日本語'],
['ko', '한국어'],
['fr', 'Français'],
Expand Down Expand Up @@ -82,15 +84,40 @@ export async function detectLang(text: string): Promise<string | null> {
return lang
}

export function getLangName(langCode: string): string {
switch (langCode) {
case 'zh-Hans':
return 'Simplified Chinese'
case 'zh-Hant':
return 'Traditional Chinese'
case 'yue':
return 'Cantonese'
case 'hmn':
return 'Hmong'
default:
}
const langName = ISO6391.getName(langCode)
if (langName) {
return langName
}
return langMap.get(langCode) || langCode
}

export async function _detectLang(text: string): Promise<string | null> {
const detectedText = text.trim()
return new Promise((resolve) => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const langName = (window as any).detectLanguage(detectedText)
console.debug('detected text:', detectedText)
console.debug('detected lang:', langName)
if (langName === 'Chineset') {
resolve('zh-Hant')
return
}
if (langName === 'Hmong') {
resolve('hmn')
return
}
const langCode = ISO6391.getCode(langName)
resolve(langCode)
})
Expand Down
150 changes: 61 additions & 89 deletions src/common/translate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ export interface TranslateResult {
error?: string
}

export const isAWord = (lang: string, text: string) => {
export const isAWord = (langCode: string, text: string) => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const Segmenter = (Intl as any).Segmenter
if (!Segmenter) {
return false
}
const segmenter = new Segmenter(lang, { granularity: 'word' })
const segmenter = new Segmenter(langCode, { granularity: 'word' })
const iterator = segmenter.segment(text)[Symbol.iterator]()
return iterator.next().value?.segment === text
}
Expand Down Expand Up @@ -174,119 +174,84 @@ export class QuoteProcessor {
}
}

const chineseLangs = ['zh-Hans', 'zh-Hant', 'wyw', 'yue']
const chineseLangCodes = ['zh-Hans', 'zh-Hant', 'wyw', 'yue', 'jdbhw', 'xdbhw']

export async function translate(query: TranslateQuery) {
const sourceLangCode = query.detectFrom
const targetLangCode = query.detectTo
const sourceLang = lang.getLangName(sourceLangCode)
const targetLang = lang.getLangName(targetLangCode)
console.debug('sourceLang', sourceLang)
console.debug('targetLang', targetLang)
let quoteProcessor: QuoteProcessor | undefined
const settings = await utils.getSettings()
const fromChinese = chineseLangs.indexOf(query.detectFrom) >= 0
const toChinese = chineseLangs.indexOf(query.detectTo) >= 0
let systemPrompt =
'You are a translation expert, translate the text in a colloquial, professional and elegant manner without sounding like a machine translation. Remember to only translate the text and not further interpret it.'
let assistantPrompt = `Translate from ${lang.langMap.get(query.detectFrom) || query.detectFrom} to ${
lang.langMap.get(query.detectTo) || query.detectTo
}. Only the translated text can be returned.`
let userPrompt = query.text
const toChinese = chineseLangCodes.indexOf(targetLangCode) >= 0
let rolePrompt =
'You are a professional translation engine, please translate the text into a colloquial, professional, elegant and fluent content, without the style of machine translation. You must only translate the text content, never interpret it.'
const assistantPrompts: string[] = []
let commandPrompt = `Translate from ${sourceLang} to ${targetLang}. Only the translated text can be returned.`
let contentPrompt = query.text

// a word could be collected
let isWordMode = false
switch (query.mode) {
case 'translate':
quoteProcessor = new QuoteProcessor()
assistantPrompt += ` Only translate the text between ${quoteProcessor.quoteStart} and ${quoteProcessor.quoteEnd}.`
userPrompt = `${quoteProcessor.quoteStart}${query.text}${quoteProcessor.quoteEnd} =>`
if (query.detectTo === 'wyw' || query.detectTo === 'yue') {
assistantPrompt = `请翻译成${lang.langMap.get(query.detectTo) || query.detectTo}`
commandPrompt += ` Only translate the text between ${quoteProcessor.quoteStart} and ${quoteProcessor.quoteEnd}.`
contentPrompt = `${quoteProcessor.quoteStart}${query.text}${quoteProcessor.quoteEnd} =>`
if (targetLangCode === 'xdbhw') {
rolePrompt = '您是一位在中文系研究中文的资深学者'
commandPrompt = `夹在${quoteProcessor.quoteStart}${quoteProcessor.quoteEnd}之间的内容是原文,请您将原文内容翻译成《呐喊》风格的现代白话文`
} else if (targetLangCode === 'jdbhw') {
rolePrompt = '您是一位在中文系研究中文的资深学者'
commandPrompt = `夹在${quoteProcessor.quoteStart}${quoteProcessor.quoteEnd}之间的内容是原文,请您将原文内容翻译成《红楼梦》风格的近代白话文`
} else if (query.text.length < 5 && toChinese) {
// 当用户的默认语言为中文时,查询中文词组(不超过5个字),展示多种翻译结果,并阐述适用语境。
rolePrompt = `你是一个翻译引擎,请将给到的文本翻译成${targetLang}。请列出3种(如果有)最常用翻译结果:单词或短语,并列出对应的适用语境(用中文阐述)、音标、词性、双语示例。按照下面格式用中文阐述:
<序号><单词或短语> · /<音标>
[<词性缩写>] <适用语境(用中文阐述)>
例句:<例句>(例句翻译)`
commandPrompt = ''
}
if (fromChinese) {
if (query.detectTo === 'zh-Hant') {
assistantPrompt = '翻譯成台灣常用用法之繁體中文白話文'
} else if (query.detectTo === 'zh-Hans') {
assistantPrompt = '翻译成简体白话文'
} else if (query.text.length < 5 && toChinese) {
// 当用户的默认语言为中文时,查询中文词组(不超过5个字),展示多种翻译结果,并阐述适用语境。
systemPrompt = `你是一个翻译引擎,请将给到的文本翻译成${
lang.langMap.get(query.detectTo) || query.detectTo
}。请列出3种(如果有)最常用翻译结果:单词或短语,并列出对应的适用语境(用中文阐述)、音标、词性、双语示例。按照下面格式用中文阐述:
<序号><单词或短语> · /<音标>
[<词性缩写>] <适用语境(用中文阐述)>
例句:<例句>(例句翻译)`
assistantPrompt = ''
}
}
if (toChinese && isAWord(query.detectFrom, query.text.trim())) {
if (toChinese && isAWord(sourceLangCode, query.text.trim())) {
isWordMode = true
// 翻译为中文时,增加单词模式,可以更详细的翻译结果,包括:音标、词性、含义、双语示例。
systemPrompt = `你是一个翻译引擎,请将翻译给到的文本,只需要翻译不需要解释。当且仅当文本只有一个单词时,请给出单词原始形态(如果有)、单词的语种、对应的音标(如果有)、所有含义(含词性)、双语示例,至少三条例句,请严格按照下面格式给到翻译结果:
rolePrompt = `你是一个翻译引擎,请将翻译给到的文本,只需要翻译不需要解释。当且仅当文本只有一个单词时,请给出单词原始形态(如果有)、单词的语种、对应的音标(如果有)、所有含义(含词性)、双语示例,至少三条例句,请严格按照下面格式给到翻译结果:
<单词>
[<语种>] · / <单词音标>
[<词性缩写>] <中文含义>]
例句:
<序号><例句>(例句翻译)`
userPrompt = `单词是:${query.text}`
commandPrompt = '好的,我明白了,请给我这个单词。'
contentPrompt = `单词是:${query.text}`
}
if (query.selectedWord) {
// 在选择的句子中,选择特定的单词。触发语境学习功能。
systemPrompt = `你是一位${
lang.langMap.get(query.detectFrom) || query.detectFrom
}词义语法专家,你在教我${lang.langMap.get(query.detectFrom) || query.detectFrom},我给你一句${
lang.langMap.get(query.detectFrom) || query.detectFrom
}句子,和这个句子中的一个单词,请用${
lang.langMap.get(query.detectTo) || query.detectTo
}帮我解释一下,这个单词在句子中的意思和句子本身的意思,如果单词在这个句子中是习话的一部分,请解释这句句子中的习话,并举几个相同意思的${
lang.langMap.get(query.detectFrom) || query.detectFrom
}例句,并用${
lang.langMap.get(query.detectTo) || query.detectTo
}解释例句。如果你明白了请说同意,然后我们开始。`
assistantPrompt = '好的,我明白了,请给我这个句子和单词。'
userPrompt = `句子是:${query.text}\n单词是:${query.selectedWord}`
rolePrompt = `You are an expert in the semantic syntax of the ${sourceLang} language and you are teaching me the ${sourceLang} language. I give you a sentence in ${sourceLang} and a word in that sentence. Please help me explain in ${targetLang} language what the word means in the sentence and what the sentence itself means, and if the word is part of an idiom in the sentence, explain the idiom in the sentence and give a few examples in ${sourceLang} with the same meaning and explain the examples in ${targetLang} language, and must in ${targetLang} language. If you understand, say yes, and then we will begin.`
commandPrompt = 'yes, I understand, please give me the sentence and the word.'
contentPrompt = `the sentence is: ${query.text}\n\nthe word is: ${query.selectedWord}`
}
break
case 'polishing':
systemPrompt = 'Revise the following sentences to make them more clear, concise, and coherent.'
if (fromChinese) {
assistantPrompt = `使用 ${lang.langMap.get(query.detectFrom) || query.detectFrom} 语言润色此段文本`
} else {
assistantPrompt = `polish this text in ${lang.langMap.get(query.detectFrom) || query.detectFrom}`
}
rolePrompt =
'You are an expert translator, please revise the following sentences to make them more clear, concise, and coherent.'
commandPrompt = `polish this text in ${sourceLang}`
break
case 'summarize':
systemPrompt = "You are a text summarizer, you can only summarize the text, don't interpret it."
if (toChinese) {
assistantPrompt = '用最简洁的语言使用中文总结此段文本'
} else {
assistantPrompt = `summarize this text in the most concise language and must use ${
lang.langMap.get(query.detectTo) || query.detectTo
} language!`
}
rolePrompt = "You are a professional text summarizer, you can only summarize the text, don't interpret it."
commandPrompt = `summarize this text in the most concise language and must use ${targetLang} language!`
break
case 'analyze':
systemPrompt = 'You are a translation engine and grammar analyzer.'
if (toChinese) {
assistantPrompt = `请用中文翻译此段文本并解析原文中的语法`
} else {
assistantPrompt = `translate this text to ${
lang.langMap.get(query.detectTo) || query.detectTo
} and explain the grammar in the original text using ${
lang.langMap.get(query.detectTo) || query.detectTo
}`
}
rolePrompt = 'You are a professional translation engine and grammar analyzer.'
commandPrompt = `translate this text to ${targetLang} and explain the grammar in the original text using ${targetLang}`
break
case 'explain-code':
systemPrompt =
rolePrompt =
'You are a code explanation engine, you can only explain the code, do not interpret or translate it. Also, please report any bugs you find in the code to the author of the code.'
if (toChinese) {
assistantPrompt =
'用最简洁的语言使用中文解释此段代码、正则表达式或脚本。如果内容不是代码,请返回错误提示。如果代码有明显的错误,请指出。'
} else {
assistantPrompt = `explain the provided code, regex or script in the most concise language and must use ${
lang.langMap.get(query.detectTo) || query.detectTo
} language! If the content is not code, return an error message. If the code has obvious errors, point them out.`
}
commandPrompt = `explain the provided code, regex or script in the most concise language and must use ${targetLang} language! If the content is not code, return an error message. If the code has obvious errors, point them out.`
break
case 'big-bang':
systemPrompt = `You are a professional writer and you will write ${query.articlePrompt} based on the given words`
assistantPrompt = `Write ${query.articlePrompt} of no more than 160 words. The article must contain the words in the following text. The more words you use, the better`
rolePrompt = `You are a professional writer and you will write ${query.articlePrompt} based on the given words`
commandPrompt = `Write ${query.articlePrompt} of no more than 160 words. The article must contain the words in the following text. The more words you use, the better`
break
}

Expand Down Expand Up @@ -317,7 +282,7 @@ export async function translate(query: TranslateQuery) {
isChatAPI = false
body[
'prompt'
] = `<|im_start|>system\n${systemPrompt}\n<|im_end|>\n<|im_start|>user\n${assistantPrompt}\n${userPrompt}\n<|im_end|>\n<|im_start|>assistant\n`
] = `<|im_start|>system\n${rolePrompt}\n<|im_end|>\n<|im_start|>user\n${commandPrompt}\n${contentPrompt}\n<|im_end|>\n<|im_start|>assistant\n`
body['stop'] = ['<|im_end|>']
} else if (settings.provider === 'ChatGPT') {
let resp: Response | null = null
Expand All @@ -332,28 +297,35 @@ export async function translate(query: TranslateQuery) {
role: 'user',
content: {
content_type: 'text',
parts: [systemPrompt + '\n\n' + assistantPrompt + ':\n' + `${userPrompt}`],
parts: [rolePrompt + '\n\n' + commandPrompt + ':\n' + `${contentPrompt}`],
},
},
],
model: settings.apiModel, // 'text-davinci-002-render-sha'
parent_message_id: utils.generateUUID(),
}
} else {
body['messages'] = [
const messages = [
{
role: 'system',
content: systemPrompt,
content: rolePrompt,
},
...assistantPrompts.map((prompt) => {
return {
role: 'user',
content: prompt,
}
}),
{
role: 'user',
content: assistantPrompt,
content: commandPrompt,
},
{
role: 'user',
content: userPrompt,
content: contentPrompt,
},
]
body['messages'] = messages
}

switch (settings.provider) {
Expand Down

0 comments on commit 50e0697

Please sign in to comment.