Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions .github/workflows/fastgpt-image-personal.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Build FastGPT images in Personal warehouse
on:
workflow_dispatch:
push:
paths:
- 'projects/app/**'
- 'packages/**'
branches:
- 'main'
jobs:
build-fastgpt-images:
runs-on: ubuntu-20.04
if: github.repository != 'labring/FastGPT'
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
driver-opts: network=host
- name: Cache Docker layers
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Login to GitHub Container Registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GH_PAT }}
- name: Set DOCKER_REPO_TAGGED based on branch or tag
run: |
echo "DOCKER_REPO_TAGGED=ghcr.io/${{ github.repository_owner }}/fastgpt:latest" >> $GITHUB_ENV
- name: Build and publish image for main branch or tag push event
env:
DOCKER_REPO_TAGGED: ${{ env.DOCKER_REPO_TAGGED }}
run: |
docker buildx build \
--build-arg name=app \
--label "org.opencontainers.image.source=https://github.com/${{ github.repository_owner }}/FastGPT" \
--label "org.opencontainers.image.description=fastgpt image" \
--push \
--cache-from=type=local,src=/tmp/.buildx-cache \
--cache-to=type=local,dest=/tmp/.buildx-cache \
-t ${DOCKER_REPO_TAGGED} \
-f Dockerfile \
.
5 changes: 1 addition & 4 deletions .github/workflows/fastgpt-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ on:
paths:
- 'projects/app/**'
- 'packages/**'
branches:
- 'main'
tags:
- 'v*.*.*'
jobs:
Expand Down Expand Up @@ -53,9 +51,8 @@ jobs:
docker buildx build \
--build-arg name=app \
--platform linux/amd64,linux/arm64 \
--label "org.opencontainers.image.source= https://github.com/ ${{ github.repository_owner }}/FastGPT" \
--label "org.opencontainers.image.source=https://github.com/${{ github.repository_owner }}/FastGPT" \
--label "org.opencontainers.image.description=fastgpt image" \
--label "org.opencontainers.image.licenses=Apache" \
--push \
--cache-from=type=local,src=/tmp/.buildx-cache \
--cache-to=type=local,dest=/tmp/.buildx-cache \
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/preview-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
with:
driver-opts: network=host
- name: Cache Docker layers
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
Expand All @@ -48,6 +48,7 @@ jobs:
--label "org.opencontainers.image.source= https://github.com/ ${{ github.repository_owner }}/FastGPT" \
--label "org.opencontainers.image.description=fastgpt-pr image" \
--label "org.opencontainers.image.licenses=Apache" \
--push \
--cache-from=type=local,src=/tmp/.buildx-cache \
--cache-to=type=local,dest=/tmp/.buildx-cache \
-t ${DOCKER_REPO_TAGGED} \
Expand Down
33 changes: 21 additions & 12 deletions docSite/content/docs/installation/upgrading/46.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,20 @@ toc: true
weight: 836
---

未正式发布
# V4.6 版本加入了简单的团队功能,可以邀请其他用户进来管理资源。该版本升级后无法执行旧的升级脚本,且无法回退

V4.6 版本加入了简单的团队功能,可以邀请其他用户进来管理资源。该版本升级后无法执行旧的升级脚本,且无法回退。
# 1. 更新镜像并变更配置文件

## 1. 更新镜像并变更配置文件

更新镜像至 latest 或者 v4.6 版本。商业版镜像更新至 V0.2.
更新镜像至 latest 或者 v4.6 版本。商业版镜像更新至 V0.2.1

最新配置可参考: [V46版本最新 config.json](/docs/development/configuration),商业镜像配置文件也更新,参考最新的飞书文档。


## 2. 执行初始化 API
# 2. 执行初始化 API

发起 2 个 HTTP 请求({{rootkey}} 替换成环境变量里的`rootkey`,{{host}}替换成自己域名)

发起 1 个 HTTP 请求({{rootkey}} 替换成环境变量里的`rootkey`,{{host}}替换成自己域名)
**该初始化接口可能速度很慢,返回超时不用管,注意看日志即可,需要注意的是,需确保initv46成功后,在执行initv46-2**

1. https://xxxxx/api/admin/initv46

Expand All @@ -30,16 +30,25 @@ curl --location --request POST 'https://{{host}}/api/admin/initv46' \
--header 'Content-Type: application/json'
```

2. https://xxxxx/api/admin/initv46-2

```bash
curl --location --request POST 'https://{{host}}/api/admin/initv46-2' \
--header 'rootkey: {{rootkey}}' \
--header 'Content-Type: application/json'
```

初始化内容:
1. 创建默认团队
2. 初始化 Mongo 所有资源的团队字段
3. 初始化 Pg 的字段
4. 初始化 Mongo Data

**该初始化接口可能速度很慢,返回超时不用管,注意看日志即可**


## 功能介绍

### Fast GPT V4.6
# V4.6功能介绍

1. 新增 - 团队空间
2. 新增 - 多路向量(多个向量映射一组数据)
3. 新增 - tts语音
4. 线上环境新增 - ReRank向量召回,提高召回精度
5. 优化 - 知识库导出,可直接触发流下载,无需等待转圈圈
131 changes: 131 additions & 0 deletions packages/global/common/string/textSplitter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import { getErrText } from '../error/utils';
import { countPromptTokens } from './tiktoken';

/**
* text split into chunks
* maxLen - one chunk len. max: 3500
* overlapLen - The size of the before and after Text
* maxLen > overlapLen
* markdown
*/
export const splitText2Chunks = (props: { text: string; maxLen: number; overlapLen?: number }) => {
const { text = '', maxLen, overlapLen = Math.floor(maxLen * 0.2) } = props;
const tempMarker = 'SPLIT_HERE_SPLIT_HERE';

const stepReg: Record<number, RegExp> = {
0: /^(#\s[^\n]+)\n/gm,
1: /^(##\s[^\n]+)\n/gm,
2: /^(###\s[^\n]+)\n/gm,
3: /^(####\s[^\n]+)\n/gm,

4: /(\n\n)/g,
5: /([\n])/g,
6: /[。]|(?!<[^a-zA-Z])\.\s/g,
7: /([!?]|!\s|\?\s)/g,
8: /([;]|;\s)/g,
9: /([,]|,\s)/g
};

const splitTextRecursively = ({
text = '',
step,
lastChunk,
overlayChunk
}: {
text: string;
step: number;
lastChunk: string;
overlayChunk: string;
}) => {
if (text.length <= maxLen) {
return [text];
}
const reg = stepReg[step];
const isMarkdownSplit = step < 4;

if (!reg) {
// use slice-maxLen to split text
const chunks: string[] = [];
let chunk = '';
for (let i = 0; i < text.length; i += maxLen - overlapLen) {
chunk = text.slice(i, i + maxLen);
chunks.push(chunk);
}
return chunks;
}

// split text by special char
const splitTexts = text
.replace(reg, isMarkdownSplit ? `${tempMarker}$1` : `$1${tempMarker}`)
.split(`${tempMarker}`)
.filter((part) => part);

let chunks: string[] = [];
for (let i = 0; i < splitTexts.length; i++) {
let text = splitTexts[i];
let chunkToken = countPromptTokens(lastChunk, '');
const textToken = countPromptTokens(text, '');

// next chunk is too large / new chunk is too large(The current chunk must be smaller than maxLen)
if (textToken >= maxLen || chunkToken + textToken > maxLen * 1.4) {
// last chunk is too large, push it to chunks, not add to next chunk
if (chunkToken > maxLen * 0.7) {
chunks.push(lastChunk);
lastChunk = '';
overlayChunk = '';
}
// chunk is small, insert to next chunks
const innerChunks = splitTextRecursively({
text,
step: step + 1,
lastChunk,
overlayChunk
});
if (innerChunks.length === 0) continue;
chunks = chunks.concat(innerChunks);
lastChunk = '';
overlayChunk = '';
continue;
}

// size less than maxLen, push text to last chunk
lastChunk += text;
chunkToken += textToken; // Definitely less than 1.4 * maxLen

// size over lapLen, push it to next chunk
if (
overlapLen !== 0 &&
!isMarkdownSplit &&
chunkToken >= maxLen - overlapLen &&
textToken < overlapLen
) {
overlayChunk += text;
}
if (chunkToken >= maxLen) {
chunks.push(lastChunk);
lastChunk = overlayChunk;
overlayChunk = '';
}
}

/* If the last chunk is independent, it needs to be push chunks. */
if (lastChunk && chunks[chunks.length - 1] && !chunks[chunks.length - 1].endsWith(lastChunk)) {
chunks.push(lastChunk);
}

return chunks;
};

try {
const chunks = splitTextRecursively({ text, step: 0, lastChunk: '', overlayChunk: '' });

const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0);

return {
chunks,
tokens
};
} catch (err) {
throw new Error(getErrText(err));
}
};
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/* Only the token of gpt-3.5-turbo is used */
import type { ChatItemType } from '@fastgpt/global/core/chat/type';
import type { ChatItemType } from '../../../core/chat/type';
import { Tiktoken } from 'js-tiktoken/lite';
import { adaptChat2GptMessages } from '@/utils/common/adapt/message';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constant';
import { adaptChat2GptMessages } from '../../../core/chat/adapt';
import { ChatCompletionRequestMessageRoleEnum } from '../../../core/ai/constant';
import encodingJson from './cl100k_base.json';

/* init tikToken obj */
Expand Down Expand Up @@ -55,17 +55,6 @@ export function countMessagesTokens({ messages }: { messages: ChatItemType[] })
return totalTokens;
}

export function sliceTextByTokens({ text, length }: { text: string; length: number }) {
const enc = getTikTokenEnc();

try {
const encodeText = enc.encode(text);
return enc.decode(encodeText.slice(0, length));
} catch (error) {
return text.slice(0, length);
}
}

/* slice messages from top to bottom by maxTokens */
export function sliceMessagesTB({
messages,
Expand Down
5 changes: 5 additions & 0 deletions packages/global/common/string/tiktoken/type.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import type { Tiktoken } from 'js-tiktoken';

declare global {
var TikToken: Tiktoken;
}
19 changes: 17 additions & 2 deletions packages/global/common/string/tools.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import crypto from 'crypto';

/* check string is a web link */
export function strIsLink(str?: string) {
if (!str) return false;
if (/^((http|https)?:\/\/|www\.|\/)[^\s/$.?#].[^\s]*$/i.test(str)) return true;
return false;
}

export const hashStr = (psw: string) => {
return crypto.createHash('sha256').update(psw).digest('hex');
/* hash string */
export const hashStr = (str: string) => {
return crypto.createHash('sha256').update(str).digest('hex');
};

/* simple text, remove chinese space and extra \n */
Expand All @@ -20,3 +22,16 @@ export const simpleText = (text: string) => {

return text;
};

/*
replace {{variable}} to value
*/
export function replaceVariable(text: string, obj: Record<string, string | number>) {
for (const key in obj) {
const val = obj[key];
if (!['string', 'number'].includes(typeof val)) continue;

text = text.replace(new RegExp(`{{(${key})}}`, 'g'), String(val));
}
return text || '';
}
5 changes: 5 additions & 0 deletions packages/global/core/ai/api.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
export type PostReRankProps = {
query: string;
inputs: { id: string; text: string }[];
};
export type PostReRankResponse = { id: string; score: number }[];
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constant';
import type { ChatMessageItemType } from '@fastgpt/global/core/ai/type.d';
import type { ChatItemType } from '../../core/chat/type.d';
import { ChatRoleEnum } from '../../core/chat/constants';
import { ChatCompletionRequestMessageRoleEnum } from '../../core/ai/constant';
import type { ChatMessageItemType } from '../../core/ai/type.d';

const chat2Message = {
[ChatRoleEnum.AI]: ChatCompletionRequestMessageRoleEnum.Assistant,
Expand Down
20 changes: 20 additions & 0 deletions packages/global/core/dataset/api.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { DatasetDataIndexItemType } from './type';

/* ================= dataset ===================== */

/* ================= collection ===================== */

/* ================= data ===================== */
export type PgSearchRawType = {
id: string;
team_id: string;
tmb_id: string;
collection_id: string;
data_id: string;
score: number;
};
export type PushDatasetDataChunkProps = {
q: string; // embedding content
a?: string; // bonus content
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
};
Loading