-
Notifications
You must be signed in to change notification settings - Fork 54
/
prompt.ts
96 lines (82 loc) · 3.41 KB
/
prompt.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
// Copyright (c) 2022 Kazuki Nakayashiki.
// Modified work: Copyright (c) 2023 Qixiang Zhu.
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
export function getSummaryPrompt(title: string, transcript: string, byteLimit: number) {
const truncatedTranscript = limitTranscriptByteLength(transcript, byteLimit);
return `标题: "${title.replace(/\n+/g, " ").trim()}"\n字幕: "${truncatedTranscript.replace(/\n+/g, " ").trim()}"\n中文总结:`;
}
export function limitTranscriptByteLength(str: string, byteLimit: number) {
const utf8str = unescape(encodeURIComponent(str));
const byteLength = utf8str.length;
if (byteLength > byteLimit) {
const ratio = byteLimit / byteLength;
const newStr = str.substring(0, Math.floor(str.length * ratio));
return newStr;
}
return str;
}
function filterHalfRandomly<T>(arr: T[]): T[] {
const filteredArr: T[] = [];
const halfLength = Math.floor(arr.length / 2);
const indicesToFilter = new Set<number>();
// 随机生成要过滤掉的元素的下标
while (indicesToFilter.size < halfLength) {
const index = Math.floor(Math.random() * arr.length);
if (!indicesToFilter.has(index)) {
indicesToFilter.add(index);
}
}
// 过滤掉要过滤的元素
for (let i = 0; i < arr.length; i++) {
if (!indicesToFilter.has(i)) {
filteredArr.push(arr[i]);
}
}
return filteredArr;
}
function getByteLength(text: string) {
return unescape(encodeURIComponent(text)).length;
}
function itemInIt(textData: SubtitleItem[], text: string): boolean {
return textData.find(t => t.text === text) !== undefined;
}
type SubtitleItem = {
text: string;
index: number;
}
export function getSmallSizeTranscripts(newTextData: SubtitleItem[], oldTextData: SubtitleItem[], byteLimit: number): string {
const text = newTextData.sort((a, b) => a.index - b.index).map(t => t.text).join(" ");
const byteLength = getByteLength(text);
if (byteLength > byteLimit) {
const filtedData = filterHalfRandomly(newTextData);
return getSmallSizeTranscripts(filtedData, oldTextData, byteLimit);
}
let resultData = newTextData.slice();
let resultText = text;
let lastByteLength = byteLength;
for (let i = 0; i < oldTextData.length; i++) {
const obj = oldTextData[i];
if (itemInIt(newTextData, obj.text)) {
continue;
}
const nextTextByteLength = getByteLength(obj.text);
const isOverLimit = lastByteLength + nextTextByteLength > byteLimit;
if (isOverLimit) {
const overRate = (lastByteLength + nextTextByteLength - byteLimit) / nextTextByteLength;
const chunkedText = obj.text.substring(0, Math.floor(obj.text.length * overRate));
resultData.push({ text: chunkedText, index: obj.index });
} else {
resultData.push(obj);
}
resultText = resultData.sort((a, b) => a.index - b.index).map(t => t.text).join(" ");
lastByteLength = getByteLength(resultText);
}
return resultText;
}