-
Notifications
You must be signed in to change notification settings - Fork 23
/
meeting_summarization.go
292 lines (243 loc) · 9.71 KB
/
meeting_summarization.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
package main
import (
"fmt"
"io"
"os/exec"
"strings"
sq "github.com/Masterminds/squirrel"
"github.com/mattermost/mattermost-plugin-ai/server/ai"
"github.com/mattermost/mattermost-plugin-ai/server/ai/subtitles"
"github.com/mattermost/mattermost/server/public/model"
"github.com/pkg/errors"
)
const ReferencedRecordingFileID = "referenced_recording_file_id"
const ReferencedTranscriptPostID = "referenced_transcript_post_id"
const NoRegen = "no_regen"
func getCaptionsFileIDFromProps(post *model.Post) (fileID string, err error) {
if post == nil {
return "", errors.New("post is nil")
}
defer func() {
if r := recover(); r != nil {
err = errors.New("unable to parse captions on post")
}
}()
captions, ok := post.GetProp("captions").([]interface{})
if !ok || len(captions) == 0 {
return "", errors.New("no captions on post")
}
// Calls will only ever have one for now.
return captions[0].(map[string]interface{})["file_id"].(string), nil
}
func (p *Plugin) createTranscription(recordingFileID string) (*subtitles.Subtitles, error) {
if p.ffmpegPath == "" {
return nil, errors.New("ffmpeg not installed")
}
recordingFileInfo, err := p.pluginAPI.File.GetInfo(recordingFileID)
if err != nil {
return nil, errors.Wrap(err, "unable to get calls file info")
}
fileReader, err := p.pluginAPI.File.Get(recordingFileID)
if err != nil {
return nil, errors.Wrap(err, "unable to read calls file")
}
var cmd *exec.Cmd
if recordingFileInfo.Size > WhisperAPILimit {
cmd = exec.Command(p.ffmpegPath, "-i", "pipe:0", "-ac", "1", "-map", "0:a:0", "-b:a", "32k", "-ar", "16000", "-f", "mp3", "pipe:1") //nolint:gosec
} else {
cmd = exec.Command(p.ffmpegPath, "-i", "pipe:0", "-f", "mp3", "pipe:1") //nolint:gosec
}
cmd.Stdin = fileReader
audioReader, err := cmd.StdoutPipe()
if err != nil {
return nil, errors.Wrap(err, "couldn't create stdout pipe")
}
errorReader, err := cmd.StderrPipe()
if err != nil {
return nil, errors.Wrap(err, "couldn't create stderr pipe")
}
if err = cmd.Start(); err != nil {
return nil, errors.Wrap(err, "couldn't run ffmpeg")
}
transcriber := p.getTranscribe()
// Limit reader should probably error out instead of just silently failing
transcription, err := transcriber.Transcribe(io.LimitReader(audioReader, WhisperAPILimit))
if err != nil {
return nil, errors.Wrap(err, "unable to transcribe")
}
errout, err := io.ReadAll(errorReader)
if err != nil {
return nil, errors.Wrap(err, "unable to read stderr from ffmpeg")
}
if err := cmd.Wait(); err != nil {
p.pluginAPI.Log.Debug("ffmpeg stderr: " + string(errout))
return nil, errors.Wrap(err, "error while waiting for ffmpeg")
}
return transcription, nil
}
func (p *Plugin) newCallRecordingThread(requestingUser *model.User, recordingPost *model.Post, channel *model.Channel, fileID string) (*model.Post, error) {
siteURL := p.API.GetConfig().ServiceSettings.SiteURL
surePost := &model.Post{
Message: fmt.Sprintf("Sure, I will summarize this recording: %s/_redirect/pl/%s\n", *siteURL, recordingPost.Id),
}
surePost.AddProp(NoRegen, "true")
if err := p.botDM(requestingUser.Id, surePost); err != nil {
return nil, err
}
if err := p.summarizeCallRecording(surePost.Id, requestingUser, fileID, channel); err != nil {
return nil, err
}
return surePost, nil
}
func (p *Plugin) newCallTranscriptionSummaryThread(requestingUser *model.User, transcriptionPost *model.Post, channel *model.Channel) (*model.Post, error) {
if len(transcriptionPost.FileIds) != 1 {
return nil, errors.New("Unexpected number of files in calls post")
}
siteURL := p.API.GetConfig().ServiceSettings.SiteURL
surePost := &model.Post{
Message: fmt.Sprintf("Sure, I will summarize this transcription: %s/_redirect/pl/%s\n", *siteURL, transcriptionPost.Id),
}
surePost.AddProp(NoRegen, "true")
if err := p.botDM(requestingUser.Id, surePost); err != nil {
return nil, err
}
transcriptionFileID, err := getCaptionsFileIDFromProps(transcriptionPost)
if err != nil {
return nil, errors.Wrap(err, "unable to get transcription file id")
}
transcriptionFileInfo, err := p.pluginAPI.File.GetInfo(transcriptionFileID)
if err != nil {
return nil, errors.Wrap(err, "unable to get transcription file info")
}
transcriptionFilePost, err := p.pluginAPI.Post.GetPost(transcriptionFileInfo.PostId)
if err != nil {
return nil, errors.Wrap(err, "unable to get transcription file post")
}
if transcriptionFilePost.ChannelId != channel.Id {
return nil, errors.New("strange configuration of calls transcription file")
}
transcriptionFileReader, err := p.pluginAPI.File.Get(transcriptionFileID)
if err != nil {
return nil, errors.Wrap(err, "unable to read calls file")
}
transcription, err := subtitles.NewSubtitlesFromVTT(transcriptionFileReader)
if err != nil {
return nil, errors.Wrap(err, "unable to parse transcription file")
}
context := p.MakeConversationContext(requestingUser, channel, nil)
summaryStream, err := p.summarizeTranscription(transcription, context)
if err != nil {
return nil, errors.Wrap(err, "unable to summarize transcription")
}
summaryPost := &model.Post{
RootId: surePost.Id,
ChannelId: surePost.ChannelId,
Message: "",
}
summaryPost.AddProp(ReferencedTranscriptPostID, transcriptionPost.Id)
if err := p.streamResultToNewPost(requestingUser.Id, summaryStream, summaryPost); err != nil {
return nil, errors.Wrap(err, "unable to stream result to post")
}
return surePost, nil
}
func (p *Plugin) summarizeCallRecording(rootID string, requestingUser *model.User, recordingFileID string, channel *model.Channel) error {
transcriptPost := &model.Post{
RootId: rootID,
Message: "Processing audio into transcription. This will take some time...",
}
transcriptPost.AddProp(ReferencedRecordingFileID, recordingFileID)
if err := p.botDM(requestingUser.Id, transcriptPost); err != nil {
return err
}
go func() (reterr error) {
// Update to an error if we return one.
defer func() {
if reterr != nil {
transcriptPost.Message = "Sorry! Somthing went wrong. Check the server logs for details."
if err := p.pluginAPI.Post.UpdatePost(transcriptPost); err != nil {
p.API.LogError("Failed to update post in error handling handleCallRecordingPost", "error", err)
}
p.API.LogError("Error in call recording post", "error", reterr)
}
}()
transcription, err := p.createTranscription(recordingFileID)
if err != nil {
return errors.Wrap(err, "failed to create transcription")
}
transcriptFileInfo, err := p.pluginAPI.File.Upload(strings.NewReader(transcription.FormatVTT()), "transcript.txt", channel.Id)
if err != nil {
return errors.Wrap(err, "unable to upload transcript")
}
context := p.MakeConversationContext(requestingUser, channel, nil)
summaryStream, err := p.summarizeTranscription(transcription, context)
if err != nil {
return errors.Wrap(err, "unable to summarize transcription")
}
if err := p.updatePostWithFile(transcriptPost, transcriptFileInfo); err != nil {
return errors.Wrap(err, "unable to update transcript post")
}
if err := p.streamResultToPost(summaryStream, transcriptPost); err != nil {
return errors.Wrap(err, "unable to stream result to post")
}
return nil
}()
return nil
}
func (p *Plugin) summarizeTranscription(transcription *subtitles.Subtitles, context ai.ConversationContext) (*ai.TextStreamResult, error) {
llmFormattedTranscription := transcription.FormatForLLM()
tokens := p.getLLM().CountTokens(llmFormattedTranscription)
tokenLimitWithMargin := int(float64(p.getLLM().TokenLimit())*0.75) - ContextTokenMargin
if tokenLimitWithMargin < 0 {
tokenLimitWithMargin = ContextTokenMargin / 2
}
isChunked := false
if tokens > tokenLimitWithMargin {
p.pluginAPI.Log.Debug("Transcription too long, summarizing in chunks.", "tokens", tokens, "limit", tokenLimitWithMargin)
chunks := splitPlaintextOnSentences(llmFormattedTranscription, tokenLimitWithMargin*4)
summarizedChunks := make([]string, 0, len(chunks))
p.pluginAPI.Log.Debug("Split into chunks", "chunks", len(chunks))
for _, chunk := range chunks {
context.PromptParameters = map[string]string{"TranscriptionChunk": chunk}
summarizeChunkPrompt, err := p.prompts.ChatCompletion(ai.PromptSummarizeChunk, context)
if err != nil {
return nil, errors.Wrap(err, "unable to get summarize chunk prompt")
}
summarizedChunk, err := p.getLLM().ChatCompletionNoStream(summarizeChunkPrompt)
if err != nil {
return nil, errors.Wrap(err, "unable to get summarized chunk")
}
summarizedChunks = append(summarizedChunks, summarizedChunk)
}
llmFormattedTranscription = strings.Join(summarizedChunks, "\n\n")
isChunked = true
p.pluginAPI.Log.Debug("Completed chunk summarization", "chunks", len(summarizedChunks), "tokens", p.getLLM().CountTokens(llmFormattedTranscription))
}
context.PromptParameters = map[string]string{"Transcription": llmFormattedTranscription, "IsChunked": fmt.Sprintf("%t", isChunked)}
summaryPrompt, err := p.prompts.ChatCompletion(ai.PromptMeetingSummary, context)
if err != nil {
return nil, errors.Wrap(err, "unable to get meeting summary prompt")
}
summaryStream, err := p.getLLM().ChatCompletion(summaryPrompt)
if err != nil {
return nil, errors.Wrap(err, "unable to get meeting summary")
}
return summaryStream, nil
}
func (p *Plugin) updatePostWithFile(post *model.Post, fileinfo *model.FileInfo) error {
if _, err := p.execBuilder(p.builder.
Update("FileInfo").
Set("PostId", post.Id).
Set("ChannelId", post.ChannelId).
Where(sq.And{
sq.Eq{"Id": fileinfo.Id},
sq.Eq{"PostId": ""},
})); err != nil {
return errors.Wrap(err, "unable to update file info")
}
post.FileIds = []string{fileinfo.Id}
post.Message = ""
if err := p.pluginAPI.Post.UpdatePost(post); err != nil {
return errors.Wrap(err, "unable to update post")
}
return nil
}