Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 131 additions & 24 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"runtime/debug"
"sort"
"sync"
"sync/atomic"
"time"

"zee/audio"
Expand All @@ -24,6 +25,7 @@
"zee/log"
"zee/shutdown"
"zee/transcriber"
"zee/update"

Check failure on line 28 in main.go

View workflow job for this annotation

GitHub Actions / test

package zee/update is not in std (/opt/hostedtoolcache/go/1.24.12/x64/src/zee/update)
)

var version = "dev"
Expand All @@ -32,6 +34,7 @@

var activeTranscriber transcriber.Transcriber
var autoPaste bool
var transcriptionsMu sync.Mutex
var transcriptions []TranscriptionRecord
var percentileStats PercentileStats
var streamEnabled bool
Expand Down Expand Up @@ -68,9 +71,41 @@
return "mic: " + name + " (ctrl+g)"
}

const recordTail = 250 * time.Millisecond
const recordTail = 500 * time.Millisecond

func run() {
if len(os.Args) > 1 && os.Args[1] == "update" {
if version == "dev" {
fmt.Println("Dev build — cannot check for updates.")
os.Exit(0)
}
fmt.Printf("zee %s — checking for updates...\n", version)
rel, err := update.CheckLatest(version)
if err != nil {
fmt.Printf("Error: %v\n", err)
os.Exit(1)
}
if rel == nil {
fmt.Println("Already up to date.")
os.Exit(0)
}
fmt.Printf("Update available: %s -> %s\n", version, rel.Version)
fmt.Print("Continue? [y/N] ")
var answer string
fmt.Scanln(&answer)
if answer != "y" && answer != "Y" {
fmt.Println("Aborted.")
os.Exit(0)
}
fmt.Printf("Downloading %s...\n", rel.Version)
if err := update.Apply(rel); err != nil {
fmt.Printf("Error: %v\n", err)
os.Exit(1)
}
fmt.Printf("Updated to %s\n", rel.Version)
os.Exit(0)
}

benchmarkFile := flag.String("benchmark", "", "Run benchmark with WAV file instead of live recording")
benchmarkRuns := flag.Int("runs", 3, "Number of benchmark iterations")
autoPasteFlag := flag.Bool("autopaste", true, "Auto-paste to focused window after transcription")
Expand Down Expand Up @@ -236,6 +271,10 @@

<-tuiReady

update.StartBackgroundCheck(version, log.Dir(), func(rel update.Release) {
tuiSend(UpdateAvailableMsg{Version: rel.Version})
})

sigChan := make(chan os.Signal, 1)
shutdown.Notify(sigChan)
go func() {
Expand Down Expand Up @@ -280,7 +319,7 @@
tuiSend(RecordingStartMsg{})
go beep.PlayStart()

err := handleRecording(captureDevice, hy.StopChan())
_, err := handleRecording(captureDevice, hy.StopChan())
if err != nil {
logToTUI("Error recording: %v", err)
log.Errorf("recording error: %v", err)
Expand All @@ -298,8 +337,7 @@
tuiSend(RecordingStartMsg{})
go beep.PlayStart()

err := handleRecording(captureDevice, hk.Keyup())
log.Info("hotkey_up")
_, err := handleRecording(captureDevice, hk.Keyup())
if err != nil {
logToTUI("Error recording: %v", err)
log.Errorf("recording error: %v", err)
Expand Down Expand Up @@ -334,17 +372,16 @@
}
}

func handleRecording(capture audio.CaptureDevice, keyup <-chan struct{}) error {
func handleRecording(capture audio.CaptureDevice, keyup <-chan struct{}) (<-chan struct{}, error) {
sess, err := activeTranscriber.NewSession(context.Background(), transcriber.SessionConfig{
Stream: streamEnabled,
Format: activeFormat,
Language: activeTranscriber.GetLanguage(),
})
if err != nil {
return err
return nil, err
}

// Read clipboard in background — we only need it after recording ends
clipCh := make(chan string, 1)
if autoPaste {
go func() {
Expand All @@ -353,9 +390,13 @@
}()
}

var lastTranscript atomic.Int64
updatesDone := make(chan struct{})
go func() {
defer close(updatesDone)
var prev string
for text := range sess.Updates() {
lastTranscript.Store(time.Now().UnixNano())
tuiSend(LiveTranscriptionMsg{Text: text})
if autoPaste {
delta := text[len(prev):]
Expand All @@ -368,35 +409,56 @@
}
}()

totalFrames, err := record(capture, keyup, sess)
tuiSend(RecordingStopMsg{})
totalFrames, err := record(capture, keyup, sess, &lastTranscript)

if err != nil {
sess.Close()
return nil, err
}
if totalFrames < uint64(encoder.SampleRate/10) {
sess.Close()
return nil, nil
}

done := make(chan struct{})
go func() {
finishTranscription(sess, clipCh, updatesDone)
close(done)
}()
return done, nil
}

func finishTranscription(sess transcriber.Session, clipCh chan string, updatesDone <-chan struct{}) {
result, closeErr := sess.Close()
<-updatesDone // wait for updates goroutine to drain

var clipPrev string
if autoPaste {
clipPrev = <-clipCh
}
tuiSend(LiveTranscriptionMsg{Text: ""})

if err != nil {
return err
}
if totalFrames < uint64(encoder.SampleRate/10) {
return nil
if closeErr != nil {
log.Errorf("transcription error: %v", closeErr)
logToTUI("Error: %v", closeErr)
}

if !streamEnabled && result.HasText && autoPaste {
if closeErr == nil && !streamEnabled && result.HasText && autoPaste {
clipboard.Copy(result.Text)
clipboard.Paste()
}

if autoPaste && clipPrev != "" {
go func() {
time.Sleep(800 * time.Millisecond)
time.Sleep(600 * time.Millisecond)
clipboard.Copy(clipPrev)
}()
}

if closeErr != nil {
return
}

displayText := result.Text
if result.NoSpeech {
displayText = "(no speech detected)"
Expand All @@ -422,8 +484,10 @@
MemoryAllocMB: result.MemoryAllocMB,
MemoryPeakMB: result.MemoryPeakMB,
}
transcriptionsMu.Lock()
transcriptions = append(transcriptions, record)
updatePercentileStats()
transcriptionsMu.Unlock()
log.TranscriptionMetrics(log.Metrics(record), activeFormat, activeFormat, activeTranscriber.Name(), bs.ConnReused, bs.TLSProtocol)
log.Confidence(bs.Confidence)
}
Expand All @@ -446,19 +510,16 @@
if !result.NoSpeech {
log.TranscriptionText(result.Text)
}

if closeErr != nil {
log.Errorf("session close error: %v", closeErr)
}
return nil
}

func record(capture audio.CaptureDevice, keyup <-chan struct{}, sess transcriber.Session) (uint64, error) {
func record(capture audio.CaptureDevice, keyup <-chan struct{}, sess transcriber.Session, lastTranscript *atomic.Int64) (uint64, error) {
var bufMu sync.Mutex
var totalFrames uint64
var peakLevel float64
var noVoiceBeeped bool
var stopped bool
var voiceDetected bool
var lastVoiceTime time.Time
done := make(chan struct{})

capture.SetCallback(func(data []byte, frameCount uint32) {
Expand Down Expand Up @@ -489,6 +550,12 @@
if rms > peakLevel {
peakLevel = rms
}
if rms >= voiceThreshold {
if !voiceDetected {
voiceDetected = true
}
lastVoiceTime = time.Now()
}
bufMu.Unlock()
}
})
Expand All @@ -510,12 +577,24 @@
elapsed := time.Since(recordStart).Seconds()
tuiSend(RecordingTickMsg{Duration: elapsed})
checkNoVoice(&bufMu, elapsed, &peakLevel, &noVoiceBeeped)
bufMu.Lock()
vd := voiceDetected
bufMu.Unlock()
if vd {
checkSilenceDuring(&bufMu, &lastVoiceTime)
}
if streamEnabled {
checkTranscriptSilence(lastTranscript)
}
}
}
}()

go func() {
<-keyup
log.Info("hotkey_up")
tuiSend(RecordingStopMsg{})
go beep.PlayEnd()
if streamEnabled {
time.Sleep(recordTail)
}
Expand All @@ -524,8 +603,6 @@
<-done

capture.Stop()
log.Info("beep_end")
beep.PlayEnd()
capture.ClearCallback()

bufMu.Lock()
Expand All @@ -544,11 +621,41 @@
}
mu.Unlock()
if shouldWarn {
log.Info("no_voice_warning")
tuiSend(NoVoiceWarningMsg{})
beep.PlayError()
}
}

const silenceTimeout = 8 * time.Second

func checkSilenceDuring(mu *sync.Mutex, lastVoiceTime *time.Time) {
mu.Lock()
shouldWarn := time.Since(*lastVoiceTime) > silenceTimeout
if shouldWarn {
*lastVoiceTime = time.Now()
}
mu.Unlock()
if shouldWarn {
log.Info("silence_during_warning")
tuiSend(NoVoiceWarningMsg{})
beep.PlayError()
}
}

func checkTranscriptSilence(lastTranscript *atomic.Int64) {
ts := lastTranscript.Load()
if ts == 0 {
return // no transcript received yet
}
if time.Since(time.Unix(0, ts)) > silenceTimeout {
lastTranscript.Store(time.Now().UnixNano())
log.Info("transcript_silence_warning")
tuiSend(TranscriptSilenceMsg{})
beep.PlayError()
}
}

func updatePercentileStats() {
n := len(transcriptions)
if n == 0 {
Expand Down
Loading
Loading