Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ func run() {
openaiKey := os.Getenv("OPENAI_API_KEY")
dgKey := os.Getenv("DEEPGRAM_API_KEY")
mistralKey := os.Getenv("MISTRAL_API_KEY")
elevenLabsKey := os.Getenv("ELEVENLABS_API_KEY")

type providerDef struct {
name, label, key string
Expand All @@ -372,6 +373,7 @@ func run() {
{"openai", "OpenAI", openaiKey, transcriber.OpenAIModels, func() transcriber.Transcriber { return transcriber.NewOpenAI(openaiKey) }},
{"deepgram", "Deepgram", dgKey, transcriber.DeepgramModels, func() transcriber.Transcriber { return transcriber.NewDeepgram(dgKey) }},
{"mistral", "Mistral", mistralKey, transcriber.MistralModels, func() transcriber.Transcriber { return transcriber.NewMistral(mistralKey) }},
{"elevenlabs", "ElevenLabs", elevenLabsKey, transcriber.ElevenLabsModels, func() transcriber.Transcriber { return transcriber.NewElevenLabs(elevenLabsKey) }},
}

var trayModels []tray.Model
Expand Down
138 changes: 138 additions & 0 deletions transcriber/elevenlabs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package transcriber

import (
"bytes"
"context"
"encoding/json"
"fmt"
"mime/multipart"
"net/http"
)

const ModelScribeV2 = "scribe_v2"

var scribeV2Langs = langsFromCodes([]string{
"af", "am", "ar", "hy", "as", "az", "be", "bn", "bs", "bg",
"my", "ca", "ny", "hr", "cs", "da", "nl", "en", "et", "fi",
"fr", "gl", "ka", "de", "el", "gu", "ha", "he", "hi", "hu",
"is", "ig", "id", "ga", "it", "ja", "jv", "kn", "kk", "km",
"ko", "ku", "ky", "lo", "lv", "ln", "lt", "lb", "mk", "ms",
"ml", "mt", "zh", "mi", "mr", "mn", "ne", "no", "oc", "or",
"ps", "fa", "pl", "pt", "pa", "ro", "ru", "sr", "sn", "sd",
"sk", "sl", "so", "es", "sw", "sv", "ta", "tg", "te", "th",
"tr", "uk", "ur", "uz", "vi", "cy", "wo", "xh", "zu",
})

var ElevenLabsModels = []ModelInfo{
{ID: ModelScribeV2, Label: "Scribe V2", Stream: false, Languages: scribeV2Langs},
}

type ElevenLabs struct {
baseTranscriber
apiKey string
}

func NewElevenLabs(apiKey string) *ElevenLabs {
apiURL := "https://api.elevenlabs.io/v1/speech-to-text"
return &ElevenLabs{
baseTranscriber: baseTranscriber{
client: NewTracedClient(apiURL),
apiURL: apiURL,
model: ModelScribeV2,
},
apiKey: apiKey,
}
}

func (e *ElevenLabs) SupportedLanguages() []Language {
return modelLanguages(ElevenLabsModels, e.GetModel())
}
func (e *ElevenLabs) Name() string { return "elevenlabs" }
func (e *ElevenLabs) Models() []ModelInfo { return ElevenLabsModels }

func (e *ElevenLabs) NewSession(_ context.Context, cfg SessionConfig) (Session, error) {
go e.client.Warm()
if cfg.Stream {
return nil, fmt.Errorf("elevenlabs does not support streaming transcription")
}
return newBatchSession(cfg, e.transcribe)
}

type elevenLabsResponse struct {
Text string `json:"text"`
LanguageCode string `json:"language_code"`
LanguageProbability float64 `json:"language_probability"`
Words []struct {
Text string `json:"text"`
Type string `json:"type"`
Start float64 `json:"start"`
End float64 `json:"end"`
LogProb float64 `json:"logprob"`
} `json:"words"`
}

func (e *ElevenLabs) transcribe(audioData []byte, format, lang string) (*Result, error) {
var body bytes.Buffer
writer := multipart.NewWriter(&body)

part, err := writer.CreateFormFile("file", "audio."+format)
if err != nil {
return nil, err
}
if _, err := part.Write(audioData); err != nil {
return nil, err
}

writer.WriteField("model_id", e.GetModel())
if lang != "" {
writer.WriteField("language_code", lang)
}
writer.WriteField("tag_audio_events", "false")
writer.Close()

req, err := http.NewRequest("POST", e.apiURL, &body)
if err != nil {
return nil, err
}

req.Header.Set("xi-api-key", e.apiKey)
req.Header.Set("Content-Type", writer.FormDataContentType())

resp, err := e.client.Do(req)
if err != nil {
return nil, err
}

if resp.StatusCode != 200 {
return nil, fmt.Errorf("elevenlabs API error %d: %s", resp.StatusCode, string(resp.Body))
}

var elResp elevenLabsResponse
if err := json.Unmarshal(resp.Body, &elResp); err != nil {
return nil, fmt.Errorf("elevenlabs response parse error: %w", err)
}

var avgLogProb float64
var wordCount int
var duration float64
for _, w := range elResp.Words {
if w.Type == "word" {
avgLogProb += w.LogProb
wordCount++
if w.End > duration {
duration = w.End
}
}
}
if wordCount > 0 {
avgLogProb /= float64(wordCount)
}

return &Result{
Text: elResp.Text,
Metrics: resp.Metrics,
Confidence: elResp.LanguageProbability,
AvgLogProb: avgLogProb,
Duration: duration,
}, nil
}
42 changes: 27 additions & 15 deletions transcriber/transcriber.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,21 +84,29 @@ type Transcriber interface {

// langLabels maps ISO-639-1 codes to display names.
var langLabels = map[string]string{
"af": "Afrikaans", "ar": "Arabic", "hy": "Armenian", "az": "Azerbaijani",
"be": "Belarusian", "bs": "Bosnian", "bg": "Bulgarian", "ca": "Catalan",
"zh": "Chinese", "hr": "Croatian", "cs": "Czech", "da": "Danish",
"nl": "Dutch", "en": "English", "et": "Estonian", "fi": "Finnish",
"fr": "French", "gl": "Galician", "de": "German", "el": "Greek",
"af": "Afrikaans", "am": "Amharic", "ar": "Arabic", "hy": "Armenian",
"as": "Assamese", "az": "Azerbaijani", "be": "Belarusian", "bn": "Bengali",
"bs": "Bosnian", "bg": "Bulgarian", "my": "Burmese", "ca": "Catalan",
"ny": "Chichewa", "zh": "Chinese", "hr": "Croatian", "cs": "Czech",
"da": "Danish", "nl": "Dutch", "en": "English", "et": "Estonian",
"fi": "Finnish", "fr": "French", "gl": "Galician", "ka": "Georgian",
"de": "German", "el": "Greek", "gu": "Gujarati", "ha": "Hausa",
"he": "Hebrew", "hi": "Hindi", "hu": "Hungarian", "is": "Icelandic",
"id": "Indonesian", "it": "Italian", "ja": "Japanese", "kn": "Kannada",
"kk": "Kazakh", "ko": "Korean", "lv": "Latvian", "lt": "Lithuanian",
"mk": "Macedonian", "ms": "Malay", "mr": "Marathi", "mi": "Maori",
"ne": "Nepali", "no": "Norwegian", "fa": "Persian", "pl": "Polish",
"pt": "Portuguese", "ro": "Romanian", "ru": "Russian", "sr": "Serbian",
"sk": "Slovak", "sl": "Slovenian", "es": "Spanish", "sw": "Swahili",
"sv": "Swedish", "tl": "Tagalog", "ta": "Tamil", "th": "Thai",
"tr": "Turkish", "uk": "Ukrainian", "ur": "Urdu", "vi": "Vietnamese",
"cy": "Welsh",
"ig": "Igbo", "id": "Indonesian", "ga": "Irish", "it": "Italian",
"ja": "Japanese", "jv": "Javanese", "kn": "Kannada", "kk": "Kazakh",
"km": "Khmer", "ko": "Korean", "ku": "Kurdish", "ky": "Kyrgyz",
"lo": "Lao", "lv": "Latvian", "ln": "Lingala", "lt": "Lithuanian",
"lb": "Luxembourgish", "mk": "Macedonian", "ms": "Malay", "ml": "Malayalam",
"mt": "Maltese", "mi": "Maori", "mr": "Marathi", "mn": "Mongolian",
"ne": "Nepali", "no": "Norwegian", "oc": "Occitan", "or": "Odia",
"ps": "Pashto", "fa": "Persian", "pl": "Polish", "pt": "Portuguese",
"pa": "Punjabi", "ro": "Romanian", "ru": "Russian", "sr": "Serbian",
"sn": "Shona", "sd": "Sindhi", "sk": "Slovak", "sl": "Slovenian",
"so": "Somali", "es": "Spanish", "sw": "Swahili", "sv": "Swedish",
"ta": "Tamil", "tg": "Tajik", "te": "Telugu", "th": "Thai",
"tl": "Tagalog", "tr": "Turkish", "uk": "Ukrainian", "ur": "Urdu",
"uz": "Uzbek", "vi": "Vietnamese", "cy": "Welsh", "wo": "Wolof",
"xh": "Xhosa", "zu": "Zulu",
}

func langsFromCodes(codes []string) []Language {
Expand Down Expand Up @@ -170,6 +178,7 @@ func New() (Transcriber, error) {
openaiKey := os.Getenv("OPENAI_API_KEY")
groqKey := os.Getenv("GROQ_API_KEY")
mistralKey := os.Getenv("MISTRAL_API_KEY")
elevenLabsKey := os.Getenv("ELEVENLABS_API_KEY")

if dgKey != "" {
return NewDeepgram(dgKey), nil
Expand All @@ -183,6 +192,9 @@ func New() (Transcriber, error) {
if mistralKey != "" {
return NewMistral(mistralKey), nil
}
if elevenLabsKey != "" {
return NewElevenLabs(elevenLabsKey), nil
}

return nil, fmt.Errorf("set DEEPGRAM_API_KEY, OPENAI_API_KEY, GROQ_API_KEY, or MISTRAL_API_KEY environment variable")
return nil, fmt.Errorf("set DEEPGRAM_API_KEY, OPENAI_API_KEY, GROQ_API_KEY, MISTRAL_API_KEY, or ELEVENLABS_API_KEY environment variable")
}
4 changes: 2 additions & 2 deletions tray/tray.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ func statusText() string {
lang = langCode
}
if provider == "" {
return "n/a"
return "𝘻𝘦𝘦"
}
return provider + " · " + model + " · " + lang
return "𝘻𝘦𝘦 — " + provider + " · " + model + " · " + lang
}

func updateStatus() {
Expand Down
58 changes: 29 additions & 29 deletions tray/tray_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func updateRecordingIcon(rec bool) {
if rec {
systray.SetIcon(iconRecHi)
if mRecord != nil {
mRecord.SetTitle("🔴 Stop Recording (Shift+Control+Space)")
mRecord.SetTitle(" Stop Recording (Shift+Control+Space)")
}
} else {
systray.SetTemplateIcon(iconIdleHi, iconIdle)
Expand Down Expand Up @@ -176,8 +176,6 @@ func onReady() {
}
})

systray.AddSeparator()

mCopy = systray.AddMenuItem("Copy Last Recorded Text", "Copy last transcription to clipboard")
mCopy.Disable()
mCopy.Click(func() {
Expand All @@ -186,34 +184,8 @@ func onReady() {
}
})

systray.AddSeparator()

mSettings = systray.AddMenuItem("Settings", "Settings")

mDevices = mSettings.AddSubMenuItem("Devices", "Select input device")

deviceMu.Lock()
mDefaultDevice = mDevices.AddSubMenuItemCheckbox("System Default", "Use system default device", deviceSel == "")
mDefaultDevice.Click(func() {
deviceMu.Lock()
cb := deviceCb
deviceMu.Unlock()
if cb != nil {
cb("")
}
deviceMu.Lock()
for _, it := range deviceItems {
it.Uncheck()
}
mDefaultDevice.Check()
deviceMu.Unlock()
})
deviceItems = make([]*systray.MenuItem, 0, len(deviceNames))
for i, name := range deviceNames {
item := addDeviceItem(mDevices, i, name, name == deviceSel)
deviceItems = append(deviceItems, item)
}
deviceMu.Unlock()
mAutoPaste = mSettings.AddSubMenuItemCheckbox("Auto-paste", "Auto-paste transcribed text", autoPasteOn)
mAutoPaste.Click(func() {
if mAutoPaste.Checked() {
Expand Down Expand Up @@ -241,6 +213,34 @@ func onReady() {
}
})

sep := mSettings.AddSubMenuItem("─────────", "")
sep.Disable()

mDevices = mSettings.AddSubMenuItem("Microphone", "Select input device")

deviceMu.Lock()
mDefaultDevice = mDevices.AddSubMenuItemCheckbox("System Default", "Use system default device", deviceSel == "")
mDefaultDevice.Click(func() {
deviceMu.Lock()
cb := deviceCb
deviceMu.Unlock()
if cb != nil {
cb("")
}
deviceMu.Lock()
for _, it := range deviceItems {
it.Uncheck()
}
mDefaultDevice.Check()
deviceMu.Unlock()
})
deviceItems = make([]*systray.MenuItem, 0, len(deviceNames))
for i, name := range deviceNames {
item := addDeviceItem(mDevices, i, name, name == deviceSel)
deviceItems = append(deviceItems, item)
}
deviceMu.Unlock()

modelMu.Lock()
if len(models) > 0 {
mBackend = mSettings.AddSubMenuItem("Model", "Select transcription model")
Expand Down