Skip to content

Commit e103593

Browse files
xyhp915tiensonqin
andauthored
feat(mobile): audio record && transcribe (#12105)
* feat: audio transcribe * enhance(mobile): auto start recording on initialization * fix(mobile): can't delete journal from selection bar * fix: duplicated audio record buttons in quick add * fix(mobile): inactive bottom tab color * enhance(mobile): display no results when there's no matched items * enhance(mobile): add audio transcription feature and enhance audio component * fix: store assets directly instead in today page instead of node ref * save transcribed text to audio's child block * enhance: transcribe supports punctuations and being offline only * fix(mobile): save assets to current editing page --------- Co-authored-by: Tienson Qin <tiensonqin@gmail.com>
1 parent ecc1828 commit e103593

31 files changed

+640
-463
lines changed

android/app/capacitor.build.gradle

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ dependencies {
2121
implementation project(':capacitor-share')
2222
implementation project(':capacitor-splash-screen')
2323
implementation project(':capacitor-status-bar')
24-
implementation project(':capacitor-voice-recorder')
2524
implementation project(':send-intent')
2625
implementation project(':jcesarmobile-ssl-skip')
2726

android/app/src/main/AndroidManifest.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
1010
<uses-permission android:name="android.permission.MANAGE_EXTERNAL_STORAGE" />
1111
<uses-permission android:name="android.permission.RECORD_AUDIO" />
12+
<uses-permission android:name="android.permission.MODIFY_AUDIO_SETTINGS" />
1213

1314
<application
1415
android:allowBackup="true"

android/app/src/main/assets/capacitor.plugins.json

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,6 @@
4747
"pkg": "@capacitor/status-bar",
4848
"classpath": "com.capacitorjs.plugins.statusbar.StatusBarPlugin"
4949
},
50-
{
51-
"pkg": "capacitor-voice-recorder",
52-
"classpath": "com.tchvu3.capacitorvoicerecorder.VoiceRecorder"
53-
},
5450
{
5551
"pkg": "send-intent",
5652
"classpath": "de.mindlib.sendIntent.SendIntent"

android/capacitor.settings.gradle

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,6 @@ project(':capacitor-splash-screen').projectDir = new File('../node_modules/@capa
3838
include ':capacitor-status-bar'
3939
project(':capacitor-status-bar').projectDir = new File('../node_modules/@capacitor/status-bar/android')
4040

41-
include ':capacitor-voice-recorder'
42-
project(':capacitor-voice-recorder').projectDir = new File('../node_modules/capacitor-voice-recorder/android')
43-
4441
include ':send-intent'
4542
project(':send-intent').projectDir = new File('../node_modules/send-intent/android')
4643

gulpfile.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ const common = {
130130
'node_modules/prop-types/prop-types.min.js',
131131
'node_modules/interactjs/dist/interact.min.js',
132132
'node_modules/photoswipe/dist/umd/*.js',
133+
'node_modules/wavesurfer.js/dist/wavesurfer.min.js',
134+
'node_modules/wavesurfer.js/dist/plugins/record.min.js',
133135
'packages/amplify/dist/amplify.js',
134136
'packages/ui/dist/ui/ui.js',
135137
'node_modules/@sqlite.org/sqlite-wasm/sqlite-wasm/jswasm/sqlite3.wasm',

ios/App/App/Info.plist

Lines changed: 117 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -3,120 +3,122 @@
33
<plist version="1.0">
44
<dict>
55
<key>NSAppTransportSecurity</key>
6-
<dict>
7-
<key>NSAllowsArbitraryLoads</key>
8-
<true/>
9-
</dict>
10-
<key>APFiles</key>
11-
<dict>
12-
<key>APFileDescriptionKey</key>
13-
<string></string>
14-
<key>APFileDestinationPath</key>
15-
<string></string>
16-
<key>APFileName</key>
17-
<string></string>
18-
<key>APFileSourcePath</key>
19-
<string></string>
20-
</dict>
21-
<key>CFBundleDevelopmentRegion</key>
22-
<string>en</string>
23-
<key>CFBundleDisplayName</key>
24-
<string>Logseq</string>
25-
<key>CFBundleExecutable</key>
26-
<string>$(EXECUTABLE_NAME)</string>
27-
<key>CFBundleIdentifier</key>
28-
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
29-
<key>CFBundleInfoDictionaryVersion</key>
30-
<string>6.0</string>
31-
<key>CFBundleName</key>
32-
<string>$(PRODUCT_NAME)</string>
33-
<key>CFBundlePackageType</key>
34-
<string>APPL</string>
35-
<key>CFBundleShortVersionString</key>
36-
<string>$(MARKETING_VERSION)</string>
37-
<key>CFBundleURLTypes</key>
38-
<array>
39-
<dict>
40-
<key>CFBundleTypeRole</key>
41-
<string>Viewer</string>
42-
<key>CFBundleURLName</key>
43-
<string>com.logseq.logseq</string>
44-
<key>CFBundleURLSchemes</key>
45-
<array>
46-
<string>logseq</string>
47-
</array>
48-
</dict>
49-
</array>
50-
<key>CFBundleVersion</key>
51-
<string>$(CURRENT_PROJECT_VERSION)</string>
52-
<key>LSApplicationCategoryType</key>
53-
<string></string>
54-
<key>LSRequiresIPhoneOS</key>
55-
<true/>
56-
<key>LSSupportsOpeningDocumentsInPlace</key>
57-
<true/>
58-
<key>UIFileSharingEnabled</key>
59-
<true/>
60-
<key>NSCameraUsageDescription</key>
61-
<string>We will access your camera when you take a photo, and embed it in your note.</string>
62-
<key>NSDocumentsFolderUsageDescription</key>
63-
<string></string>
64-
<key>NSDownloadsFolderUsageDescription</key>
65-
<string></string>
66-
<key>NSFileProviderDomainUsageDescription</key>
67-
<string></string>
68-
<key>NSFileProviderPresenceUsageDescription</key>
69-
<string></string>
70-
<key>NSMicrophoneUsageDescription</key>
71-
<string>We will access your microphone to record audio notes</string>
72-
<key>NSPhotoLibraryAddUsageDescription</key>
73-
<string>We will access your album when you save a photo.</string>
74-
<key>NSPhotoLibraryUsageDescription</key>
75-
<string>We will access your album when you choose a photo, and embed it in your note.</string>
76-
<key>NSUbiquitousContainers</key>
77-
<dict>
78-
<key>iCloud.com.logseq.logseq</key>
79-
<dict>
80-
<key>NSUbiquitousContainerIsDocumentScopePublic</key>
81-
<true/>
82-
<key>NSUbiquitousContainerName</key>
83-
<string>Logseq</string>
84-
<key>NSUbiquitousContainerSupportedFolderLevels</key>
85-
<string>ANY</string>
86-
</dict>
87-
</dict>
88-
<key>UIBackgroundModes</key>
89-
<array>
90-
<string>audio</string>
91-
</array>
92-
<key>UILaunchStoryboardName</key>
93-
<string>LaunchScreen</string>
94-
<key>UIMainStoryboardFile</key>
95-
<string>Main</string>
96-
<key>UIRequiredDeviceCapabilities</key>
97-
<array>
98-
<string>armv7</string>
99-
</array>
100-
<key>UISupportedInterfaceOrientations</key>
101-
<array>
102-
<string>UIInterfaceOrientationPortrait</string>
103-
<string>UIInterfaceOrientationLandscapeLeft</string>
104-
<string>UIInterfaceOrientationLandscapeRight</string>
105-
</array>
106-
<key>UISupportedInterfaceOrientations~ipad</key>
107-
<array>
108-
<string>UIInterfaceOrientationPortrait</string>
109-
<string>UIInterfaceOrientationPortraitUpsideDown</string>
110-
<string>UIInterfaceOrientationLandscapeLeft</string>
111-
<string>UIInterfaceOrientationLandscapeRight</string>
112-
</array>
113-
<key>UISupportsDocumentBrowser</key>
114-
<true/>
115-
<key>UIViewControllerBasedStatusBarAppearance</key>
116-
<true/>
117-
<key>CFBundleGetInfoString</key>
118-
<string></string>
119-
<key>ITSAppUsesNonExemptEncryption</key>
120-
<false/>
6+
<dict>
7+
<key>NSAllowsArbitraryLoads</key>
8+
<true/>
9+
</dict>
10+
<key>APFiles</key>
11+
<dict>
12+
<key>APFileDescriptionKey</key>
13+
<string></string>
14+
<key>APFileDestinationPath</key>
15+
<string></string>
16+
<key>APFileName</key>
17+
<string></string>
18+
<key>APFileSourcePath</key>
19+
<string></string>
20+
</dict>
21+
<key>CFBundleDevelopmentRegion</key>
22+
<string>en</string>
23+
<key>CFBundleDisplayName</key>
24+
<string>Logseq</string>
25+
<key>CFBundleExecutable</key>
26+
<string>$(EXECUTABLE_NAME)</string>
27+
<key>CFBundleIdentifier</key>
28+
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
29+
<key>CFBundleInfoDictionaryVersion</key>
30+
<string>6.0</string>
31+
<key>CFBundleName</key>
32+
<string>$(PRODUCT_NAME)</string>
33+
<key>CFBundlePackageType</key>
34+
<string>APPL</string>
35+
<key>CFBundleShortVersionString</key>
36+
<string>$(MARKETING_VERSION)</string>
37+
<key>CFBundleURLTypes</key>
38+
<array>
39+
<dict>
40+
<key>CFBundleTypeRole</key>
41+
<string>Viewer</string>
42+
<key>CFBundleURLName</key>
43+
<string>com.logseq.logseq</string>
44+
<key>CFBundleURLSchemes</key>
45+
<array>
46+
<string>logseq</string>
47+
</array>
48+
</dict>
49+
</array>
50+
<key>CFBundleVersion</key>
51+
<string>$(CURRENT_PROJECT_VERSION)</string>
52+
<key>LSApplicationCategoryType</key>
53+
<string></string>
54+
<key>LSRequiresIPhoneOS</key>
55+
<true/>
56+
<key>LSSupportsOpeningDocumentsInPlace</key>
57+
<true/>
58+
<key>UIFileSharingEnabled</key>
59+
<true/>
60+
<key>NSCameraUsageDescription</key>
61+
<string>We will access your camera when you take a photo, and embed it in your note.</string>
62+
<key>NSDocumentsFolderUsageDescription</key>
63+
<string></string>
64+
<key>NSDownloadsFolderUsageDescription</key>
65+
<string></string>
66+
<key>NSFileProviderDomainUsageDescription</key>
67+
<string></string>
68+
<key>NSFileProviderPresenceUsageDescription</key>
69+
<string></string>
70+
<key>NSSpeechRecognitionUsageDescription</key>
71+
<string>We need access to speech recognition to convert your voice to text.</string>
72+
<key>NSMicrophoneUsageDescription</key>
73+
<string>We will access your microphone to record audio notes</string>
74+
<key>NSPhotoLibraryAddUsageDescription</key>
75+
<string>We will access your album when you save a photo.</string>
76+
<key>NSPhotoLibraryUsageDescription</key>
77+
<string>We will access your album when you choose a photo, and embed it in your note.</string>
78+
<key>NSUbiquitousContainers</key>
79+
<dict>
80+
<key>iCloud.com.logseq.logseq</key>
81+
<dict>
82+
<key>NSUbiquitousContainerIsDocumentScopePublic</key>
83+
<true/>
84+
<key>NSUbiquitousContainerName</key>
85+
<string>Logseq</string>
86+
<key>NSUbiquitousContainerSupportedFolderLevels</key>
87+
<string>ANY</string>
88+
</dict>
89+
</dict>
90+
<key>UIBackgroundModes</key>
91+
<array>
92+
<string>audio</string>
93+
</array>
94+
<key>UILaunchStoryboardName</key>
95+
<string>LaunchScreen</string>
96+
<key>UIMainStoryboardFile</key>
97+
<string>Main</string>
98+
<key>UIRequiredDeviceCapabilities</key>
99+
<array>
100+
<string>armv7</string>
101+
</array>
102+
<key>UISupportedInterfaceOrientations</key>
103+
<array>
104+
<string>UIInterfaceOrientationPortrait</string>
105+
<string>UIInterfaceOrientationLandscapeLeft</string>
106+
<string>UIInterfaceOrientationLandscapeRight</string>
107+
</array>
108+
<key>UISupportedInterfaceOrientations~ipad</key>
109+
<array>
110+
<string>UIInterfaceOrientationPortrait</string>
111+
<string>UIInterfaceOrientationPortraitUpsideDown</string>
112+
<string>UIInterfaceOrientationLandscapeLeft</string>
113+
<string>UIInterfaceOrientationLandscapeRight</string>
114+
</array>
115+
<key>UISupportsDocumentBrowser</key>
116+
<true/>
117+
<key>UIViewControllerBasedStatusBarAppearance</key>
118+
<true/>
119+
<key>CFBundleGetInfoString</key>
120+
<string></string>
121+
<key>ITSAppUsesNonExemptEncryption</key>
122+
<false/>
121123
</dict>
122124
</plist>

ios/App/App/UILocalPlugin.swift

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import Capacitor
99
import Foundation
10+
import Speech
1011

1112
func isDarkMode() -> Bool {
1213
if #available(iOS 12.0, *) {
@@ -204,9 +205,82 @@ public class UILocalPlugin: CAPPlugin, CAPBridgedPlugin {
204205
private var datepickerDialogView: UIView?
205206

206207
public let pluginMethods: [CAPPluginMethod] = [
207-
CAPPluginMethod(name: "showDatePicker", returnType: CAPPluginReturnPromise)
208+
CAPPluginMethod(name: "showDatePicker", returnType: CAPPluginReturnPromise),
209+
CAPPluginMethod(name: "transcribeAudio2Text", returnType: CAPPluginReturnPromise)
208210
]
209211

212+
// TODO: switch to use https://developer.apple.com/documentation/speech/speechanalyzer for iOS 26+
213+
// 语音识别方法
214+
private func recognizeSpeech(from url: URL, completion: @escaping (String?, Error?) -> Void) {
215+
SFSpeechRecognizer.requestAuthorization { authStatus in
216+
guard authStatus == .authorized else {
217+
completion(nil, NSError(domain: "", code: -1, userInfo: [NSLocalizedDescriptionKey: "语音识别权限未授权"]))
218+
return
219+
}
220+
221+
let recognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))
222+
let request = SFSpeechURLRecognitionRequest(url: url)
223+
224+
// Setting up offline speech recognition
225+
recognizer?.supportsOnDeviceRecognition = true
226+
request.shouldReportPartialResults = false
227+
request.requiresOnDeviceRecognition = true
228+
request.taskHint = .dictation
229+
if #available(iOS 16, *) {
230+
request.addsPunctuation = true
231+
}
232+
233+
recognizer?.recognitionTask(with: request) { result, error in
234+
if let result = result {
235+
let transcription = result.bestTranscription.formattedString
236+
completion(transcription, nil)
237+
} else if let error = error {
238+
completion(nil, error)
239+
}
240+
}
241+
}
242+
}
243+
244+
@objc func transcribeAudio2Text(_ call: CAPPluginCall) {
245+
self.call = call
246+
247+
// 接收音频数据 arrayBuffer
248+
guard let audioArray = call.getArray("audioData", NSNumber.self) as? [UInt8] else {
249+
call.reject("无效的音频数据")
250+
return
251+
}
252+
253+
// 将数组转换为 Data
254+
let audioData = Data(audioArray)
255+
256+
// 保存为本地文件
257+
let fileURL = FileManager.default.temporaryDirectory.appendingPathComponent("recordedAudio.m4a")
258+
259+
do {
260+
try audioData.write(to: fileURL)
261+
262+
let fileExists = FileManager.default.fileExists(atPath: fileURL.path)
263+
264+
print("文件是否存在: \(fileExists), 路径: \(fileURL.path)")
265+
if !fileExists {
266+
call.reject("文件保存失败,文件不存在")
267+
return
268+
}
269+
270+
271+
// 调用语音识别
272+
self.recognizeSpeech(from: fileURL) { result, error in
273+
if let result = result {
274+
call.resolve(["transcription": result])
275+
} else if let error = error {
276+
call.reject("语音识别失败: \(error.localizedDescription)")
277+
}
278+
}
279+
} catch {
280+
call.reject("保存文件失败: \(error.localizedDescription)")
281+
}
282+
}
283+
210284
@objc func showDatePicker(_ call: CAPPluginCall) {
211285
self.call = call
212286

ios/App/Podfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ def capacitor_pods
2323
pod 'CapacitorShare', :path => '../../node_modules/@capacitor/share'
2424
pod 'CapacitorSplashScreen', :path => '../../node_modules/@capacitor/splash-screen'
2525
pod 'CapacitorStatusBar', :path => '../../node_modules/@capacitor/status-bar'
26-
pod 'CapacitorVoiceRecorder', :path => '../../node_modules/capacitor-voice-recorder'
2726
pod 'SendIntent', :path => '../../node_modules/send-intent'
2827
pod 'JcesarmobileSslSkip', :path => '../../node_modules/@jcesarmobile/ssl-skip'
2928
end

0 commit comments

Comments
 (0)