@@ -12,6 +12,7 @@ package attachments
1212#include <CoreFoundation/CoreFoundation.h>
1313#include <Foundation/Foundation.h>
1414#include <ImageIO/ImageIO.h>
15+ #include <math.h>
1516#include <UniformTypeIdentifiers/UniformTypeIdentifiers.h>
1617#if TARGET_OS_IPHONE
1718#include <MobileCoreServices/MobileCoreServices.h>
@@ -31,6 +32,11 @@ typedef struct {
3132 int imageLength;
3233} ImageConversionResult;
3334
35+ typedef struct {
36+ float* data;
37+ int length;
38+ } AudioAmpsResult;
39+
3440VideoPreviewResult MakeVideoThumbnail(const char* inFilename) {
3541 VideoPreviewResult result = {NULL, 0, 0};
3642 NSString* filename = [NSString stringWithUTF8String:inFilename];
@@ -71,6 +77,118 @@ VideoPreviewResult MakeVideoThumbnail(const char* inFilename) {
7177 return result;
7278}
7379
80+ // GetAudioAmplitudes reads PCM samples from an audio file via AVAssetReader and
81+ // returns numSamples RMS amplitude values in [0,1]. The caller must free result.data.
82+ AudioAmpsResult GetAudioAmplitudes(const char* inFilename, int numSamples) {
83+ AudioAmpsResult result = {NULL, 0};
84+ if (numSamples <= 0) return result;
85+
86+ NSString* filename = [NSString stringWithUTF8String:inFilename];
87+ NSURL* url = [NSURL fileURLWithPath:filename];
88+ AVURLAsset* asset = [AVURLAsset URLAssetWithURL:url options:nil];
89+
90+ NSArray<AVAssetTrack*>* audioTracks = [asset tracksWithMediaType:AVMediaTypeAudio];
91+ if (audioTracks.count == 0) return result;
92+ AVAssetTrack* audioTrack = audioTracks[0];
93+
94+ Float64 durationSeconds = CMTimeGetSeconds(asset.duration);
95+ if (!(durationSeconds > 0)) return result;
96+
97+ Float64 sampleRate = 0;
98+ for (id formatDescription in audioTrack.formatDescriptions) {
99+ CMAudioFormatDescriptionRef desc = (__bridge CMAudioFormatDescriptionRef)formatDescription;
100+ const AudioStreamBasicDescription* asbd =
101+ CMAudioFormatDescriptionGetStreamBasicDescription(desc);
102+ if (asbd && asbd->mSampleRate > 0) {
103+ sampleRate = asbd->mSampleRate;
104+ break;
105+ }
106+ }
107+ if (sampleRate <= 0) {
108+ sampleRate = 44100;
109+ }
110+ long long totalSamples = llround(durationSeconds * sampleRate);
111+ if (totalSamples < numSamples) {
112+ totalSamples = numSamples;
113+ }
114+
115+ NSError* error = nil;
116+ AVAssetReader* reader = [AVAssetReader assetReaderWithAsset:asset error:&error];
117+ if (!reader || error) return result;
118+
119+ NSDictionary* outputSettings = @{
120+ AVFormatIDKey: @(kAudioFormatLinearPCM),
121+ AVLinearPCMBitDepthKey: @32,
122+ AVLinearPCMIsFloatKey: @YES,
123+ AVLinearPCMIsNonInterleaved: @NO,
124+ AVNumberOfChannelsKey: @1,
125+ };
126+
127+ AVAssetReaderTrackOutput* output = [AVAssetReaderTrackOutput
128+ assetReaderTrackOutputWithTrack:audioTrack
129+ outputSettings:outputSettings];
130+ output.alwaysCopiesSampleData = NO;
131+
132+ if (![reader canAddOutput:output]) return result;
133+ [reader addOutput:output];
134+ if (![reader startReading]) return result;
135+
136+ float* sumSq = (float*)calloc(numSamples, sizeof(float));
137+ unsigned int* counts = (unsigned int*)calloc(numSamples, sizeof(unsigned int));
138+ if (!sumSq || !counts) {
139+ free(sumSq);
140+ free(counts);
141+ return result;
142+ }
143+
144+ long long sampleIndex = 0;
145+ while (reader.status == AVAssetReaderStatusReading) {
146+ CMSampleBufferRef sampleBuffer = [output copyNextSampleBuffer];
147+ if (!sampleBuffer) break;
148+ CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
149+ if (blockBuffer) {
150+ size_t length = CMBlockBufferGetDataLength(blockBuffer);
151+ if (length >= sizeof(float)) {
152+ float* chunk = (float*)malloc(length);
153+ if (chunk && CMBlockBufferCopyDataBytes(blockBuffer, 0, length, chunk) == kCMBlockBufferNoErr) {
154+ size_t floatCount = length / sizeof(float);
155+ for (size_t i = 0; i < floatCount; i++) {
156+ int bucket = (int)(((sampleIndex + (long long)i) * numSamples) / totalSamples);
157+ if (bucket >= numSamples) {
158+ bucket = numSamples - 1;
159+ }
160+ float s = chunk[i];
161+ sumSq[bucket] += s * s;
162+ counts[bucket]++;
163+ }
164+ sampleIndex += (long long)floatCount;
165+ }
166+ free(chunk);
167+ }
168+ }
169+ CFRelease(sampleBuffer);
170+ }
171+
172+ float* amps = (float*)calloc(numSamples, sizeof(float));
173+ if (!amps) {
174+ free(sumSq);
175+ free(counts);
176+ return result;
177+ }
178+
179+ for (int i = 0; i < numSamples; i++) {
180+ if (counts[i] > 0) {
181+ amps[i] = sqrtf(sumSq[i] / (float)counts[i]);
182+ }
183+ }
184+ free(sumSq);
185+ free(counts);
186+
187+ result.data = amps;
188+ result.length = numSamples;
189+ return result;
190+ }
191+
74192#if TARGET_OS_IPHONE
75193ImageConversionResult HEICToJPEG(const char* inFilename) {
76194 ImageConversionResult result = {NULL, 0};
@@ -121,6 +239,22 @@ import (
121239 "github.com/keybase/client/go/chat/utils"
122240)
123241
242+ func getAudioAmps (basename string ) []float64 {
243+ cbasename := C .CString (basename )
244+ defer C .free (unsafe .Pointer (cbasename ))
245+ result := C .GetAudioAmplitudes (cbasename , C .int (audioAmpsCount ))
246+ if result .length == 0 || result .data == nil {
247+ return nil
248+ }
249+ defer C .free (unsafe .Pointer (result .data ))
250+ amps := make ([]float64 , int (result .length ))
251+ cData := (* [1 << 20 ]C.float )(unsafe .Pointer (result .data ))[:int (result .length ):int (result .length )]
252+ for i , v := range cData {
253+ amps [i ] = float64 (v )
254+ }
255+ return amps
256+ }
257+
124258func previewVideo (ctx context.Context , log utils.DebugLabeler , src io.Reader ,
125259 basename string , nvh types.NativeVideoHelper ,
126260) (res * PreviewRes , err error ) {
@@ -137,6 +271,12 @@ func previewVideo(ctx context.Context, log utils.DebugLabeler, src io.Reader,
137271 }
138272 log .Debug (ctx , "previewVideo: length: %d duration: %ds" , result .imageLength , duration )
139273 if result .imageLength == 0 {
274+ // Audio-only files (e.g. M4A) have no video track so no thumbnail, but AVFoundation
275+ // can still read their duration. Extract amplitude data for the waveform visualization.
276+ if duration > 1 && isAudioExtension (basename ) {
277+ amps := getAudioAmps (basename )
278+ return previewAudio (duration , amps )
279+ }
140280 return res , errors .New ("no data returned from native" )
141281 }
142282 localDat := make ([]byte , result .imageLength )
0 commit comments