Merge code from bilibili#354 to improve av sync issues

roackb2 · Dec 21, 2019 · b16b9b2 · b16b9b2
1 parent 8c1bed1
commit b16b9b2
Showing 1 changed file with 97 additions and 72 deletions.
diff --git a/src/remux/mp4-remuxer.js b/src/remux/mp4-remuxer.js
@@ -346,90 +346,108 @@ class MP4Remuxer {
             let sample = samples[i];
             let unit = sample.unit;
             let originalDts = sample.dts - this._dtsBase;
-            let dts = originalDts - dtsCorrection;
+            let dts = originalDts;
+            let needFillSilentFrames = false;
+            let silentFrames = null;
+            let sampleDuration = 0;
 
-            if (firstDts === -1) {
-                firstDts = dts;
+            if (originalDts < -0.001) {
+                continue; //pass the first sample with the invalid dts
             }
 
-            let sampleDuration = 0;
+            if (this._audioMeta.codec !== 'mp3') {
+                // for AAC codec, we need to keep dts increase based on refSampleDuration
+                let curRefDts = originalDts;
+                const maxAudioFramesDrift = 3;
+                if (this._audioNextDts) {
+                    curRefDts = this._audioNextDts;
+                }
 
-            if (i !== samples.length - 1) {
-                let nextDts = samples[i + 1].dts - this._dtsBase - dtsCorrection;
-                sampleDuration = nextDts - dts;
-            } else {  // the last sample
-                if (lastSample != null) {  // use stashed sample's dts to calculate sample duration
-                    let nextDts = lastSample.dts - this._dtsBase - dtsCorrection;
-                    sampleDuration = nextDts - dts;
-                } else if (mp4Samples.length >= 1) {  // use second last sample duration
-                    sampleDuration = mp4Samples[mp4Samples.length - 1].duration;
-                } else {  // the only one sample, use reference sample duration
-                    sampleDuration = Math.floor(refSampleDuration);
+                dtsCorrection = originalDts - curRefDts;
+                if (dtsCorrection <= -maxAudioFramesDrift * refSampleDuration) {
+                    // If we're overlapping by more than maxAudioFramesDrift number of frame, drop this sample
+                    Log.w(this.TAG, `Dropping 1 audio frame (originalDts: ${originalDts} ms ,curRefDts: ${curRefDts} ms)  due to dtsCorrection: ${dtsCorrection} ms overlap.`);
+                    continue;
                 }
-            }
+                else if (dtsCorrection >= maxAudioFramesDrift * refSampleDuration && this._fillAudioTimestampGap && !Browser.safari) {
+                    // Silent frame generation, if large timestamp gap detected && config.fixAudioTimestampGap
+                    needFillSilentFrames = true;
+                    // We need to insert silent frames to fill timestamp gap
+                    let frameCount = Math.floor(dtsCorrection / refSampleDuration);
+                    Log.w(this.TAG, 'Large audio timestamp gap detected, may cause AV sync to drift. ' +
+                        'Silent frames will be generated to avoid unsync.\n' +
+                        `originalDts: ${originalDts} ms, curRefDts: ${curRefDts} ms, ` +
+                        `dtsCorrection: ${Math.round(dtsCorrection)} ms, generate: ${frameCount} frames`);
+
+
+                    dts = Math.floor(curRefDts);
+                    sampleDuration = Math.floor(curRefDts + refSampleDuration) - dts;
+
+                    let silentUnit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount);
+                    if (silentUnit == null) {
+                        Log.w(this.TAG, 'Unable to generate silent frame for ' +
+                            `${this._audioMeta.originalCodec} with ${this._audioMeta.channelCount} channels, repeat last frame`);
+                        // Repeat last frame
+                        silentUnit = unit;
+                    }
+                    silentFrames = [];
+
+                    for (let j = 0; j < frameCount; j++) {
+                        curRefDts = curRefDts + refSampleDuration;
+                        let intDts = Math.floor(curRefDts);  // change to integer
+                        let intDuration = Math.floor(curRefDts + refSampleDuration) - intDts;
+                        let frame = {
+                            dts: intDts,
+                            pts: intDts,
+                            cts: 0,
+                            unit: silentUnit,
+                            size: silentUnit.byteLength,
+                            duration: intDuration,  // wait for next sample
+                            originalDts: originalDts,
+                            flags: {
+                                isLeading: 0,
+                                dependsOn: 1,
+                                isDependedOn: 0,
+                                hasRedundancy: 0
+                            }
+                        };
+                        silentFrames.push(frame);
+                        mdatBytes += unit.byteLength;
 
-            let needFillSilentFrames = false;
-            let silentFrames = null;
+                    }
 
-            // Silent frame generation, if large timestamp gap detected && config.fixAudioTimestampGap
-            if (sampleDuration > refSampleDuration * 1.5 && this._audioMeta.codec !== 'mp3' && this._fillAudioTimestampGap && !Browser.safari) {
-                // We need to insert silent frames to fill timestamp gap
-                needFillSilentFrames = true;
-                let delta = Math.abs(sampleDuration - refSampleDuration);
-                let frameCount = Math.ceil(delta / refSampleDuration);
-                let currentDts = dts + refSampleDuration;  // Notice: in float
+                    this._audioNextDts = curRefDts + refSampleDuration;
 
-                Log.w(this.TAG, 'Large audio timestamp gap detected, may cause AV sync to drift. ' +
-                                'Silent frames will be generated to avoid unsync.\n' +
-                                `dts: ${dts + sampleDuration} ms, expected: ${dts + Math.round(refSampleDuration)} ms, ` +
-                                `delta: ${Math.round(delta)} ms, generate: ${frameCount} frames`);
+                } else {
+
+                    dts = Math.floor(curRefDts);
+                    sampleDuration = Math.floor(curRefDts + refSampleDuration) - dts;
+                    this._audioNextDts = curRefDts + refSampleDuration;
 
-                let silentUnit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount);
-                if (silentUnit == null) {
-                    Log.w(this.TAG, 'Unable to generate silent frame for ' +
-                                    `${this._audioMeta.originalCodec} with ${this._audioMeta.channelCount} channels, repeat last frame`);
-                    // Repeat last frame
-                    silentUnit = unit;
-                }
-                silentFrames = [];
-
-                for (let j = 0; j < frameCount; j++) {
-                    let intDts = Math.round(currentDts);  // round to integer
-                    if (silentFrames.length > 0) {
-                        // Set previous frame sample duration
-                        let previousFrame = silentFrames[silentFrames.length - 1];
-                        previousFrame.duration = intDts - previousFrame.dts;
-                    }
-                    let frame = {
-                        dts: intDts,
-                        pts: intDts,
-                        cts: 0,
-                        unit: silentUnit,
-                        size: silentUnit.byteLength,
-                        duration: 0,  // wait for next sample
-                        originalDts: originalDts,
-                        flags: {
-                            isLeading: 0,
-                            dependsOn: 1,
-                            isDependedOn: 0,
-                            hasRedundancy: 0
-                        }
-                    };
-                    silentFrames.push(frame);
-                    mdatBytes += frame.size;
-                    currentDts += refSampleDuration;
                 }
+            } else {
+                // keep the original dts calculate algorithm for mp3
+                dts = originalDts - dtsCorrection;
 
-                // last frame: align end time to next frame dts
-                let lastFrame = silentFrames[silentFrames.length - 1];
-                lastFrame.duration = dts + sampleDuration - lastFrame.dts;
 
-                // silentFrames.forEach((frame) => {
-                //     Log.w(this.TAG, `SilentAudio: dts: ${frame.dts}, duration: ${frame.duration}`);
-                // });
+                if (i !== samples.length - 1) {
+                    let nextDts = samples[i + 1].dts - this._dtsBase - dtsCorrection;
+                    sampleDuration = nextDts - dts;
+                } else {  // the last sample
+                    if (lastSample != null) {  // use stashed sample's dts to calculate sample duration
+                        let nextDts = lastSample.dts - this._dtsBase - dtsCorrection;
+                        sampleDuration = nextDts - dts;
+                    } else if (mp4Samples.length >= 1) {  // use second last sample duration
+                        sampleDuration = mp4Samples[mp4Samples.length - 1].duration;
+                    } else {  // the only one sample, use reference sample duration
+                        sampleDuration = Math.floor(refSampleDuration);
+                    }
+                }
+                this._audioNextDts = dts + sampleDuration;
+            }
 
-                // Set correct sample duration for current frame
-                sampleDuration = Math.round(refSampleDuration);
+            if (firstDts === -1) {
+                firstDts = dts;
             }
 
             mp4Samples.push({
@@ -454,6 +472,13 @@ class MP4Remuxer {
             }
         }
 
+        if (mp4Samples.length === 0) {
+            //no samples need to remux
+            track.samples = [];
+            track.length = 0;
+            return;
+        }
+
         // allocate mdatbox
         if (mpegRawTrack) {
             // allocate for raw mpeg buffer
@@ -479,7 +504,7 @@ class MP4Remuxer {
 
         let latest = mp4Samples[mp4Samples.length - 1];
         lastDts = latest.dts + latest.duration;
-        this._audioNextDts = lastDts;
+        //this._audioNextDts = lastDts;
 
         // fill media segment info & add to info list
         let info = new MediaSegmentInfo();