forked from nareix/joy4
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.go
311 lines (282 loc) · 10.5 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
package aacparser
import (
"bytes"
"fmt"
"io"
"time"
"github.com/strengine/Core/av"
"github.com/strengine/Core/utils/bits"
)
// copied from libavcodec/mpeg4audio.h
const (
AOT_AAC_MAIN = 1 + iota ///< Y Main
AOT_AAC_LC ///< Y Low Complexity
AOT_AAC_SSR ///< N (code in SoC repo) Scalable Sample Rate
AOT_AAC_LTP ///< Y Long Term Prediction
AOT_SBR ///< Y Spectral Band Replication
AOT_AAC_SCALABLE ///< N Scalable
AOT_TWINVQ ///< N Twin Vector Quantizer
AOT_CELP ///< N Code Excited Linear Prediction
AOT_HVXC ///< N Harmonic Vector eXcitation Coding
AOT_TTSI = 12 + iota ///< N Text-To-Speech Interface
AOT_MAINSYNTH ///< N Main Synthesis
AOT_WAVESYNTH ///< N Wavetable Synthesis
AOT_MIDI ///< N General MIDI
AOT_SAFX ///< N Algorithmic Synthesis and Audio Effects
AOT_ER_AAC_LC ///< N Error Resilient Low Complexity
AOT_ER_AAC_LTP = 19 + iota ///< N Error Resilient Long Term Prediction
AOT_ER_AAC_SCALABLE ///< N Error Resilient Scalable
AOT_ER_TWINVQ ///< N Error Resilient Twin Vector Quantizer
AOT_ER_BSAC ///< N Error Resilient Bit-Sliced Arithmetic Coding
AOT_ER_AAC_LD ///< N Error Resilient Low Delay
AOT_ER_CELP ///< N Error Resilient Code Excited Linear Prediction
AOT_ER_HVXC ///< N Error Resilient Harmonic Vector eXcitation Coding
AOT_ER_HILN ///< N Error Resilient Harmonic and Individual Lines plus Noise
AOT_ER_PARAM ///< N Error Resilient Parametric
AOT_SSC ///< N SinuSoidal Coding
AOT_PS ///< N Parametric Stereo
AOT_SURROUND ///< N MPEG Surround
AOT_ESCAPE ///< Y Escape Value
AOT_L1 ///< Y Layer 1
AOT_L2 ///< Y Layer 2
AOT_L3 ///< Y Layer 3
AOT_DST ///< N Direct Stream Transfer
AOT_ALS ///< Y Audio LosslesS
AOT_SLS ///< N Scalable LosslesS
AOT_SLS_NON_CORE ///< N Scalable LosslesS (non Core)
AOT_ER_AAC_ELD ///< N Error Resilient Enhanced Low Delay
AOT_SMR_SIMPLE ///< N Symbolic Music Representation Simple
AOT_SMR_MAIN ///< N Symbolic Music Representation Main
AOT_USAC_NOSBR ///< N Unified Speech and Audio Coding (no SBR)
AOT_SAOC ///< N Spatial Audio Object Coding
AOT_LD_SURROUND ///< N Low Delay MPEG Surround
AOT_USAC ///< N Unified Speech and Audio Coding
)
type MPEG4AudioConfig struct {
SampleRate int
ChannelLayout av.ChannelLayout
ObjectType uint
SampleRateIndex uint
ChannelConfig uint
}
var sampleRateTable = []int{
96000, 88200, 64000, 48000, 44100, 32000,
24000, 22050, 16000, 12000, 11025, 8000, 7350,
}
/*
These are the channel configurations:
0: Defined in AOT Specifc Config
1: 1 channel: front-center
2: 2 channels: front-left, front-right
3: 3 channels: front-center, front-left, front-right
4: 4 channels: front-center, front-left, front-right, back-center
5: 5 channels: front-center, front-left, front-right, back-left, back-right
6: 6 channels: front-center, front-left, front-right, back-left, back-right, LFE-channel
7: 8 channels: front-center, front-left, front-right, side-left, side-right, back-left, back-right, LFE-channel
8-15: Reserved
*/
var chanConfigTable = []av.ChannelLayout{
0,
av.CH_FRONT_CENTER,
av.CH_FRONT_LEFT | av.CH_FRONT_RIGHT,
av.CH_FRONT_CENTER | av.CH_FRONT_LEFT | av.CH_FRONT_RIGHT,
av.CH_FRONT_CENTER | av.CH_FRONT_LEFT | av.CH_FRONT_RIGHT | av.CH_BACK_CENTER,
av.CH_FRONT_CENTER | av.CH_FRONT_LEFT | av.CH_FRONT_RIGHT | av.CH_BACK_LEFT | av.CH_BACK_RIGHT,
av.CH_FRONT_CENTER | av.CH_FRONT_LEFT | av.CH_FRONT_RIGHT | av.CH_BACK_LEFT | av.CH_BACK_RIGHT | av.CH_LOW_FREQ,
av.CH_FRONT_CENTER | av.CH_FRONT_LEFT | av.CH_FRONT_RIGHT | av.CH_SIDE_LEFT | av.CH_SIDE_RIGHT | av.CH_BACK_LEFT | av.CH_BACK_RIGHT | av.CH_LOW_FREQ,
}
func ParseADTSHeader(frame []byte) (config MPEG4AudioConfig, hdrlen int, framelen int, samples int, err error) {
if frame[0] != 0xff || frame[1]&0xf6 != 0xf0 {
err = fmt.Errorf("aacparser: not adts header")
return
}
config.ObjectType = uint(frame[2]>>6) + 1
config.SampleRateIndex = uint(frame[2] >> 2 & 0xf)
config.ChannelConfig = uint(frame[2]<<2&0x4 | frame[3]>>6&0x3)
if config.ChannelConfig == uint(0) {
err = fmt.Errorf("aacparser: adts channel count invalid")
return
}
(&config).Complete()
framelen = int(frame[3]&0x3)<<11 | int(frame[4])<<3 | int(frame[5]>>5)
samples = (int(frame[6]&0x3) + 1) * 1024
hdrlen = 7
if frame[1]&0x1 == 0 {
hdrlen = 9
}
if framelen < hdrlen {
err = fmt.Errorf("aacparser: adts framelen < hdrlen")
return
}
return
}
const ADTSHeaderLength = 7
func FillADTSHeader(header []byte, config MPEG4AudioConfig, samples int, payloadLength int) {
payloadLength += 7
//AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
header[0] = 0xff
header[1] = 0xf1
header[2] = 0x50
header[3] = 0x80
header[4] = 0x43
header[5] = 0xff
header[6] = 0xcd
//config.ObjectType = uint(frames[2]>>6)+1
//config.SampleRateIndex = uint(frames[2]>>2&0xf)
//config.ChannelConfig = uint(frames[2]<<2&0x4|frames[3]>>6&0x3)
header[2] = (byte(config.ObjectType-1)&0x3)<<6 | (byte(config.SampleRateIndex)&0xf)<<2 | byte(config.ChannelConfig>>2)&0x1
header[3] = header[3]&0x3f | byte(config.ChannelConfig&0x3)<<6
header[3] = header[3]&0xfc | byte(payloadLength>>11)&0x3
header[4] = byte(payloadLength >> 3)
header[5] = header[5]&0x1f | (byte(payloadLength)&0x7)<<5
header[6] = header[6]&0xfc | byte(samples/1024-1)
return
}
func readObjectType(r *bits.Reader) (objectType uint, err error) {
if objectType, err = r.ReadBits(5); err != nil {
return
}
if objectType == AOT_ESCAPE {
var i uint
if i, err = r.ReadBits(6); err != nil {
return
}
objectType = 32 + i
}
return
}
func writeObjectType(w *bits.Writer, objectType uint) (err error) {
if objectType >= 32 {
if err = w.WriteBits(AOT_ESCAPE, 5); err != nil {
return
}
if err = w.WriteBits(objectType-32, 6); err != nil {
return
}
} else {
if err = w.WriteBits(objectType, 5); err != nil {
return
}
}
return
}
func readSampleRateIndex(r *bits.Reader) (index uint, err error) {
if index, err = r.ReadBits(4); err != nil {
return
}
if index == 0xf {
if index, err = r.ReadBits(24); err != nil {
return
}
}
return
}
func writeSampleRateIndex(w *bits.Writer, index uint) (err error) {
if index >= 0xf {
if err = w.WriteBits(0xf, 4); err != nil {
return
}
if err = w.WriteBits(index, 24); err != nil {
return
}
} else {
if err = w.WriteBits(index, 4); err != nil {
return
}
}
return
}
func (self MPEG4AudioConfig) IsValid() bool {
return self.ObjectType > 0
}
func (self *MPEG4AudioConfig) Complete() {
if int(self.SampleRateIndex) < len(sampleRateTable) {
self.SampleRate = sampleRateTable[self.SampleRateIndex]
}
if int(self.ChannelConfig) < len(chanConfigTable) {
self.ChannelLayout = chanConfigTable[self.ChannelConfig]
}
return
}
func ParseMPEG4AudioConfigBytes(data []byte) (config MPEG4AudioConfig, err error) {
// copied from libavcodec/mpeg4audio.c avpriv_mpeg4audio_get_config()
r := bytes.NewReader(data)
br := &bits.Reader{R: r}
if config.ObjectType, err = readObjectType(br); err != nil {
return
}
if config.SampleRateIndex, err = readSampleRateIndex(br); err != nil {
return
}
if config.ChannelConfig, err = br.ReadBits(4); err != nil {
return
}
(&config).Complete()
return
}
func WriteMPEG4AudioConfig(w io.Writer, config MPEG4AudioConfig) (err error) {
bw := &bits.Writer{W: w}
if err = writeObjectType(bw, config.ObjectType); err != nil {
return
}
if config.SampleRateIndex == 0 {
for i, rate := range sampleRateTable {
if rate == config.SampleRate {
config.SampleRateIndex = uint(i)
}
}
}
if err = writeSampleRateIndex(bw, config.SampleRateIndex); err != nil {
return
}
if config.ChannelConfig == 0 {
for i, layout := range chanConfigTable {
if layout == config.ChannelLayout {
config.ChannelConfig = uint(i)
}
}
}
if err = bw.WriteBits(config.ChannelConfig, 4); err != nil {
return
}
if err = bw.FlushBits(); err != nil {
return
}
return
}
type CodecData struct {
ConfigBytes []byte
Config MPEG4AudioConfig
}
func (self CodecData) Type() av.CodecType {
return av.AAC
}
func (self CodecData) MPEG4AudioConfigBytes() []byte {
return self.ConfigBytes
}
func (self CodecData) ChannelLayout() av.ChannelLayout {
return self.Config.ChannelLayout
}
func (self CodecData) SampleRate() int {
return self.Config.SampleRate
}
func (self CodecData) SampleFormat() av.SampleFormat {
return av.FLTP
}
func (self CodecData) PacketDuration(data []byte) (dur time.Duration, err error) {
dur = time.Duration(1024) * time.Second / time.Duration(self.Config.SampleRate)
return
}
func NewCodecDataFromMPEG4AudioConfig(config MPEG4AudioConfig) (self CodecData, err error) {
b := &bytes.Buffer{}
WriteMPEG4AudioConfig(b, config)
return NewCodecDataFromMPEG4AudioConfigBytes(b.Bytes())
}
func NewCodecDataFromMPEG4AudioConfigBytes(config []byte) (self CodecData, err error) {
self.ConfigBytes = config
if self.Config, err = ParseMPEG4AudioConfigBytes(config); err != nil {
err = fmt.Errorf("aacparser: parse MPEG4AudioConfig failed(%s)", err)
return
}
return
}