Skip to content

Commit

Permalink
Fix UTF16LE parsing and writing
Browse files Browse the repository at this point in the history
See #51
  • Loading branch information
n10v committed Mar 3, 2020
1 parent 1f38ceb commit 88f2646
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 23 deletions.
5 changes: 5 additions & 0 deletions buf_reader.go
Expand Up @@ -168,6 +168,11 @@ func (br *bufReader) ReadText(encoding Encoding) []byte {
var text []byte
delims := encoding.TerminationBytes
text, br.err = br.readTillDelims(delims)

if encoding.Equals(EncodingUTF16) {
text = append(text, br.ReadByte())
}

br.Discard(len(delims))

return text
Expand Down
13 changes: 7 additions & 6 deletions buf_writer.go
Expand Up @@ -9,7 +9,7 @@ import (
type bufWriter struct {
err error
w *bufio.Writer
written int64
written int
}

func newBufWriter(w io.Writer) *bufWriter {
Expand All @@ -20,6 +20,7 @@ func (bw *bufWriter) EncodeAndWriteText(src string, to Encoding) {
if bw.err != nil {
return
}

bw.err = encodeWriteText(bw, src, to)
}

Expand Down Expand Up @@ -59,25 +60,25 @@ func (bw *bufWriter) WriteString(s string) {
}
var n int
n, bw.err = bw.w.WriteString(s)
bw.written += int64(n)
bw.written += n
}

func (bw *bufWriter) Write(p []byte) (n int, err error) {
if bw.err != nil {
return 0, bw.err
}
n, err = bw.w.Write(p)
bw.written += int64(n)
bw.written += n
bw.err = err
return n, err
}

func (bw *bufWriter) Written() int64 {
func (bw *bufWriter) Written() int {
return bw.written
}

func useBufWriter(w io.Writer, f func(*bufWriter)) (int64, error) {
var writtenBefore int64
var writtenBefore int
bw, ok := w.(*bufWriter)
if ok {
writtenBefore = bw.Written()
Expand All @@ -88,5 +89,5 @@ func useBufWriter(w io.Writer, f func(*bufWriter)) (int64, error) {

f(bw)

return bw.Written() - writtenBefore, bw.Flush()
return int64(bw.Written() - writtenBefore), bw.Flush()
}
22 changes: 19 additions & 3 deletions encoding.go
Expand Up @@ -2,6 +2,7 @@ package id3v2

import (
"bytes"
"io/ioutil"

xencoding "golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
Expand Down Expand Up @@ -99,9 +100,11 @@ func encodedSize(src string, enc Encoding) int {
return len(src)
}

toXEncoding := resolveXEncoding(nil, enc)
encoded, _ := toXEncoding.Encoder().String(src)
return len(encoded)
bw := getBufWriter(ioutil.Discard)
encodeWriteText(bw, src, enc)

return bw.Written()

}

// decodeText decodes src from "from" encoding to UTF-8.
Expand All @@ -117,6 +120,13 @@ func decodeText(src []byte, from Encoding) string {
if err != nil {
return string(src)
}

// HACK: Delete REPLACEMENT CHARACTER (�) if encoding went wrong.
// See https://apps.timwhitlock.info/unicode/inspect?s=%EF%BF%BD
if from.Equals(EncodingUTF16) {
result = bytes.ReplaceAll(result, []byte{0xEF, 0xBF, 0xBD}, []byte{})
}

return string(result)
}

Expand All @@ -132,7 +142,13 @@ func encodeWriteText(bw *bufWriter, src string, to Encoding) error {
if err != nil {
return err
}

bw.WriteString(encoded)

if to.Equals(EncodingUTF16) && !bytes.HasSuffix([]byte(encoded), []byte{0}) {
bw.WriteByte(0)
}

return nil
}

Expand Down
48 changes: 41 additions & 7 deletions encoding_test.go
Expand Up @@ -29,11 +29,10 @@ func TestEncodeWriteText(t *testing.T) {
src string
to Encoding
expected []byte
size int
}{
{"Héllö", EncodingISO, []byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}, 5},
{"Héllö", EncodingUTF16, []byte{0xFE, 0xFF, 0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, 12},
{"Héllö", EncodingUTF16BE, []byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, 10},
{"Héllö", EncodingISO, []byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}},
{"Héllö", EncodingUTF16, []byte{0xFE, 0xFF, 0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6, 0x00}},
{"Héllö", EncodingUTF16BE, []byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}},
}

buf := new(bytes.Buffer)
Expand All @@ -49,10 +48,45 @@ func TestEncodeWriteText(t *testing.T) {
}
got := buf.Bytes()
if !bytes.Equal(got, tc.expected) {
t.Errorf("Expected %q from %q encoding, got %q", tc.expected, tc.to, got)
t.Errorf("Expected %q to %q encoding, got %q", tc.expected, tc.to, got)
}
if bw.Written() != int64(tc.size) {
t.Errorf("Expected %v size, got %v", tc.size, bw.Written())
if bw.Written() != len(tc.expected) {
t.Errorf("Expected %v size, got %v", len(tc.expected), bw.Written())
}
}
}

// See https://github.com/bogem/id3v2/issues/51.
func TestUnsynchronisedLyricsFrameWithUTF16(t *testing.T) {
contentDescriptor := "Content descriptor"
lyrics := "Lyrics"

frame := UnsynchronisedLyricsFrame{
Encoding: EncodingUTF16,
Language: "eng",
ContentDescriptor: contentDescriptor,
Lyrics: lyrics,
}

buf := new(bytes.Buffer)

if _, err := frame.WriteTo(buf); err != nil {
t.Fatal(err)
}

parsed, err := parseUnsynchronisedLyricsFrame(newBufReader(buf))
if err != nil {
t.Fatal(err)
}

uslf := parsed.(UnsynchronisedLyricsFrame)

if uslf.ContentDescriptor != contentDescriptor {
t.Errorf("Expected content descriptor: %q, got: %q", contentDescriptor, uslf.ContentDescriptor)
}

if uslf.Lyrics != lyrics {
t.Errorf("Expected lyrics: %q, got: %q", lyrics, uslf.Lyrics)
}

}
4 changes: 2 additions & 2 deletions tag.go
Expand Up @@ -410,10 +410,10 @@ func (tag *Tag) WriteTo(w io.Writer) (n int64, err error) {
})
if err != nil {
bw.Flush()
return bw.Written(), err
return int64(bw.Written()), err
}

return bw.Written(), bw.Flush()
return int64(bw.Written()), bw.Flush()
}

func writeTagHeader(bw *bufWriter, framesSize uint, version byte) {
Expand Down
10 changes: 5 additions & 5 deletions tag_test.go
Expand Up @@ -28,28 +28,28 @@ const (

var (
frontCover = PictureFrame{
Encoding: EncodingUTF8,
Encoding: EncodingUTF16,
MimeType: "image/jpeg",
PictureType: PTFrontCover,
Description: "Front cover",
Picture: mustReadFile(frontCoverPath),
}
backCover = PictureFrame{
Encoding: EncodingUTF8,
Encoding: EncodingUTF16,
MimeType: "image/jpeg",
PictureType: PTBackCover,
Description: "Back cover",
Picture: mustReadFile(backCoverPath),
}

engUSLF = UnsynchronisedLyricsFrame{
Encoding: EncodingUTF8,
Encoding: EncodingUTF16,
Language: "eng",
ContentDescriptor: "Content descriptor",
Lyrics: "bogem/id3v2",
}
gerUSLF = UnsynchronisedLyricsFrame{
Encoding: EncodingUTF8,
Encoding: EncodingUTF16,
Language: "ger",
ContentDescriptor: "Inhaltsdeskriptor",
Lyrics: "Einigkeit und Recht und Freiheit",
Expand Down Expand Up @@ -473,7 +473,7 @@ func TestConcurrent(t *testing.T) {
}

// TestEncodedText checks
// if text of frames encoded with different encodings is correctly written.
// if texts of frames encoded with different encodings are correctly written.
func TestEncodedText(t *testing.T) {
t.Parallel()

Expand Down

0 comments on commit 88f2646

Please sign in to comment.