diff --git a/buf_reader.go b/buf_reader.go index 2452392..057a782 100644 --- a/buf_reader.go +++ b/buf_reader.go @@ -168,6 +168,11 @@ func (br *bufReader) ReadText(encoding Encoding) []byte { var text []byte delims := encoding.TerminationBytes text, br.err = br.readTillDelims(delims) + + if encoding.Equals(EncodingUTF16) { + text = append(text, br.ReadByte()) + } + br.Discard(len(delims)) return text diff --git a/buf_writer.go b/buf_writer.go index ba7968b..39d7b8c 100644 --- a/buf_writer.go +++ b/buf_writer.go @@ -9,7 +9,7 @@ import ( type bufWriter struct { err error w *bufio.Writer - written int64 + written int } func newBufWriter(w io.Writer) *bufWriter { @@ -20,6 +20,7 @@ func (bw *bufWriter) EncodeAndWriteText(src string, to Encoding) { if bw.err != nil { return } + bw.err = encodeWriteText(bw, src, to) } @@ -59,7 +60,7 @@ func (bw *bufWriter) WriteString(s string) { } var n int n, bw.err = bw.w.WriteString(s) - bw.written += int64(n) + bw.written += n } func (bw *bufWriter) Write(p []byte) (n int, err error) { @@ -67,17 +68,17 @@ func (bw *bufWriter) Write(p []byte) (n int, err error) { return 0, bw.err } n, err = bw.w.Write(p) - bw.written += int64(n) + bw.written += n bw.err = err return n, err } -func (bw *bufWriter) Written() int64 { +func (bw *bufWriter) Written() int { return bw.written } func useBufWriter(w io.Writer, f func(*bufWriter)) (int64, error) { - var writtenBefore int64 + var writtenBefore int bw, ok := w.(*bufWriter) if ok { writtenBefore = bw.Written() @@ -88,5 +89,5 @@ func useBufWriter(w io.Writer, f func(*bufWriter)) (int64, error) { f(bw) - return bw.Written() - writtenBefore, bw.Flush() + return int64(bw.Written() - writtenBefore), bw.Flush() } diff --git a/encoding.go b/encoding.go index 03b78c1..5dd2c0f 100644 --- a/encoding.go +++ b/encoding.go @@ -2,6 +2,7 @@ package id3v2 import ( "bytes" + "io/ioutil" xencoding "golang.org/x/text/encoding" "golang.org/x/text/encoding/charmap" @@ -99,9 +100,11 @@ func encodedSize(src string, enc Encoding) int { return len(src) } - toXEncoding := resolveXEncoding(nil, enc) - encoded, _ := toXEncoding.Encoder().String(src) - return len(encoded) + bw := getBufWriter(ioutil.Discard) + encodeWriteText(bw, src, enc) + + return bw.Written() + } // decodeText decodes src from "from" encoding to UTF-8. @@ -117,6 +120,13 @@ func decodeText(src []byte, from Encoding) string { if err != nil { return string(src) } + + // HACK: Delete REPLACEMENT CHARACTER (�) if encoding went wrong. + // See https://apps.timwhitlock.info/unicode/inspect?s=%EF%BF%BD + if from.Equals(EncodingUTF16) { + result = bytes.ReplaceAll(result, []byte{0xEF, 0xBF, 0xBD}, []byte{}) + } + return string(result) } @@ -132,7 +142,13 @@ func encodeWriteText(bw *bufWriter, src string, to Encoding) error { if err != nil { return err } + bw.WriteString(encoded) + + if to.Equals(EncodingUTF16) && !bytes.HasSuffix([]byte(encoded), []byte{0}) { + bw.WriteByte(0) + } + return nil } diff --git a/encoding_test.go b/encoding_test.go index 77a8817..bc2e846 100644 --- a/encoding_test.go +++ b/encoding_test.go @@ -29,11 +29,10 @@ func TestEncodeWriteText(t *testing.T) { src string to Encoding expected []byte - size int }{ - {"Héllö", EncodingISO, []byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}, 5}, - {"Héllö", EncodingUTF16, []byte{0xFE, 0xFF, 0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, 12}, - {"Héllö", EncodingUTF16BE, []byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, 10}, + {"Héllö", EncodingISO, []byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}}, + {"Héllö", EncodingUTF16, []byte{0xFE, 0xFF, 0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6, 0x00}}, + {"Héllö", EncodingUTF16BE, []byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}}, } buf := new(bytes.Buffer) @@ -49,10 +48,45 @@ func TestEncodeWriteText(t *testing.T) { } got := buf.Bytes() if !bytes.Equal(got, tc.expected) { - t.Errorf("Expected %q from %q encoding, got %q", tc.expected, tc.to, got) + t.Errorf("Expected %q to %q encoding, got %q", tc.expected, tc.to, got) } - if bw.Written() != int64(tc.size) { - t.Errorf("Expected %v size, got %v", tc.size, bw.Written()) + if bw.Written() != len(tc.expected) { + t.Errorf("Expected %v size, got %v", len(tc.expected), bw.Written()) } } } + +// See https://github.com/bogem/id3v2/issues/51. +func TestUnsynchronisedLyricsFrameWithUTF16(t *testing.T) { + contentDescriptor := "Content descriptor" + lyrics := "Lyrics" + + frame := UnsynchronisedLyricsFrame{ + Encoding: EncodingUTF16, + Language: "eng", + ContentDescriptor: contentDescriptor, + Lyrics: lyrics, + } + + buf := new(bytes.Buffer) + + if _, err := frame.WriteTo(buf); err != nil { + t.Fatal(err) + } + + parsed, err := parseUnsynchronisedLyricsFrame(newBufReader(buf)) + if err != nil { + t.Fatal(err) + } + + uslf := parsed.(UnsynchronisedLyricsFrame) + + if uslf.ContentDescriptor != contentDescriptor { + t.Errorf("Expected content descriptor: %q, got: %q", contentDescriptor, uslf.ContentDescriptor) + } + + if uslf.Lyrics != lyrics { + t.Errorf("Expected lyrics: %q, got: %q", lyrics, uslf.Lyrics) + } + +} diff --git a/tag.go b/tag.go index de0418c..5407dc6 100644 --- a/tag.go +++ b/tag.go @@ -410,10 +410,10 @@ func (tag *Tag) WriteTo(w io.Writer) (n int64, err error) { }) if err != nil { bw.Flush() - return bw.Written(), err + return int64(bw.Written()), err } - return bw.Written(), bw.Flush() + return int64(bw.Written()), bw.Flush() } func writeTagHeader(bw *bufWriter, framesSize uint, version byte) { diff --git a/tag_test.go b/tag_test.go index 5ea11ed..08caa24 100644 --- a/tag_test.go +++ b/tag_test.go @@ -28,14 +28,14 @@ const ( var ( frontCover = PictureFrame{ - Encoding: EncodingUTF8, + Encoding: EncodingUTF16, MimeType: "image/jpeg", PictureType: PTFrontCover, Description: "Front cover", Picture: mustReadFile(frontCoverPath), } backCover = PictureFrame{ - Encoding: EncodingUTF8, + Encoding: EncodingUTF16, MimeType: "image/jpeg", PictureType: PTBackCover, Description: "Back cover", @@ -43,13 +43,13 @@ var ( } engUSLF = UnsynchronisedLyricsFrame{ - Encoding: EncodingUTF8, + Encoding: EncodingUTF16, Language: "eng", ContentDescriptor: "Content descriptor", Lyrics: "bogem/id3v2", } gerUSLF = UnsynchronisedLyricsFrame{ - Encoding: EncodingUTF8, + Encoding: EncodingUTF16, Language: "ger", ContentDescriptor: "Inhaltsdeskriptor", Lyrics: "Einigkeit und Recht und Freiheit", @@ -473,7 +473,7 @@ func TestConcurrent(t *testing.T) { } // TestEncodedText checks -// if text of frames encoded with different encodings is correctly written. +// if texts of frames encoded with different encodings are correctly written. func TestEncodedText(t *testing.T) { t.Parallel()