Skip to content

Commit

Permalink
Sort repeated lyrics that may be out of order (#2989)
Browse files Browse the repository at this point in the history
With synchronized lyrics with repeated text, there is not a guarantee that the repeat is in order (e.g. `[00:00.00][00:10.00] a\n[00:05.00]b`).
This change will post-process lyrics with repeated timestamps in one line to ensure that it is always sorted.
  • Loading branch information
kgarner7 committed May 2, 2024
1 parent 8f11b99 commit a4c2232
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 39 deletions.
101 changes: 62 additions & 39 deletions model/lyrics.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package model

import (
"cmp"
"regexp"
"slices"
"strconv"
"strings"

Expand Down Expand Up @@ -46,6 +48,7 @@ func ToLyrics(language, text string) (*Lyrics, error) {
synced := syncRegex.MatchString(text)
priorLine := ""
validLine := false
repeated := false
var timestamps []int64

for _, line := range lines {
Expand Down Expand Up @@ -82,6 +85,10 @@ func ToLyrics(language, text string) (*Lyrics, error) {
}

times := timeRegex.FindAllStringSubmatchIndex(line, -1)
if len(times) > 1 {
repeated = true
}

// The second condition is for when there is a timestamp in the middle of
// a line (after any text)
if times == nil || times[0][0] != 0 {
Expand All @@ -105,9 +112,6 @@ func ToLyrics(language, text string) (*Lyrics, error) {

// [fullStart, fullEnd, hourStart, hourEnd, minStart, minEnd, secStart, secEnd, msStart, msEnd]
for _, match := range times {
var hours, millis int64
var err error

// for multiple matches, we need to check that later matches are not
// in the middle of the string
if end != 0 {
Expand All @@ -118,46 +122,11 @@ func ToLyrics(language, text string) (*Lyrics, error) {
}

end = match[1]

hourStart := match[2]
if hourStart != -1 {
// subtract 1 because group has : at the end
hourEnd := match[3] - 1
hours, err = strconv.ParseInt(line[hourStart:hourEnd], 10, 64)
if err != nil {
return nil, err
}
}

minutes, err := strconv.ParseInt(line[match[4]:match[5]], 10, 64)
timeInMillis, err := parseTime(line, match)
if err != nil {
return nil, err
}

sec, err := strconv.ParseInt(line[match[6]:match[7]], 10, 64)
if err != nil {
return nil, err
}

msStart := match[8]
if msStart != -1 {
msEnd := match[9]
// +1 offset since this capture group contains .
millis, err = strconv.ParseInt(line[msStart+1:msEnd], 10, 64)
if err != nil {
return nil, err
}

length := msEnd - msStart

if length == 3 {
millis *= 10
} else if length == 2 {
millis *= 100
}
}

timeInMillis := (((((hours * 60) + minutes) * 60) + sec) * 1000) + millis
timestamps = append(timestamps, timeInMillis)
}

Expand Down Expand Up @@ -186,6 +155,14 @@ func ToLyrics(language, text string) (*Lyrics, error) {
}
}

// If there are repeated values, there is no guarantee that they are in order
// In this, case, sort the lyrics by start time
if repeated {
slices.SortFunc(structuredLines, func(a, b Line) int {
return cmp.Compare(*a.Start, *b.Start)
})
}

lyrics := Lyrics{
DisplayArtist: artist,
DisplayTitle: title,
Expand All @@ -198,4 +175,50 @@ func ToLyrics(language, text string) (*Lyrics, error) {
return &lyrics, nil
}

func parseTime(line string, match []int) (int64, error) {
var hours, millis int64
var err error

hourStart := match[2]
if hourStart != -1 {
// subtract 1 because group has : at the end
hourEnd := match[3] - 1
hours, err = strconv.ParseInt(line[hourStart:hourEnd], 10, 64)
if err != nil {
return 0, err
}
}

minutes, err := strconv.ParseInt(line[match[4]:match[5]], 10, 64)
if err != nil {
return 0, err
}

sec, err := strconv.ParseInt(line[match[6]:match[7]], 10, 64)
if err != nil {
return 0, err
}

msStart := match[8]
if msStart != -1 {
msEnd := match[9]
// +1 offset since this capture group contains .
millis, err = strconv.ParseInt(line[msStart+1:msEnd], 10, 64)
if err != nil {
return 0, err
}

length := msEnd - msStart

if length == 3 {
millis *= 10
} else if length == 2 {
millis *= 100
}
}

timeInMillis := (((((hours * 60) + minutes) * 60) + sec) * 1000) + millis
return timeInMillis, nil
}

type LyricList []Lyrics
14 changes: 14 additions & 0 deletions model/lyrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,18 @@ var _ = Describe("ToLyrics", func() {
{Start: &c, Value: "c"},
}))
})

It("Properly sorts repeated lyrics out of order", func() {
a, b, c, d, e := int64(0), int64(10000), int64(40000), int64(13*60*1000), int64(1000*60*60*51)
lyrics, err := ToLyrics("xxx", "[00:00.00] [13:00]Repeated\n[00:10.00][51:00:00.00]Test\n[00:40.00]Not repeated")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Synced).To(BeTrue())
Expect(lyrics.Line).To(Equal([]Line{
{Start: &a, Value: "Repeated"},
{Start: &b, Value: "Test"},
{Start: &c, Value: "Not repeated"},
{Start: &d, Value: "Repeated"},
{Start: &e, Value: "Test"},
}))
})
})

0 comments on commit a4c2232

Please sign in to comment.