Skip to content

Commit

Permalink
new html parser now successfully parses album/track pages, album from #4
Browse files Browse the repository at this point in the history
 parses without problems, not finished
  • Loading branch information
olde-ducke committed Jan 23, 2022
1 parent 7805f75 commit 8e19cd8
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 6 deletions.
2 changes: 1 addition & 1 deletion json.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ func parseTagSearchJSON(dataBlobJSON string, highlights bool) (*Result, error) {
}

if index > len(dataBlob.Hubs.Tabs)-1 {
return nil, errors.New("tag page JSON parser: ./json.go:265: tab index out of range")
return nil, errors.New("tag page JSON parser: ./json.go:272: tab index out of range")
}

key := dataBlob.Hubs.Tabs[index].DigDeeper.InitialSettings
Expand Down
72 changes: 67 additions & 5 deletions net.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ func processmediapage(ctx context.Context, link string, dbg, msg func(string)) (
}
msg(response.Status)

// check canonical name in response header
// must be artist.bandcamp.com
canonical := response.Header.Get("link")
if !strings.Contains(canonical, "bandcamp.com") {
return originError
}

contentType := response.Header.Get("Content-Type")
dbg(contentType)

Expand All @@ -114,13 +121,49 @@ func processmediapage(ctx context.Context, link string, dbg, msg func(string)) (
return err
}

inf, _ := getAttrVal(doc, "meta", "name")
dbg(inf)
inf, _ = getValByAttr(doc, &html.Attribute{
mediadata, ok := getAttrVal(doc, "script", "data-tralbum")
if !ok {
dbg("failed to parse mediadata")
return unexpectedError
}

metadata, ok := getTextByAttr(doc, &html.Attribute{
Key: "type",
Val: "application/ld+json",
}, "script")
if !ok {
return unexpectedError
}

itemType, ok := getValByAttr(doc, &html.Attribute{
Key: "property",
Val: "og:type",
}, "meta", "content")
dbg(inf)
dbg(itemType)
if !ok {
return unexpectedError
}

var isAlbum bool
switch itemType {
case "album":
isAlbum = true
msg("found album data")
case "song":
isAlbum = false
msg("found track data")
default:
// TODO: parse albums/tracks from discography page
return unexpectedError
}

result, err := parseTrAlbumJSON(metadata, mediadata, isAlbum)
if err != nil {
return err
}
dbg(fmt.Sprint(result))
dbg(mediadata)

return nil
})

Expand All @@ -142,7 +185,6 @@ func getAttrVal(node *html.Node, tag, attr string) (string, bool) {
if val, ok := getAttr(node, attr); ok {
return val, ok
}

}

for child := node.FirstChild; child != nil; child = child.NextSibling {
Expand Down Expand Up @@ -180,6 +222,26 @@ func hasAttr(node *html.Node, attr *html.Attribute) bool {
return false
}

func getTextByAttr(node *html.Node, attr *html.Attribute, tag string) (string, bool) {
if node.Type == html.ElementNode && node.Data == tag {
if hasAttr(node, attr) {
if child := node.FirstChild; child != nil {
if child.Type == html.TextNode {
return child.Data, true
}
}
}
}

for child := node.FirstChild; child != nil; child = child.NextSibling {
if val, ok := getTextByAttr(child, attr, tag); ok {
return val, ok
}
}

return "", false
}

func downloadmedia(ctx context.Context, link string, dbg, msg func(string)) (string, error) {
dbg(link)
msg("fetching")
Expand Down

0 comments on commit 8e19cd8

Please sign in to comment.