You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
if a website body contains a json string, I get garbage urls...
specific case...
string := `{"props":{"pageProps":{"theme":{"key":"leaf","mode":"light","colors":{"body":"palette.slate13","linkText":"#fff","linkBackground":"#39e09b","linkShadow":"#000"},"components":{"ProfileBackground":{"backgroundColor":"#fff","backgroundStyle":"flat"},"LinkContainer":{"borderType":"squared","styleType":"fill"},"SocialLink":{"fill":"linkBackground"},"Banner":{"default":{"backgroundColor":"linkBackground","color":"linkText"}}}},"username":"adrianphoto_bcn","pageTitle":"@adrianphoto_bcn","metaTitle":"@adrianphoto_bcn","metaDescription":"Linktree. Make your link do more.","profilePictureUrl":"https://d15mvavv27jnvy.cloudfront.net/zdKaK/660bb5ffef7d46960c5c1be349944840.jpg","description":null,"links":[{"id":"11987649","url":"https://onlyfans.com/adrianphotobcn","animation":null,"amazonAffiliate":null,"thumbnail":null,"title":"Onlyfans","type":"CLASSIC","context":{}},{"id":"7730208","url":"http://Photoproducer.manyvids.com","animation":null,"amazonAffiliate":null,"thumbnail":null,"title":"ManyVids","type":"CLASSIC","context":{}},{"id":"11994192","url":"https://www.suicidegirls.com/members/adrianphoto_bcn/","animation":null,"amazonAffiliate":null,"thumbnail":null,"title":"Suicidegirls","type":"CLASSIC","context":{}},{"id":"7730413","url":"https://mobile.twitter.com/adrianphoto_bcn","animation":null,"amazonAffiliate":null,"thumbnail":null,"title":"Twitter","type":"CLASSIC","context":{}},{"id":"7730346","url":"https://www.instagram.com/adrianphotobcn","animation":null,"amazonAffiliate":null,"thumbnail":null,"title":"Instagram","type":"CLASSIC","context":{}},{"id":"16064948","url":"https://www.instagram.com/afoto.bcn","animation":null,"amazonAffiliate":null,"thumbnail":null,"title":"Instagram sec","type":"CLASSIC","context":{}}],"socialLinks":[],"integrations":[],"leapLink":null,"isOwner":false,"isLogoVisible":true,"isProfileVerified":true,"hasConsentedToView":true,"account":{"id":1848934,"username":"adrianphoto_bcn","isActive":true,"profilePictureUrl":"https://d15mvavv27jnvy.cloudfront.net/zdKaK/660bb5ffef7d46960c5c1be349944840.jpg","pageTitle":"@adrianphoto_bcn","googleAnalyticsId":null,"facebookPixelId":null,"donationsActive":false,"contentWarning":null,"description":null,"isLogoVisible":true,"owner":{"id":2054277,"isEmailVerified":true},"pageMeta":null,"integrations":[],"links":[{"id":11987649,"type":"CLASSIC","title":"Onlyfans","url":"https://onlyfans.com/adrianphotobcn","formattedUrl":"https://onlyfans.com/adrianphotobcn","thumbnailUrl":null,"animation":null,"isLeapLink":false,"isLeapLinkActive":false,"amazonAffiliate":null,"context":null},{"id":7730208,"type":"CLASSIC","title":"ManyVids","url":"Photoproducer.manyvids.com","formattedUrl":"http://Photoproducer.manyvids.com","thumbnailUrl":null,"animation":null,"isLeapLink":false,"isLeapLinkActive":false,"amazonAffiliate":null,"context":null},{"id":11994192,"type":"CLASSIC","title":"Suicidegirls","url":"https://www.suicidegirls.com/members/adrianphoto_bcn/","formattedUrl":"https://www.suicidegirls.com/members/adrianphoto_bcn/","thumbnailUrl":null,"animation":null,"isLeapLink":false,"isLeapLinkActive":false,"amazonAffiliate":null,"context":null},{"id":7730413,"type":"CLASSIC","title":"Twitter","url":"https://mobile.twitter.com/adrianphoto_bcn","formattedUrl":"https://mobile.twitter.com/adrianphoto_bcn","thumbnailUrl":null,"animation":null,"isLeapLink":false,"isLeapLinkActive":false,"amazonAffiliate":null,"context":null},{"id":7730346,"type":"CLASSIC","title":"Instagram","url":"https://www.instagram.com/adrianphotobcn","formattedUrl":"https://www.instagram.com/adrianphotobcn","thumbnailUrl":null,"animation":null,"isLeapLink":false,"isLeapLinkActive":false,"amazonAffiliate":null,"context":null},{"id":16064948,"type":"CLASSIC","title":"Instagram sec","url":"https://www.instagram.com/afoto.bcn","formattedUrl":"https://www.instagram.com/afoto.bcn","thumbnailUrl":null,"animation":null,"isLeapLink":false,"isLeapLinkActive":false,"amazonAffiliate":null,"context":null}],"socialLinks":[],"theme":{"key":"leaf"}}},"__N_SSP":true},"page":"/[profile]","query":{"profile":"adrianphoto_bcn"}`
rxStrict := xurls.Strict()
urls := rxStrict.FindAllString(string, -1)
for _, url := range urls {
fmt.Printf("%s\n",url)
}
thanks
The text was updated successfully, but these errors were encountered:
Thanks for raising this. I think you should be parsing the JSON first, to avoid weird edge cases like this one, and because you would want to unescape JSON strings before extracting URLs anyway.
Having said that, it probably doesn't make sense for us to support double quotes in the middle of a URL. I'll see what I can do about that.
Hi,
if a website body contains a json string, I get garbage urls...
specific case...
thanks
The text was updated successfully, but these errors were encountered: