Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

[musicbrainz] Update the Scanner & Scrapers to support ID resolution …

…for MusicBrainzIDs
  • Loading branch information...
commit ad4ec1f2e9a182f2f8a92158684a4d64829d14c8 1 parent ddc1499
@night199uk authored
View
64 xbmc/addons/Scraper.cpp
@@ -247,7 +247,7 @@ vector<CStdString> CScraper::Run(const CStdString& function,
CStdString strXML = InternalRun(function,scrURL,http,extras);
if (strXML.IsEmpty())
{
- if (function != "NfoUrl")
+ if (function != "NfoUrl" && function != "ResolveIDToUrl")
CLog::Log(LOGERROR, "%s: Unable to parse web site",__FUNCTION__);
throw CScraperError();
}
@@ -475,6 +475,68 @@ CScraperUrl CScraper::NfoUrl(const CStdString &sNfoContent)
return scurlRet;
}
+CScraperUrl CScraper::ResolveIDToUrl(const CStdString& externalID)
+{
+ CScraperUrl scurlRet;
+
+ // scraper function takes an external ID, returns XML (see below)
+ vector<CStdString> vcsIn;
+ vcsIn.push_back(externalID);
+ CScraperUrl scurl;
+ CCurlFile fcurl;
+ vector<CStdString> vcsOut = Run("ResolveIDToUrl", scurl, fcurl, &vcsIn);
+ if (vcsOut.empty() || vcsOut[0].empty())
+ return scurlRet;
+ if (vcsOut.size() > 1)
+ CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
+
+ // parse returned XML: either <error> element on error, blank on failure,
+ // or <url>...</url> or <url>...</url><id>...</id> on success

Why allow the latter, invalid XML?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+ for (unsigned int i=0; i < vcsOut.size(); ++i)
+ {
+ CXBMCTinyXML doc;
+ doc.Parse(vcsOut[i], 0, TIXML_ENCODING_UTF8);
+ CheckScraperError(doc.RootElement());
+
+ if (doc.RootElement())
+ {
+ /*
+ NOTE: Scrapers might return invalid xml with some loose
+ elements (eg. '<url>http://some.url</url><id>123</id>').
+ Since XMLUtils::GetString() is assuming well formed xml
+ with start and end-tags we're not able to use it.
+ Check for the desired Elements instead.
+ */
+ TiXmlElement* pxeUrl=NULL;
+ TiXmlElement* pId=NULL;
+ if (!strcmp(doc.RootElement()->Value(),"details"))
+ {
+ pxeUrl = doc.RootElement()->FirstChildElement("url");
+ pId = doc.RootElement()->FirstChildElement("id");
+ }
+ else
+ {
+ pId = doc.FirstChildElement("id");
+ pxeUrl = doc.FirstChildElement("url");
+ }
+ if (pId && pId->FirstChild())
+ scurlRet.strId = pId->FirstChild()->Value();
+
+ if (pxeUrl && pxeUrl->Attribute("function"))
+ continue;
+
+ if (pxeUrl)
+ scurlRet.ParseElement(pxeUrl);
+ else if (!strcmp(doc.RootElement()->Value(), "url"))
+ scurlRet.ParseElement(doc.RootElement());
+ else
+ continue;
+ break;
+ }
+ }
+ return scurlRet;
+}
+
static bool RelevanceSortFunction(const CScraperUrl &left, const CScraperUrl &right)
{
return left.relevance > right.relevance;
View
11 xbmc/addons/Scraper.h
@@ -120,6 +120,17 @@ class CScraper : public CAddon
// scraper media functions
CScraperUrl NfoUrl(const CStdString &sNfoContent);
+ /*! \brief Resolve an external ID (e.g. MusicBrainz IDs) to a URL using scrapers
+ If we have an ID in hand, e.g. MusicBrainz IDs or TheTVDB Season IDs
+ we can get directly to a URL instead of searching by name and choosing from
+ the search results. The correct scraper type should be used to get the right
+ URL for a given ID, so we can differentiate albums, artists, TV Seasons, etc.
+ \param externalID the external ID - e.g. MusicBrainzArtist/AlbumID
+ \return a populated URL pointing to the details page for the given ID or
+ an empty URL if we couldn't resolve the ID.
+ */
+ CScraperUrl ResolveIDToUrl(const CStdString &externalID);
+
std::vector<CScraperUrl> FindMovie(XFILE::CCurlFile &fcurl,
const CStdString &sMovie, bool fFirst);
std::vector<MUSIC_GRABBER::CMusicAlbumInfo> FindAlbum(XFILE::CCurlFile &fcurl,
View
119 xbmc/music/infoscanner/MusicInfoScanner.cpp
@@ -49,6 +49,8 @@
#include "TextureCache.h"
#include "ThumbLoader.h"
#include "interfaces/AnnouncementManager.h"
+#include "addons/AddonManager.h"
+#include "addons/Scraper.h"
#include <algorithm>
@@ -56,6 +58,7 @@ using namespace std;
using namespace MUSIC_INFO;
using namespace XFILE;
using namespace MUSIC_GRABBER;
+using namespace ADDON;
CMusicInfoScanner::CMusicInfoScanner() : CThread("CMusicInfoScanner")
{
@@ -831,6 +834,19 @@ INFO_RET CMusicInfoScanner::DownloadAlbumInfo(const CAlbum& album, CMusicAlbumIn
info->ClearCache();
CMusicInfoScraper scraper(info);
+
+ bool bMusicBrainz;
+ if (g_advancedSettings.m_bMusicLibraryUseMusicBrainz && !album.strMusicBrainzAlbumID.empty())
+ {
+ CScraperUrl musicBrainzURL;
+ if (ResolveMusicBrainz(album.strMusicBrainzAlbumID, info, scraper, musicBrainzURL))
+ {
+ CMusicArtistInfo albumNfo("nfo", musicBrainzURL);
+ scraper.GetArtists().clear();
+ scraper.GetArtists().push_back(albumNfo);
+ bMusicBrainz = true;
+ }
+ }
// handle nfo files
CStdString strAlbumPath;
@@ -862,6 +878,7 @@ INFO_RET CMusicInfoScanner::DownloadAlbumInfo(const CAlbum& album, CMusicAlbumIn
CLog::Log(LOGDEBUG,"-- nfo-scraper: %s",info->Name().c_str());
CLog::Log(LOGDEBUG,"-- nfo url: %s", scrUrl.m_url[0].m_url.c_str());
scraper.SetScraperInfo(info);
+ scraper.GetAlbums().clear();
scraper.GetAlbums().push_back(albumNfo);
}
else
@@ -891,7 +908,7 @@ INFO_RET CMusicInfoScanner::DownloadAlbumInfo(const CAlbum& album, CMusicAlbumIn
CGUIDialogSelect *pDlg = NULL;
int iSelectedAlbum=0;
- if (result == CNfoFile::NO_NFO)
+ if (result == CNfoFile::NO_NFO && !bMusicBrainz)
{
iSelectedAlbum = -1; // set negative so that we can detect a failure
if (scraper.Succeeded() && scraper.GetAlbumCount() >= 1)
@@ -1068,6 +1085,19 @@ INFO_RET CMusicInfoScanner::DownloadArtistInfo(const CArtist& artist, CMusicArti
CMusicInfoScraper scraper(info);
+ bool bMusicBrainz;
+ if (g_advancedSettings.m_bMusicLibraryUseMusicBrainz && !artist.strMusicBrainzArtistID.empty())
+ {
+ CScraperUrl musicBrainzURL;
+ if (ResolveMusicBrainz(artist.strMusicBrainzArtistID, info, scraper, musicBrainzURL))
+ {
+ CMusicArtistInfo artistNfo("nfo", musicBrainzURL);
+ scraper.GetArtists().clear();
+ scraper.GetArtists().push_back(artistNfo);
+ bMusicBrainz = true;
+ }
+ }
+
// handle nfo files
CStdString strArtistPath;
m_musicDatabase.GetArtistPath(artist.idArtist, strArtistPath);
@@ -1086,6 +1116,10 @@ INFO_RET CMusicInfoScanner::DownloadArtistInfo(const CArtist& artist, CMusicArti
CLog::Log(LOGDEBUG, "%s Got details from nfo", __FUNCTION__);
nfoReader.GetDetails(artistInfo.GetArtist());
m_musicDatabase.SetArtistInfo(artist.idArtist, artistInfo.GetArtist());
+ // Update the name from the .nfo if we're using MusicBrainz to ensure any MBIDs get wiped out
+ // even for .nfo artists
+ if (g_advancedSettings.m_bMusicLibraryUseMusicBrainz && !artistInfo.GetArtist().strArtist.empty())
+ m_musicDatabase.UpdateArtist(artist.idArtist, artistInfo.GetArtist().strArtist);
map<string, string> artwork = GetArtistArtwork(artist.idArtist, &artist);
m_musicDatabase.SetArtForItem(artist.idArtist, "artist", artwork);
m_musicDatabase.Close();
@@ -1121,9 +1155,9 @@ INFO_RET CMusicInfoScanner::DownloadArtistInfo(const CArtist& artist, CMusicArti
}
int iSelectedArtist = 0;
- if (result == CNfoFile::NO_NFO)
+ if (result == CNfoFile::NO_NFO && !bMusicBrainz)
{
- if (scraper.Succeeded() && scraper.GetArtistCount() >= 1)
+ if (scraper.GetArtistCount() >= 1)
{
// now load the first match
if (pDialog && scraper.GetArtistCount() > 1)
@@ -1214,6 +1248,11 @@ INFO_RET CMusicInfoScanner::DownloadArtistInfo(const CArtist& artist, CMusicArti
nfoReader.GetDetails(artistInfo.GetArtist(), NULL, true);
m_musicDatabase.SetArtistInfo(artist.idArtist, artistInfo.GetArtist());
+ // The next line (unintuitively) will also update your artist names from .nfo files
+ // if they existed, so you can use a .nfo to override the MBID based default names
+ if (g_advancedSettings.m_bMusicLibraryUseMusicBrainz && !artistInfo.GetArtist().strArtist.empty())
+ m_musicDatabase.UpdateArtist(artist.idArtist, artistInfo.GetArtist().strArtist);
+
// check thumb stuff
map<string, string> artwork = GetArtistArtwork(artist.idArtist, &artistInfo.GetArtist());
m_musicDatabase.SetArtForItem(artist.idArtist, "artist", artwork);
@@ -1221,6 +1260,80 @@ INFO_RET CMusicInfoScanner::DownloadArtistInfo(const CArtist& artist, CMusicArti
return INFO_ADDED;
}
+bool CMusicInfoScanner::ResolveMusicBrainz(const CStdString strMusicBrainzID, ScraperPtr &preferredScraper, CMusicInfoScraper &musicInfoScraper, CScraperUrl &musicBrainzURL)
+{
+ // We have a MusicBrainz ID
+ // Get a scraper that can resolve it to a MusicBrainz URL & force our
+ // search directly to the specific album.
+ bool bMusicBrainz = false;
+ ADDON::TYPE type = ScraperTypeFromContent(preferredScraper->Content());
+
+ CFileItemList items;
+ ADDON::AddonPtr addon;
+ ADDON::ScraperPtr defaultScraper;
+ if (ADDON::CAddonMgr::Get().GetDefault(type, addon))
+ defaultScraper = boost::dynamic_pointer_cast<CScraper>(addon);
+
+ vector<ScraperPtr> vecScrapers;
+
+ // add selected scraper - first proirity
+ if (preferredScraper)
+ vecScrapers.push_back(preferredScraper);
+
+ // Add all scrapers except selected and default
+ VECADDONS addons;
+ CAddonMgr::Get().GetAddons(type, addons);
+
+ for (unsigned i = 0; i < addons.size(); ++i)
+ {
+ ScraperPtr scraper = boost::dynamic_pointer_cast<CScraper>(addons[i]);
+
+ // skip if scraper requires settings and there's nothing set yet
+ if (scraper->RequiresSettings() && !scraper->HasUserSettings())
+ continue;
+
+ if((!preferredScraper || preferredScraper->ID() != scraper->ID()) && (!defaultScraper || defaultScraper->ID() != scraper->ID()) )
+ vecScrapers.push_back(scraper);
+ }
+
+ // add default scraper - not user selectable so it's last priority
+ if(defaultScraper &&
+ (!preferredScraper || preferredScraper->ID() != defaultScraper->ID()) &&
+ (!defaultScraper->RequiresSettings() || defaultScraper->HasUserSettings()))
+ vecScrapers.push_back(defaultScraper);
+
+ for (unsigned int i=0; i < vecScrapers.size(); ++i)
+ {
+ if (vecScrapers[i]->Type() != type)
+ continue;
+
+ vecScrapers[i]->ClearCache();
+ try
+ {
+ musicBrainzURL = vecScrapers[i]->ResolveIDToUrl(strMusicBrainzID);
+ }
+ catch (const ADDON::CScraperError &sce)
+ {
+ if (!sce.FAborted())
+ continue;
+ }
+ if (!musicBrainzURL.m_url.empty())
+ {
+ Sleep(2000); // MusicBrainz rate-limits queries to 1 p.s - once we hit the rate-limiter
+ // they start serving up the 'you hit the rate-limiter' page fast - meaning
+ // we will never get below the rate-limit threshold again in a specific run.
+ // This helps us to avoidthe rate-limiter as far as possible.
+ CLog::Log(LOGDEBUG,"-- nfo-scraper: %s",vecScrapers[i]->Name().c_str());
+ CLog::Log(LOGDEBUG,"-- nfo url: %s", musicBrainzURL.m_url[0].m_url.c_str());
+ musicInfoScraper.SetScraperInfo(vecScrapers[i]);
+ bMusicBrainz = true;
+ break;
+ }
+ }
+
+ return bMusicBrainz;
+}
+
map<string, string> CMusicInfoScanner::GetArtistArtwork(long id, const CArtist *artist)
{
CStdString artistPath;
View
10 xbmc/music/infoscanner/MusicInfoScanner.h
@@ -21,6 +21,7 @@
#include "threads/Thread.h"
#include "music/MusicDatabase.h"
#include "MusicAlbumInfo.h"
+#include "MusicInfoScraper.h"
class CAlbum;
class CArtist;
@@ -115,6 +116,15 @@ class CMusicInfoScanner : CThread, public IRunnable
int CountFiles(const CFileItemList& items, bool recursive);
int CountFilesRecursively(const CStdString& strPath);
+ /*! \brief Resolve a MusicBrainzID to a URL
+ If we have a MusicBrainz ID for an artist or album,
+ resolve it to an MB URL and set up the scrapers accordingly.
+
+ \param preferredScraper [in] A ScraperPtr to the preferred album/artist scraper.
+ \param musicBrainzURL [out] will be populated with the MB URL for the artist/album.
+ */
+ bool ResolveMusicBrainz(const CStdString strMusicBrainzID, ADDON::ScraperPtr &preferredScraper, MUSIC_GRABBER::CMusicInfoScraper &musicInfoScraper, CScraperUrl &musicBrainzURL);
+
protected:
IMusicInfoScannerObserver* m_pObserver;
int m_currentItem;
@jmarshallnz

Why allow the latter, invalid XML?

Please sign in to comment.
Something went wrong with that request. Please try again.