From 80d4d020a8dd34500e839c80a8ae564ae29c0e4d Mon Sep 17 00:00:00 2001 From: Michael Wiencek Date: Thu, 20 Dec 2018 12:01:24 -0600 Subject: [PATCH 1/2] MBH-502: SOLR access via search.mb.org/ws/2 We want people with MBS mirrors to continue to be able to use our production search servers, as they've always been able to do. But allowing public access to our SOLR cluster is problematic from a security standpoint. We could set up a proxy [1] in front of SOLR to block bad requests, but a better option is to just use the /ws/2 compatibility layer already in place in our openresty config. This layer rewrites "old" API requests understood by our Lucene-based search server into a format expected by SOLR, and performs an internal redirect. There's nothing outdated or impractical about this API, so no reason it needs to be "old." To allow this, the code for communicating with the old Lucene-based search is used when SEARCH_ENGINE is 'SOLR' and SEARCH_SERVER is set to the default public search server (search.musicbrainz.org). Setting SEARCH_ENGINE to 'LUCENE' is now only necessary when an actual, old Lucene-based server is configured as SEARCH_SERVER. [1] https://github.com/dergachev/solr-security-proxy --- lib/DBDefs.pm.sample | 2 +- lib/DBDefs/Default.pm | 2 +- lib/MusicBrainz/Server/Data/Search.pm | 2 +- lib/MusicBrainz/Server/Data/WebService.pm | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/DBDefs.pm.sample b/lib/DBDefs.pm.sample index 8aeaf5ddf5d..b4a3e2167d3 100644 --- a/lib/DBDefs.pm.sample +++ b/lib/DBDefs.pm.sample @@ -170,7 +170,7 @@ sub WEB_SERVER { "www.musicbrainz.example.com" } # Relevant only if SSL redirects are enabled # sub WEB_SERVER_SSL { "localhost" } # sub SEARCH_SERVER { "search.musicbrainz.org" } -# sub SEARCH_ENGINE { "LUCENE" } +# sub SEARCH_ENGINE { "SOLR" } # Used, for example, to have emails sent from the beta server list the # main server # sub WEB_SERVER_USED_IN_EMAIL { my $self = shift; $self->WEB_SERVER } diff --git a/lib/DBDefs/Default.pm b/lib/DBDefs/Default.pm index 7d2c749b062..3ae69127594 100644 --- a/lib/DBDefs/Default.pm +++ b/lib/DBDefs/Default.pm @@ -103,7 +103,7 @@ sub WEB_SERVER { "localhost:5000" } # Relevant only if SSL redirects are enabled sub WEB_SERVER_SSL { "localhost" } sub SEARCH_SERVER { "search.musicbrainz.org" } -sub SEARCH_ENGINE { "LUCENE" } +sub SEARCH_ENGINE { "SOLR" } # Whether to use x-accel-redirect for webservice searches, # using /internal/search as the internal redirect sub SEARCH_X_ACCEL_REDIRECT { 0 } diff --git a/lib/MusicBrainz/Server/Data/Search.pm b/lib/MusicBrainz/Server/Data/Search.pm index 4c2d1c3f1c3..f0982a109c2 100644 --- a/lib/MusicBrainz/Server/Data/Search.pm +++ b/lib/MusicBrainz/Server/Data/Search.pm @@ -794,7 +794,7 @@ sub external_search $type =~ s/release_group/release-group/; my $search_url_string; - if (DBDefs->SEARCH_ENGINE eq 'LUCENE') { + if (DBDefs->SEARCH_ENGINE eq 'LUCENE' || DBDefs->SEARCH_SERVER eq DBDefs::Default->SEARCH_SERVER) { my $dismax = $adv ? 'false' : 'true'; $search_url_string = "http://%s/ws/2/%s/?query=%s&offset=%s&max=%s&fmt=jsonnew&dismax=$dismax&web=1"; } else { diff --git a/lib/MusicBrainz/Server/Data/WebService.pm b/lib/MusicBrainz/Server/Data/WebService.pm index d11ceaae86f..67e976ebe8c 100644 --- a/lib/MusicBrainz/Server/Data/WebService.pm +++ b/lib/MusicBrainz/Server/Data/WebService.pm @@ -211,7 +211,7 @@ sub xml_search } my $url_ext; - if (DBDefs->SEARCH_ENGINE eq 'LUCENE') { + if (DBDefs->SEARCH_ENGINE eq 'LUCENE' || DBDefs->SEARCH_SERVER eq DBDefs::Default->SEARCH_SERVER) { my $format = ($args->{fmt} // "") eq "json" ? "jsonnew" : "xml"; $url_ext = "/ws/2/$resource/?" . "max=$limit&type=$resource&fmt=$format&offset=$offset" . From aa0caa5ba2ff78148e43cb3c4a33e149f2b35bc9 Mon Sep 17 00:00:00 2001 From: yvanzo Date: Wed, 27 May 2020 14:13:14 +0200 Subject: [PATCH 2/2] Reference MB Solr and SIR in install doc --- INSTALL.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/INSTALL.md b/INSTALL.md index 8257a4fb2e9..a5798adde39 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -345,7 +345,8 @@ The server by itself doesn't rate limit any request it handles. If you're receiving 503s, then you're likely performing [search queries](https://musicbrainz.org/doc/Search_Server) without having set up a local instance of the -[search server](https://github.com/metabrainz/search-server). By default, +[search server](https://github.com/metabrainz/mb-solr) along with the +[search index rebuilder](https://github.com/metabrainz/sir). By default, search queries are sent to search.musicbrainz.org and are rate limited. Once you set up your own instance, change `SEARCH_SERVER` in lib/DBDefs.pm to