Permalink
Browse files

Working Solr update

  • Loading branch information...
1 parent abfc548 commit a821d28a809fe666df898add0500687c3c640a9c @lalinsky committed Nov 11, 2012
Showing with 51 additions and 11 deletions.
  1. +13 −3 README.md
  2. +28 −0 mbslave-solr-update.py
  3. +8 −6 mbslave/search.py
  4. +2 −2 solr/conf/schema.xml
View
@@ -198,13 +198,23 @@ echo "UPDATE replication_control SET current_schema_sequence = 16;" | ./mbslave-
If you would like to also build a Solr index for searching, mbslave includes a script to
export the MusicBrainz into XML file that you can feed to Solr:
- ./mbslave-solr-export.py >/tmp/solr-data.xml
+ ./mbslave-solr-export.py >/tmp/mbslave-solr-data.xml
Once you have generated this file, you for example start a local instance of Solr:
java -Dsolr.solr.home=/path/to/mbslave/solr/ -jar start.jar
-And tell it to import the XML file:
+Import the XML file:
- curl http://localhost:8983/solr/musicbrainz/update -F stream.file=/tmp/solr-data.xml -F commit=true
+ curl http://localhost:8983/solr/musicbrainz/update -F stream.file=/tmp/mbslave-solr-data.xml -F commit=true
+
+Install triggers to queue database updates:
+
+ echo 'CREATE SCHEMA mbslave;' | ./mbslave-psql.py -S
+ ./mbslave-remap-schema.py <sql-extra/solr-queue.sql | ./mbslave-psql.py -s mbslave
+ ./mbslave-solr-generate-triggers.py | ./mbslave-remap-schema.py | ./mbslave-psql.py -s mbslave
+
+Update the index:
+
+ ./mbslave-solr-update.py
View
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+
+import os
+import urllib2
+from cStringIO import StringIO
+from lxml import etree as ET
+from mbslave import Config, connect_db
+from mbslave.search import fetch_all_updated
+
+cfg = Config(os.path.join(os.path.dirname(__file__), 'mbslave.conf'))
+db = connect_db(cfg, True)
+
+xml = StringIO()
+xml.write('<update>\n')
+for doc in fetch_all_updated(cfg, db):
+ xml.write(ET.tostring(doc))
+ xml.write('\n')
+xml.write('</update>\n')
+
+req = urllib2.Request(cfg.solr.url + '/update', xml.getvalue(),
+ {'Content-Type': 'application/xml; encoding=UTF-8'})
+resp = urllib2.urlopen(req)
+the_page = resp.read()
+
+print the_page
+
+db.commit()
+
View
@@ -66,7 +66,7 @@ def __init__(self, table, name, foreign=None, null=False, backref=None):
Field('name', Column('name', ForeignColumn('artist_name', 'name'))),
Field('sort_name', Column('sort_name', ForeignColumn('artist_name', 'name'))),
Field('country', Column('country', ForeignColumn('country', 'name', null=True))),
- Field('country_code', Column('country', ForeignColumn('country', 'iso_code', null=True))),
+ Field('country', Column('country', ForeignColumn('country', 'iso_code', null=True))),
Field('gender', Column('gender', ForeignColumn('gender', 'name', null=True))),
Field('type', Column('type', ForeignColumn('artist_type', 'name', null=True))),
MultiField('mbid', ForeignColumn('artist_gid_redirect', 'gid', backref='new_id')),
@@ -80,7 +80,7 @@ def __init__(self, table, name, foreign=None, null=False, backref=None):
Field('name', Column('name', ForeignColumn('label_name', 'name'))),
Field('sort_name', Column('sort_name', ForeignColumn('label_name', 'name'))),
Field('country', Column('country', ForeignColumn('country', 'name', null=True))),
- Field('country_code', Column('country', ForeignColumn('country', 'iso_code', null=True))),
+ Field('country', Column('country', ForeignColumn('country', 'iso_code', null=True))),
Field('type', Column('type', ForeignColumn('label_type', 'name', null=True))),
MultiField('mbid', ForeignColumn('label_gid_redirect', 'gid', backref='new_id')),
MultiField('ipi', ForeignColumn('label_ipi', 'ipi')),
@@ -115,6 +115,8 @@ def __init__(self, table, name, foreign=None, null=False, backref=None):
Field('status', Column('status', ForeignColumn('release_status', 'name', null=True))),
Field('type', Column('release_group', ForeignColumn('release_group', 'type', ForeignColumn('release_group_primary_type', 'name', null=True)))),
Field('artist', Column('artist_credit', ForeignColumn('artist_credit', 'name', ForeignColumn('artist_name', 'name')))),
+ Field('country', Column('country', ForeignColumn('country', 'name', null=True))),
+ Field('country', Column('country', ForeignColumn('country', 'iso_code', null=True))),
MultiField('mbid', ForeignColumn('release_gid_redirect', 'gid', backref='new_id')),
MultiField('catno', ForeignColumn('release_label', 'catalog_number')),
MultiField('label', ForeignColumn('release_label', 'label', ForeignColumn('label', 'name', ForeignColumn('label_name', 'name')))),
@@ -401,12 +403,12 @@ def fetch_works(db, ids=()):
def fetch_all(cfg, db):
return itertools.chain(
- fetch_works(db) if cfg.solr.index_works else [],
+ fetch_artists(db) if cfg.solr.index_artists else [],
+ fetch_labels(db) if cfg.solr.index_labels else [],
fetch_recordings(db) if cfg.solr.index_recordings else [],
- fetch_releases(db) if cfg.solr.index_releases else [],
fetch_release_groups(db) if cfg.solr.index_release_groups else [],
- fetch_artists(db) if cfg.solr.index_artists else [],
- fetch_labels(db) if cfg.solr.index_labels else [])
+ fetch_releases(db) if cfg.solr.index_releases else [],
+ fetch_works(db) if cfg.solr.index_works else [])
def fetch_all_updated(cfg, db):
View
@@ -27,7 +27,7 @@
- artist
- label
- release
- - releasegroup
+ - release_group
- recording
- work
-->
@@ -40,7 +40,7 @@
<field name="name" type="text" indexed="true" />
<!-- Artist sort name, label sort name. -->
- <field name="sortname" type="text" indexed="true" />
+ <field name="sort_name" type="text" indexed="true" />
<!-- Artist sort name, label sort name. -->
<field name="disambiguation" type="text" indexed="true" />

0 comments on commit a821d28

Please sign in to comment.