Permalink
Browse files

Initial commit

  • Loading branch information...
0 parents commit 90290ebe7b0dec1159306a00866eb90dd0764b7f @lalinsky committed Oct 29, 2010
Showing with 12,868 additions and 0 deletions.
  1. +1 −0 .bzrignore
  2. +34 −0 README.txt
  3. +47 −0 mbslave-import.py
  4. +21 −0 mbslave-psql.py
  5. +129 −0 mbslave-sync.py
  6. +11 −0 mbslave.conf.default
  7. +209 −0 sql/CalculateRelatedTags.sql
  8. +16 −0 sql/CreateAll.sql
  9. +697 −0 sql/CreateFKConstraints.sql
  10. +659 −0 sql/CreateFunctions.sql
  11. +236 −0 sql/CreateIndexes.sql
  12. +45 −0 sql/CreateLinkFKs.pl
  13. +100 −0 sql/CreatePrimaryKeys.sql
  14. +325 −0 sql/CreateReplicationTriggers.sql
  15. +1,052 −0 sql/CreateTables.sql
  16. +101 −0 sql/CreateTriggers.sql
  17. +24 −0 sql/CreateViews.sql
  18. +13 −0 sql/DropAll.sql
  19. +143 −0 sql/DropFKConstraints.sql
  20. +34 −0 sql/DropFunctions.sql
  21. +174 −0 sql/DropIndexes.sql
  22. +100 −0 sql/DropPrimaryKeys.sql
  23. +90 −0 sql/DropReplicationTriggers.sql
  24. +169 −0 sql/DropTables.sql
  25. +41 −0 sql/DropTriggers.sql
  26. +9 −0 sql/DropViews.sql
  27. +44 −0 sql/ImportArtistRelations.sql
  28. +31 −0 sql/InsertDefaultRows.sql
  29. +83 −0 sql/PopulateCountries.pl
  30. +17 −0 sql/PopulateMetaTables.sql
  31. +25 −0 sql/ReplicationSetup.sql
  32. +144 −0 sql/contrib/musicip/track_frd/CreateTrackDateFunctions.sql
  33. +22 −0 sql/contrib/musicip/track_frd/CreateTrackDateTriggers.sql
  34. +19 −0 sql/contrib/musicip/track_frd/DropTrackDateFunctions.sql
  35. +14 −0 sql/contrib/musicip/track_frd/DropTrackDateTriggers.sql
  36. +1 −0 sql/contrib/musicip/track_frd/PopulateTrackDates.sql
  37. +68 −0 sql/contrib/musicip/track_frd/README
  38. +30 −0 sql/drop_all_links.pl
  39. +45 −0 sql/updates/20030201-1.sql
  40. +57 −0 sql/updates/20030201-2.sql
  41. +31 −0 sql/updates/20030321-1.sql
  42. +42 −0 sql/updates/20030401-1.sql
  43. +15 −0 sql/updates/20030809-1.sql
  44. +65 −0 sql/updates/20030910-1.sql
  45. +26 −0 sql/updates/20031004-0.sql
  46. +75 −0 sql/updates/20031004-1.pl
  47. +11 −0 sql/updates/20031004-2.sql
  48. +42 −0 sql/updates/20031004-3.sql
  49. +75 −0 sql/updates/20031019-1.sql
  50. +17 −0 sql/updates/20031025-1.sql
  51. +77 −0 sql/updates/20031025-2.pl
  52. +75 −0 sql/updates/20031025-3.pl
  53. +445 −0 sql/updates/20031126-1.sql
  54. +23 −0 sql/updates/20031216-1.sql
  55. +90 −0 sql/updates/20031231-1.sql
  56. +154 −0 sql/updates/20031231-2.pl
  57. +184 −0 sql/updates/20031231-3.sql
  58. +204 −0 sql/updates/20040220-1.sql
  59. +276 −0 sql/updates/20040326-1.pl
  60. +41 −0 sql/updates/20040409-1.sql
  61. +42 −0 sql/updates/20040428-1.sql
  62. +143 −0 sql/updates/20040516-1.pl
  63. +12 −0 sql/updates/20040516-2.sql
  64. +75 −0 sql/updates/20040522-1.sql
  65. +25 −0 sql/updates/20040608-1.sql
  66. +97 −0 sql/updates/20040730-1.pl
  67. +229 −0 sql/updates/20040730-2.sql
  68. +145 −0 sql/updates/20040916-1.sql
  69. +543 −0 sql/updates/20050114-1.sql
  70. +102 −0 sql/updates/20050114-2.sql
  71. +858 −0 sql/updates/20050420-1.sql
  72. +21 −0 sql/updates/20050420-2.sql
  73. +17 −0 sql/updates/20050424-1.sql
  74. +146 −0 sql/updates/20050527-1.pl
  75. +25 −0 sql/updates/20050928.sql
  76. +96 −0 sql/updates/20060305-1.pl
  77. +96 −0 sql/updates/20060305-2.pl
  78. +60 −0 sql/updates/20060310-1.sh
  79. +88 −0 sql/updates/20060310-1.sql
  80. +23 −0 sql/updates/20060531-1.sql
  81. +255 −0 sql/updates/20061104-1.sql
  82. +136 −0 sql/updates/20061104-2.sql
  83. +66 −0 sql/updates/20061104-3.sql
  84. +27 −0 sql/updates/20061104-4.sql
  85. +80 −0 sql/updates/20070401-1.sql
  86. +94 −0 sql/updates/20070622-1.sql
  87. +72 −0 sql/updates/20070622-2.sql
  88. +61 −0 sql/updates/20070622-3.sql
  89. +20 −0 sql/updates/20070719-1.sql
  90. +19 −0 sql/updates/20070719-2.sql
  91. +153 −0 sql/updates/20070813-1.sql
  92. +27 −0 sql/updates/20070921-1.sql
  93. +60 −0 sql/updates/20071212-1.sql
  94. +36 −0 sql/updates/20080201-1.sql
  95. +37 −0 sql/updates/20080529.sql
  96. +49 −0 sql/updates/20080610-1.sql
  97. +41 −0 sql/updates/20080610-2.sql
  98. +21 −0 sql/updates/20080707-1.sql
  99. +58 −0 sql/updates/20080707-2.sql
  100. +76 −0 sql/updates/20080711-1.sql
  101. +28 −0 sql/updates/20080729.sql
  102. +13 −0 sql/updates/20081017-1.sql
  103. +15 −0 sql/updates/20081017-2.sql
  104. +40 −0 sql/updates/20081027.sql
  105. +303 −0 sql/updates/20081115-1.sql
  106. +83 −0 sql/updates/20081123.sql
  107. +19 −0 sql/updates/20090220.sql
  108. +319 −0 sql/updates/20090402-1.pl
  109. +119 −0 sql/updates/20090402-2.sql
  110. +102 −0 sql/updates/20090402-3.sql
  111. +19 −0 sql/updates/20090402-4.sql
  112. +22 −0 sql/updates/20090416-1.sql
  113. +13 −0 sql/updates/20090416-2.sql
  114. +48 −0 sql/updates/20090524-1.pl
  115. +45 −0 sql/updates/PopulateAlbumDateAdded.pl
  116. +36 −0 sql/updates/puid_load.sql
  117. +21 −0 sql/vertical/rawdata/CreateFKConstraints.sql
  118. +61 −0 sql/vertical/rawdata/CreateIndexes.sql
  119. +27 −0 sql/vertical/rawdata/CreatePrimaryKeys.sql
  120. +144 −0 sql/vertical/rawdata/CreateTables.sql
  121. +8 −0 sql/vertical/rawdata/DropFKConstraints.sql
  122. +53 −0 sql/vertical/rawdata/DropIndexes.sql
  123. +27 −0 sql/vertical/rawdata/DropPrimaryKeys.sql
  124. +24 −0 sql/vertical/rawdata/DropTables.sql
@@ -0,0 +1 @@
+mbslave.conf
@@ -0,0 +1,34 @@
+Installation
+============
+
+1. Setup a database and create mbslave.conf by copying and editing
+ mbslave.conf.default
+
+2. Prepare empty schema for the MusicBrainz database and create the
+ table structure:
+
+ $ echo 'CREATE SCHEMA musicbrainz;' | ./mbslave-psql
+ $ ./mbslave-psql.py <sql/CreateTables.sql
+
+3. Download the MusicBrainz database dump files from
+ http://musicbrainz.org/doc/Database_Download
+
+4. Import the data dumps, for example:
+
+ $ ./mbslave-import.py mbdump.tar.bz2 mbdump-derived.tar.bz2
+
+5. Setup primary keys, indexes, views and functions:
+
+ $ ./mbslave-psql.py <sql/CreatePrimaryKeys.sql
+ $ ./mbslave-psql.py <sql/CreateIndexes.sql
+ $ ./mbslave-psql.py <sql/CreateViews.sql
+ $ ./mbslave-psql.py <sql/CreateFunctions.sql
+
+6. Vacuum the newly created database (optional)
+
+ $ echo 'VACUUM ANALYZE;' | ./mbslave-psql.py
+
+7. Run the initial replication:
+
+ $ ./mbslave-sync.py
+
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+import ConfigParser
+import psycopg2
+import tarfile
+import sys
+import os
+
+
+def load_tar(filename, db, schema, ignored_tables):
+ print "Importing data from", filename
+ tar = tarfile.open(filename, 'r:bz2')
+ cursor = db.cursor()
+ for member in tar:
+ if not member.name.startswith('mbdump/'):
+ continue
+ table = member.name.split('/')[1].replace('_sanitised', '')
+ fulltable = schema + "." + table
+ if table in ignored_tables:
+ print " - Ignoring", fulltable
+ continue
+ cursor.execute("SELECT 1 FROM %s LIMIT 1" % fulltable)
+ if cursor.fetchone():
+ print " - Skipping", fulltable, "(already contains data)"
+ continue
+ print " - Loading", fulltable
+ cursor.copy_from(tar.extractfile(member), fulltable)
+ db.commit()
+
+
+config = ConfigParser.RawConfigParser()
+config.read(os.path.dirname(__file__) + '/mbslave.conf')
+
+opts = {}
+opts['database'] = config.get('DATABASE', 'name')
+opts['user'] = config.get('DATABASE', 'user')
+if config.has_option('DATABASE', 'host'):
+ opts['host'] = config.get('DATABASE', 'host')
+if config.has_option('DATABASE', 'port'):
+ opts['port'] = config.get('DATABASE', 'port')
+db = psycopg2.connect(**opts)
+
+schema = config.get('DATABASE', 'schema')
+ignored_tables = set(config.get('TABLES', 'ignore').split(','))
+for filename in sys.argv[1:]:
+ load_tar(filename, db, schema, ignored_tables)
+
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+import ConfigParser
+import os
+
+config = ConfigParser.RawConfigParser()
+config.read(os.path.dirname(__file__) + '/mbslave.conf')
+
+args = []
+args.append('-U')
+args.append(config.get('DATABASE', 'user'))
+if config.has_option('DATABASE', 'host'):
+ args.append('-h')
+ args.append(config.get('DATABASE', 'host'))
+if config.has_option('DATABASE', 'port'):
+ args.append('-p')
+ args.append(config.get('DATABASE', 'port'))
+args.append(config.get('DATABASE', 'name'))
+
+os.environ['PGOPTIONS'] = '-c search_path=%s' % config.get('DATABASE', 'schema')
+os.execvp("psql", args)
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+
+import ConfigParser
+import psycopg2
+import tarfile
+import sys
+import os
+import re
+
+
+def parse_data_fields(s):
+ fields = {}
+ for name, value in re.findall(r'''"([^"]+)"=('(?:''|[^'])*') ''', s):
+ if not value:
+ value = None
+ else:
+ value = value[1:-1].replace("''", "'").replace("\\\\", "\\")
+ fields[name] = value
+ return fields
+
+
+def parse_bool(s):
+ return s == 't'
+
+
+ESCAPES = (('\\b', '\b'), ('\\f', '\f'), ('\\n', '\n'), ('\\r', '\r'),
+ ('\\t', '\t'), ('\\v', '\v'), ('\\\\', '\\'))
+
+def unescape(s):
+ if s == '\\N':
+ return None
+ for orig, repl in ESCAPES:
+ s = s.replace(orig, repl)
+ return s
+
+
+def read_psql_dump(fp, types):
+ for line in fp:
+ values = map(unescape, line.rstrip('\r\n').split('\t'))
+ for i, value in enumerate(values):
+ if value is not None:
+ values[i] = types[i](value)
+ yield values
+
+
+class PacketImporter(object):
+
+ def __init__(self, db, schema, ignored_tables):
+ self._db = db
+ self._data = {}
+ self._transactions = {}
+ self._schema = schema
+ self._ignored_tables = ignored_tables
+
+ def load_pending_data(self, fp):
+ dump = read_psql_dump(fp, [int, parse_bool, parse_data_fields])
+ for id, key, values in dump:
+ self._data[(id, key)] = values
+
+ def load_pending(self, fp):
+ dump = read_psql_dump(fp, [int, str, str, int])
+ for id, table, type, xid in dump:
+ table = table.split(".")[1].strip('"')
+ transaction = self._transactions.setdefault(xid, [])
+ transaction.append((id, table, type))
+
+ def process(self):
+ cursor = self._db.cursor()
+ for xid in sorted(self._transactions.keys()):
+ transaction = self._transactions[xid]
+ #print ' - Running transaction', xid
+ #print 'BEGIN; --', xid
+ for id, table, type in sorted(transaction):
+ if table in self._ignored_tables:
+ continue
+ fulltable = self._schema + '.' + table
+ if type == 'd':
+ sql = 'DELETE FROM %s' % (fulltable,)
+ params = []
+ elif type == 'u':
+ values = self._data[(id, False)]
+ sql_values = ', '.join('%s=%%s' % i for i in values)
+ sql = 'UPDATE %s SET %s' % (fulltable, sql_values)
+ params = values.values()
+ elif type == 'i':
+ values = self._data[(id, False)]
+ sql_columns = ', '.join(values.keys())
+ sql_values = ', '.join(['%s'] * len(values))
+ sql = 'INSERT INTO %s (%s) VALUES (%s)' % (fulltable, sql_columns, sql_values)
+ params = values.values()
+ if type == 'd' or type == 'u':
+ values = self._data[(id, True)]
+ sql += ' WHERE ' + ' AND '.join('%s=%%s' % i for i in values.keys())
+ params.extend(values.values())
+ cursor.execute(sql, params)
+ #print sql, params
+ #print 'COMMIT; --', xid
+ self._db.commit()
+
+
+def process_tar(filename, db, schema, ignored_tables):
+ print "Processing", filename
+ tar = tarfile.open(filename, 'r:bz2')
+ importer = PacketImporter(db, schema, ignored_tables)
+ for member in tar:
+ if member.name == 'mbdump/Pending':
+ importer.load_pending(tar.extractfile(member))
+ elif member.name == 'mbdump/PendingData':
+ importer.load_pending_data(tar.extractfile(member))
+ importer.process()
+
+
+config = ConfigParser.RawConfigParser()
+config.read(os.path.dirname(__file__) + '/mbslave.conf')
+
+opts = {}
+opts['database'] = config.get('DATABASE', 'name')
+opts['user'] = config.get('DATABASE', 'user')
+if config.has_option('DATABASE', 'host'):
+ opts['host'] = config.get('DATABASE', 'host')
+if config.has_option('DATABASE', 'port'):
+ opts['port'] = config.get('DATABASE', 'port')
+db = psycopg2.connect(**opts)
+
+schema = config.get('DATABASE', 'schema')
+ignored_tables = set(config.get('TABLES', 'ignore').split(','))
+for filename in sys.argv[1:]:
+ process_tar(filename, db, schema, ignored_tables)
+
@@ -0,0 +1,11 @@
+[DATABASE]
+host=localhost
+port=5432
+name=musicbrainz_db
+user=musicbrainz_user
+schema=public
+
+[TABLES]
+ignore=
+#ignore=albumwords,trackwords,artistwords,labelwords,wordlist,puid,puidjoin,trm,trmjoin
+
Oops, something went wrong.

0 comments on commit 90290eb

Please sign in to comment.