# Pre-Classical Clean Up 4: Hyperion

From the live database

In [1]:
import pandas
import sqlalchemy

SITE_URL = 'https://musicbrainz.org'

# import postgresql environ variables
# defined in postgres-dockerfile/postgres.env
PGHOST = '127.0.0.1'
PGDATABASE = 'musicbrainz'
PGUSER = 'musicbrainz'
PGPASSWORD = 'musicbrainz'
PGPORT = 5433

def sql(query, **kwargs):
    """helper function for SQL queries using the %(...) syntax
    Parameters for the query must be passed as keyword arguments
    e.g. sql('SELECT * FROM artist WHERE name=%(singer)s', singer='Bob Dylan')
    """
    engine = sqlalchemy.create_engine(
        'postgresql+psycopg2://'
        '{PGUSER}:{PGPASSWORD}@{PGHOST}:{PGPORT}/{PGDATABASE}'.format(**globals()),
        isolation_level='READ UNCOMMITTED')
    return pandas.read_sql(query, engine, params=kwargs)

# helper function to build canonical URLs
def _mb_link(entity_type, mbid):
    return '<a href="{url}/{entity_type}/{mbid}">{mbid}</a>'.format(
        url=SITE_URL, **locals())

mb_release_link = lambda mbid: _mb_link('release', mbid) # noqa

In [2]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)

## Main label

In [3]:
sql("""SELECT id, gid, name, comment FROM label WHERE name='hyperion';""")

Unnamed: 0,id,gid,name,comment
0,388,08e6c3c8-81ab-405f-9cff-10f6b8db064c,hyperion,UK classical


## MB labels associated with links to hyperion-records

In [4]:
mb_labels = sql("""
SELECT 
    DISTINCT label.id,
    label.name AS label,
    label.gid AS mbid,
    COUNT(*) AS releases_count
  FROM release       AS r
  JOIN release_label AS rl ON rl.release = r.id
  JOIN label               ON rl.label = label.id
  JOIN l_release_url AS lu ON r.id = lu.entity0
  JOIN url           AS u  ON u.id = lu.entity1
 WHERE u.url ILIKE '%%hyperion-records%%'
 GROUP BY label.id, label.name, label.gid
 ORDER BY releases_count DESC
;""")
mb_labels

Unnamed: 0,id,label,mbid,releases_count
0,388,hyperion,08e6c3c8-81ab-405f-9cff-10f6b8db064c,889
1,22708,helios,0a94e96a-9219-4dd7-a529-18d34e77f50f,134
2,16197,Signum Classics,79c26ea5-2313-4d53-84d9-b04219620c5f,9
3,2900,Gimell,3d971128-6d37-4870-b188-8e5b84bb1258,2
4,15918,Hyperion Records,0a3ba296-f572-49df-b570-ca00062773d3,1
5,53283,Mariinsky,6b060b9a-da6f-4a11-923e-8b88944c4503,1
6,59800,Appian Publications and Recordings,f51e7981-6399-467c-af06-b4e9a29dbd8d,1
7,95583,The Choir of King's College Cambridge,d5827354-3b3f-421e-a1fe-6e25e2f7b8ea,1


In [5]:
with open('docs/hyperion_releases.html', 'w') as f:
    f.write('<h1>Pre-Classical Clean Up #4</h1>\n')
    f.write('<h2>Labels corresponding to Hyperion in MusicBrainz</h2>\n')
    f.write(mb_labels.to_html().replace('&lt;', '<').replace('&gt;', '>'))

"Hyperion Records" is a completely different label that shouldn't be here, I fixed the releases already

## MB releases with link to Hyperion and no label

In [6]:
sql("""
SELECT 
    COUNT(*) AS releases_count
  FROM release       AS r
  LEFT OUTER JOIN release_label AS rl ON rl.release = r.id
  JOIN l_release_url AS lu ON r.id = lu.entity0
  JOIN url           AS u  ON u.id = lu.entity1
 WHERE u.url ILIKE '%%hyperion-records%%'
 AND rl.label IS NULL
;""")

Unnamed: 0,releases_count
0,0


## Releases from hyperion labels in MB

In [7]:
mb_releases = sql("""
SELECT 
    rl.catalog_number,
    r.barcode,
    r.name  AS title,
    ac.name AS ac,
    label.name AS label,
    r.quality,
    r.gid   AS mb_url
  FROM release_label AS rl
  JOIN release       AS r  ON rl.release = r.id
  JOIN label               ON rl.label = label.id
  JOIN artist_credit AS ac ON r.artist_credit = ac.id
 WHERE label.id IN (388, 22708, 16197, 2900, 53283, 59800, 95583)
ORDER BY rl.catalog_number, r.barcode;
""")
mb_releases['mb_url'] = mb_releases.mb_url.apply(mb_release_link)
mb_releases.head()

Unnamed: 0,catalog_number,barcode,title,ac,label,quality,mb_url
0,203,,The Tallis Scholars Sing Thomas Tallis,Thomas Tallis; The Tallis Scholars,Gimell,-1,"<a href=""https://musicbrainz.org/release/ef9be3c2-3cd0-4d61-bdd9-05dda38b488c"">ef9be3c2-3cd0-4d61-bdd9-05dda38b488c</a>"
1,205,755138120525.0,Requiem,"The Tallis Scholars, Peter Phillips",Gimell,-1,"<a href=""https://musicbrainz.org/release/8406624d-f46f-45d9-b0db-4790a892869a"">8406624d-f46f-45d9-b0db-4790a892869a</a>"
2,22,755138102224.0,Tenebrae Responsories,"Victoria; Tallis Scholars, Peter Phillips",Gimell,-1,"<a href=""https://musicbrainz.org/release/0e88f294-269d-444d-b790-7f81ccd6ccab"">0e88f294-269d-444d-b790-7f81ccd6ccab</a>"
3,454 890-2,28945489025.0,The Palestrina 400 Collection,The Tallis Scholars,Gimell,-1,"<a href=""https://musicbrainz.org/release/f70844a3-642c-4a02-a37e-c5caa3c42aa2"">f70844a3-642c-4a02-a37e-c5caa3c42aa2</a>"
4,454 901-2,,Palestrina Masses: Missa Benedicta es,"Giovanni Pierluigi da Palestrina; The Tallis Scholars, Peter Phillips",Gimell,-1,"<a href=""https://musicbrainz.org/release/1e173bae-9453-4f8a-8092-962e41cff59c"">1e173bae-9453-4f8a-8092-962e41cff59c</a>"


In [8]:
mb_releases.shape

(1804, 7)

In [9]:
with open('docs/hyperion_releases.html', 'a') as f:
    f.write('<h2>Hyperion releases in MusicBrainz (with duplicates)</h2>\n')
    f.write(mb_releases.to_html().replace('&lt;', '<').replace('&gt;', '>'))

1804 releases already in MB, some might be duplicates

In [10]:
mb_releases = sql("""
SELECT 
    rl.catalog_number,
    r.barcode,
    r.name  AS title,
    ac.name AS ac,
    label.name AS label,
    r.quality,
    u.url   AS hyperion_url,
    r.gid   AS mb_url
  FROM release_label AS rl
  JOIN release       AS r  ON rl.release = r.id
  JOIN label               ON rl.label = label.id
  JOIN artist_credit AS ac ON r.artist_credit = ac.id
  LEFT OUTER JOIN l_release_url AS lu ON r.id = lu.entity0
  LEFT OUTER JOIN url           AS u  ON u.id = lu.entity1
 WHERE label.id IN (388, 22708, 16197, 2900, 53283, 59800, 95583)
   AND (u.url IS NULL OR u.url ILIKE '%%hyperion-records%%')
ORDER BY rl.catalog_number, r.barcode;
""")
mb_releases['mb_url'] = mb_releases.mb_url.apply(mb_release_link)
mb_releases['hyperion_url'] = mb_releases.hyperion_url.apply(lambda url: '<a href="{0}">{0}</a>'.format(url) if url else None)
mb_releases.head()

Unnamed: 0,catalog_number,barcode,title,ac,label,quality,hyperion_url,mb_url
0,454 901-2,,Palestrina Masses: Missa Benedicta es,"Giovanni Pierluigi da Palestrina; The Tallis Scholars, Peter Phillips",Gimell,-1,,"<a href=""https://musicbrainz.org/release/1e173bae-9453-4f8a-8092-962e41cff59c"">1e173bae-9453-4f8a-8092-962e41cff59c</a>"
1,454 909-2,28945490922.0,Missa Pange lingua / Missa La sol fa re mi / Plainchant: Pange lingua,"Josquin; The Tallis Scholars, Peter Phillips",Gimell,-1,,"<a href=""https://musicbrainz.org/release/d9a9a334-aa0f-4007-8a9d-674eda037ca0"">d9a9a334-aa0f-4007-8a9d-674eda037ca0</a>"
2,A66210,,Two Romantic Violin Concertos,"Federigo Fiorillo, Giovanni Battista Viotti; European Community Chamber Orchestra, Jörg Faerber, Adelina Oprean",hyperion,-1,,"<a href=""https://musicbrainz.org/release/28dbbf85-4ec1-4242-8469-6dbc800a8f5c"">28dbbf85-4ec1-4242-8469-6dbc800a8f5c</a>"
3,APR 5519,5024709155194.0,Tchaikovsky: Piano Concerto no. 1 / Rachmaninov: Piano Concerto no. 3,"Pyotr Tchaikovsky, Sergei Rachmaninov; Vladimir Horowitz, New York Philharmonic Symphony Orchestra, John Barbirolli",Appian Publications and Recordings,-1,"<a href=""http://www.hyperion-records.co.uk/dc.asp?dc=D_APR5519"">http://www.hyperion-records.co.uk/dc.asp?dc=D_APR5519</a>","<a href=""https://musicbrainz.org/release/31192ec6-6ea1-4f6e-b665-9b2a78e5f3fa"">31192ec6-6ea1-4f6e-b665-9b2a78e5f3fa</a>"
4,APR 6003,5024709160037.0,"The Complete Solo Recordings, 1929-1936 / Lalo: Symphonie espagnole",Jacques Thibaud,Appian Publications and Recordings,-1,,"<a href=""https://musicbrainz.org/release/04bb6a06-6839-4e11-8b46-b7e5f2709180"">04bb6a06-6839-4e11-8b46-b7e5f2709180</a>"


In [11]:
mb_releases.shape

(1154, 8)

In [12]:
with open('docs/hyperion_releases.html', 'a') as f:
    f.write('<h2>Hyperion releases in MusicBrainz (with links)</h2>\n')
    f.write(mb_releases.to_html().replace('&lt;', '<').replace('&gt;', '>'))

## Hyperion releases with no catalogue number in MB

In [13]:
mb_releases_wo_catno = sql("""
SELECT release.barcode,
       release.name AS title,
       label.name AS label,
       release.gid AS mb_url
  FROM release_label AS rl
  JOIN release       ON rl.release = release.id
  JOIN label         ON rl.label = label.id
 WHERE label.id IN (388, 22708, 16197, 2900, 53283, 59800, 95583)
   AND rl.catalog_number IS NULL
ORDER BY rl.catalog_number;
""")
mb_releases_wo_catno['mb_url'] = mb_releases_wo_catno.mb_url.apply(mb_release_link)
mb_releases_wo_catno.head()

Unnamed: 0,barcode,title,label,mb_url
0,,Missa Ecce ego Joannes,hyperion,"<a href=""https://musicbrainz.org/release/2cbbdf24-d9d1-44cb-a07e-83ccdf36be6c"">2cbbdf24-d9d1-44cb-a07e-83ccdf36be6c</a>"
1,34571163291.0,The Evening Watch and Other Choral Music,hyperion,"<a href=""https://musicbrainz.org/release/d358abfe-4a98-4115-9ba5-ed072befcf43"">d358abfe-4a98-4115-9ba5-ed072befcf43</a>"
2,,"Variations on a Theme of Chopin, Op. 22 / Variations on a Theme of Corelli, Op. 42 (piano: Howard Shelley)",hyperion,"<a href=""https://musicbrainz.org/release/0ae980b3-2c97-3a6f-9e61-1699a099b878"">0ae980b3-2c97-3a6f-9e61-1699a099b878</a>"
3,,"The Complete Anthems and Services, Vol. I (King's Consort & The New Oxford College Choir feat. conductor: Robert King)",hyperion,"<a href=""https://musicbrainz.org/release/692eb556-50f5-4479-ab4e-634972f8c987"">692eb556-50f5-4479-ab4e-634972f8c987</a>"
4,,Music for St. James the Greater (Binchois Consort),hyperion,"<a href=""https://musicbrainz.org/release/fbc0f0bd-c854-4c82-9ce6-0b4c8862fc70"">fbc0f0bd-c854-4c82-9ce6-0b4c8862fc70</a>"


In [14]:
mb_releases_wo_catno.shape

(32, 4)

In [15]:
with open('docs/hyperion_releases.html', 'a') as f:
    f.write('<h2>Hyperion releases with no catalog number in MB</h2>\n')
    f.write(mb_releases_wo_catno.to_html().replace('&lt;', '<').replace('&gt;', '>'))

## MB Releases with hyperion cat. no. but neither label nor link

# Official Hyperion catalogue

In [16]:
hyperion_releases = pd.read_html('https://www.hyperion-records.co.uk/n.asp?n=1')[0][:-1]

hyperion_releases.rename(columns={0: 'catalog_number', 1: 'name'}, inplace=True)
hyperion_releases['barcode'] = hyperion_releases.name.str[-12:]
hyperion_releases['name'] = hyperion_releases.name.str[:-12]
hyperion_releases['url'] = hyperion_releases.catalog_number.apply(
    lambda no: '<a href="https://www.hyperion-records.co.uk/dc.asp?dc=D_{0}">{0}</a>'.format(no)) 

In [17]:
hyperion_releases.head()

Unnamed: 0,catalog_number,name,barcode,url
0,1EMBNB,Jack: Black and Blue,34571100708,"<a href=""https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMBNB"">1EMBNB</a>"
1,1EMHDUS,Harmonies d'un Soir,34571100753,"<a href=""https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMHDUS"">1EMHDUS</a>"
2,1EMIPM08,Pitts J: Airs & Fantasias,34571100678,"<a href=""https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMIPM08"">1EMIPM08</a>"
3,1EMJ2O,Pitts: Jerusalem-Yerushalayim Superseded by 1EMJOY,34571100647,"<a href=""https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMJ2O"">1EMJ2O</a>"
4,1EMJOY,Pitts: Jerusalem-Yerushalayim,34571101460,"<a href=""https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMJOY"">1EMJOY</a>"


In [18]:
with open('docs/hyperion_releases.html', 'a') as f:
    f.write('<h2>Hyperion releases in Hyperion catalogue</h2>\n')
    f.write(hyperion_releases.to_html().replace('&lt;', '<').replace('&gt;', '>'))

## Barcodes in MusicBrainz not found in Hyperion

In [19]:
barcodes_not_in_hyperion = set(mb_releases.barcode) - set(hyperion_releases.barcode) - {''} - {None}
releases_not_in_hyperion = mb_releases[mb_releases.barcode.apply(lambda b: b in barcodes_not_in_hyperion)]
releases_not_in_hyperion.head()

Unnamed: 0,catalog_number,barcode,title,ac,label,quality,hyperion_url,mb_url
1,454 909-2,28945490922,Missa Pange lingua / Missa La sol fa re mi / Plainchant: Pange lingua,"Josquin; The Tallis Scholars, Peter Phillips",Gimell,-1,,"<a href=""https://musicbrainz.org/release/d9a9a334-aa0f-4007-8a9d-674eda037ca0"">d9a9a334-aa0f-4007-8a9d-674eda037ca0</a>"
3,APR 5519,5024709155194,Tchaikovsky: Piano Concerto no. 1 / Rachmaninov: Piano Concerto no. 3,"Pyotr Tchaikovsky, Sergei Rachmaninov; Vladimir Horowitz, New York Philharmonic Symphony Orchestra, John Barbirolli",Appian Publications and Recordings,-1,"<a href=""http://www.hyperion-records.co.uk/dc.asp?dc=D_APR5519"">http://www.hyperion-records.co.uk/dc.asp?dc=D_APR5519</a>","<a href=""https://musicbrainz.org/release/31192ec6-6ea1-4f6e-b665-9b2a78e5f3fa"">31192ec6-6ea1-4f6e-b665-9b2a78e5f3fa</a>"
4,APR 6003,5024709160037,"The Complete Solo Recordings, 1929-1936 / Lalo: Symphonie espagnole",Jacques Thibaud,Appian Publications and Recordings,-1,,"<a href=""https://musicbrainz.org/release/04bb6a06-6839-4e11-8b46-b7e5f2709180"">04bb6a06-6839-4e11-8b46-b7e5f2709180</a>"
5,APR 7304,5024709173044,The Complete Solo Studio Recordings,Harriet Cohen,Appian Publications and Recordings,-1,,"<a href=""https://musicbrainz.org/release/e95bfeb1-6769-4134-bcda-728a32df16d9"">e95bfeb1-6769-4134-bcda-728a32df16d9</a>"
6,CDA20199,34571101996,"Clarinet Concerto in A Major, K622 / Clarinet Quintet in A Major, K581","Wolfgang Amadeus Mozart; Thea King, English Chamber Orchestra, Jeffrey Tate, Gabrieli String Quartet",hyperion,-1,,"<a href=""https://musicbrainz.org/release/0549aa69-e9bb-48ac-900d-7ee4844a616d"">0549aa69-e9bb-48ac-900d-7ee4844a616d</a>"


In [20]:
with open('docs/hyperion_releases.html', 'a') as f:
    f.write('<h2>Releases in MusicBrainz not found in Hyperion catalogue by barcode</h2>\n')
    f.write(releases_not_in_hyperion.to_html().replace('&lt;', '<').replace('&gt;', '>'))

## Releases not in MusicBrainz

In [21]:
barcodes_not_in_musicbrainz = set(hyperion_releases.barcode) - set(mb_releases.barcode)
releases_not_in_mb = hyperion_releases[hyperion_releases.barcode.apply(lambda b: b in barcodes_not_in_musicbrainz)]

In [22]:
with open('docs/hyperion_releases.html', 'a') as f:
    f.write('<h2>Releases in Hyperion catalogue not found in MusicBrainz by barcode</h2>\n')
    f.write(releases_not_in_mb.to_html().replace('&lt;', '<').replace('&gt;', '>'))