# Pre-Classical Clean Up 4: Hyperion

From the live database

In [1]:
import pandas
import sqlalchemy

SITE_URL = 'https://musicbrainz.org'

# import postgresql environ variables
# defined in postgres-dockerfile/postgres.env
PGHOST = '127.0.0.1'
PGDATABASE = 'musicbrainz'
PGUSER = 'musicbrainz'
PGPASSWORD = 'musicbrainz'
PGPORT = 5433

def sql(query, **kwargs):
    """helper function for SQL queries using the %(...) syntax
    Parameters for the query must be passed as keyword arguments
    e.g. sql('SELECT * FROM artist WHERE name=%(singer)s', singer='Bob Dylan')
    """
    engine = sqlalchemy.create_engine(
        'postgresql+psycopg2://'
        '{PGUSER}:{PGPASSWORD}@{PGHOST}:{PGPORT}/{PGDATABASE}'.format(**globals()),
        isolation_level='READ UNCOMMITTED')
    return pandas.read_sql(query, engine, params=kwargs)

# helper function to build canonical URLs
def _mb_link(entity_type, mbid):
    return '<a href="{url}/{entity_type}/{mbid}">{mbid}</a>'.format(
        url=SITE_URL, **locals())

mb_release_link = lambda mbid: _mb_link('release', mbid) # noqa

In [2]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)

## Main label

In [3]:
sql("""SELECT id, gid, name, comment FROM label WHERE name='hyperion';""")

Unnamed: 0,id,gid,name,comment
0,388,08e6c3c8-81ab-405f-9cff-10f6b8db064c,hyperion,UK classical


## MB labels associated with links to hyperion-records

In [4]:
mb_labels = sql("""
SELECT 
    DISTINCT label.id,
    label.name AS label,
    label.gid AS mbid,
    COUNT(*) AS releases_count
  FROM release       AS r
  JOIN release_label AS rl ON rl.release = r.id
  JOIN label               ON rl.label = label.id
  JOIN l_release_url AS lu ON r.id = lu.entity0
  JOIN url           AS u  ON u.id = lu.entity1
 WHERE u.url ILIKE '%%hyperion-records%%'
 GROUP BY label.id, label.name, label.gid
 ORDER BY releases_count DESC
;""")
mb_labels

Unnamed: 0,id,label,mbid,releases_count
0,388,hyperion,08e6c3c8-81ab-405f-9cff-10f6b8db064c,901
1,22708,helios,0a94e96a-9219-4dd7-a529-18d34e77f50f,139
2,16197,Signum Classics,79c26ea5-2313-4d53-84d9-b04219620c5f,9
3,2900,Gimell,3d971128-6d37-4870-b188-8e5b84bb1258,2
4,53283,Mariinsky,6b060b9a-da6f-4a11-923e-8b88944c4503,1
5,59800,Appian Publications and Recordings,f51e7981-6399-467c-af06-b4e9a29dbd8d,1
6,95583,The Choir of King's College Cambridge,d5827354-3b3f-421e-a1fe-6e25e2f7b8ea,1


We keep only the first three for the moment, the other ones seem to only be distributed by hyperion.

## MB releases with link to Hyperion and no label

In [5]:
sql("""
SELECT 
    COUNT(*) AS releases_count
  FROM release       AS r
  LEFT OUTER JOIN release_label AS rl ON rl.release = r.id
  JOIN l_release_url AS lu ON r.id = lu.entity0
  JOIN url           AS u  ON u.id = lu.entity1
 WHERE u.url ILIKE '%%hyperion-records%%'
 AND rl.label IS NULL
;""")

Unnamed: 0,releases_count
0,0


## Releases from hyperion labels in MB

In [6]:
sql("""
SELECT 
    COUNT(r.name)
  FROM release_label AS rl
  JOIN release       AS r  ON rl.release = r.id
  JOIN label               ON rl.label = label.id
 WHERE label.id IN (388, 22708, 16197);
""")

Unnamed: 0,count
0,1692


In [7]:
mb_releases = sql("""
SELECT 
    rl.catalog_number,
    r.barcode,
    r.name  AS title,
    ac.name AS ac,
    label.name AS label,
    CASE WHEN r.quality=-1 THEN 'unset'
         WHEN r.quality=0  THEN 'low'
         WHEN r.quality=1  THEN 'normal'
         WHEN r.quality=2  THEN 'high'
    END     AS quality,
    STRING_AGG(at.name, ', ') AS covers,
    STRING_AGG(
        CASE WHEN u.url ILIKE '%%hyperion-records%%' THEN u.url
             ELSE NULL
        END, ', ') AS hyperion_url,
    r.gid   AS mb_url
  FROM release_label AS rl
  JOIN release       AS r  ON rl.release = r.id
  JOIN label               ON rl.label = label.id
  JOIN artist_credit AS ac ON r.artist_credit = ac.id
  LEFT OUTER JOIN cover_art_archive.cover_art      AS cova  ON cova.release = r.id
  LEFT OUTER JOIN cover_art_archive.cover_art_type AS covat ON covat.id = cova.id
  LEFT OUTER JOIN cover_art_archive.art_type       AS at    ON covat.type_id = at.id
  LEFT OUTER JOIN l_release_url AS lu ON r.id = lu.entity0
  LEFT OUTER JOIN url           AS u  ON u.id = lu.entity1
 WHERE label.id IN (388, 22708, 16197)
GROUP BY rl.catalog_number, r.barcode, r.name, ac.name, label.name, r.quality, r.gid
ORDER BY rl.catalog_number, r.barcode;
""")
mb_releases['mb_url'] = mb_releases.mb_url.apply(mb_release_link)
mb_releases['hyperion_url'] = mb_releases.hyperion_url.apply(lambda url: '<a href="{0}">{0}</a>'.format(url) if url else None)
mb_releases.head()

Unnamed: 0,catalog_number,barcode,title,ac,label,quality,covers,hyperion_url,mb_url
0,67250,,"Thalaba the Destroyer, etc. (Royal Philharmonic Orchestra feat. conductor: Vernon Handley)",Sir Granville Bantock,hyperion,unset,,,"<a href=""https://musicbrainz.org/release/d128f056-12e4-4817-9fb1-e4107f48b72a"">d128f056-12e4-4817-9fb1-e4107f48b72a</a>"
1,67428,34571574288.0,"The Sacred Music, Vol. 1","Monteverdi; The King's Consort, Robert King",hyperion,unset,,,"<a href=""https://musicbrainz.org/release/07436f6d-e19d-4948-98db-92fb18326592"">07436f6d-e19d-4948-98db-92fb18326592</a>"
2,67568,,"The Byrd Edition, Volume 10: Laudibus in sanctis (The Cardinall's Musick feat. conductor: Andrew Carwood)",William Byrd,hyperion,unset,,,"<a href=""https://musicbrainz.org/release/dc2a9e87-4035-4d14-a312-6eabf60cd058"">dc2a9e87-4035-4d14-a312-6eabf60cd058</a>"
3,67653,,"The Byrd Edition, Volume 11: Hodie Simon Petrus (The Cardinall's Musick feat. conductor: Andrew Carwood)",William Byrd,hyperion,unset,,,"<a href=""https://musicbrainz.org/release/25360292-fcc6-45ae-9117-6d6f76106b2b"">25360292-fcc6-45ae-9117-6d6f76106b2b</a>"
4,67675,,"The Byrd Edition, Volume 12: Assumpta est Maria (The Cardinall's Musick feat. conductor: Andrew Carwood)",William Byrd,hyperion,unset,,,"<a href=""https://musicbrainz.org/release/ac0f0a14-d39b-4a10-bca0-cb6a81d2e9db"">ac0f0a14-d39b-4a10-bca0-cb6a81d2e9db</a>"


In [8]:
mb_releases.shape

(1692, 9)

1677 releases already in MB, some might be duplicates

## Hyperion releases with PDF booklet in MB

In [9]:
mb_releases_with_booklet = sql("""
SELECT 
    rl.catalog_number,
    r.name AS title,
    STRING_AGG(at.name, ', ') AS covers,
    r.gid AS mb_url
  FROM release_label AS rl
  JOIN release       AS r  ON rl.release = r.id
  JOIN label               ON rl.label = label.id
  JOIN cover_art_archive.cover_art      AS cova  ON cova.release = r.id
  JOIN cover_art_archive.cover_art_type AS covat ON covat.id = cova.id
  JOIN cover_art_archive.art_type       AS at    ON covat.type_id = at.id
 WHERE label.id IN (388, 22708, 16197) 
   AND at.name = 'Booklet'
   AND cova.mime_type = 'application/pdf'
 GROUP BY rl.catalog_number, r.name, r.gid
ORDER BY rl.catalog_number;
""")
mb_releases_with_booklet['mb_url'] = mb_releases_with_booklet.mb_url.apply(mb_release_link)
mb_releases_with_booklet.head()

Unnamed: 0,catalog_number,title,covers,mb_url
0,CDA66062,Bruckner Motets,Booklet,"<a href=""https://musicbrainz.org/release/0873033b-6b94-448f-bcc5-c267194df1c4"">0873033b-6b94-448f-bcc5-c267194df1c4</a>"
1,CDA66245,Requiem,Booklet,"<a href=""https://musicbrainz.org/release/c32fac32-2325-4f63-ac38-31d7daab4afc"">c32fac32-2325-4f63-ac38-31d7daab4afc</a>"
2,CDA66389,Motets,Booklet,"<a href=""https://musicbrainz.org/release/412e5b4e-6414-4e81-9f73-d27192307fab"">412e5b4e-6414-4e81-9f73-d27192307fab</a>"
3,CDA66426,Lamentations,Booklet,"<a href=""https://musicbrainz.org/release/b0357701-8d59-3fc8-aa72-3d5d655fdd7e"">b0357701-8d59-3fc8-aa72-3d5d655fdd7e</a>"
4,CDA67103,"Cantiones Sacrae, Volume 1",Booklet,"<a href=""https://musicbrainz.org/release/3976b461-dc61-4a25-be47-c2939661d083"">3976b461-dc61-4a25-be47-c2939661d083</a>"


In [10]:
mb_releases_with_booklet.shape

(51, 4)

## Hyperion releases with no catalogue number in MB

In [11]:
mb_releases_wo_catno = sql("""
SELECT release.barcode,
       release.name AS title,
       label.name AS label,
       release.gid AS mb_url
  FROM release_label AS rl
  JOIN release       ON rl.release = release.id
  JOIN label         ON rl.label = label.id
 WHERE label.id IN (388, 22708, 16197)
   AND rl.catalog_number IS NULL
ORDER BY rl.catalog_number;
""")
mb_releases_wo_catno['mb_url'] = mb_releases_wo_catno.mb_url.apply(mb_release_link)
mb_releases_wo_catno.head()

Unnamed: 0,barcode,title,label,mb_url
0,,Piano Quintet & String Quartet no. 2,hyperion,"<a href=""https://musicbrainz.org/release/4f66cd30-fdf0-46d7-afa7-a803c385759b"">4f66cd30-fdf0-46d7-afa7-a803c385759b</a>"
1,,Missa Ecce ego Joannes,hyperion,"<a href=""https://musicbrainz.org/release/2cbbdf24-d9d1-44cb-a07e-83ccdf36be6c"">2cbbdf24-d9d1-44cb-a07e-83ccdf36be6c</a>"
2,34571163291.0,The Evening Watch and Other Choral Music,hyperion,"<a href=""https://musicbrainz.org/release/d358abfe-4a98-4115-9ba5-ed072befcf43"">d358abfe-4a98-4115-9ba5-ed072befcf43</a>"
3,,"Variations on a Theme of Chopin, Op. 22 / Variations on a Theme of Corelli, Op. 42 (piano: Howard Shelley)",hyperion,"<a href=""https://musicbrainz.org/release/0ae980b3-2c97-3a6f-9e61-1699a099b878"">0ae980b3-2c97-3a6f-9e61-1699a099b878</a>"
4,,"The Complete Anthems and Services, Vol. I (King's Consort & The New Oxford College Choir feat. conductor: Robert King)",hyperion,"<a href=""https://musicbrainz.org/release/692eb556-50f5-4479-ab4e-634972f8c987"">692eb556-50f5-4479-ab4e-634972f8c987</a>"


In [12]:
mb_releases_wo_catno.shape

(22, 4)

## Hyperion Releases with no link

In [13]:
mb_releases_wo_link = sql("""
SELECT 
    rl.catalog_number,
    r.barcode,
    r.name  AS title,
    label.name AS label,
    r.gid   AS mb_url
  FROM release_label AS rl
  JOIN release       AS r  ON rl.release = r.id
  JOIN label               ON rl.label = label.id
  LEFT OUTER JOIN l_release_url AS lu ON r.id = lu.entity0
  LEFT OUTER JOIN url           AS u  ON u.id = lu.entity1
 WHERE label.id IN (388, 22708, 16197)
   AND u.url IS NULL
ORDER BY rl.catalog_number, r.barcode;
""")
mb_releases_wo_link['mb_url'] = mb_releases_wo_link.mb_url.apply(mb_release_link)
mb_releases_wo_link.head()

Unnamed: 0,catalog_number,barcode,title,label,mb_url
0,A66210,,Two Romantic Violin Concertos,hyperion,"<a href=""https://musicbrainz.org/release/28dbbf85-4ec1-4242-8469-6dbc800a8f5c"">28dbbf85-4ec1-4242-8469-6dbc800a8f5c</a>"
1,CDA20199,34571101996.0,"Clarinet Concerto in A Major, K622 / Clarinet Quintet in A Major, K581",hyperion,"<a href=""https://musicbrainz.org/release/0549aa69-e9bb-48ac-900d-7ee4844a616d"">0549aa69-e9bb-48ac-900d-7ee4844a616d</a>"
2,CDA20790,34571107905.0,"The Romantic Piano Concerto, Volume 11: Scharwenka: Piano Concerto no. 4 in F minor / Sauer: Piano Concerto no. 1 in E minor",hyperion,"<a href=""https://musicbrainz.org/release/b83965a9-caa1-39e7-a9c3-c83d8fffea28"">b83965a9-caa1-39e7-a9c3-c83d8fffea28</a>"
3,CDA66008,34571160085.0,How the World Wags,hyperion,"<a href=""https://musicbrainz.org/release/2aabc622-546a-42e9-8e17-ad7f8af38416"">2aabc622-546a-42e9-8e17-ad7f8af38416</a>"
4,CDA66056,34571160566.0,Songs and Dialogues,hyperion,"<a href=""https://musicbrainz.org/release/25d85cc4-27bb-3e73-b7f9-a166e9d5dbcb"">25d85cc4-27bb-3e73-b7f9-a166e9d5dbcb</a>"


In [14]:
mb_releases_wo_link.shape

(90, 5)

## MB Releases with hyperion cat. no. but neither label nor link

# Official Hyperion catalogue

In [15]:
hyperion_releases = pd.read_html('https://www.hyperion-records.co.uk/n.asp?n=1')[0][:-1]

hyperion_releases.rename(columns={0: 'catalog_number', 1: 'name'}, inplace=True)
hyperion_releases['barcode'] = hyperion_releases.name.str[-12:]
hyperion_releases['name'] = hyperion_releases.name.str[:-12]
hyperion_releases['url'] = hyperion_releases.catalog_number.apply(
    lambda no: '<a href="https://www.hyperion-records.co.uk/dc.asp?dc=D_{0}">{0}</a>'.format(no)) 

In [16]:
hyperion_releases.head()

Unnamed: 0,catalog_number,name,barcode,url
0,1EMBNB,Jack: Black and Blue,34571100708,"<a href=""https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMBNB"">1EMBNB</a>"
1,1EMHDUS,Harmonies d'un Soir,34571100753,"<a href=""https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMHDUS"">1EMHDUS</a>"
2,1EMIPM08,Pitts J: Airs & Fantasias,34571100678,"<a href=""https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMIPM08"">1EMIPM08</a>"
3,1EMJ2O,Pitts: Jerusalem-Yerushalayim Superseded by 1EMJOY,34571100647,"<a href=""https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMJ2O"">1EMJ2O</a>"
4,1EMJOY,Pitts: Jerusalem-Yerushalayim,34571101460,"<a href=""https://www.hyperion-records.co.uk/dc.asp?dc=D_1EMJOY"">1EMJOY</a>"


## Barcodes in MusicBrainz not found in Hyperion

In [17]:
barcodes_not_in_hyperion = set(mb_releases.barcode) - set(hyperion_releases.barcode) - {''} - {None}
releases_not_in_hyperion = mb_releases[mb_releases.barcode.apply(lambda b: b in barcodes_not_in_hyperion)]
releases_not_in_hyperion.head()

Unnamed: 0,catalog_number,barcode,title,ac,label,quality,covers,hyperion_url,mb_url
16,CDA20021,34571101217,Sacred Vocal Music (The Parley of Instruments),Claudio Monteverdi,hyperion,unset,,,"<a href=""https://musicbrainz.org/release/19d5fb58-d0cb-44ce-b73e-6a02a0768404"">19d5fb58-d0cb-44ce-b73e-6a02a0768404</a>"
17,CDA20199,34571101996,"Clarinet Concerto in A Major, K622 / Clarinet Quintet in A Major, K581","Wolfgang Amadeus Mozart; Thea King, English Chamber Orchestra, Jeffrey Tate, Gabrieli String Quartet",hyperion,unset,,,"<a href=""https://musicbrainz.org/release/0549aa69-e9bb-48ac-900d-7ee4844a616d"">0549aa69-e9bb-48ac-900d-7ee4844a616d</a>"
18,CDA20276,34571102764,String Sextets,Johannes Brahms; The Raphael Ensemble,hyperion,unset,,,"<a href=""https://musicbrainz.org/release/48b482a5-c7cc-4b6a-814d-b290f01d4579"">48b482a5-c7cc-4b6a-814d-b290f01d4579</a>"
19,CDA20294,34571102948,Stabat Mater / Salve Regina / In Caelestibus Regnis,"Giovanni Battista Pergolesi; The King's Consort, Robert King, Gillian Fisher, Michael Chance",hyperion,unset,,"<a href=""http://www.hyperion-records.co.uk/dc.asp?dc=D_CDA66294"">http://www.hyperion-records.co.uk/dc.asp?dc=D_CDA66294</a>","<a href=""https://musicbrainz.org/release/fa3e18b5-adf1-48a9-ac3f-0a834b3136a3"">fa3e18b5-adf1-48a9-ac3f-0a834b3136a3</a>"
20,CDA20450,34571104508,Celtic Symphony / Hebridean Symphony / The Witch of Atlas / The Sea Reivers,"Bantock; Royal Philharmonic Orchestra, Vernon Handley",hyperion,unset,,,"<a href=""https://musicbrainz.org/release/002a59f0-b93d-3343-9985-5108f6f45f99"">002a59f0-b93d-3343-9985-5108f6f45f99</a>"


## Releases not in MusicBrainz

In [18]:
barcodes_not_in_musicbrainz = set(hyperion_releases.barcode) - set(mb_releases.barcode)
releases_not_in_mb = hyperion_releases[hyperion_releases.barcode.apply(lambda b: b in barcodes_not_in_musicbrainz)]

## Template

In [19]:
from jinja2 import Template
template = Template("""
<!doctype html>

<html lang="en">
  <head>
    <meta charset="utf-8">
    <title>Community Cleanup #4: Hyperion</title>
    <link href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
  </head>

  <body style="margin: 20px;">
    <h1>Pre-Classical Clean Up #4</h1>
    <a href="https://community.metabrainz.org/t/community-cleanup-4-hyperion/356213">
      <img src="https://i.imgur.com/qtAyjmj.png" width="668" height="187">
    </a>
    
    <ol>
      <li><a href="#labels">Labels corresponding to Hyperion in MusicBrainz</a></li>
      <li><a href="#booklets">Hyperion releases with PDF booklet in MB</a></li>
      <li><a href="#wo_catno">Hyperion releases with no catalog number in MB</a></li>
      <li><a href="#not_in_hyp">Releases in MusicBrainz not found in Hyperion catalogue</a></li>
      <li><a href="#all_mb">All Hyperion releases in MusicBrainz</a></li>
      <li><a href="#not_in_mb">Releases in Hyperion catalogue missing in MusicBrainz</a></li>
      <li><a href="#official">Official Hyperion catalogue</a></li>
    </ol>
    
    <h2 id="labels">Labels corresponding to Hyperion in MusicBrainz</h2>
    {{ mb_labels.to_html(index=False) }}
    <p>We keep only the first three for the moment, the other ones seem to only be distributed by hyperion.</p>
    <p>Feb 1st: 893, 135, 9</p>
    
    <h2 id="booklets">Hyperion releases with PDF booklet in MB</h2>
    <p>{{ mb_releases_with_booklet.shape[0] }} releases (Feb. 2nd: 51)</p>
    <p>These should be checked and the booklets removed if coming from hyperion</p>
    {{ mb_releases_with_booklet.to_html(index=False) }}
    
    <h2 id="wo_catno">Hyperion releases with no catalog number in MB</h2>
    <p>{{ mb_releases_wo_catno.shape[0] }} releases (Feb. 1st: 23)</p>
    <p>These should be checked and catalog number added if possible</p>
    {{ mb_releases_wo_catno.to_html(index=False) }}
    
    <h2 id="not_in_hyp">Releases in MusicBrainz not found in Hyperion catalogue</h2>
    <p>{{ releases_not_in_hyperion.shape[0] }} releases found by naive barcode comparison (Feb. 1st: 45)</p>
    <p>These should be checked and barcode fixed if that is the explanation</p>
    {{ releases_not_in_hyperion.to_html(index=False) }}

    <h2 id="all_mb">All Hyperion releases in MusicBrainz</h2>
    <p>{{ mb_releases.shape[0] }} releases (Feb. 1st: 1677)</p>
    {{ mb_releases.to_html(index=True) }}
    
    <h2 id="not_in_mb">Releases in Hyperion catalogue missing in MusicBrainz</h2>
    <p>{{ releases_not_in_mb.shape[0] }} releases found by naive barcode comparison (Feb. 1st: 2179)</p>
    {{ releases_not_in_mb.to_html(index=True) }}
    
    <h2 id="official">Official Hyperion catalogue</h2>
    <p>{{ hyperion_releases.shape[0] }} releases</p>
    {{ hyperion_releases.to_html(index=True) }}
  </body>
</html>
""")

with open('docs/hyperion_releases.html', 'w') as f:
    f.write(template.render(**globals())
            .replace('&lt;', '<').replace('&gt;', '>')
            .replace('class="dataframe"', 'class="table table-striped table-hover table-sm"')
            .replace('thead', 'thead class="thead-light"'))