In [None]:
%run -i ../startup.py
ENTITY_TYPE = 'work'
endpoint='http://data.bnf.fr/sparql'

## Works from BNF

In [None]:
# linked to MB works
bnf_entity_count(ENTITY_TYPE)

In [None]:
links_from_bnf = sparql(f"""
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT (?work AS ?bnf) ?mbid ?name
WHERE {{
  ?work skos:exactMatch ?mbid .
  ?work skos:prefLabel ?name .
  FILTER (regex (?mbid, 'musicbrainz.org/work/'))
}}
""", endpoint='http://data.bnf.fr/sparql')

print('Count:', len(links_from_bnf))
display_df(links_from_bnf.head())

## Works from MB with BNF links

In [None]:
links_from_mb = sql("""
SELECT
    url.url AS bnf,
    work.gid AS mbid,
    work.name
FROM work
JOIN l_url_work AS luw ON luw.entity1 = work.id
JOIN url               ON luw.entity0 = url.id
WHERE
    url.url LIKE '%%bnf.fr%%'
ORDER BY work.name
;
""")
links_from_mb.bnf = links_from_mb.bnf.apply(lambda s: s.split('/')[-1])
links_from_mb.mbid = links_from_mb.mbid.apply(str)

print('Count:', len(links_from_mb))
display_df(links_from_mb.head())

## Data alignment

In [None]:
merge = pd.merge(links_from_bnf, links_from_mb, 
                 on=['bnf', 'mbid'], suffixes=('_bnf', '_mb'),
                 how='outer', indicator=True)
display_df(merge.head())

In [None]:
# link in mb but missing in bnf
links_to_add_to_bnf = merge.loc[lambda x : x['_merge']=='right_only'][[
    'name_mb', 'mbid', 'bnf']]

print('Count:', len(links_to_add_to_bnf))
display_df(links_to_add_to_bnf.head())

In [None]:
# link in bnf but missing in mb
links_to_add_to_mb = merge.loc[lambda x : x['_merge']=='left_only'][[
    'name_bnf', 'bnf', 'mbid']]
links_to_add_to_mb['edit_link'] = links_to_add_to_mb.apply(
    mb_work_edit_bnf_link, axis=1)

print('Count:', len(links_to_add_to_mb))
display_df(links_to_add_to_mb.head())

## Report

In [None]:
import jinja2

template = jinja2.Template("""
<!doctype html>

<html lang="en">
  <head>
    <meta charset="utf-8">
    <title>Alignment of MusicBrainz and BNF Works</title>
    <link href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
  </head>

  <body style="margin: 20px;">
    <h1>Alignment of MusicBrainz and BNF Works</h1>

    <p>Latest MB database update: {{ MB_DATABASE_VERSION }}</p>
    <p>Latest update: {{ date.today().isoformat() }}</p>

    <ol>
      <li>
        <a href="#bnf2mb">Add missing BNF links to MusicBrainz</a>
        ({{ links_to_add_to_mb.shape[0] }} rows)
      </li>
      <li>
        <a href="#mb2bnf">Add missing MusicBrainz links to BNF</a>
        ({{ links_to_add_to_bnf.shape[0] }} rows)
      </li>

    </ol>
    
    <h2 id="bnf2mb">Add missing BNF links to MusicBrainz</h2>
    {{ df_to_html(links_to_add_to_mb) }}

    <h2 id="mb2bnf">Add missing MusicBrainz links to BNF</h2>
    {{ df_to_html(links_to_add_to_bnf) }}
    
  </body>
</html>
""")

with open('../docs/bnf-works-report.html', 'w') as f:
    f.write(template.render(**globals())
            .replace('&lt;', '<').replace('&gt;', '>')
            .replace('class="dataframe"', 'class="table table-striped table-hover table-sm"')
            .replace('thead', 'thead class="thead-light"'))