This notebook queries the public ODL linked data endpoint with SPARQL to obtain aggregations from the MOZ (Muziekopnamen Zendgemachtigden). These are printed for copying to Flourish.

This notebook was used to generate the data for the MOZ collection description on the LABS website.

In [None]:
import json
import os
import requests

from collections import OrderedDict

sparql_endpoint = "https://cat.apis.beeldengeluid.nl/sparql"
muziekweb_sparql_endpoint = "https://api.data.muziekweb.nl/datasets/MuziekwebOrganization/Muziekweb/services/Muziekweb/sparql"
moz_series_id = "<http://data.beeldengeluid.nl/id/series/2101608030025711131>"

prefixes = """PREFIX schema: <http://schema.org/>
              PREFIX sdo: <https://schema.org/>
              PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
              PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n"""

muziekweb_prefixes = """PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
                        PREFIX vocab: <https://data.muziekweb.nl/vocab/>
                        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n"""
 

In [None]:
def get_count_from_sparql(sparql_endpoint, query):
    """Carries out the query on the endpoint and returns the value of the count
    :params - sparql_endpoint - the endpoint containing the RDF data to be queried
    :params - query - the query to be carried out. This must select a count variable called ?count
    :returns - the count of the results. Note, if the query fails, then the result is 0"""
    response = requests.get(
                sparql_endpoint,
                params={"query": query},
                headers = {"Accept": "application/sparql-results+json"})
    return response.json()["results"]["bindings"][0]["count"]["value"]

In [None]:
def get_category_count_from_sparql(sparql_endpoint, query):
    """Carries out the query on the endpoint and returns the value of the counts per category
    :params - sparql_endpoint - the endpoint containing the RDF data to be queried
    :params - query - the query to be carried out. This must select a count variable called ?count and 
    a variable ?category per value of which the count has been calculated
    :returns - a dict of the categories with their counts"""
    category_counts = OrderedDict()
    response = requests.get(
                sparql_endpoint,
                params={"query": query},
                headers = {"Accept": "application/sparql-results+json"})
    for result in response.json()["results"]["bindings"]:
        category_counts[result["category"]["value"]] = int(result["count"]["value"])
        
    return category_counts

In [None]:
def get_category_count_and_category_uri_from_sparql(sparql_endpoint, query):
    """Carries out the query on the endpoint and returns the value of the counts per category, and also the category uri
    :params - sparql_endpoint - the endpoint containing the RDF data to be queried
    :params - query - the query to be carried out. This must select a count variable called ?count and 
    a variable ?category per value of which the count has been calculated, and a variable ?category_uri for the URI
    of the category
    :returns - a dict of the categories with a field "count" for the count and a field "uri" for the uri """
    category_counts = OrderedDict()
    response = requests.get(
                sparql_endpoint,
                params={"query": query},
                headers = {"Accept": "application/sparql-results+json"})
    for result in response.json()["results"]["bindings"]:
        category_counts[result["category"]["value"]] = {
                                                        "count": int(result["count"]["value"]),
                                                        "uri": result["category_uri"]["value"]
                                                        }
        
    return category_counts

In [None]:
def fill_gaps_in_timeline(timeline):
    """For a timeline in years, fills in gaps where there are no values with the value 0.
    :params timeline - an ordered dict with the years as keys and the counts as values
    :returns an ordered dict with complete data between the first and final year"""
    start_year = int(list(timeline.keys())[0])
    end_year = int(list(timeline.keys())[-1]) + 1
    
    complete_timeline = OrderedDict()
    for year in range(start_year, end_year):
        if str(year) not in timeline:
            complete_timeline[str(year)] = 0
        else:
            complete_timeline[str(year)] = int(timeline[str(year)])
    return complete_timeline

In [None]:
def get_person_connections(person_id):
    """Gets the persons connected to this person via programmes or scenes, with the properties creator or byArtist
    :params person_id - the GTAA id of the person
    :returns a dictionary with the name of the person as key, then the fields 'count' and 'uri' for the count
    of how often the person co-occurs with the input person, and the GTAA uri of the person"""
    query=prefixes + f"""
    SELECT ?count ?category ?category_uri
    WHERE
    {{
     {{
      SELECT (COUNT(DISTINCT ?program) as ?count) ?category ?category_uri
      WHERE
      {{

        ?program sdo:partOfSeason/sdo:partOfSeries <http://data.beeldengeluid.nl/id/series/2101608030025711131> .
        {{
             ?program (sdo:creator/sdo:creator)|(sdo:byArtist/sdo:byArtist) {person_id} ;
                      (sdo:creator/sdo:creator)|(sdo:byArtist/sdo:byArtist) ?category_uri .
        }}
          UNION
        {{
            ?program sdo:hasPart ?scene .
            ?scene (sdo:creator/sdo:creator)|(sdo:byArtist/sdo:byArtist) {person_id};
                   (sdo:creator/sdo:creator)|(sdo:byArtist/sdo:byArtist) ?category_uri .
        }}
            ?category_uri skos:prefLabel ?category
      }} GROUP BY ?category_uri ?category
     }}
    }} ORDER BY DESC(?count) LIMIT 10
    """

    return get_category_count_and_category_uri_from_sparql(sparql_endpoint, query)

In [None]:
def get_top_x_entities_by_role_property(role_property, number_of_entities):
    query=prefixes + f"""

    SELECT ?count ?category
    WHERE
    {{
      {{
        SELECT (COUNT(DISTINCT ?program) as ?count) ?category
        WHERE 
        {{
            ?program sdo:partOfSeason/sdo:partOfSeries {moz_series_id} .
              {{
                ?program {role_property}/{role_property} ?entity  .
                ?entity skos:prefLabel ?category
              }}
              UNION
              {{
                ?program sdo:isPartOfSeason ?season .
                ?season  {role_property}/{role_property} ?entity  .
                ?entity skos:prefLabel ?category
              }}
              UNION
              {{
                {moz_series_id} {role_property}/{role_property} ?entity  .
                ?entity skos:prefLabel ?category
              }}
              UNION
              {{
                ?program sdo:hasPart ?scene.
                ?scene  {role_property}/{role_property} ?entity  .
                ?entity skos:prefLabel ?category 
              }}
        }} GROUP BY ?category 
      }}
    }} ORDER BY DESC(?count) LIMIT {number_of_entities}
    """

    return get_category_count_from_sparql(sparql_endpoint, query)

In [None]:
def uninvert_name(name):
    """Given a person name in the form surname, first name, returns this in the form
    firstname surname.
    :params name - inverted person name
    :returns uninverted name"""
    if "," in name:
        name_parts = name.split(",")
        return f"{name_parts[1].strip()} {name_parts[0].strip()}"
    else:
        return name

## Number of concerts

In [None]:
query = prefixes + f"""
            SELECT (COUNT(DISTINCT(?program)) as ?count)
            WHERE 
            {{
                ?program sdo:partOfSeason/sdo:partOfSeries {moz_series_id}
            }}"""

total = int(get_count_from_sparql(sparql_endpoint, query))
print(total)

## Number of concerts over time

In [None]:
query = prefixes + f"""
            SELECT (COUNT(?category) as ?count) ?category
            WHERE 
            {{
                ?program sdo:partOfSeason/sdo:partOfSeries {moz_series_id}.  
                ?program <https://schema.org/datePublished> ?date .
                BIND(substr(?date, 1, 4) as ?category)
            }} GROUP BY ?category"""

timeline = get_category_count_from_sparql(sparql_endpoint, query)
complete_timeline = fill_gaps_in_timeline(timeline)  # fill in the gaps where there are no values

for year in complete_timeline:
    print(f"{year}\t{complete_timeline[year]}")

## Distribution over muziekgenres

In [None]:
query = prefixes + f"""
            SELECT ?count ?category
            WHERE
            {{
                {{
                SELECT (COUNT(?category) as ?count) ?category
                    WHERE 
                    {{
                        ?program sdo:partOfSeason/sdo:partOfSeries {moz_series_id}.  
                        ?program sdo:genre/skos:prefLabel ?category .
                    }} GROUP BY ?category
                }}
            }} ORDER BY DESC(?count) """

distribution = get_category_count_from_sparql(sparql_endpoint, query)

for genre in distribution:
    print(f"{genre}\t{distribution[genre]}")

## Distribution over locations

In [None]:
query = prefixes + f"""
            SELECT ?count ?category
            WHERE
            {{
                {{

                    SELECT (COUNT(?category) as ?count) ?category
                    WHERE 
                    {{
                        ?program sdo:partOfSeason/sdo:partOfSeries {moz_series_id}.
                        {{  
                            ?program <https://schema.org/locationCreated> ?category .
                        }}
                        UNION                
                        {{  
                            ?program sdo:hasPart ?scene.
                            ?scene <https://schema.org/locationCreated> ?category .
                        }}
                    }} GROUP BY ?category
                }}
            }} ORDER BY DESC(?count) """

distribution = get_category_count_from_sparql(sparql_endpoint, query)

for location in distribution:
    print(f"{location}\t{distribution[location]}")

## Distributie over events

In [None]:
query = prefixes + f"""
            SELECT ?count ?category
            WHERE
            {{
                {{

                    SELECT (COUNT(?category) as ?count) ?category
                    WHERE 
                    {{
                        ?program sdo:partOfSeason/sdo:partOfSeries {moz_series_id}.
                        {{  
                            ?program <https://schema.org/recordedAt>/skos:prefLabel ?category .
                        }}
                        UNION                
                        {{  
                            ?program sdo:hasPart ?scene.
                            ?scene <https://schema.org/recordedAt>/skos:prefLabel ?category .
                        }}
                    }} GROUP BY ?category
                }}
            }} ORDER BY DESC(?count) """

distribution = get_category_count_from_sparql(sparql_endpoint, query)

for event in distribution:
    print(f"{location}\t{distribution[event]}")

## Percentage of concerts with artists

In [None]:
query = prefixes + f"""
SELECT (COUNT(DISTINCT ?program) as ?count)
WHERE 
{{
        ?program sdo:partOfSeason/sdo:partOfSeries {moz_series_id}
          {{
            ?program 
              (sdo:byArtist|sdo:creator|sdo:mentions|sdo:contributor) ?x
          }}
          UNION
          {{
            ?program sdo:isPartOfSeason ?season .

            ?season (sdo:byArtist|sdo:creator|sdo:mentions|sdo:contributor) ?x
          }}
          UNION
          {{
            {moz_series_id} (sdo:byArtist|sdo:creator|sdo:mentions|sdo:contributor) ?x
          }}
          UNION
          {{
            ?program sdo:hasPart ?scene.

            ?scene (sdo:byArtist|sdo:creator|sdo:mentions|sdo:contributor) ?x
          }}
      }}
"""
with_person_count = int(get_count_from_sparql(sparql_endpoint, query))
print(f"Met artiesten\t{with_person_count}\t{((with_person_count)/total)*100:.2f}")
print(f"Zonder artiesten\t{total-with_person_count}\t{100-((with_person_count)/total)*100:.2f}")

## Number of artists
NB: this includes orchestras etc. as well as individual persons

In [None]:
query = prefixes + f"""
SELECT (COUNT(DISTINCT ?entityName) as ?count)
WHERE 
{{
        ?program sdo:partOfSeason/sdo:partOfSeries {moz_series_id}
          {{
            ?program 
              (sdo:byArtist|sdo:creator|sdo:mentions|sdo:contributor)/
              (sdo:byArtist|sdo:creator|sdo:mentions|sdo:contributor) ?entity  .
            ?entity skos:prefLabel ?entityName
          }}
          UNION
          {{
            ?program sdo:isPartOfSeason ?season .

            ?season  (sdo:byArtist|sdo:creator|sdo:mentions|sdo:contributor)/
            (sdo:byArtist|sdo:creator|sdo:mentions|sdo:contributor) ?entity  .
            ?entity skos:prefLabel ?entityName
          }}
          UNION
          {{
            {moz_series_id}  (sdo:byArtist|sdo:creator|sdo:mentions|sdo:contributor)/
            (sdo:byArtist|sdo:creator|sdo:mentions|sdo:contributor) ?entity  .
            ?entity skos:prefLabel ?entityName
          }}
          UNION
          {{
            ?program sdo:hasPart ?scene.

            ?scene  (sdo:byArtist|sdo:creator|sdo:mentions|sdo:contributor)/
            (sdo:byArtist|sdo:creator|sdo:mentions|sdo:contributor) ?entity  .
            ?entity skos:prefLabel ?entityName 
          }}
      }}
"""
print(get_count_from_sparql(sparql_endpoint, query))

## Distribution over roles

In [None]:
query=prefixes + f"""

SELECT ?count ?category
WHERE
{{
  {{
    SELECT (COUNT(DISTINCT ?entityName) as ?count) ?role
    WHERE 
    {{
        {{
            ?program sdo:partOfSeason/sdo:partOfSeries {moz_series_id} .
            ?program ?role ?roleEntity . 
            ?roleEntity ?role ?entity  .
            ?entity skos:prefLabel ?entityName
        }}
        UNION
        {{
            ?program sdo:partOfSeason/sdo:partOfSeries {moz_series_id} .
            ?program sdo:partOfSeason ?season.
            ?season ?role ?roleEntity . 
            ?roleEntity ?role ?entity  .
            ?entity skos:prefLabel ?entityName
        }}
        UNION
        {{
            ?program sdo:partOfSeason/sdo:partOfSeries {moz_series_id} .
            {moz_series_id} ?role ?roleEntity . 
            ?roleEntity ?role ?entity  .
            ?entity skos:prefLabel ?entityName
        }}
        UNION
        {{
            ?program sdo:partOfSeason/sdo:partOfSeries {moz_series_id} .
            ?program sdo:hasPart ?scene.
            ?scene ?role ?roleEntity . 
            ?roleEntity ?role ?entity  .
            ?entity skos:prefLabel ?entityName
        }}
        
    }} GROUP BY ?role
  }}
    ?role rdfs:label ?category
}} ORDER BY DESC(?count)"""

distribution = get_category_count_from_sparql(sparql_endpoint, query)

for role in distribution:
    print(f"{role}\t{distribution[role]}")

## Top x creators

In [None]:
number = 10
distribution = get_top_x_entities_by_role_property("sdo:creator", number)

for person in distribution:
    print(f"{uninvert_name(person)}\t{distribution[person]}")

In [None]:
# number of albums in Muziekweb per creator
for person in distribution:
    person_name = uninvert_name(person)
    query = muziekweb_prefixes + f"""
            SELECT (COUNT(DISTINCT(?album)) AS ?count) 
            WHERE {{
              ?album vocab:performer/skos:prefLabel \"{person_name}\" ; a vocab:Album
            }}"""

    print(f"{uninvert_name(person)}\t{get_count_from_sparql(muziekweb_sparql_endpoint, query)}")

## Top x performers

In [None]:
number = 10
distribution = get_top_x_entities_by_role_property("sdo:byArtist", number)

for artist in distribution:
    print(f"{uninvert_name(artist)}\t{distribution[artist]}")

In [None]:
# number of albums in Muziekweb per performer
for artist in distribution:
    artist_name = uninvert_name(artist)
    query = muziekweb_prefixes + f"""
            SELECT (COUNT(DISTINCT(?album)) AS ?count) 
            WHERE {{
              ?album vocab:performer/skos:prefLabel \"{artist_name}\" ; a vocab:Album
            }}"""

    print(f"{uninvert_name(artist)}\t{get_count_from_sparql(muziekweb_sparql_endpoint, query)}")

## Network of a person

In [None]:
# For now, we choose Jean-Pierre Gabriel, most frequently occurring creator. Can choose any other creator

person_id = "<http://data.beeldengeluid.nl/gtaa/255521>"  # Gabriel
person_label = "Gabriël, Jean-Pierre"

# get the person's top 10 connections
connections = get_person_connections(person_id)

person_group = {}

# now get the top 10 connections for each of the person's top 10
i = 2
for person in connections:
    person_connections = get_person_connections("<"+ connections[person]['uri'] + ">") 
    for new_person in person_connections:
        if person != new_person:
            print(f"{uninvert_name(person)}\t{uninvert_name(new_person)}\t{person_connections[new_person]['count']}")
            if new_person not in person_group:
                person_group[new_person] = i
    i += 1

# Paste output in the Links tab of the Flourish visualisation

In [None]:
for person in person_group:
    print(f"{uninvert_name(person)}\t{person_group[person]}")
    
# Paste output in the Points tab of the Flourish visualisation

## Top 10 production companies

In [None]:
number = 10
distribution = get_top_x_entities_by_role_property("sdo:productionCompany", number)

for company in distribution:
    print(f"{company}\t{distribution[company]}")

## Top x broadcasters
Broadcasters are not yet in the data, they will be added shortly

In [None]:
number = 10
distribution = get_top_x_entities_by_role_property("sdo:provider", number)

for broadcaster in distribution:
    print(f"{broadcaster}\t{distribution[broadcaster]}")