# musoW to schema.org

## Imports


In [None]:
from datetime import datetime
import re

# google auth
from google.colab import auth
import gspread
from oauth2client.client import GoogleCredentials

# data
import pandas as pd
import numpy as np
import hashlib

!pip install rdflib
from rdflib import Graph, Literal, RDF, URIRef, ConjunctiveGraph, Namespace
from rdflib.namespace import FOAF , XSD, OWL, DC , RDF , RDFS

# download 
from google.colab import files

Collecting rdflib
  Downloading rdflib-6.0.2-py3-none-any.whl (407 kB)
[?25l[K     |▉                               | 10 kB 18.8 MB/s eta 0:00:01[K     |█▋                              | 20 kB 18.0 MB/s eta 0:00:01[K     |██▍                             | 30 kB 11.0 MB/s eta 0:00:01[K     |███▏                            | 40 kB 9.4 MB/s eta 0:00:01[K     |████                            | 51 kB 5.5 MB/s eta 0:00:01[K     |████▉                           | 61 kB 5.3 MB/s eta 0:00:01[K     |█████▋                          | 71 kB 5.3 MB/s eta 0:00:01[K     |██████▍                         | 81 kB 6.0 MB/s eta 0:00:01[K     |███████▎                        | 92 kB 6.0 MB/s eta 0:00:01[K     |████████                        | 102 kB 5.1 MB/s eta 0:00:01[K     |████████▉                       | 112 kB 5.1 MB/s eta 0:00:01[K     |█████████▋                      | 122 kB 5.1 MB/s eta 0:00:01[K     |██████████▌                     | 133 kB 5.1 MB/s eta 0:00:01[K  

## Parse google spreadsheet

In [None]:
# old musoW backup (include errors)
# https://raw.githubusercontent.com/polifonia-project/registry-data/master/dataset.nt

# musow new schema (summary tab new properties)
# https://docs.google.com/spreadsheets/d/1SXwn1PbqkW4DhgzKel2ofUgr681yOpb_wIRy4rc5zpM/edit?usp=sharing

# authenticate
auth.authenticate_user()

# spreadsheet: Research topics
gc = gspread.authorize(GoogleCredentials.get_application_default())
spreadsheet = gc.open('Polifonia Registry (musoW Survey)')
topics = spreadsheet.get_worksheet(3) # tab resources

# Transform tables to dataframes
def make_header(df):
  new_header = df.iloc[0] 
  df = df[1:] 
  df.columns = new_header
  return df

# dataframe
rows = topics.get_all_values()
df = pd.DataFrame.from_records(rows)
df = make_header(df)
df.head()

Unnamed: 0,Person,ID: Resource name,Description,ID: URL,Project,Search Criterion,Research Questions,Item:Resource example,Reused resource,Resource type,Category,Type: Collection,Type: Specification,Affordance: Is playable?,Purpose: Learning,Purpose: Research,Scope: Temporal,Scope: Geographical,Scope: Genre,Scope: Artist,Scope: Formats,Scope: MO type,Scope: Object type,Access: Public,Access: license,Access: Free/Charged,Format: Interoperable?,Interface: Human consumption?,Interface: SPARQL endpoint?,SPARQL endpoint URI,Interface: Data Dump?,Interface: API?,Interface: Is it queryable?,Interface: Browsable?,Collection: Size,Data size,Symbolic: Machine readable?,Feature: Melody,Feature: Harmony,Feature: Lyrics,Feature: Rhythm,Feature: Timbre,Feature: Contour/Shape,Feature: Structure,Feature: Descriptive Metadata,Situation/Task,Target audience
1,MD,ArchiveGrid,Aggregator of archival descriptions,https://beta.worldcat.org/archivegrid/,OCLC WorldCat,MuSO (Europeana),,,,Catalogue,Metadata,Y,N,N,N,Y,N,N,N,N,"MARC, EAD, HTML, PDF",MusicalWork,Musical resource,Y,Not specified,Free,Y,Y,,,,,Y,Y,>1M,UNKNOWN,N,N,N,N,N,N,N,N,Y,metadata aggregation,researchers
2,MD,MuSO (Europeana),The MuSO (Music Scholarship Online) project co...,http://oaktrust.library.tamu.edu/handle/1969.1...,MuSO (Europeana),MuSO (Europeana),1. To whom is this content interesting? 2. How...,,,Catalogue,Metadata,Y,N,N,N,Y,N,N,N,N,,MusicalWork,Musical resource,Y,Open Access,Free,Y,Y,,,,,N,Y,<100,UNKNOWN,N,N,N,N,N,N,N,N,N,metadata aggregation,researchers
3,MD,Digital Resources for Musicology,Catalogue of digital projects for musicologists,http://drm.ccarh.org/,Center for Computer Assisted Research in the H...,Digital Resources for Musicology,,,,Catalogue,Metadata,Y,N,N,N,Y,Y,Y,Y,Y,,MusicalWork,Musical resource,Y,Open Access,Free,Y,Y,,,,,Y,Y,<1000,,N,N,N,N,N,N,N,N,Y,metadata aggregation,researchers
4,MD,Online Resources for Music Scholars,Catalogue of digital projects for musicologists,http://hcl.harvard.edu/research/guides/biblio/...,University of Harvard,Digital Resources for Musicology,,,,Catalogue,Metadata,Y,N,N,N,Y,N,N,N,N,JPG,MusicalWork,Musical resource,Y,Open Access,Free,Y,Y,,,,,Y,Y,<100000,,N,N,N,N,N,N,N,N,Y,metadata aggregation,researchers
5,MD,Electronic and Virtual Editions,Catalogue of digital projects for musicologists,http://eve.ccarh.org/,Center for Computer Assisted Research in the H...,Digital Resources for Musicology,,,,Catalogue,Metadata,Y,N,N,N,Y,N,N,N,N,"audio, video",MusicalWork,Musical resource,Y,Open Access,Free,N,Y,,,,,Y,Y,<1000,,N,N,N,N,N,N,N,N,Y,metadata aggregation,researchers


## Utils

In [None]:
def ts():
  now = datetime.now()
  timestamp = datetime.timestamp(now)
  return str(timestamp).replace('.','-')

def urid(s):
  return str(hashlib.md5(s.encode()).hexdigest())

def camel_case_split(identifier):
    matches = re.finditer('.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', identifier)
    return " ".join([m.group(0) for m in matches])

def column_to_json(field_id, field_disambiguate, field_value_type, field_prepend,
                   field_label, field_type, field_property, field_placeholder,
                   field_browse, field_searchWikidata, field_disabled,
                   field_class, field_cache, field_values=None):
  json_field = {}
  json_field["id"] = field_id
  json_field["disambiguate"] = field_disambiguate
  json_field["value"] = field_value_type
  json_field["prepend"] = field_prepend
  json_field["label"] = field_label
  json_field["type"] = field_type
  json_field["property"] = field_property
  json_field["placeholder"] = field_placeholder
  json_field["browse"] = field_browse
  json_field["searchWikidata"] = field_searchWikidata
  json_field["disabled"] = field_disabled
  json_field["class"] = field_class
  json_field["cache_autocomplete"] = field_cache 
  if field_values:
    json_field["values"] = field_values
  return json_field

## Setup graphs

In [None]:
base = 'https://w3id.org/musow/'
vocab = 'https://w3id.org/musow/vocab/'
schema = "https://schema.org/"

SCHEMA = Namespace(schema)
BASE = Namespace(base)
VOCAB = Namespace(vocab)
MO = Namespace("http://purl.org/ontology/mo/")
PROV = Namespace("http://www.w3.org/ns/prov#")

#dft = df[10:13] # CHANGE

g = ConjunctiveGraph()
g.bind("schema", SCHEMA)
g.bind("musow", BASE)
g.bind("prov", PROV)

# controlled vocabularies
vocab_graph = URIRef(BASE+'vocabularies/')

## Controlled vocabularies

In [None]:
res_types = {
    "Dataset": vocab+'dataset',
    "Repository": vocab+'repository',
    "Service": vocab+'service',
    "Software": vocab+'software',
    "Schema": vocab+'schema',
    "Catalogue": vocab+'catalogue',
    "Digital Library": vocab+'digital-library',
    "Ontology": vocab+'ontology',
    "Digital edition": vocab+'digital-edition',
    "Format": vocab+'format',
    "Project": vocab+'project'
}

res_educational = {
    "Learning": vocab+'learning',
    "Research": vocab+"research"
}

res_audiences = list({a.strip().lower() for a in ";".join(df["Target audience"].unique().tolist()).split(';') if len(a) > 1})
res_audiences = {k:vocab+k.strip().lower().replace(" ","-") for k in res_audiences if k}

res_scope = {
    "Temporal":vocab+"temporal",
    "Geographical":vocab+"geographical",
    "Genre":vocab+"genre",
    "Artist": vocab+"artist"
}

res_mo_types = ["AnalogSignal","MusicalWork","Signal","Record","Recording","Track","Libretto","Score","Performance","MusicArtist","Instrument","MusicalManifestation"]
res_mo_types = {camel_case_split(k):"http://purl.org/ontology/mo/"+k for k in res_mo_types if k}

res_subject_types = ["Tabs","Audio","Musical resource","Listening experience","Publisher"]
res_subject_types = {k:vocab+k.lower().replace(" ","-") for k in res_subject_types if k} 

res_app_features = {
    "Shares data in open data formats": vocab+'interoperable',
    "Provides user-friendly interfaces": vocab+'human-consumption',
    "Offers playable music records": vocab+'playable',
    "Serves machine-readable music data": vocab+'machine-readable',
    "Offers a Web API":vocab+'api',
    "Has a SPARQL endpoint": vocab+'sparql-endpoint',
    "Provides user-friendly search interfaces":vocab+'search-interface',
    "Provides interfaces for browsing contents": vocab+'browse-interface',
    "Offers data export or dump": vocab+'data-dump'
}


res_categories = {k:vocab+k.lower().replace(" ","-") for k in df["Category"].unique().tolist() if k}

res_data_features = {
    "Melody": vocab+'melody',
    "Harmony": vocab+'harmony',
    "Lyrics": vocab+'lyrics',
    "Rhythm": vocab+'rhythm',
    "Timbre": vocab+'timbre',
    "Contour/Shape": vocab+'contour-shape',
    "Music structure":vocab+'structure',
    "Descriptive metadata": vocab+'descriptive-metadata'
}

res_formats = list({a.strip().lower() for a in ",".join(df["Scope: Formats"].unique().tolist()).split(',') if len(a) > 1})
res_formats = {k:vocab+k.strip().lower().replace(" ","-") for k in res_formats if k}


res_licenses = {k:vocab+k.lower().replace(" ","-") for k in df["Access: license"].unique().tolist() if k}


res_collection_size = {k:vocab+k.lower().replace("<",'lt-').replace(">",'gt-').replace(" ","-") for k in df["Collection: Size"].unique().tolist() if k and k != 'N/A' and k != 'UNKNOWN'}


res_datasize = {k:vocab+k.lower().replace("~",'') for k in df["Data size"].unique().tolist() if k and k != 'N/A' and k != 'UNKNOWN'}


res_tasks = list({a.strip().lower() for a in ";".join(df["Situation/Task"].unique().tolist()).split(';') if len(a) > 1})
res_tasks = {k:vocab+k.strip().lower().replace(" ","-") for k in res_tasks if k}



## Create graphs

In [None]:
# create ids for every resource (for interlinking)
ids = {}
for index, row in df.iterrows():
  ids[index] = ts()

# create a graph for each row
for index, row in df.iterrows():
  loc_id = ids[index]
  res = URIRef(base+loc_id)
  g_name = res+'/'
  res_id = urid(row["ID: Resource name"])
  #res = URIRef(BASE+res_id)
  
  # STR: title
  res_title = row["ID: Resource name"]
  g.add(( res , RDF.type , SCHEMA.CreativeWork , g_name ))
  g.add(( res , OWL.sameAs , URIRef("http://data.open.ac.uk/musow/"+res_id) , g_name ))
  g.add(( res , RDFS.label , Literal(res_title.strip()) , g_name ))
  g.add(( res , SCHEMA.name , Literal(res_title.strip()) , g_name ))

  # URI: homepage
  res_home = row["ID: URL"]
  if len(res_home):
    g.add(( res , SCHEMA.mainEntityOfPage , URIRef(res_home.strip()) , g_name ))

  # STR: description
  res_desc = row["Description"]
  if len(res_desc):
    g.add(( res , SCHEMA.description , Literal(res_desc) , g_name ))

  # URI: project or creator 
  res_project = row["Project"]
  if len(res_project):
    projects = [f.strip() for f in res_project.split(',')]
    for f in projects:
      # search for the index of the resource
      idx = df.index[df["ID: Resource name"] == f].tolist()
      res_project_uri = URIRef(BASE+ids[idx[0]]) if len(idx) == 1 else URIRef(BASE+urid(f))
      g.add(( res , SCHEMA.creator , res_project_uri , g_name ))
      g.add(( res_project_uri, RDFS.label , Literal(f.strip()) , g_name ))

  # VOCAB: additionalType
  res_type = row["Resource type"]
  res_type_uri = URIRef(VOCAB+res_type.lower().replace(" ","-"))
  g.add(( res , SCHEMA.additionalType , res_type_uri , g_name ))
  g.add(( res_type_uri, RDFS.label , Literal(res_type.strip()) , g_name ))
  g.add(( res_type_uri, RDFS.label , Literal(res_type.strip()) , vocab_graph ))

  # VOCAB: educationalUse
  res_purpose = ["Learning"] if row["Purpose: Learning"] == "Y" \
                              and row["Purpose: Research"] == "N" else \
                 ["Research"] if row["Purpose: Learning"] == "N" \
                              and row["Purpose: Research"] == "Y" else \
                 ["Research", "Learning"] if row["Purpose: Learning"] == "Y" \
                              and row["Purpose: Research"] == "Y" else  []
  
  if len(res_purpose) > 0:
    for r in res_purpose:
      res_purpose_uri = URIRef(VOCAB+r.lower().replace(" ","-"))
      g.add(( res , SCHEMA.educationalUse , res_purpose_uri , g_name ))
      g.add(( res_purpose_uri, RDFS.label , Literal(r.strip()) , g_name ))
      g.add(( res_purpose_uri, RDFS.label , Literal(r.strip()) , vocab_graph ))

  # VOCAB: audience
  res_audience = row["Target audience"].split(';')
  if len(res_audience) > 0:
    for r in res_audience:
      res_audience_uri = URIRef(VOCAB+r.strip().lower().replace(" ","-"))
      g.add(( res , SCHEMA.audience , res_audience_uri , g_name ))
      g.add(( res_audience_uri, RDFS.label , Literal(r.strip()) , g_name ))
      g.add(( res_audience_uri, RDFS.label , Literal(r.strip()) , vocab_graph ))

  # VOCAB: scope
  res_temporal = row["Scope: Temporal"]
  res_temporal_uri = URIRef(VOCAB+'temporal')
  res_geographical = row["Scope: Geographical"]
  res_geographical_uri = URIRef(VOCAB+'geographical')
  res_genre = row["Scope: Genre"]
  res_genre_uri = URIRef(VOCAB+'genre')
  res_artist = row["Scope: Artist"]
  res_artist_uri = URIRef(VOCAB+'artist')
			
  if res_temporal == 'Y':
    g.add(( res , SCHEMA.about , res_temporal_uri , g_name ))
    g.add(( res_temporal_uri, RDFS.label , Literal('Temporal') , g_name ))
    g.add(( res_temporal_uri, RDFS.label , Literal('Temporal') , vocab_graph ))
  if res_geographical == 'Y':
    g.add(( res , SCHEMA.about , res_geographical_uri , g_name ))
    g.add(( res_geographical_uri, RDFS.label , Literal('Geographical') , g_name ))
    g.add(( res_geographical_uri, RDFS.label , Literal('Geographical') , vocab_graph ))
  if res_genre == 'Y':
    g.add(( res , SCHEMA.about , res_genre_uri , g_name ))
    g.add(( res_genre_uri, RDFS.label , Literal('Genre') , g_name ))
    g.add(( res_genre_uri, RDFS.label , Literal('Genre') , vocab_graph ))    
  if res_artist == 'Y':
    g.add(( res , SCHEMA.about , res_artist_uri , g_name ))
    g.add(( res_artist_uri, RDFS.label , Literal('Composer') , g_name ))
    g.add(( res_artist_uri, RDFS.label , Literal('Composer') , vocab_graph )) 

  # VOCAB: subject 
  res_mo_type = row["Scope: MO type"]
  res_mo_type_label = camel_case_split(res_mo_type)
  mo_type_uri = URIRef(MO+res_mo_type)

  if len(res_mo_type):
    g.add(( res , SCHEMA.mainEntity , mo_type_uri , g_name ))
    g.add(( mo_type_uri, RDFS.label , Literal(res_mo_type_label) , g_name ))
    g.add(( mo_type_uri, RDFS.label , Literal(res_mo_type_label) , vocab_graph ))

  res_subject_type = row["Scope: Object type"]
  res_subject_type_id = "-".join([f.lower() for f in res_subject_type.split()])
  subject_type_uri = URIRef(VOCAB+res_subject_type_id)
  
  if len(res_subject_type) and res_subject_type in res_subject_types:
    g.add(( res , SCHEMA.mainEntity , subject_type_uri , g_name ))
    g.add(( subject_type_uri, RDFS.label , Literal(res_subject_type) , g_name ))
    g.add(( subject_type_uri, RDFS.label , Literal(res_subject_type) , vocab_graph ))
  
  # VOCAB: features http://schema.org/featureList
  res_interoperable = [row["Format: Interoperable?"], "Shares data in open data formats", URIRef(VOCAB+'interoperable')]
  res_human = [row["Interface: Human consumption?"], "Provides user-friendly interfaces", URIRef(VOCAB+'human-consumption')]
  res_playable = [row["Affordance: Is playable?"],"Offers playable music records", URIRef(VOCAB+'playable')]
  res_machine_readable = [row["Symbolic: Machine readable?"], "Serves machine-readable music data", URIRef(VOCAB+'machine-readable')]
  res_api = [row["Interface: API?"],"Offers a Web API",URIRef(VOCAB+'api')]
  res_sparql = [row["Interface: SPARQL endpoint?"],"Has a SPARQL endpoint",URIRef(VOCAB+'sparql-endpoint')]
  res_search = [row["Interface: Is it queryable?"],"Provides user-friendly search interfaces",URIRef(VOCAB+'search-interface')]
  res_browse = [row["Interface: Browsable?"], "Provides interfaces for browsing contents", URIRef(VOCAB+'browse-interface')]
  res_dump = [row["Interface: Data Dump?"],"Offers data export or dump",URIRef(VOCAB+'data-dump')]

  res_features = [res_interoperable,res_human,res_playable,res_machine_readable,res_api,res_sparql,res_search,res_browse,res_dump]
  for r in res_features:
    if r[0] == 'Y':
      g.add(( res , SCHEMA.featureList ,r[2] , g_name ))
      g.add(( r[2], RDFS.label , Literal(r[1]) , g_name ))
      g.add(( r[2], RDFS.label , Literal(r[1]) , vocab_graph ))

  # URI: example resource
  res_example = row["Item:Resource example"]
  if len(res_example):
    g.add(( res , SCHEMA.workExample , URIRef(res_example) , g_name ))

  # URI: based on 
  res_reused = row["Reused resource"]  
  if len(res_reused):
    resources = [f.strip() for f in res_reused.split(';')]
    for f in resources:
      # search for the index of the resource
      idx = df.index[df["ID: Resource name"] == f].tolist()
      res_reused_uri = URIRef(BASE+ids[idx[0]]) if len(idx) == 1 else URIRef(BASE+urid(f))
      g.add(( res , SCHEMA.isBasedOn , res_reused_uri , g_name ))
      g.add(( res_reused_uri, RDFS.label , Literal(f) , g_name ))

  # VOCAB: category  
  res_category = row["Category"]
  res_category_uri= URIRef(VOCAB+res_category.lower()) 
  if len(res_category):
    g.add(( res , SCHEMA.category ,res_category_uri , g_name ))
    g.add(( res_category_uri, RDFS.label , Literal(res_category) , g_name ))
    g.add(( res_category_uri, RDFS.label , Literal(res_category) , vocab_graph ))
  
  # VOCAB: music feature
  res_melody = [row["Feature: Melody"], "Melody", URIRef(VOCAB+'melody')]
  res_harmony = [row["Feature: Harmony"], "Harmony", URIRef(VOCAB+'harmony')]
  res_lyrics = [row["Feature: Lyrics"],"Lyrics", URIRef(VOCAB+'lyrics')]
  res_rhythm = [row["Feature: Rhythm"], "Rhythm",URIRef(VOCAB+'rhythm')]
  res_timbre = [row["Feature: Timbre"], "Timbre",URIRef(VOCAB+'timbre')]
  res_contour_shape = [row["Feature: Contour/Shape"], "Contour/Shape",URIRef(VOCAB+'contour-shape')]
  res_structure = [row["Feature: Structure"], "Music structure",URIRef(VOCAB+'structure')]
  res_metadata = [row["Feature: Descriptive Metadata"], "Descriptive Metadata",URIRef(VOCAB+'descriptive-metadata')]

  res_music_features = [res_melody,res_harmony,res_lyrics,res_rhythm,res_timbre,res_contour_shape,res_structure,res_metadata]
  for r in res_music_features:
    if r[0] == 'Y':
      g.add(( res , SCHEMA.keywords ,r[2] , g_name ))
      g.add(( r[2], RDFS.label , Literal(r[1]) , g_name ))
      g.add(( r[2], RDFS.label , Literal(r[1]) , vocab_graph ))

  # VOCAB: data format 
  res_format = row["Scope: Formats"]
  if len(res_format):
    formats = [f.strip().lower() for f in res_format.split(',')]
    for f in formats:
      g.add(( res , SCHEMA.encodingFormat ,URIRef(VOCAB+f) , g_name ))
      g.add(( URIRef(VOCAB+f), RDFS.label , Literal(f) , g_name ))
      g.add(( URIRef(VOCAB+f), RDFS.label , Literal(f) , vocab_graph ))
  
  # VOCAB: license 
  res_license = row["Access: license"]
  if len(res_license):
    res_license_uri = URIRef(VOCAB+res_license.strip().lower().replace(" ","-"))
    g.add(( res , SCHEMA.license ,res_license_uri , g_name ))
    g.add(( res_license_uri, RDFS.label , Literal(res_license) , g_name ))
    g.add(( res_license_uri, RDFS.label , Literal(res_license) , vocab_graph ))

  # URI: sparql endpoint 
  res_endpoint = row["SPARQL endpoint URI"]
  if len(res_endpoint):
    g.add(( res , URIRef("http://rdfs.org/ns/void#sparqlEndpoint") ,URIRef(res_endpoint) , g_name ))

  # VOCAB: collection size
  res_collection = row["Collection: Size"]
  res_collection_uri = URIRef(VOCAB+res_collection.lower().replace("<",'lt-').replace(">",'gt-'))
  if len(res_collection) and res_collection != 'N/A' and res_collection != 'UNKNOWN':
    g.add(( res , SCHEMA.collectionSize ,res_collection_uri , g_name ))
    g.add(( res_collection_uri, RDFS.label , Literal(res_collection) , g_name ))
    g.add(( res_collection_uri, RDFS.label , Literal(res_collection) , vocab_graph ))

  # VOCAB: content size
  res_content = row["Data size"]
  res_content_uri = URIRef(VOCAB+res_content.lower().replace("~",''))
  if len(res_content) and res_content != 'N/A' and res_content != 'UNKNOWN':
    g.add(( res , SCHEMA.contentSize ,res_content_uri , g_name ))
    g.add(( res_content_uri, RDFS.label , Literal(res_content) , g_name ))
    g.add(( res_content_uri, RDFS.label , Literal(res_content) , vocab_graph ))

  # VOCAB: task https://schema.org/specialty 
  res_task = row["Situation/Task"].split(';')
  if len(res_task) > 0:
    for r in res_task:
      res_task_uri = URIRef(VOCAB+r.strip().lower().replace(" ","-"))
      g.add(( res , SCHEMA.specialty , res_task_uri , g_name ))
      g.add(( res_task_uri, RDFS.label , Literal(r.strip()) , g_name ))
      g.add(( res_task_uri, RDFS.label , Literal(r.strip()) , vocab_graph ))


  # PROVENANCE 
  users = {"AM" : "albert.meronyo@gmail.com", 
           "ED": "enricodaga@gmail.com", 
           "MD": "marilena.daquino2@unibo.it"}

  # graph name
  g.add(( g_name , RDFS.label , Literal(res_title.strip()) , g_name ))

  # URI: creator
  res_creator = users[row["Person"]]
  res_creator_uri = URIRef(BASE+res_creator.replace('.','-dot-').replace('@', '-at-'))  
  g.add(( g_name, PROV.wasAttributedTo , res_creator_uri, g_name ))
  g.add(( res_creator_uri, RDFS.label , Literal(res_creator), g_name ))
  
  # STR: date
  date = Literal(datetime.now(),datatype=XSD.dateTime)
  g.add(( g_name, PROV.generatedAtTime , date, g_name ))
  
  # STR: publication stage
  g.add(( g_name, URIRef(BASE+"publicationStage") , Literal("published"), g_name ))  
  
# serialize graphs
g.serialize(format='trig',destination='records.trig',  encoding='utf-8' )

# download 
files.download('records.trig')
print(g.serialize(format='trig'))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

@prefix musow: <https://w3id.org/musow/> .
@prefix ns1: <http://www.w3.org/2002/07/owl#> .
@prefix ns2: <http://rdfs.org/ns/void#> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <https://schema.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<https://w3id.org/musow/1635268063-598386/> {
    <https://w3id.org/musow/1635268063-598386/> rdfs:label "MIREX" ;
        prov:generatedAtTime "2021-10-26T17:07:44.627187"^^xsd:dateTime ;
        prov:wasAttributedTo musow:marilena-dot-daquino2-at-unibo-dot-it ;
        musow:publicationStage "published" .

    <http://purl.org/ontology/mo/Signal> rdfs:label "Signal" .

    musow:1635268063-598386 a schema:CreativeWork ;
        rdfs:label "MIREX" ;
        ns1:sameAs <http://data.open.ac.uk/musow/0179bf3894134d07caca0d2209bf5374> ;
        schema:additionalType <https://w3id.org/musow/vocab/project> ;
        schema:audience <https://w3id.org/musow/vocab/researche

## JSON schema for the web form

In [None]:
# prepare JSON form
field_title = column_to_json("res_title", "True", "Literal", "The title of the resource",
                   "title", "Textbox", schema+"name", "e.g. Listening Experience Database",
                   "True", "False", "False",
                   "col-md-11", "off", field_values=None)

field_homepage = column_to_json("res_homepage", "False", "Literal", "The URL of the homepage of the resource",
                   "homepage", "Textbox", schema+"mainEntityOfPage", "e.g. https://led.kmi.open.ac.uk/",
                   "False", "False", "False",
                   "col-md-11", "off", field_values=None)

field_description = column_to_json("res_description", "False", "Literal", "A short description of the resource",
                   "description", "Textbox", schema+"description", "e.g. An open and freely searchable database ...",
                   "False", "False", "False",
                   "col-md-11", "off", field_values=None)

field_creator = column_to_json("res_creator", "False", "URI", "The persons, organisations, or project responsible for the resource",
                   "creator or project", "Textbox", schema+"creator", "e.g. The Open University",
                   "True", "True", "False",
                   "col-md-11", "off", field_values=None)

field_type = column_to_json("res_type", "False", "URI", "The type of resource",
                   "type", "Dropdown", schema+"additionalType", "",
                   "True", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_types.items()})

field_purpose = column_to_json("res_purpose", "False", "URI", "The purpose of the resource, e.g. a research project, a learning resource",
                   "purpose", "Dropdown", schema+"educationalUse", "",
                   "False", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_educational.items()})

field_audience = column_to_json("res_audience", "False", "URI", "The target audience of the resource",
                   "audience", "Checkbox", schema+"audience", "",
                   "True", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_audiences.items()})

field_task = column_to_json("res_task", "False", "URI", "What task or research discipline is the resource relevant to?",
                   "task or research field", "Checkbox", schema+"specialty", "",
                   "True", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_tasks.items()})

field_features = column_to_json("res_features", "False", "URI", "The technical features that the online resource offers",
                   "technical features", "Checkbox", schema+"featureList", "",
                   "False", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_app_features.items()})

field_based_on = column_to_json("res_reused", "False", "URI", "A tool, a dataset, a vocabulary, or any other component that is reused by the resource",
                   "reused resources", "Textbox", schema+"isBasedOn", "Gutenberg project",
                   "False", "True", "False",
                   "col-md-11", "off", field_values=None)

field_example = column_to_json("res_example", "False", "Literal", "If the resource collects music-related resources, the URL of an exemplar resource",
                   "example resource", "Textbox", schema+"workExample", "e.g. https://led.kmi.open.ac.uk/entity/lexp/1628588049214",
                   "False", "False", "False",
                   "col-md-11", "off", field_values=None)

res_subjects = {**res_mo_types , **res_subject_types}
field_subject = column_to_json("res_subject", "False", "URI", "If the resource collects music-related resources, what are these?",
                   "type of collected resources", "Checkbox", schema+"mainEntity", "",
                   "True", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_subjects.items()})

field_scope = column_to_json("res_scope", "False", "URI", "If the resource collects music-related resources, what is the scope of the collection?",
                   "scope of the collection", "Checkbox", schema+"about", "",
                   "False", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_scope.items()}) 

field_temporal = column_to_json("res_temporal", "False", "URI", "Which historical period does the resource address?",
                   "temporal scope of data", "Textbox", schema+"temporalCoverage", "e.g. XX century",
                   "False", "True", "False",
                   "col-md-11", "off", field_values=None)

field_geo = column_to_json("res_geo", "False", "URI", "Which geographical places does the resource address?",
                   "geographical scope of data", "Textbox", schema+"spatialCoverage", "e.g. Italy",
                   "False", "True", "False",
                   "col-md-11", "off", field_values=None)

field_genre = column_to_json("res_genre", "False", "URI", "Which music genres does the resource address?",
                   "music genre in scope", "Textbox", schema+"genre", "e.g. Jazz",
                   "False", "True", "False",
                   "col-md-11", "off", field_values=None)

field_artist = column_to_json("res_artist", "False", "URI", "Which composers, performers, or interesting people does the resource address?",
                   "people in scope", "Textbox", schema+"character", "e.g. Louis Armstrong",
                   "False", "True", "False",
                   "col-md-11", "off", field_values=None)

field_category = column_to_json("res_category", "False", "URI", "If the resource collects music-related resources, what kind of data are provided for the resources?",
                   "category of data collected", "Checkbox", schema+"category", "",
                   "True", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_categories.items()})

field_music_feature = column_to_json("res_music_feature", "False", "URI", "If the resource collects symbolic data of music resources, what type of music feature is provided as structured data?",
                   "music data features", "Checkbox", schema+"keywords", "",
                   "True", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_data_features.items()})

field_collection = column_to_json("res_collectionsize", "False", "URI", "If the resource collects music-related resources, can you estimate the size of the collection?",
                   "collection size", "Dropdown", schema+"collectionSize", "",
                   "True", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_collection_size.items()})




field_format = column_to_json("res_format", "False", "URI", "If the resource collects music-related resources, in what data format are these served?",
                   "data formats", "Checkbox", schema+"encodingFormat", "",
                   "False", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_formats.items()})

field_license = column_to_json("res_license", "False", "URI", "If the resource collects music-related resources, which licence is specified for their reuse?",
                   "data license", "Dropdown", schema+"license", "",
                   "True", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_licenses.items()})

field_content = column_to_json("res_datasize", "False", "URI", "If the resource serves data, can you estimate of the size of data?",
                   "data size", "Dropdown", schema+"contentSize", "",
                   "True", "False", "False",
                   "col-md-11", "off", field_values={v: k for k, v in res_datasize.items()})

field_distribution = column_to_json("res_distribution", "False", "Literal", "If the resource serves data, the URL of the latest data distribution",
                   "data download URL", "Textbox", schema+"downloadUrl", "e.g. https://led.kmi.open.ac.uk/rdf/export/led-SNAPSHOT.nt.bz2",
                   "False", "False", "False",
                   "col-md-11", "off", field_values=None)

field_sparql = column_to_json("res_sparql", "False", "Literal", "If the resource serves data, the URL of the SPARQL endpoint of the resource, if applicable",
                   "SPARQL endpoint", "Textbox", "http://rdfs.org/ns/void#sparqlEndpoint", "e.g. https://data.open.ac.uk/query",
                   "False", "False", "False",
                   "col-md-11", "off", field_values=None)

field_api = column_to_json("res_api", "False", "Literal", "If the resource serves data, the URL of the API",
                   "web API", "Textbox", schema+"serviceUrl", "e.g. http://example.org/api/",
                   "False", "False", "False",
                   "col-md-11", "off", field_values=None)


musow_form = [field_title, 
              field_homepage, 
              field_description,
              field_creator,
              field_type,
              field_purpose,
              field_audience,
              field_task,
              field_features,
              field_based_on,
              field_example,
              field_subject,
              field_scope,
              field_temporal, 
              field_geo, 
              field_genre, 
              field_artist,
              field_category,
              field_music_feature,
              field_collection,
              field_format,
              field_license,
              field_content,
              field_distribution, 
              field_sparql,
              field_api]

# CHANGE sort the fields
musow_form

[{'browse': 'True',
  'cache_autocomplete': 'off',
  'class': 'col-md-11',
  'disabled': 'False',
  'disambiguate': 'True',
  'id': 'res_title',
  'label': 'title',
  'placeholder': 'e.g. Listening Experience Database',
  'prepend': 'The title of the resource',
  'property': 'https://schema.org/name',
  'searchWikidata': 'False',
  'type': 'Textbox',
  'value': 'Literal'},
 {'browse': 'False',
  'cache_autocomplete': 'off',
  'class': 'col-md-11',
  'disabled': 'False',
  'disambiguate': 'False',
  'id': 'res_homepage',
  'label': 'homepage',
  'placeholder': 'e.g. https://led.kmi.open.ac.uk/',
  'prepend': 'The URL of the homepage of the resource',
  'property': 'https://schema.org/mainEntityOfPage',
  'searchWikidata': 'False',
  'type': 'Textbox',
  'value': 'Literal'},
 {'browse': 'False',
  'cache_autocomplete': 'off',
  'class': 'col-md-11',
  'disabled': 'False',
  'disambiguate': 'False',
  'id': 'res_description',
  'label': 'description',
  'placeholder': 'e.g. An open and fr