In [1]:
import json
import pandas as pd

projects = [
    #'DigiBatMat',
    'DIGITRUBBER',
    'DiProMag',
    #'DiStAl',
    'GlasDigital',
    #'iBain',
    'KNOW-NOW',
    'KupferDigital',
    'LeBeDigital',
    'ODE_AM',
    'PMDao_MO',
    'PMDao_TTO',
    'SensoTwin',
    'SmaDi',
    #'StahlDigital'
]

data = {}

for ont in projects:
    with open(f'{ont}/{ont}.json', 'r', encoding='utf-8') as f:
        data.update({ont: json.load(f)})

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


## Used Top-Level-Ontologies
For each of the provided ontologies the use of TLOs was analyzed. This was achieved by counting rdfs:subClassOf and rdfs:subPropertyOf chains, for which the subject belongs to the projects namespace and the object belongs to the TLOs namespace. For example, the SPARQL-Query for the usage of PMD Core Ontology (v2.0.x) in the SensoTwin project reads:
```sparql
SELECT (COUNT(*) as ?subcount)
WHERE {
    ?ao rdfs:subClassOf+|rdfs:subPropertyOf+ ?tlo .
    FILTER( STRSTARTS( STR(?tlo), "https://w3id.org/pmd/co" ) ) .
    FILTER( STRSTARTS( STR(?ao), "http://w3id.org/sensotwin/applicationontology" ) ) .
}
```

In [2]:
tlos = {ont: item['tlos']['original'] for ont, item in data.items()}
pd.DataFrame(tlos).T

Unnamed: 0,pmdco-2.0.7,pmdco-v0.1-beta,emmo,cco,bfo,ro,iao,prov-o,qudt,chebi
DIGITRUBBER,0,0,0,0,2030,0,328,0,0,232
DiProMag,123,0,0,0,0,0,0,0,83,0
GlasDigital,0,282,0,0,0,0,0,0,0,0
KNOW-NOW,121,0,0,0,0,0,0,81,0,0
KupferDigital,577,0,0,0,0,0,0,293,0,0
LeBeDigital,112,0,0,0,0,0,0,2,0,0
ODE_AM,0,0,0,181,0,0,0,0,0,0
PMDao_MO,145,0,0,0,0,0,0,0,0,0
PMDao_TTO,49,0,0,0,0,0,0,0,0,0
SensoTwin,242,0,0,0,0,0,0,181,0,0


In [4]:
tlos = {ont: item['tlos']['reasoned'] for ont, item in data.items()}
pd.DataFrame(tlos).T

Unnamed: 0,pmdco-2.0.7,pmdco-v0.1-beta,emmo,cco,bfo,ro,iao,prov-o,qudt,chebi
DIGITRUBBER,0,0,0,0,2030,0,328,0,0,232
DiProMag,164,0,0,0,0,0,0,96,83,0
GlasDigital,0,284,0,0,0,0,0,0,0,0
KNOW-NOW,121,0,0,0,0,0,0,81,0,0
KupferDigital,773,0,0,0,0,0,0,293,0,0
LeBeDigital,268,0,0,0,0,0,0,115,0,0
ODE_AM,0,0,0,316,97,0,0,0,0,0
PMDao_MO,337,0,0,0,0,0,0,145,0,0
PMDao_TTO,126,0,0,0,0,0,0,47,0,0
SensoTwin,499,0,0,0,0,0,0,332,0,0


## Overall defined concepts
The overall number of introduced concepts was analysed. For that, the projects ontology as well as the applicable pmdco were loaded into Protégé and a Reasoner was run. On the resultant graph, the following query was executed (exemplary for `owl:Class`es in SensoTwin):

```sparql
SELECT (COUNT(*) as ?classcount)
WHERE {
    ?class a owl:Class .
    FILTER STRSTARTS( ?class, "http://w3id.org/sensotwin/applicationontology" ) .
}
```

The table below shows the respective numbers of found definitions.

In [5]:
concepts = {ont: {
    'owl:Class': item['definitioncounts']['owl:Class'],
    'owl:ObjectProperty': item['definitioncounts']['owl:ObjectProperty'],
    'owl:DatatypeProperty': item['definitioncounts']['owl:DatatypeProperty'],
    'Total': item['definitioncounts']['owl:Class']+item['definitioncounts']['owl:ObjectProperty']+item['definitioncounts']['owl:DatatypeProperty'],
    'Reasoner': f"{item['reasoner']['reasoner']}-{item['reasoner']['version']}"
} for ont, item in data.items()}
pd.DataFrame(concepts).T

Unnamed: 0,owl:Class,owl:ObjectProperty,owl:DatatypeProperty,Total,Reasoner
DIGITRUBBER,636,0,0,636,elk-0.5.0
DiProMag,217,3,2,222,elk-0.5.0
GlasDigital,213,10,33,256,pellet-2.2.0
KNOW-NOW,81,3,0,84,pellet-2.2.0
KupferDigital,293,0,0,293,pellet-2.2.0
LeBeDigital,114,0,0,114,pellet-2.2.0
ODE_AM,256,12,3,271,pellet-2.2.0
PMDao_MO,145,0,0,145,pellet-2.2.0
PMDao_TTO,47,1,0,48,pellet-2.2.0
SensoTwin,193,18,12,223,pellet-2.2.0


## Number of ProcessingNodes, ValueObjects (pmdco-2.0.x) and ProcessNodes (pmdco-v0.1-beta)
To get an overview over the usage of the PMD Core Ontology the number of subclasses of ProcessingNode and ValueObject was determined. For that, the projects ontology as well as the applicable pmdco were loaded into Protégé and a Reasoner was run. On the resultant graph, the following query was executed (exemplary for sub-classes of ProcessingNode in SensoTwin):

```sparql
SELECT ?classname
WHERE {
    ?x rdfs:subClassOf+ <https://w3id.org/pmd/co/ProcessingNode> .
    BIND(STR(?x) AS ?classname) .
    FILTER STRSTARTS( ?classname, "http://w3id.org/sensotwin/applicationontology" ) .
}
```

The table below shows the respective numbers of found definitions.

In [7]:
pmdusage = {ont: {
    'ProcessingNode (2.0.x)': item['processingnodes']['pmdco-2.0.7']['count'],
    'ValueObject (2.0.x)': item['valueobjects']['pmdco-2.0.7']['count'],
    'ProcessNode (v0.1-beta)': item['processingnodes']['pmdco-v0.1-beta']['count'],
    'Total': item['processingnodes']['pmdco-2.0.7']['count']+item['valueobjects']['pmdco-2.0.7']['count']+item['processingnodes']['pmdco-v0.1-beta']['count'],
    'Reasoner': f"{item['reasoner']['reasoner']}-{item['reasoner']['version']}"
} for ont, item in data.items()}
pd.DataFrame(pmdusage).T

Unnamed: 0,ProcessingNode (2.0.x),ValueObject (2.0.x),ProcessNode (v0.1-beta),Total,Reasoner
DIGITRUBBER,0,0,0,0,elk-0.5.0
DiProMag,21,55,0,76,elk-0.5.0
GlasDigital,0,0,3,3,pellet-2.2.0
KNOW-NOW,6,51,0,57,pellet-2.2.0
KupferDigital,28,196,0,224,pellet-2.2.0
LeBeDigital,9,42,0,51,pellet-2.2.0
ODE_AM,0,0,0,0,pellet-2.2.0
PMDao_MO,37,38,0,75,pellet-2.2.0
PMDao_TTO,2,44,0,46,pellet-2.2.0
SensoTwin,140,82,0,222,pellet-2.2.0


## Used Licenses
The following table summarizes the referenced licenses. The SPARQL used for finding this information reads:
```sparql
SELECT ?lic
WHERE {
    ?x <http://purl.org/dc/terms/license>|<http://purl.org/dc/elements/1.1/license> ?lic .
}
```

In [8]:
def license_cleanup(license):
    replacements = [
        ('https://creativecommons.org/licenses/by/4.0', 'CC-BY-4.0'),
        ('http://creativecommons.org/licenses/by/4.0', 'CC-BY-4.0'),
        ('https://creativecommons.org/licenses/unspecified', '')
    ]
    license = license.replace('<', '').replace('>', '')
    for old, new in replacements:
        if license.startswith(old):
            return new
    return license

licenses = {ont: {'used_licenses': ', '.join(map(license_cleanup, set(item['license']['items'])))} for ont, item in data.items()}
pd.DataFrame(licenses).T

Unnamed: 0,used_licenses
DIGITRUBBER,
DiProMag,CC-BY-4.0
GlasDigital,CC-BY-4.0
KNOW-NOW,
KupferDigital,
LeBeDigital,CC-BY-4.0
ODE_AM,CC-BY-4.0
PMDao_MO,CC-BY-4.0
PMDao_TTO,CC-BY-4.0
SensoTwin,CC-BY-4.0


## Contributors

In [9]:
import re
import rdflib
from IPython.display import display, HTML

def pp(df):
    return display(HTML(df.to_html().replace('\\n', '<br>')))

def orcid_resolve(string):
    m = re.match(r"<?(https://orcid.org/(\d{4}-\d{4}-\d{4}-\d{4}))>?", string)
    if m:
        orcid = m.group(1)
        stype = 'uri' if f'<{orcid}>' == string else 'literal'

        g = rdflib.Graph()
        g.parse(orcid)
        names = []
        [names.append(str(row.gname)) for row in g.query(
            f"""
                SELECT ?gname WHERE {{
                    <{orcid}> <http://xmlns.com/foaf/0.1/givenName> ?gname .
                }}
            """
        )]
        [names.append(str(row.fname)) for row in g.query(
            f"""
                SELECT ?fname WHERE {{
                    <{orcid}> <http://xmlns.com/foaf/0.1/familyName> ?fname .
                }}
            """
        )]
        name = ' '.join(names)
        return f'{orcid} ({stype}) -> {name}'
    return string

contributors = {ont: {'creators_contributors': '\n'.join(map(orcid_resolve, set(item['creators_contributors']['items'])))} for ont, item in data.items()}
df = pd.DataFrame(contributors).T
pp(df)

Unnamed: 0,creators_contributors
DIGITRUBBER,https://orcid.org/0000-0002-6601-2165 (literal) -> Christopher Mungall https://orcid.org/0000-0002-8688-6599 (literal) -> James Balhoff
DiProMag,Luana Caron Michael Feige Thomas Hilbig Günter Reiss Philipp Cimiano Moritz Blum Tapas Samanta Sonja Schöning Simon Bekemeier Martin Wortmann Andreas Hütten Basil Ell Christian Schröder Inga Ennen Alisa Chirkova Lennart Schwan
GlasDigital,Simon Stier (https://orcid.org/0000-0003-0410-3616) Ya-Fan Chen (https://orcid.org/0000-0003-4295-7815)
KNOW-NOW,"Baca Duque, Lui Felipe Ben Hassine, Sahar Guejiep Dowouo, Simplice"
KupferDigital,Hossein Beygi Nasrabadi (www.orcid.org/0000-0002-3092-0532)
LeBeDigital,"https://orcid.org/0000-0003-2445-6734 (literal) -> Birgit Meng https://orcid.org/0000-0003-0626-5002 (literal) -> Stephan Pirskawetz https://orcid.org/0009-0004-9700-2439 (literal) -> Aida Zoriyatkha https://orcid.org/0009-0006-4524-9143 (literal) -> Melissa Telong https://orcid.org/0009-0003-7121-0283 (literal) -> Mattheo Krüger Mattheo Krüger, Melissa Telong Donfack, Aida Zoriyatkha, Birgit Meng, Stephan Pirskawetz"
ODE_AM,"Thomas Bjarsch Mohamed Kamal, Jan Reimann Mohamed Kamal, Heiko Beinersdorf"
PMDao_MO,https://orcid.org/0000-0002-3717-7104 (literal) -> Bernd Bayerlein https://orcid.org/0000-0002-7094-5371 (literal) -> Markus Schilling
PMDao_TTO,https://orcid.org/0000-0002-3717-7104 (literal) -> Bernd Bayerlein https://orcid.org/0000-0002-7094-5371 (literal) -> Markus Schilling https://orcid.org/0000-0001-7192-7143 (literal) -> Jörg Waitelonis https://orcid.org/0000-0003-4971-3645 (literal) -> Philipp von Hartrott https://orcid.org/0000-0002-9014-2920 (literal) -> Henk Birkholz
SensoTwin,https://orcid.org/0009-0004-1208-3971 (uri) -> Ursula Pähler


## Namespaces


In [11]:
import requests
from ipywidgets import IntProgress
from IPython.display import display

mime_types = ['text/turtle','application/rdf+xml','application/ld+json','application/n-triples']
res = {
    'accept': {},
    'noaccept': {},
    'error': {}
}

all_namespaces = list(set(x for ds in data.values() for x in ds['namespaces']['items']))

f = IntProgress(min=0, max=len(all_namespaces))
display(f)

for x in all_namespaces:
    f.value += 1
    try:
        req = requests.head(x, headers={'Accept': ','.join(mime_types)}, allow_redirects=True)
        if req.headers['content-type'] in mime_types: 
            res['accept'].update({x: {'status_code': req.status_code, 'content_type': req.headers['content-type']}})
        else:
            res['noaccept'].update({x: {'status_code': req.status_code, 'content_type': req.headers['content-type']}})
    except Exception as e:
        res['error'].update({x: {'error': e}})
pd.concat((pd.DataFrame(res['accept']).T, pd.DataFrame(res['noaccept']).T, pd.DataFrame(res['error']).T)).to_excel('requests_raw.xlsx')

IntProgress(value=0, max=114)

In [15]:
tloaodict = pd.read_excel('requests.xlsx', index_col=0).T.to_dict()
for tkey in tloaodict.keys():
    tloaodict.update({tkey: {key: value for key, value in tloaodict[tkey].items() if not pd.isna(value)}})

dftlo = pd.DataFrame({proj: {tloaodict[x]['countas_tlo']: int(x in data[proj]['namespaces']['items']) for x in tloaodict.keys() if 'countas_tlo' in tloaodict[x]} for proj in data.keys()})
dftlo.insert(loc=len(dftlo.columns), column='Sum', value=dftlo.sum(axis=1))
dftlo['name'] = dftlo.index
dftlo.sort_values(by=['Sum', 'name'], ascending=[False, True], inplace=True)
dftlo

Unnamed: 0,DIGITRUBBER,DiProMag,GlasDigital,KNOW-NOW,KupferDigital,LeBeDigital,ODE_AM,PMDao_MO,PMDao_TTO,SensoTwin,SmaDi,Sum,name
owl,1,1,1,1,1,1,1,1,1,1,1,11,owl
rdfs,1,1,1,1,1,1,1,1,1,1,1,11,rdfs
dcterms,1,1,1,1,1,1,0,1,1,1,1,10,dcterms
rdf,1,1,1,1,0,0,1,0,1,1,1,8,rdf
xml,1,1,1,1,1,0,1,0,1,1,0,8,xml
pmdco20,0,1,0,1,1,1,0,1,1,1,0,7,pmdco20
skos,1,0,1,1,1,0,1,1,1,0,0,7,skos
dcelements,1,0,0,1,0,0,1,1,1,1,0,6,dcelements
proc,0,0,0,1,1,1,0,0,1,1,0,5,proc
swrl,1,0,0,0,0,0,1,0,0,0,0,2,swrl


In [16]:
dfao = pd.DataFrame({proj: {tloaodict[x]['countas_ao']: int(x in data[proj]['namespaces']['items']) for x in tloaodict.keys() if 'countas_ao' in tloaodict[x]} for proj in data.keys()})
dfao.insert(loc=len(dfao.columns), column='Sum', value=dfao.sum(axis=1))
#dfao.sort_values(by='Sum', ascending=False, inplace=True)
dfao.sort_index(ascending=True, inplace=True)
dfao

Unnamed: 0,DIGITRUBBER,DiProMag,GlasDigital,KNOW-NOW,KupferDigital,LeBeDigital,ODE_AM,PMDao_MO,PMDao_TTO,SensoTwin,SmaDi,Sum
DIGITRUBBER,1,0,0,0,0,0,0,0,0,0,0,1
DiProMag,0,1,0,0,0,0,0,0,0,0,0,1
GlasDigital,0,0,1,0,0,0,0,0,0,0,0,1
KNOW-NOW,0,0,0,1,0,0,0,0,0,0,0,1
KupferDigital,0,0,0,0,1,0,0,0,0,0,0,1
LeBeDigital,0,0,0,0,0,1,0,0,0,0,0,1
ODE_AM,0,0,0,0,0,0,1,0,0,0,0,1
PMDao_MO,0,0,0,0,0,0,0,1,0,0,0,1
PMDao_TTO,0,0,0,0,0,0,0,0,1,0,0,1
SensoTwin,0,0,0,0,0,0,0,0,0,1,0,1
