In [116]:
import sparql_dataframe
import seaborn as sns
import matplotlib.pyplot as plt
import dataframe_image as dfi
import pandas as pd
from orkg import ORKG
from datetime import datetime

#We are changing the url from orkg.org/orkg to orkg.org/ so if my query is not working this could be the issue
ENDPOINT_URL = "https://www.orkg.org/orkg/triplestore"

PREFIXES =  """
            PREFIX orkgr: <http://orkg.org/orkg/resource/>
            PREFIX orkgc: <http://orkg.org/orkg/class/>
            PREFIX orkgp: <http://orkg.org/orkg/predicate/>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            """

query = """
        SELECT ?comparison, ?contribution, ?paper, COUNT(DISTINCT ?related_resource) AS ?numberOfRelatedResources, COUNT(DISTINCT ?related_figure) AS ?numberOfRelatedFigures, ?field_name
            WHERE {
                ?comparison a orkgc:Comparison;
                            orkgp:P26 ?DOI;
                            orkgp:compareContribution ?contribution.
                ?paper orkgp:P31 ?contribution.
                OPTIONAL{?comparison orkgp:hasSubject ?field.
                         ?field rdfs:label ?field_name}
                OPTIONAL{?comparison orkgp:RelatedResource ?related_resource;
                                     orkgp:RelatedFigure ?related_figure}
            }
            ORDER BY ?comparison
        """
data = sparql_dataframe.get(ENDPOINT_URL, PREFIXES+query)
data.head()

Unnamed: 0,comparison,contribution,paper,numberOfRelatedResources,numberOfRelatedFigures,field_name
0,http://orkg.org/orkg/resource/R107854,http://orkg.org/orkg/resource/R107624,http://orkg.org/orkg/resource/R107618,0,0,Learner-Interface Interaction
1,http://orkg.org/orkg/resource/R107854,http://orkg.org/orkg/resource/R107845,http://orkg.org/orkg/resource/R107843,0,0,Learner-Interface Interaction
2,http://orkg.org/orkg/resource/R107854,http://orkg.org/orkg/resource/R107665,http://orkg.org/orkg/resource/R107663,0,0,Learner-Interface Interaction
3,http://orkg.org/orkg/resource/R107854,http://orkg.org/orkg/resource/R107836,http://orkg.org/orkg/resource/R107834,0,0,Learner-Interface Interaction
4,http://orkg.org/orkg/resource/R108358,http://orkg.org/orkg/resource/R108130,http://orkg.org/orkg/resource/R108129,0,0,Geology


In [118]:
comparsions= data.drop_duplicates(subset=['comparison'])[['comparison', 'numberOfRelatedResources', 'numberOfRelatedFigures', 'field_name']]

result = pd.DataFrame(columns=['comparison', 'number_of_papers', 'number_of_contributions', 'number_of_resources', 'number_of_literals', 'number_of_predicates', 'number_of_related_resources', 'number_of_related_figures', 'field_name'])

orkg = ORKG(host="https://www.orkg.org/")

def count_RPL(resid):
    resources = set({})
    literals = set({})
    predicates = set({})

    for id in resid:
        statements = orkg.statements.bundle(thing_id=id).content['statements']
    
        for statement in statements:
            cls = statement['subject']['_class']
            if cls == 'resource':
                resources.add(statement['subject']['id'])
            else:
                literals.add(statement['subject']['id'])
            
            cls = statement['object']['_class']
            if cls == 'resource':
                resources.add(statement['object']['id'])
            else:
                literals.add(statement['object']['id'])
            
            predicates.add(statement['predicate']['id'])
    
    return len(resources), len(literals), len(predicates)

for index, row1 in comparsions.iterrows():
    contribution_list = set({})
    paper_list = set({})
    for index, row2 in data.iterrows():
        if row1['comparison'] == row2['comparison']:
            contribution_list.add(row2['contribution'].split('/')[-1])
            paper_list.add(row2['paper'])
    r, p, l = count_RPL(contribution_list)
    result = result.append({'comparison':row1['comparison'], 'number_of_papers': len(paper_list), 'number_of_contributions':len(contribution_list), 'number_of_resources': r, 'number_of_literals': l, 'number_of_predicates': p, 'number_of_related_resources': row1['numberOfRelatedResources'], 'number_of_related_figures': row1['numberOfRelatedFigures'], 'field_name': row1['field_name']}, ignore_index=True)
    #print(row1['comparison'], r, p, l)
    
now = datetime.now()
result.to_csv('query_result_' + now.strftime('%Y-%m-%d') + '.csv', encoding='utf-8')
result.head(16)

http://orkg.org/orkg/resource/R107854 29 1 5
http://orkg.org/orkg/resource/R108358 90 249 29
http://orkg.org/orkg/resource/R108601 6 24 7
http://orkg.org/orkg/resource/R108719 6 47 19
http://orkg.org/orkg/resource/R109041 10 126 18
http://orkg.org/orkg/resource/R109236 21 56 15
http://orkg.org/orkg/resource/R109546 17 40 12
http://orkg.org/orkg/resource/R109612 14 160 17
http://orkg.org/orkg/resource/R109904 15 35 4
http://orkg.org/orkg/resource/R110071 8 30 9
http://orkg.org/orkg/resource/R110124 26 25 9
http://orkg.org/orkg/resource/R110138 4 41 9
http://orkg.org/orkg/resource/R110188 4 13 5
http://orkg.org/orkg/resource/R110245 4 13 5
http://orkg.org/orkg/resource/R110361 43 21 15
http://orkg.org/orkg/resource/R110651 6 20 18
http://orkg.org/orkg/resource/R110655 23 28 15
http://orkg.org/orkg/resource/R110777 7 11 5
http://orkg.org/orkg/resource/R110991 7 30 7
http://orkg.org/orkg/resource/R111117 5 14 6
http://orkg.org/orkg/resource/R111151 7 6 5
http://orkg.org/orkg/resource/R1111

Unnamed: 0,comparison,number_of_papers,number_of_contributions,number_of_resources,number_of_literals,number_of_predicates,number_of_related_resources,number_of_related_figures,field_name
0,http://orkg.org/orkg/resource/R107854,4,4,29,5,1,0,0,Learner-Interface Interaction
1,http://orkg.org/orkg/resource/R108358,12,12,90,29,249,0,0,Geology
2,http://orkg.org/orkg/resource/R108601,4,4,6,7,24,0,0,Digital Communications and Networking
3,http://orkg.org/orkg/resource/R108719,3,3,6,19,47,0,0,Plant Pathology
4,http://orkg.org/orkg/resource/R109041,9,9,10,18,126,0,0,"Atomic, Molecular and Optical Physics"
5,http://orkg.org/orkg/resource/R109236,5,5,21,15,56,0,0,Geology
6,http://orkg.org/orkg/resource/R109546,4,4,17,12,40,0,0,Industrial and Organizational Psychology
7,http://orkg.org/orkg/resource/R109612,10,10,14,17,160,0,0,Oceanography
8,http://orkg.org/orkg/resource/R109904,12,12,15,4,35,0,0,Information Science
9,http://orkg.org/orkg/resource/R110071,4,4,8,9,30,0,0,Biomedical Engineering and Bioengineering


In [120]:
from sunau import AUDIO_FILE_ENCODING_LINEAR_16


resources = set({})
literals = set({})
predicates = set({})

tf_contrib_res_id = ['R171849','R172247','R172160', 'R172322', 'R162790', 'R162733', 'R162788', 'R145734', 'R145731', 'R175728']
knoll_contrib_res_id = []
runnwerth_contrib_res_id =[]
karras1_contrib_res_id = []
karras2_contrib_res_id = []
bioassay_contrib_res_id = []
auer1_contrib_res_id = []
auer2_contrib_res_id = []
auer3_contrib_res_id = []

for id in res_id:
    statements = orkg.statements.bundle(thing_id=id).content['statements']
    for statement in statements:
        cls = statement['subject']['_class']
        if cls == 'resource':
            resources.add(statement['subject']['id'])
        else:
            literals.add(statement['subject']['id'])
        
        cls = statement['object']['_class']
        if cls == 'resource':
            resources.add(statement['object']['id'])
        else:
            literals.add(statement['object']['id'])
        
        predicates.add(statement['predicate']['id'])

print(f'For resource({res_id}): you have distinct {len(resources)} resource, {len(literals)} literals, and {len(predicates)} predicate')

For resource(['R171849', 'R172247', 'R172160', 'R172322', 'R162790', 'R162733', 'R162788', 'R145734', 'R145731', 'R175728']): you have distinct 670 resource, 131 literals, and 30 predicate
