In [1]:
import sys, os
from collections import defaultdict
sys.path.append(os.path.expanduser("~")+'/Desktop/topic_modeling/fine_grained_topic_modeling_for_misinformation/src/')
from cimple_querying import build_SPARQL_wrapper, request, get_predicates_recordType, \
            get_all_statements_with_predicate, get_all_subjects_objects_pairs_from_property_and_subject

wrapper=build_SPARQL_wrapper('https://data.cimple.eu/sparql')

In [2]:
# uses namespaces as keys 
concept_types=defaultdict(list)
for statement in request(wrapper,'select distinct ?Concept where {[] a ?Concept} LIMIT 100'):
    if statement['Concept']['value'][:18]=='http://schema.org/':
        concept_types['schema'].append(statement['Concept']['value'].split('/')[-1])
    elif statement['Concept']['value'][:18]=='http://www.w3.org/':
        concept_types['w3'].append(statement['Concept']['value'])
    elif statement['Concept']['value'][:16]=='http://purl.org/':
        concept_types['dc'].append(statement['Concept']['value'])
    elif statement['Concept']['value'][:26]=='http://www.openlinksw.com/':
        concept_types['openlinksw'].append(statement['Concept']['value'])
    else:
        concept_types['others'].append(statement['Concept']['value'])


In [3]:
concept_types['schema']

['Review',
 'NewsArticle',
 'Organization',
 'SocialMediaPosting',
 'Rating',
 'ClaimReview',
 'Claim']

In [13]:
def extract_predicates(record_type, as_subject=True):
    cimple_predicates=[]
    schema_predicates=[]
    other=[]
    for statement in request(wrapper, get_predicates_recordType(record_type, as_subject=as_subject)):
        if statement['predicate']['value'][:31]=='http://data.cimple.eu/ontology#':
            cimple_predicates.append(statement['predicate']['value'])
        elif statement['predicate']['value'][:18]=='http://schema.org/':
            schema_predicates.append(statement['predicate']['value'])
        else:
            other.append(statement['predicate']['value'])
    other=[]
    return cimple_predicates, schema_predicates, other


## predicates for record types def as subjects

In [14]:
recordType_schema=defaultdict(dict)
for type in concept_types['schema']:
    r=extract_predicates(type, as_subject=True)
    recordType_schema[type]['cimple_predicates']=r[0]
    recordType_schema[type]['schema_predicates']=r[1]
    recordType_schema[type]['others']=r[2]

In [15]:
print('schema_predicates')
for type in recordType_schema.keys():
    print(type, ' -> ', recordType_schema[type]['schema_predicates'])


Review  ->  ['http://schema.org/mentions', 'http://schema.org/reviewRating', 'http://schema.org/itemReviewed', 'http://schema.org/reviewBody', 'http://schema.org/isBasedOnURL']
NewsArticle  ->  ['http://schema.org/articleBody', 'http://schema.org/headline', 'http://schema.org/mentions', 'http://schema.org/dateCreated', 'http://schema.org/author']
Organization  ->  ['http://schema.org/name', 'http://schema.org/url']
SocialMediaPosting  ->  ['http://schema.org/text', 'http://schema.org/mentions', 'http://schema.org/dateCreated']
Rating  ->  ['http://schema.org/name', 'http://schema.org/sameAs', 'http://schema.org/ratingValue', 'http://schema.org/author']
ClaimReview  ->  ['http://schema.org/url', 'http://schema.org/headline', 'http://schema.org/alternativeHeadline', 'http://schema.org/mentions', 'http://schema.org/reviewRating', 'http://schema.org/dateCreated', 'http://schema.org/author', 'http://schema.org/datePublished', 'http://schema.org/inLanguage', 'http://schema.org/itemReviewed']

In [16]:
print('cimple_predicates')
for type in recordType_schema.keys():
    print(type, ' -> ', recordType_schema[type]['cimple_predicates'])

Review  ->  ['http://data.cimple.eu/ontology#normalizedReviewRating', 'http://data.cimple.eu/ontology#readability_score']
NewsArticle  ->  ['http://data.cimple.eu/ontology#hasPoliticalLeaning', 'http://data.cimple.eu/ontology#hasSentiment', 'http://data.cimple.eu/ontology#readability_score', 'http://data.cimple.eu/ontology#hasEmotion', 'http://data.cimple.eu/ontology#promotesConspiracy', 'http://data.cimple.eu/ontology#mentionsConspiracy']
Organization  ->  []
SocialMediaPosting  ->  ['http://data.cimple.eu/ontology#related', 'http://data.cimple.eu/ontology#hasPoliticalLeaning', 'http://data.cimple.eu/ontology#hasSentiment', 'http://data.cimple.eu/ontology#readability_score', 'http://data.cimple.eu/ontology#hasEmotion', 'http://data.cimple.eu/ontology#promotesConspiracy', 'http://data.cimple.eu/ontology#mentionsConspiracy']
Rating  ->  []
ClaimReview  ->  ['http://data.cimple.eu/ontology#normalizedReviewRating', 'http://data.cimple.eu/ontology#hasPoliticalLeaning', 'http://data.cimple.

## predicates for record types def as objects

In [17]:
recordType_schema=defaultdict(dict)
for type in concept_types['schema']:
    r=extract_predicates(type, as_subject=False)
    recordType_schema[type]['cimple_predicates']=r[0]
    recordType_schema[type]['schema_predicates']=r[1]
    recordType_schema[type]['others']=r[2]

In [18]:
for type in recordType_schema.keys():
    print(type, ' -> ', recordType_schema[type]['schema_predicates'])

Review  ->  []
NewsArticle  ->  []
Organization  ->  ['http://schema.org/author']
SocialMediaPosting  ->  ['http://schema.org/itemReviewed']
Rating  ->  ['http://schema.org/sameAs', 'http://schema.org/reviewRating']
ClaimReview  ->  []
Claim  ->  ['http://schema.org/itemReviewed']


In [19]:
for type in recordType_schema.keys():
    print(type, ' -> ', recordType_schema[type]['cimple_predicates'])

Review  ->  []
NewsArticle  ->  []
Organization  ->  []
SocialMediaPosting  ->  []
Rating  ->  ['http://data.cimple.eu/ontology#normalizedReviewRating']
ClaimReview  ->  ['http://data.cimple.eu/ontology#related']
Claim  ->  []
