In [11]:
# import statements
import pprint
from rdflib import Graph, Namespace, URIRef

In [3]:
FoS = Graph()
FoS.parse("FoS_s.nt")

<Graph identifier=N5e395b149fee41398dbd80a17373c020 (<class 'rdflib.graph.Graph'>)>

In [58]:
papersAbs = Graph()
papersAbs.parse("PaperAbstracts_s.nt", format="turtle")

<Graph identifier=Ncf4a68a352ff48aea926ee476f429c7a (<class 'rdflib.graph.Graph'>)>

In [37]:
papers = Graph()
papers.parse("Papers_s.nt", format="turtle")

<Graph identifier=Nc885275d4bb24ac6b2fd9c4cb53282e0 (<class 'rdflib.graph.Graph'>)>

In [6]:
papersFoS = Graph()
papersFoS.parse("PaperFoS_s.nt", format="turtle")

<Graph identifier=N54634bcbfdaf4452974cbf0ac5cd4f41 (<class 'rdflib.graph.Graph'>)>

In [15]:
journals = Graph()
journals.parse("Journals_s.nt", format="turtle")

<Graph identifier=N0caa6b2bf9f94cd3bc72297bbed1d726 (<class 'rdflib.graph.Graph'>)>

In [87]:
# Get the unique paper URIs with the prefix 'http://mag.graph/entity/' (up to 10,000)
paper_uris = set()
counter = 0
for s in papers.subjects(predicate=None, object=None):
    if str(s).startswith('http://mag.graph/entity/'):
        paper_uris.add(str(s))
        counter += 1
        if counter >= 10000:
            break

In [104]:
# Define the desired paper predicates
predicates = [
    URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),  # Paper type
    URIRef('http://purl.org/dc/terms/title'),  # Paper title
    URIRef('http://mag.graph/property/appearsInJournal'),  # Journal appearance
]

# Create a dictionary to store all paper information
papers_result = {}

for paper in paper_uris:
    # Create a dictionary to store each result
    values = {}
    for predicate in predicates:
        result = list(papers.objects(subject=URIRef(paper), predicate=predicate))
        
        if result:
            # If in journal, query for journal name
            if predicate == URIRef('http://mag.graph/property/appearsInJournal'):
                result = list(journals.objects(subject=URIRef(paper), predicate=URIRef('http://xmlns.com/foaf/0.1/name')))
            
            values[str(predicate)] = str(result[0])
            
    papers_result[paper] = values
    
    # Query for abstract of paper
    abstract = list(papersAbs.objects(subject=URIRef(paper)))
    if abstract:
        papers_result[paper]['abstract'] = str(abstract[0])
        
    # Query for disciplines
    disciplines_results = list(papersFoS.objects(subject=URIRef(paper), predicate=URIRef('http://purl.org/spar/fabio/hasDiscipline')))
    disciplines = {}
    for discipline in disciplines_results:
        disciplines.add(list(FoS.objects(subject=discipline, predicate=URIRef('http://xmlns.com/foaf/0.1/name')))[0])
    
    if disciplines:
        papers_result[paper]['concept_tags'] = str(abstract[0])


In [105]:
import time
import multiprocessing as mp
import mag_functions as F

# Given each paper and respective information, fetch proper concept tags
for paper in papers_result:
    information = {
        "title": papers_result[paper].get('http://purl.org/dc/terms/title'),
        "doc_type": papers_result[paper].get('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
        "journal": papers_result[paper].get('http://mag.graph/property/appearsInJournal'),
        "abstract": papers_result[paper].get('abstract'),
        "inverted_abstract": False
    }

    try:
        start_time = time.time()
        results = F.get_tags([input_json[count]], 1)[1][0]
        results['time'] = time.time() - start_time
    except:
        results = {"tags": None, "scores": None, "tag_ids": None}

    papers_result[paper]['results'] = results


{'title': 'The Distinctive Feature of Weak Interactions and Some of Its Subsequences (Impossibility of Generation of Masses and Absence of the MSW Effect', 'doc_type': 'http://mag.graph/class/Paper', 'journal': 'http://mag.graph/entity/2596664605', 'abstract': 'In the Quantum theory the wave functions form a full and orthonormalized functional space. For this reason we can use the equation on eigenfunctions and eigenstates and find the eigenenergies $E_n$ and eigenfunctions $Psi_n$ to determine the physical characteristics of the considered systems (or models). In the Quantum theory the observed values are the average value of operators. Since the wave functions create a full and orthonormalized space, the average values of operators coincide with eigenvalues of operators. This situation takes place in the case of strong and electromagnetic interactions. However, the average values of the weak interaction operators are equal to zero since only the left-handed components of spinors part

[rdflib.term.URIRef('http://mag.graph/entity/21774173'), rdflib.term.URIRef('http://mag.graph/entity/178790620'), rdflib.term.URIRef('http://mag.graph/entity/201194858'), rdflib.term.URIRef('http://mag.graph/entity/179104552'), rdflib.term.URIRef('http://mag.graph/entity/41339378'), rdflib.term.URIRef('http://mag.graph/entity/85551212'), rdflib.term.URIRef('http://mag.graph/entity/2778695967')]


[rdflib.term.URIRef('http://mag.graph/entity/1997309520')]
