In [16]:
import pandas as pd
import re
import csv
import rdflib
from rdflib import Graph, RDF, URIRef, Namespace, Literal, RDF, URIRef
from rdflib.namespace import XSD,RDF, FOAF

In [10]:
# Creating an RDF graph
g = Graph()

# Namespace definitions
base_uri = "http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#"
ns = URIRef(base_uri)

# Custom namespaces for properties and classes
PAPER = URIRef(base_uri + "Paper")
AUTHOR = URIRef(base_uri + "Author")
YEAR = URIRef(base_uri + "Year")

In [8]:
# Loading CSV
df = pd.read_csv('data/affiliated_with.csv')

# Convert affiliation names to a safe IRI format
def affiliation_to_iri(name):
    # Remove or replace invalid characters for IRI
    name = re.sub(r'[^\w\s]', '', name)  # Removes any non-alphanumeric chars
    name = name.replace(' ', '_')  # Replace ' ' with '_'
    return f'ex:Affiliation_{name}'

# Prefixes for .ttl file
prefixes = """
@prefix ex: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

"""

# To write the Turtle content
with open('turtle2/affiliated_with.ttl', 'w') as ttl_file:
    ttl_file.write(prefixes)
    
    # Iterate through the DataFrame and write turtle statements
    for index, row in df.iterrows():
        author_iri = f"ex:Author_{row['authorId']}"
        affiliation_iri = affiliation_to_iri(row['affiliation'])
        ttl_file.write(f"{author_iri} ex:affiliated_with {affiliation_iri} .\n")

print("Turtle file has been created.")


Turtle file has been created.


In [4]:
import csv

with open('data/papers_details.csv', newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        # Create a URI for each paper based on its ID
        paper_uri = URIRef(ns + row['paperId'])
        
        # Add RDF types and literals for properties
        g.add((paper_uri, RDF.type, PAPER))
        g.add((paper_uri, URIRef(ns + "title"), Literal(row['title'], datatype=XSD.string)))
        g.add((paper_uri, URIRef(ns + "abstract"), Literal(row['abstract'], datatype=XSD.string)))
        g.add((paper_uri, URIRef(ns + "year"), Literal(row['year'], datatype=XSD.gYear)))
        g.add((paper_uri, URIRef(ns + "keywords"), Literal(row['keywords'], datatype=XSD.string)))
        g.add((paper_uri, URIRef(ns + "doi"), Literal(row['doi'], datatype=XSD.string)))

# Serialize graph to Turtle format and save
g.serialize(destination='turtle2/papers_details.ttl', format='turtle')
print("Serialized and saved the Turtle file.")



Serialized and saved the Turtle file.


In [12]:
# namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

# new graph
g = Graph()

# prefix for binding
g.bind("ex", EX)

with open('data/published_in_enriched_v2.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create URIs for paper and venue
        paper = EX[f"Paper_{row['paper_id']}"]
        venue = EX[f"Venue_{row['ss_venue_id']}"]
        year = Literal(row['year'], datatype=rdflib.XSD.integer)
        
        # Add triples to the graph
        g.add((paper, EX.published_in, venue))
        g.add((paper, EX.publish_year, year))

# Serialize the graph in Turtle format and save
g.serialize(destination='turtle2/published_in_enriched_v2.ttl', format='turtle')


<Graph identifier=N9b14c377828f4ff4abbe2e6b5fd73b4e (<class 'rdflib.graph.Graph'>)>

In [13]:
# namespaces
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

g = Graph()

# prefix for binding
g.bind("ex", EX)


with open('data/reviewed_by.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create URIs for review and author
        review = EX[f"Review_{row['review_id']}"]
        author = EX[f"Author_{row['author_id']}"]
        
        # Addiing triples to the graph
        g.add((review, EX.reviewed_by, author))

# Serialize the graph in Turtle format
g.serialize(destination='turtle2/reviewed_by.ttl', format='turtle')


<Graph identifier=N79971a9fbc654862addd0a8782f08391 (<class 'rdflib.graph.Graph'>)>

In [14]:
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
g = Graph()
g.bind("ex", EX)

with open('data/review_on.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create URIs for review and paper
        review = EX[f"Review_{row['review_id']}"]
        paper = EX[f"Paper_{row['paper_id']}"]
        
        # Addiing triples to the graph
        g.add((review, EX.reviewed_on, paper))

# Serialize the graph in Turtle format
g.serialize(destination='turtle2/review_on.ttl', format='turtle')

<Graph identifier=N0dd5efee1d9441a899824892d8fa4f4f (<class 'rdflib.graph.Graph'>)>

In [15]:
def escape_literal(text):
    """ Escapes quotes and controls characters in a text string for Turtle output. """
    if pd.isna(text):
        return ""
    # Escape backslashes first, then quotes, and replace newlines and carriage returns
    text = text.replace('\\', '\\\\').replace('"', '\\"').replace('\n', ' ').replace('\r', ' ')
    return text

df = pd.read_csv('data/papers_details_enriched.csv')

# Define the base namespace for your papers
base_ns = "http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#"

# Open a file to write the Turtle content
with open('turtle2/papers_details_enriched.ttl', 'w') as ttl_file:
    ttl_file.write('@prefix ns1: <{}> .\n'.format(base_ns))
    ttl_file.write('@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n\n')
    
    # Iterate over the DataFrame rows
    for index, row in df.iterrows():
        paper_id = 'Paper_' + str(row['paperId'])
        title = escape_literal(row['title'])
        abstract = escape_literal(row['abstract'])
        year = str(int(row['year'])) if pd.notna(row['year']) else ""
        keywords = escape_literal(row['keywords'])
        doi = escape_literal(row['doi'])

        # Creating an entry for each paper with its details
        ttl_file.write('ns1:{} a ns1:Paper ;\n'.format(paper_id))
        ttl_file.write('    ns1:title "{}"^^xsd:string ;\n'.format(title))
        ttl_file.write('    ns1:abstract "{}"^^xsd:string ;\n'.format(abstract))
        ttl_file.write('    ns1:year "{}"^^xsd:gYear ;\n'.format(year))
        ttl_file.write('    ns1:keywords "{}"^^xsd:string ;\n'.format(keywords))
        ttl_file.write('    ns1:doi "{}"^^xsd:string .\n\n'.format(doi))

print("Turtle file has been created with sanitized literals.")


Turtle file has been created with sanitized literals.


In [18]:
def clean_affiliation_name(name):
    # Remove unwanted characters as well as encode spaces
    return re.sub(r'[^a-zA-Z0-9\s]', '', name).replace(' ', '_')

with open('data/affiliations.csv', 'r', encoding='utf-8') as csv_file:
    reader = csv.DictReader(csv_file)
    affiliations = list(reader)

# Open the Turtle file for writing
with open('turtle2/affiliations.ttl', 'w', encoding='utf-8') as ttl_file:
    ttl_file.write("@prefix ap: <http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#> .\n")
    ttl_file.write("@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n")
    ttl_file.write("@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n")
    ttl_file.write("@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n")
    ttl_file.write("@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n\n")
    
    # Iterate through each row and write data in turtle format
    for affiliation in affiliations:
        cleaned_name = clean_affiliation_name(affiliation['name'])
        uri = f"ap:Affiliation_{cleaned_name}"
        address = affiliation['address'].replace('\n', ', ')
        
        ttl_file.write(f"{uri} rdf:type ap:Affiliation .\n")
        ttl_file.write(f"{uri} ap:affiliation_name \"{affiliation['name']}\"^^xsd:string .\n")
        ttl_file.write(f"{uri} ap:type \"{affiliation['type']}\"^^xsd:string .\n")
        ttl_file.write(f"{uri} ap:address \"{address}\"^^xsd:string .\n")
        ttl_file.write(f"{uri} ap:affiliation_email \"{affiliation['email']}\"^^xsd:string .\n")
        ttl_file.write(f"{uri} ap:phone_number \"{affiliation['phone_number']}\"^^xsd:string .\n")
        ttl_file.write("\n")

In [17]:
ex = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
schema = Namespace("http://schema.org/")

g = Graph()

# Binding the namespaces
g.bind("ex", ex)
g.bind("foaf", FOAF)

# Function for adding authors to the graph
def add_author(author_id, name, affiliation, email):
    author_uri = ex[f"Author_{author_id}"]
    g.add((author_uri, RDF.type, ex.Author))
    g.add((author_uri, FOAF.name, Literal(name)))
    g.add((author_uri, schema.email, Literal(email)))
    g.add((author_uri, ex.affiliated_with, Literal(affiliation)))

with open('data/authors.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        add_author(row['authorId'], row['name'], row['affiliations'], row['email'])

# Serialize the graph to Turtle format
g.serialize(destination='turtle2/authors.ttl', format='turtle')
print("Conversion to Turtle completed.")


Conversion to Turtle completed.


In [19]:
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

g = Graph()

g.bind("ex", EX)
g.bind("foaf", FOAF)

with open('data/journals.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create a new journal resource
        journal = EX[f"Journal_{row['ss_venue_id']}"]
        
        # Add properties to the journal
        g.add((journal, RDF.type, EX.Journal))
        g.add((journal, EX.name, Literal(row['name'])))
        g.add((journal, EX.issn, Literal(row['issn'])))
        g.add((journal, FOAF.homepage, URIRef(row['url'])))

# Serialize the graph in Turtle format
g.serialize(destination='turtle2/journals.ttl', format='turtle')

<Graph identifier=N82fb78aa16a54082a818e856886f4346 (<class 'rdflib.graph.Graph'>)>

In [20]:
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

g = Graph()

g.bind("ex", EX)
g.bind("foaf", FOAF)

import csv
with open('data/journals_enriched.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create a new journal resource
        journal = EX[f"Journal_{row['ss_venue_id']}"]
        
        # Adding properties to the journal
        g.add((journal, RDF.type, EX.Journal))
        g.add((journal, EX.name, Literal(row['name'])))
        g.add((journal, FOAF.homepage, URIRef(row['url'])))
        g.add((journal, EX.year, Literal(row['year'], datatype=rdflib.XSD.integer)))
        g.add((journal, EX.volume, Literal(row['volume'], datatype=rdflib.XSD.integer)))

# Serialize the graph in Turtle format
g.serialize(destination='turtle2/journals_enriched.ttl', format='turtle')

<Graph identifier=N86c2818a93124bbbbb53dd06c1887f7d (<class 'rdflib.graph.Graph'>)>

In [21]:
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

g = Graph()

g.bind("ex", EX)
g.bind("foaf", FOAF)

with open('data/conferences.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create a new conference resource
        conference = EX[f"Conference_{row['ss_venue_id']}"]
        
        # Add properties to the conference
        g.add((conference, RDF.type, EX.Conference))
        g.add((conference, EX.name, Literal(row['name'])))
        if row['url']:  # Ensure there is a URL before adding
            g.add((conference, FOAF.homepage, URIRef(row['url'])))

# Serialize the graph in Turtle format
g.serialize(destination='turtle2/conferences.ttl', format='turtle')

<Graph identifier=Ne945a23762294853a24521ef4f8bb49d (<class 'rdflib.graph.Graph'>)>

In [22]:
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

g = Graph()

# Binding prefixes
g.bind("ex", EX)
g.bind("foaf", FOAF)

with open('data/conferences_enriched.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create a new conference resource
        conference = EX[f"Conference_{row['ss_venue_id']}"]
        
        # Add properties to the conference
        g.add((conference, RDF.type, EX.Conference))
        g.add((conference, EX.name, Literal(row['name'])))
        if row['url']:  # Ensure there is a URL before adding
            g.add((conference, FOAF.homepage, URIRef(row['url'])))
        g.add((conference, EX.city, Literal(row['city'])))
        g.add((conference, EX.year, Literal(row['year'], datatype=rdflib.XSD.gYear)))
        g.add((conference, EX.edition, Literal(int(row['edition']))))

# Serialize the graph in Turtle format
g.serialize(destination='turtle2/conferences_enriched.ttl', format='turtle')

<Graph identifier=N1c351dae6246426894de8bed86ada878 (<class 'rdflib.graph.Graph'>)>

In [23]:
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")
XSD = Namespace("http://www.w3.org/2001/XMLSchema#")

g = Graph()

g.bind("ex", EX)
g.bind("xsd", XSD)

with open('data/reviews.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create a new review resource
        review = EX[f"Review_{row['review_id']}"]
        
        # Add properties to the review
        g.add((review, RDF.type, EX.Review))
        g.add((review, EX.decision, Literal(row['decision'])))
        g.add((review, EX.date, Literal(row['date'], datatype=XSD.date)))
        g.add((review, EX.abstract, Literal(row['abstract'])))

# Serialize the graph in Turtle format
g.serialize(destination='turtle2/reviews.ttl', format='turtle')

<Graph identifier=Nf5e5e571f7c745ff93a57e078edd224f (<class 'rdflib.graph.Graph'>)>

In [24]:
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

g = Graph()

g.bind("ex", EX)

with open('data/written_by.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create relationships
        paper = EX[f"Paper_{row['paperId']}"]
        author = EX[f"Author_{row['authorId']}"]
        
        # Add "written by" relationship
        g.add((paper, EX.written_by, author))

# Serialize the graph in Turtle format
g.serialize(destination='turtle2/written_by.ttl', format='turtle')


<Graph identifier=N646f8f8ba2ab49f088cd84c5081b3535 (<class 'rdflib.graph.Graph'>)>

In [25]:
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

g = Graph()

g.bind("ex", EX)

with open('data/written_by_enriched.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create relationships
        paper = EX[f"Paper_{row['paperId']}"]
        author = EX[f"Author_{row['authorId']}"]
        
        # Add "written by" relationship
        g.add((paper, EX.written_by, author))

# Serialize the graph in Turtle format
g.serialize(destination='turtle2/written_by_enriched.ttl', format='turtle')

<Graph identifier=N51fc831f167b421cb4e618265a0ad231 (<class 'rdflib.graph.Graph'>)>

In [26]:
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

g = Graph()

g.bind("ex", EX)

with open('data/citations.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create citation relationships
        citing_paper = EX[f"Paper_{row['paperId']}"]
        cited_paper = EX[f"Paper_{row['referenceId']}"]
        year = Literal(row['year'], datatype=rdflib.XSD.integer)
        
        # Add citation relationship
        g.add((citing_paper, EX.cites_to, cited_paper))
        g.add((citing_paper, EX.citation_year, year))

# Serialize the graph in Turtle format
g.serialize(destination='turtle2/citations.ttl', format='turtle')

<Graph identifier=N277339910be24621ae9acff317c6d65e (<class 'rdflib.graph.Graph'>)>

In [28]:
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

g = Graph()

g.bind("ex", EX)

with open('data/published_in_enriched.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create URIs for paper and venue
        paper = EX[f"Paper_{row['paper_id']}"]
        venue = EX[f"Venue_{row['ss_venue_id']}"]
        year = Literal(row['year'], datatype=rdflib.XSD.integer)
        
        # Add triples to the graph
        g.add((paper, EX.published_in, venue))
        g.add((paper, EX.publish_year, year))

# Serialize the graph in Turtle format
g.serialize(destination='turtle2/published_in_enriched.ttl', format='turtle')

<Graph identifier=N86b2d5d8cf9540cdae0791cfbbf3f90b (<class 'rdflib.graph.Graph'>)>

In [27]:
EX = Namespace("http://www.semanticweb.org/kocak/ontologies/2024/4/AcademicPapers#")

g = Graph()

g.bind("ex", EX)
with open('data/published_in.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create URIs for paper and venue
        paper = EX[f"Paper_{row['paper_id']}"]
        venue = EX[f"Venue_{row['ss_venue_id']}"]
        
        # Add triples to the graph
        g.add((paper, EX.published_in, venue))

# Serialize the graph in Turtle format
g.serialize(destination='turtle2/published_in.ttl', format='turtle')

<Graph identifier=N02be96de63bb4bd29ac3954a9e613b23 (<class 'rdflib.graph.Graph'>)>