In [60]:
# import libraries
from rdflib import Graph,Literal,RDF,URIRef
from rdflib.namespace import FOAF,XSD,RDFS
from rdflib import Namespace
import pandas as pd
import string
import random
import numpy as np

# ignore the warning
import warnings
warnings.filterwarnings('ignore')

In [61]:
# Initialize Graph
g = Graph()

# Supporting Functions

In [62]:
KG_SDM = Namespace("http://kg_sdm.org/")
g.bind("kg_sdm",KG_SDM) 

In [63]:
def save_rdf_file(g,filename,rdf_format='ttl'):
    g.serialize(filename+'.'+rdf_format,format= rdf_format)

In [64]:
# we created this parser as there should not be any forbiden characters in the URI
def URLparse(url:str):
    url=url.replace("\'","_").replace("\"","_")
    for i in string.punctuation:
        url = url.replace(i,"_")
    url = url.replace(" ","_")
    return url

# Schema Design
* Once we have created the schema and the TBOX, it's time to populate the graph using triples. 
* On the image you can see the schema that we will be following in this ABOX
* We are showing the process of grouping the data from csv files and adding them to the knowledge graph.
* The upload will be done in parts, each part uploading a different part of the graph

![title](B1_TBOX-Sotiroski_Choudhary.png)

# ABOX Definition

## 1. Person

In [65]:
people_names = pd.read_csv('data_kg_sdm/authors.csv') # getting all the the people names from the authors file
people_names.rename(columns = {'name':'person_name'}, inplace=True)
people_names.head()

Unnamed: 0,ID,person_name
0,57218202833,Gautam A.
1,7004904337,Crandall J.W.
2,7005513246,Goodrich M.A.
3,6508306234,de Moura Oliveira P.B.
4,9277159100,Hedengren J.D.


In [66]:
# using the bridge table to connect the author with the school / institution
author_school = pd.read_csv('data_kg_sdm/author_belongs_school.csv')
author_school['author_ID'] = author_school['author_ID'].astype(int)
author_school.head()

Unnamed: 0,author_ID,org_ID
0,57218202833,1
1,7004904337,1
2,7005513246,1
3,6508306234,2
4,9277159100,3


In [67]:
# getting the data for each school
schools = pd.read_csv('data_kg_sdm/schools.csv')
schools.rename(columns = {'name':'school_name'}, inplace=True)
schools.head()

Unnamed: 0,ID,school_name
0,1,"Computer Science Department, Brigham Young Uni..."
1,2,"INESC-TEC Technology and Science, Campus da FE..."
2,3,"Department of Engineering, University of Trás..."
3,4,"Department of Chemical Engineering, Brigham Y..."
4,5,"Task Committee Secretary and Lead Engineer, Op..."


In [68]:
# merging the academics / people in one table with their related school
academics = pd.merge(schools, author_school, left_on='ID', right_on='org_ID')
academics = pd.merge(academics, people_names, left_on='author_ID', right_on='ID')
academics.head()

Unnamed: 0,ID_x,school_name,author_ID,org_ID,ID_y,person_name
0,1,"Computer Science Department, Brigham Young Uni...",57218202833,1,57218202833,Gautam A.
1,1,"Computer Science Department, Brigham Young Uni...",7004904337,1,7004904337,Crandall J.W.
2,1,"Computer Science Department, Brigham Young Uni...",7005513246,1,7005513246,Goodrich M.A.
3,1,"Computer Science Department, Brigham Young Uni...",55078951500,1,55078951500,Bodily P.M.
4,1,"Computer Science Department, Brigham Young Uni...",57215280005,1,57215280005,Jacobsen A.J.


* Once we have all the people/academics we want to divide them depending on their role. 
We suppose 60% are Authors, 20% Reviewsrs, 10% Chair and Editor.

* Every variable will hold either the four types of academics which will be used in other parts of the notebook when uploading the triples

In [69]:
total = len(academics)
nb_auth = int(total * 0.6)
nb_rev = int(total * 0.2)
nb_chair_editor = int(total * 0.1)

In [70]:
authors = academics.loc[:nb_auth]
reviewers = academics.loc[nb_auth : nb_auth + nb_rev]
chair = academics.loc[nb_auth + nb_rev : nb_auth + nb_rev + nb_chair_editor ]
editor = academics.loc[ nb_auth + nb_rev + nb_chair_editor :nb_auth + nb_rev + nb_chair_editor + nb_chair_editor]

In [71]:
person_type_list = [authors,reviewers,chair,editor]
person_sub_type_ls = ["Author","Reviewers","Chair","Editor"]

Person = URIRef("http://kg_sdm.org/Person")

for i,person_tp in enumerate(person_type_list):
    # creating the URI for each person type
    preson_sub_type = URIRef(f"http://kg_sdm.org/{person_sub_type_ls[i]}")
    
    for name, school in zip(person_tp['person_name'], person_tp['school_name']):
        # assigning each person a URI
        parsed_name = URLparse(name)
        person_node = URIRef(f"http://kg_sdm.org/Person/{parsed_name}")
        # their literals
        name_lit = Literal(str(name))
        school_lit = Literal(str(school))

        # Connecting the nodes
        # add subclass type
        g.add((person_node, RDF.type, preson_sub_type))
        # add school
        g.add((person_node, KG_SDM.school, school_lit))
        # add name of person
        g.add((person_node, FOAF.name, name_lit))

In [72]:
# save_rdf_file(g,'person_links','ttl')

## 2. Submissions
In submission we store the articles from an author submitted to a conference

### a. Adding Articles(Papers)

In [73]:
# loading articles
articles = pd.read_csv('data_kg_sdm/articles.csv')

In [74]:
# The bridge table that connects the articles that an author wrote
author_article = pd.read_csv('data_kg_sdm/author_written_article.csv')
author_article['author_ID'] = author_article['author_ID'].astype(int)
author_article.head()

Unnamed: 0,author_ID,article_ID
0,57218202833,1
1,7004904337,1
2,7005513246,1
3,6508306234,2
4,9277159100,2


In [75]:
paper_type = ['Demo','FullPaper', 'Poster', 'Short']
keywords = ['ML', 'NLP', 'Database', 'Graph']

In [76]:
# As we don't have some of the data, we are generating the year randomly, 
# the paper type as well as if a paper is accepted or not.
# As we have to label the paper submissions as accepted or rejected, we take the first 500 articles as accepted

articles['year'] = [ random.randint(2000,2022) for i in range(len(articles))]
articles['type'] = [random.choice(paper_type) for i in range(len(articles))]
articles['keyword'] = [random.choice(keywords) for i in range(len(articles))]
articles['accepted'] = ""
articles['accepted'].loc[:500] = True
articles['accepted'].loc[500:] = False

In [77]:
articles.head()

Unnamed: 0,ID,title,volume,DOI,year,type,keyword,accepted
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15,2016,Poster,ML,True
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3,2002,FullPaper,ML,True
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822,2018,Demo,Graph,True
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428,2011,Poster,Database,True
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448,2004,FullPaper,Database,True


In [78]:
for _,_, article_title, _, _ , year, paper_type, keyword, decision in articles.itertuples():
    # get the author node
    # author_node = URIRef(f"http://kg_sdm.org/Person/{URLparse(author_name)}")
    
    # create the submission onde
    submission_node = URIRef(f"http://kg_sdm.org/Submission/{URLparse(article_title)}")
    g.add((submission_node, RDF.type, KG_SDM.Submission))
    
    # author wrote a paper
    # g.add((author_node, KG_SDM.writes, submission_node))
    
    # data for submission
    paper_title_lit = Literal(str(article_title))
    paper_year_lit = Literal(int(year))
    g.add((submission_node, KG_SDM.paper_title, paper_title_lit))
    g.add((submission_node, KG_SDM.paper_year, paper_year_lit))
                            
    # adding the keywords
    keyword_node = URIRef(f"http://kg_sdm.org/{keyword}")
    g.add((submission_node, KG_SDM.related_to, keyword_node))

    # adding the paper type
    paper_node = URIRef(f"http://kg_sdm.org/{URLparse(paper_type)}")
    g.add((submission_node, KG_SDM.of_type, paper_node))

In [79]:
# only authors are writing the papers
authors.head()

Unnamed: 0,ID_x,school_name,author_ID,org_ID,ID_y,person_name
0,1,"Computer Science Department, Brigham Young Uni...",57218202833,1,57218202833,Gautam A.
1,1,"Computer Science Department, Brigham Young Uni...",7004904337,1,7004904337,Crandall J.W.
2,1,"Computer Science Department, Brigham Young Uni...",7005513246,1,7005513246,Goodrich M.A.
3,1,"Computer Science Department, Brigham Young Uni...",55078951500,1,55078951500,Bodily P.M.
4,1,"Computer Science Department, Brigham Young Uni...",57215280005,1,57215280005,Jacobsen A.J.


In [80]:
# joining the authors and articles with the bridge table
article_pub = pd.merge(articles, author_article, left_on='ID', right_on='article_ID')
articles_publishedin = pd.merge(article_pub, authors, left_on='author_ID', right_on='author_ID')
articles_publishedin = articles_publishedin[['title', 'year', 'type', 'keyword', 'accepted', 'person_name', 'school_name']]

In [81]:
articles_publishedin

Unnamed: 0,title,year,type,keyword,accepted,person_name,school_name
0,Self-assessment of Proficiency of Intelligent ...,2016,Poster,ML,True,Gautam A.,"Computer Science Department, Brigham Young Uni..."
1,Self-assessment of Proficiency of Intelligent ...,2016,Poster,ML,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni..."
2,Moderating operator influence in human-swarm s...,2012,Poster,NLP,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni..."
3,Cooperating in long-term relationships with ti...,2007,Demo,Database,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni..."
4,Information design in crowdfunding under thres...,2011,Short,ML,False,Crandall J.W.,"Computer Science Department, Brigham Young Uni..."
...,...,...,...,...,...,...,...
2844,Effect of the turbulence modeling in large-edd...,2004,Demo,Database,False,Arshad S.,"Brigham Young University, Provo, UT, United S..."
2845,Exploration of carbon-filled carbon nanotube v...,2011,FullPaper,Graph,False,Kowalski T.,"Department of Mechanical Engineering, Brigham..."
2846,Effect of leader placement on robotic swarm co...,2019,Demo,NLP,False,Butail S.,"Brigham Young University, Provo, UT 84602, U..."
2847,Thermophysical properties of thin fibers via p...,2006,FullPaper,ML,False,Glorieux C.,"Department of Mechanical Engineering, Brigham ..."


In [82]:
for _, article_title, _, _, _ , _, author_name, _ in articles_publishedin.itertuples():

    # get the author node
    author_node = URIRef(f"http://kg_sdm.org/Person/{URLparse(author_name)}")
    
    # create the submission onde
    submission_node = URIRef(f"http://kg_sdm.org/Submission/{URLparse(article_title)}")
    g.add((author_node, KG_SDM.writes, submission_node))

In [83]:
# save_rdf_file(g,'submission_links','ttl')

### b. DecisionProcess
Creating the reviewers, their votes and connecting them with the submission

In [84]:
reviewProcess = articles_publishedin.drop_duplicates(subset=['title'])
reviewProcess.head()

Unnamed: 0,title,year,type,keyword,accepted,person_name,school_name
0,Self-assessment of Proficiency of Intelligent ...,2016,Poster,ML,True,Gautam A.,"Computer Science Department, Brigham Young Uni..."
2,Moderating operator influence in human-swarm s...,2012,Poster,NLP,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni..."
3,Cooperating in long-term relationships with ti...,2007,Demo,Database,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni..."
4,Information design in crowdfunding under thres...,2011,Short,ML,False,Crandall J.W.,"Computer Science Department, Brigham Young Uni..."
7,Intent-based robotic path-replanning: When to ...,2006,FullPaper,NLP,True,Goodrich M.A.,"Computer Science Department, Brigham Young Uni..."


In [85]:
reviews = []
comments = []
rejacc = []

for i in range(len(reviewProcess)):
    # create N reviewers and comments
    N = random.randint(2,4)
    # assign N reviewers (get a random sample)
    rev_list = list(reviewers.sample(N)['person_name'])
    # for every reviewer get 
    acc = [random.random()>0.5 for i in range(N)]
    com_list = []
    for j in range(N):
        # generating a random comment
        comment = ''.join((random.choice('abcdefghijklmnopqrstuvwxyz') for i in range(5)))
        com_list.append(comment)
    reviews.append(rev_list)
    comments.append(com_list)
    rejacc.append(acc)
    
reviewProcess['reviewer_name'] = reviews
reviewProcess['comment'] = comments
reviewProcess['decision'] = rejacc

* As you may have noticed, in DecisionProcess we are randomly assigning the accepted/rejected decision for every reviewer, but we have manually created the accepted/rejected submission in the Submission node. 

* The reason why we did this is because it is more complicated to randomly generate the decisions and then connect with the Submisison node. And also because of the fact that it is artificial data and we won't need to query the accepted/rejected decision.

* With the way we did the graph, every article that was accepted can be found using a publication.


In [86]:
reviewProcess.head()

Unnamed: 0,title,year,type,keyword,accepted,person_name,school_name,reviewer_name,comment,decision
0,Self-assessment of Proficiency of Intelligent ...,2016,Poster,ML,True,Gautam A.,"Computer Science Department, Brigham Young Uni...","[Hirata C.M., Gandikota R., Solovjov V.P., Bak...","[kexqd, cbphv, aracf, bkwib]","[False, False, False, False]"
2,Moderating operator influence in human-swarm s...,2012,Poster,NLP,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni...","[Taylor B.A., Rossiter J.A., Raikes A.C.]","[yrpmu, gpdis, aqooh]","[False, True, True]"
3,Cooperating in long-term relationships with ti...,2007,Demo,Database,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni...","[Carilho M., Gammer B., Ward C.J., Lübeck A.]","[cecqm, jpelk, gvkry, gbxac]","[True, False, True, True]"
4,Information design in crowdfunding under thres...,2011,Short,ML,False,Crandall J.W.,"Computer Science Department, Brigham Young Uni...","[Wright G.A., Ganguly R., Tryon J.E., Carpente...","[pfkdi, sjmol, uzzuz, gwxvg]","[False, True, False, False]"
7,Intent-based robotic path-replanning: When to ...,2006,FullPaper,NLP,True,Goodrich M.A.,"Computer Science Department, Brigham Young Uni...","[Troyanskaya M., Dodson B., Milani A.S., Scott...","[vxdub, zxizq, ixdet, wrxxl]","[True, True, True, False]"


In [87]:
# because we were storing the comments and decisions and reviewers in an array
# we will use the explode function to have each reviewer connected to the paper
reviewProcess = reviewProcess.apply(pd.Series.explode)

In [88]:
for _, article_name, year, _, _, _, author_name, _, reviewer_name, comment, accepted in reviewProcess.itertuples():
    # get the submission node
    submission_node = URIRef(f"http://kg_sdm.org/Submission/{URLparse(article_name)}")

    # create the reviewProcess node
    review_process = URIRef(f"http://kg_sdm.org/DecisionProcess/{URLparse(reviewer_name+'_'+article_name)}")
    g.add((review_process, RDF.type, KG_SDM.DecisionProcess))
    
    # connect submission and review process
    g.add((submission_node, KG_SDM.goes_through, review_process))

    # get the reviewer node
    reviewer_node = URIRef(f"http://kg_sdm.org/Person/{URLparse(reviewer_name)}")
    g.add((reviewer_node, KG_SDM.participates_in, review_process))
    
    # add the literals
    comment_lit = Literal(str(comment))    
    decision_lit = Literal(bool(accepted))
    g.add((review_process, KG_SDM.comment, comment_lit))
    g.add((review_process, KG_SDM.decision, decision_lit))

## 3. Venues
Loading the Venue file which contains journal and conferences and assigning them

### a. Adding Conferences and Journals 

In [89]:
publications = pd.read_csv('data_kg_sdm/publications.csv')
publications.head()

Unnamed: 0,ID,name,Type
0,1,Advances in Intelligent Systems and Computing,Conference Paper
1,2,Lecture Notes in Electrical Engineering,Conference Paper
2,3,Journal of Hydraulic Engineering,Journal
3,4,Journal of Computing and Information Science i...,Journal
4,5,Telematics and Informatics,Conference Paper


In [90]:
journals = publications[publications['Type']=='Journal']
journals.head()

Unnamed: 0,ID,name,Type
2,3,Journal of Hydraulic Engineering,Journal
3,4,Journal of Computing and Information Science i...,Journal
8,9,International Journal of Electrical Power and ...,Journal
10,11,Journal of Cleaner Production,Journal
11,12,Journal of Manufacturing Processes,Journal


In [91]:
conferences = publications[publications['Type']!='Journal']
conferences.head()

Unnamed: 0,ID,name,Type
0,1,Advances in Intelligent Systems and Computing,Conference Paper
1,2,Lecture Notes in Electrical Engineering,Conference Paper
4,5,Telematics and Informatics,Conference Paper
5,6,Electric Power Systems Research,Conference Paper
6,7,Optical Fiber Technology,Conference Paper


In [92]:
# from conferences we split them in 60% worshops and 40% symposiums 
total = len(conferences)
workshops = conferences.iloc[:int(0.4*total):]
symposium = conferences.iloc[int(0.4*total):]

In [93]:
venue_type_list = [journals,workshops,symposium]
venue_sub_type_ls = ["Journal","Workshop","Symposium"]


for i,venue_tp in enumerate(venue_type_list):
    print(venue_sub_type_ls[i])
    venue_type = venue_sub_type_ls[i]
    venue_sub_type = URIRef(f"http://kg_sdm.org/{venue_type}")
    
    for index,row in venue_tp.iterrows():
        confname = row['name']
        
        # parsing conference
        conf_title = URLparse(confname)
        conf_node = URIRef(f"http://kg_sdm.org/Venue/{conf_title}")
        venue_lit = Literal(str(conf_title))

        # add subclass type
        g.add((conf_node, RDF.type, venue_sub_type))
        # add name of venue
        g.add((conf_node, KG_SDM.venue_title, venue_lit))



Journal
Workshop
Symposium


In [94]:
# save_rdf_file(g,'Venue','ttl')

### b. Adding submissions submitted in Venues

In [95]:
# read the articles
articles = pd.read_csv('data_kg_sdm/articles.csv')
articles.head()

Unnamed: 0,ID,title,volume,DOI
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448


In [96]:
# read all the Venues
publications = pd.read_csv('data_kg_sdm/publications.csv')
publications

Unnamed: 0,ID,name,Type
0,1,Advances in Intelligent Systems and Computing,Conference Paper
1,2,Lecture Notes in Electrical Engineering,Conference Paper
2,3,Journal of Hydraulic Engineering,Journal
3,4,Journal of Computing and Information Science i...,Journal
4,5,Telematics and Informatics,Conference Paper
...,...,...,...
399,400,CAD Computer Aided Design,Conference Paper
400,401,International Symposium on Advances in Computa...,Conference Paper
401,402,AIAA SPACE and Astronautics Forum and Expositi...,Conference Paper
402,403,IEEE Transactions on Control of Network Systems,Conference Paper


In [97]:
# load article venue merge link
article_publisher_link = pd.read_csv('data_kg_sdm/article_published_by.csv')
article_publisher_link

Unnamed: 0,article_ID,publisher_ID,year
0,1,198,2020
1,2,389,2020
2,3,216,2020
3,4,21,2018
4,5,133,2018
...,...,...,...
875,876,360,2017
876,877,229,2015
877,878,205,2015
878,879,95,2016


In [98]:
# merge article submitted to Venue
article_pub = pd.merge(articles, article_publisher_link, left_on='ID', right_on='article_ID')
articles_publishedin = pd.merge(article_pub, publications, left_on='publisher_ID', right_on='ID',how='left')
articles_publishedin

Unnamed: 0,ID_x,title,volume,DOI,article_ID,publisher_ID,year,ID_y,name,Type
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15,1,198,2020,198,International Conference on Transportation and...,Conference Paper
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3,2,389,2020,389,AAAI Workshop - Technical Report,Conference Paper
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822,3,216,2020,216,Advances in Heat Transfer,Conference Paper
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428,4,21,2018,21,Proceedings - 2020 IEEE 21st International Con...,Conference Paper
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448,5,133,2018,133,IEEE Radiation Effects Data Workshop,Conference Paper
...,...,...,...,...,...,...,...,...,...,...
875,876,Formability of magnesium alloy AZ31B from room...,Part F8,10.1007/978-3-319-52392-7_91,876,360,2017,360,Proceedings - 2017 IEEE 17th International Con...,Conference Paper
876,877,A summary of data-aided equalizer experiments ...,,,877,229,2015,229,Science Robotics,Conference Paper
877,878,Joining Dissimilar Material Using Friction Sti...,,10.1007/978-3-319-52383-5_16,878,205,2015,205,"Journal of Guidance, Control, and Dynamics",Journal
878,879,A minimal realization technique for the dynami...,4,10.1109/TCNS.2015.2498468,879,95,2016,95,Geotechnical Special Publication,Conference Paper


In [99]:
# iterating over above combined dataframe and loading the submission submitted_to venues
for index,row in articles_publishedin.iterrows():
    year = row['year']
    year_literal = Literal(int(year))
    submissiontitle = row['title']
    confname = row['name']
    
    # parsing conference
    conf_title = URLparse(confname)
    conf_node = URIRef(f"http://kg_sdm.org/Venue/{conf_title}")

    # parsing submission
    sub_title = URLparse(submissiontitle)
    sub_node = URIRef(f"http://kg_sdm.org/Submission/{sub_title}")
    
    # connect conference and submission
    g.add((sub_node,KG_SDM.submitted_to,conf_node))



### c. Adding Submissions in Publication 

In [100]:
articles = pd.read_csv('data_kg_sdm/articles.csv')
articles['accepted'] = ""
articles['accepted'].loc[:500] = True
articles['accepted'].loc[500:] = False
articles.head()

Unnamed: 0,ID,title,volume,DOI,accepted
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15,True
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3,True
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822,True
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428,True
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448,True


In [101]:
# load article venue merge link
article_publisher_link = pd.read_csv('data_kg_sdm/article_published_by.csv')
article_publisher_link

Unnamed: 0,article_ID,publisher_ID,year
0,1,198,2020
1,2,389,2020
2,3,216,2020
3,4,21,2018
4,5,133,2018
...,...,...,...
875,876,360,2017
876,877,229,2015
877,878,205,2015
878,879,95,2016


In [102]:
articles_publishedin = pd.merge(articles, article_publisher_link, left_on='ID', right_on='article_ID')
articles_publishedin

Unnamed: 0,ID,title,volume,DOI,accepted,article_ID,publisher_ID,year
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15,True,1,198,2020
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3,True,2,389,2020
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822,True,3,216,2020
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428,True,4,21,2018
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448,True,5,133,2018
...,...,...,...,...,...,...,...,...
875,876,Formability of magnesium alloy AZ31B from room...,Part F8,10.1007/978-3-319-52392-7_91,False,876,360,2017
876,877,A summary of data-aided equalizer experiments ...,,,False,877,229,2015
877,878,Joining Dissimilar Material Using Friction Sti...,,10.1007/978-3-319-52383-5_16,False,878,205,2015
878,879,A minimal realization technique for the dynami...,4,10.1109/TCNS.2015.2498468,False,879,95,2016


#### I. Adding Journal publications in Volume

In [103]:
# Get all the journals  
articles_publishedin_journal = pd.merge(articles_publishedin, journals, left_on='publisher_ID', right_on='ID')
articles_publishedin_journal

Unnamed: 0,ID_x,title,volume,DOI,accepted,article_ID,publisher_ID,year,ID_y,name,Type
0,14,In-situ strain measurement of ballistic fabric...,59,10.1016/j.yofte.2020.102334,True,14,112,2020,112,International Journal of Modelling and Simulation,Journal
1,487,Understanding engineering and technology stude...,2018-June,,True,487,112,2016,112,International Journal of Modelling and Simulation,Journal
2,17,High-Speed Acoustic Impact-Echo Sounding of Co...,39,10.1007/s10921-020-00695-0,True,17,145,2018,145,Journal of Mechanics in Medicine and Biology,Journal
3,31,An iterative pose estimation algorithm based o...,7,10.1109/JAS.2020.1003222,True,31,351,2019,351,International Journal of Industrial Organization,Journal
4,34,Developable mechanisms on right conical surfaces,149,10.1016/j.mechmachtheory.2020.103813,True,34,287,2019,287,Journal of Homeland Security and Emergency Man...,Journal
...,...,...,...,...,...,...,...,...,...,...,...
211,761,Ground-based 3D radar imaging of trees using a...,6,10.3390/electronics6010011,False,761,355,2015,355,Journal of Mechanical Science and Technology,Journal
212,771,Theoretical estimates of maximum fields in sup...,30,10.1088/1361-6668/30/3/033002,False,771,74,2016,74,Journal of Nanobiotechnology,Journal
213,780,MMI waveguide based multispectral detection of...,,10.1109/IPCon.2016.7831137,False,780,4,2015,4,Journal of Computing and Information Science i...,Journal
214,816,Two-phase flow pressure drop in superhydrophob...,110,10.1016/j.ijheatmasstransfer.2017.03.055,False,816,65,2015,65,Journal of Microelectromechanical Systems,Journal


In [104]:
# Adding the journals in volumes only which are accepted (where decision = True )
for index,row in articles_publishedin_journal.iterrows():
    year = row['year']
    year_literal = Literal(int(year))
    submissiontitle = row['title']
    confname = row['name']
    decision = row['accepted']
    
    # parsing conference
    conf_title = URLparse(confname)
    conf_node = URIRef(f"http://kg_sdm.org/Venue/{conf_title}")

    
    # parsing submission
    sub_title = URLparse(submissiontitle)
    sub_node = URIRef(f"http://kg_sdm.org/Submission/{sub_title}")

    
    # parsing for publication
    # assuming only half of the articles got accepted
    if decision:
        Pub_title = conf_title+'_volume_'+str(random.randint(1, 5))
        Pub_title_lit = Literal(str(Pub_title))
        
        pub_node = URIRef(f"http://kg_sdm.org/Publication/{Pub_title}")
        g.add((pub_node, RDF.type, KG_SDM.Volume))
        g.add((sub_node,KG_SDM.published_in,pub_node))
        g.add((pub_node, KG_SDM.publication_title,Pub_title_lit))
        g.add((pub_node, KG_SDM.publication_year,year_literal))

#### II. Adding Conference publications in Proceddings

In [105]:
# get all the conferences
conferences = workshops.append(symposium)
conferences

Unnamed: 0,ID,name,Type
0,1,Advances in Intelligent Systems and Computing,Conference Paper
1,2,Lecture Notes in Electrical Engineering,Conference Paper
4,5,Telematics and Informatics,Conference Paper
5,6,Electric Power Systems Research,Conference Paper
6,7,Optical Fiber Technology,Conference Paper
...,...,...,...
399,400,CAD Computer Aided Design,Conference Paper
400,401,International Symposium on Advances in Computa...,Conference Paper
401,402,AIAA SPACE and Astronautics Forum and Expositi...,Conference Paper
402,403,IEEE Transactions on Control of Network Systems,Conference Paper


In [106]:
articles_publishedin_conf = pd.merge(articles_publishedin, conferences, left_on='publisher_ID', right_on='ID')
articles_publishedin_conf

Unnamed: 0,ID_x,title,volume,DOI,accepted,article_ID,publisher_ID,year,ID_y,name,Type
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15,True,1,198,2020,198,International Conference on Transportation and...,Conference Paper
1,95,Using Cyclic Quadrilaterals to Design Cylindri...,83,10.1007/978-3-030-43929-3_14,True,95,198,2018,198,International Conference on Transportation and...,Conference Paper
2,474,Design and characterization of a package-less ...,17,10.1117/1.JMM.17.3.034501,True,474,198,2017,198,International Conference on Transportation and...,Conference Paper
3,651,Human factors in mission control centers,,10.1016/B978-0-08-101869-9.00013-3,False,651,198,2018,198,International Conference on Transportation and...,Conference Paper
4,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3,True,2,389,2020,389,AAAI Workshop - Technical Report,Conference Paper
...,...,...,...,...,...,...,...,...,...,...,...
659,811,Optimization of origami-based tubes for lightw...,5B-2017,10.1115/DETC2017-67274,False,811,298,2015,298,ACS Synthetic Biology,Conference Paper
660,821,Foreword,,,False,821,122,2015,122,"2019 IEEE Photonics Conference, IPC 2019 - Pro...",Conference Paper
661,845,Modeling the coanda effect with FDS and STARCC...,,,False,845,153,2016,153,Composites Part A: Applied Science and Manufac...,Conference Paper
662,849,Split-vertex technique for thickness-accommoda...,5B-2017,10.1115/DETC2017-68018,False,849,128,2015,128,"Sensors and Actuators, B: Chemical",Conference Paper


In [107]:
# Adding the conference in Proceddings only which are accepted (where decision = True )
for index,row in articles_publishedin_conf.iterrows():
    year = row['year']
    year_literal = Literal(int(year))
    submissiontitle = row['title']
    confname = row['name']
    decision = row['accepted']
    
    # parsing conference
    conf_title = URLparse(confname)
    conf_node = URIRef(f"http://kg_sdm.org/Venue/{conf_title}")

    
    # parsing submission
    sub_title = URLparse(submissiontitle)
    sub_node = URIRef(f"http://kg_sdm.org/Submission/{sub_title}")

    
    # parsing for publication
    # assuming only half of the articles got accepted
    if decision:
        Pub_title = conf_title+'_proceddings'
        Pub_title_lit = Literal(str(Pub_title))
        
        pub_node = URIRef(f"http://kg_sdm.org/Publication/{Pub_title}")
        g.add((pub_node, RDF.type, KG_SDM.Proceddings))
        g.add((sub_node,KG_SDM.published_in,pub_node))
        g.add((pub_node, KG_SDM.publication_title,Pub_title_lit))
        g.add((pub_node, KG_SDM.publication_year,year_literal))

### d. Adding handlers

* conferences are handled by chair
* Journals are handled by editor

In [108]:
# As defined in person
chair.head()

Unnamed: 0,ID_x,school_name,author_ID,org_ID,ID_y,person_name
1737,989,"Department of Physics and Astronomy, Brigham Y...",57204435650,989,57204435650,Laughlin E.
1738,989,"Department of Physics and Astronomy, Brigham Y...",57204415715,989,57204415715,Howe L.
1739,991,Department of Civil and Environmental Engineer...,57193610705,991,57193610705,Talbot M.
1740,994,"Aerion Technologies Corporation, Mechanical En...",15833172400,994,15833172400,Rajnarayan D.
1741,996,"New Product Development, Intuitive Surgical I...",55916540700,996,55916540700,Grames C.


In [109]:
# editor as defined in journal
editor.head()

Unnamed: 0,ID_x,school_name,author_ID,org_ID,ID_y,person_name
1954,1205,"Dipartimento di Fisica, Università degli Studi...",6701695863,1205,6701695863,Citterio M.
1955,1206,"INFN Milano, via G. Celoria, 16, Milano, 2013...",37098602100,1206,37098602100,Camplani A.
1956,1206,"INFN Milano, via G. Celoria, 16, Milano, 2013...",7006631262,1206,7006631262,Lazzaroni M.
1957,1207,NSF Center for High Performance Reconfigurabl...,56404157200,1207,56404157200,Takai H.
1958,1208,"Brookhaven National Laboratory, UptonNY, Unit...",56424241200,1208,56424241200,Chen H.


In [110]:
journals['editors'] = editor.iloc[:len(journals)]['person_name'].values
journals.head()

Unnamed: 0,ID,name,Type,editors
2,3,Journal of Hydraulic Engineering,Journal,Citterio M.
3,4,Journal of Computing and Information Science i...,Journal,Camplani A.
8,9,International Journal of Electrical Power and ...,Journal,Lazzaroni M.
10,11,Journal of Cleaner Production,Journal,Takai H.
11,12,Journal of Manufacturing Processes,Journal,Chen H.


In [111]:
# getting list of chairs
chair_names = list(chair['person_name'].values)
# since we do not have chair in our data, generating random peopels from chair to handle the conferences
conf_chairs = [chair_names[random.randint(0,len(chair_names)-1)]  for i in range(len(conferences))]
# saving conference handlers
conferences['chair'] = conf_chairs
conferences

Unnamed: 0,ID,name,Type,chair
0,1,Advances in Intelligent Systems and Computing,Conference Paper,Szymanski R.
1,2,Lecture Notes in Electrical Engineering,Conference Paper,Gladstone R.
4,5,Telematics and Informatics,Conference Paper,Morales C.
5,6,Electric Power Systems Research,Conference Paper,Lopes C.V.
6,7,Optical Fiber Technology,Conference Paper,Sanders S.
...,...,...,...,...
399,400,CAD Computer Aided Design,Conference Paper,Jensen D.W.
400,401,International Symposium on Advances in Computa...,Conference Paper,Malaska M.J.
401,402,AIAA SPACE and Astronautics Forum and Expositi...,Conference Paper,Campbell E.
402,403,IEEE Transactions on Control of Network Systems,Conference Paper,Liu Z.Y.-C.


In [112]:
# adding the chair handlers
for index,row in conferences.iterrows():
    authorname = row['chair']
    confname = row['name']

    # parsing conference
    conf_title = URLparse(confname)
    conf_node = URIRef(f"http://kg_sdm.org/Venue/{conf_title}")
    venue_lit = Literal(str(conf_title))

    # parsing authors
    per_title = URLparse(authorname)
    per_node = URIRef(f"http://kg_sdm.org/Person/{per_title}")


    g.add((per_node, KG_SDM.handles, conf_node))

In [113]:
# adding the editor handlers
for index,row in journals.iterrows():
    authorname = row['editors']
    confname = row['name']

    # parsing conference
    conf_title = URLparse(confname)
    conf_node = URIRef(f"http://kg_sdm.org/Venue/{conf_title}")
    venue_lit = Literal(str(conf_title))

    # parsing authors
    per_title = URLparse(authorname)
    per_node = URIRef(f"http://kg_sdm.org/Person/{per_title}")


    g.add((per_node, KG_SDM.handles, conf_node))

## 4. Saving the Graph

In [114]:
save_rdf_file(g,"abox",rdf_format='ttl')

# 5. Statistics

In [116]:
print(f"Number of people\t{len(people_names)}")
print(f"Number of authors\t{len(authors)}")
print(f"Number of reviewers\t{len(reviewers)}")
print(f"Number of chairs\t{len(chair)}")
print(f"Number of editor\t{len(editor)}")
print(f"Number of submissions\t{len(articles_publishedin)}")
print(f"Number of reviews\t{len(reviewProcess)}")
print(f"Number of publications\t{len(publications)}")
print(f"Number of journals\t{len(journals)}")
print(f"Number of conferences \t{len(conferences)}")


Number of people	2172
Number of authors	1304
Number of reviewers	435
Number of chairs	218
Number of editor	218
Number of submissions	880
Number of reviews	2518
Number of publications	404
Number of journals	100
Number of conferences 	304


In [117]:
reviewers.head()

Unnamed: 0,ID_x,school_name,author_ID,org_ID,ID_y,person_name
1303,565,"Brigham Young University, Chemistry and Bioch...",57209530205,565,57209530205,Hooper K.
1304,566,"Department of Mechanical Engineering, Brigham ...",57215116223,566,57215116223,Baker N.F.
1305,566,"Department of Mechanical Engineering, Brigham ...",57188658434,566,57188658434,Thomas J.J.
1306,566,"Department of Mechanical Engineering, Brigham ...",36682113800,566,36682113800,Dykes K.
1307,567,"National Renewable Energy Laboratory, Nationa...",57191952713,567,57191952713,Stanley A.P.J.
