In [1]:
import xml.etree.ElementTree as ET
import re
import os
import spacy
import scispacy

### XML to Text

In [36]:
def xml_to_text_all():
    
    ### create a list of file from the directory
    
    directory_path = 'D:\\Drive\\SISE\\CelloGraph\\Dev\\Data\\XML'
    file_list = os.listdir(directory_path)
    
    for xml_file in file_list:
        
        ### loading xml and creating root
        tree = ET.parse(directory_path + '\\' + xml_file)
        root = tree.getroot()

        ### getting metadata (title, publication_date and doi)

        metadata = []

        for elem in root:
            if elem.tag[29:] == 'teiHeader':
                for sub_elem1 in elem:
                    if sub_elem1.tag[29:] == 'fileDesc':
                        publication_info = {}
                        for sub_elem2 in sub_elem1:
                            if sub_elem2.tag[29:] == 'titleStmt':
                                for sub_elem3 in sub_elem2:
                                    if sub_elem3.tag[29:] == 'title':
                                        publication_info['Title'] = sub_elem3.text
                            if sub_elem2.tag[29:] == 'publicationStmt':
                                for sub_elem3 in sub_elem2:
                                    if sub_elem3.tag[29:] == 'date':
                                        publication_info['Publication Date'] = sub_elem3.text
                            if sub_elem2.tag[29:] == 'sourceDesc':
                                for sub_elem3 in sub_elem2:
                                    if sub_elem3.tag[29:] == 'biblStruct':
                                        for sub_elem4 in sub_elem3:
                                            if sub_elem4.tag[29:] == 'idno':
                                                publication_info['DOI'] = sub_elem4.text
                        metadata.append([0, 'Metadata', publication_info])

        ### getting abstract

        abstract = []

        for elem in root:
            if elem.tag[29:] == 'teiHeader':
                for sub_elem1 in elem:
                    if sub_elem1.tag[29:] == 'profileDesc':
                        for sub_elem2 in sub_elem1:
                            if sub_elem2.tag[29:] == 'abstract':
                                for sub_elem3 in sub_elem2:
                                    if sub_elem3.tag[29:] == 'div':
                                        list_of_paragraphs = {}
                                        paragraph_number = 0
                                        for sub_elem4 in sub_elem3:
                                            if sub_elem4.tag[29:] == 'p':
                                                paragraph_number += 1
                                                list_of_paragraphs[paragraph_number] = ET.tostring(sub_elem4, encoding='unicode')
                                        abstract.append([0, 'Abstract', list_of_paragraphs])

        ### getting other sections

        list_of_sections = []

        for elem in root:
            if (elem.tag[29:] == 'text'):
                for sub_elem1 in elem:
                    if (sub_elem1.tag[29:] == 'body'):
                        for sub_elem2 in sub_elem1:
                            if sub_elem2.tag[29:] == 'div':
                                section_number = ''
                                section_name = ''
                                list_of_paragraphs = []
                                for sub_elem3 in sub_elem2:
                                    if sub_elem3.tag[29:] == 'head':
                                        if bool(sub_elem3.attrib):
                                            section_number = str(sub_elem3.attrib)[7:-3]
                                        else:
                                            section_number = 'NO_SECTION_NUMBER'
                                        section_name = sub_elem3.text
                                    if sub_elem3.tag[29:] == 'p':
                                        list_of_paragraphs.append(ET.tostring(sub_elem3, encoding='unicode'))
                                # commented logic skips NO_SECTION_NUMBER with no paragraphs, i.e., Table 1, Table 2 .....
    #                             if section_number == 'NO_SECTION_NUMBER' and not bool(list_of_paragraphs):
                                if section_number == 'NO_SECTION_NUMBER':
                                    pass
                                else:
                                    list_of_sections.append([section_number, section_name, list_of_paragraphs])

        ### organizing paragraph(s) sequentially in a dictionary
        ### adding dictionary with each respective record as a 4th element

        for section in list_of_sections:
            list_of_paragraphs = {}
            for paragraph_number, paragraph_text in enumerate(section[2], start=1):
                list_of_paragraphs[paragraph_number] = paragraph_text
            section.append(list_of_paragraphs)

        ### deleting 3rd element from each record of document

        for section in list_of_sections:
            section.remove(section[2])

        ### merging abstract with other sections

        document = abstract + list_of_sections

        ### text preprocessing

        pattern_starting_p_tag = r'<ns0:p[^>]+>'
        pattern_ending_p_tag = '</ns0:p>'
        pattern_starting_ref_tag = r'<ns0:ref[^>]+>'
        pattern_ending_ref_tag = '</ns0:ref>'
        pattern_ref = r'<ref>.*?</ref>'          # temporary for removing ref tag

        for record in document:
            for paragraph_number, paragraph_text in record[2].items():
                text = paragraph_text
                text = re.sub(pattern_starting_p_tag, '', text)
                text = re.sub(pattern_ending_p_tag, '', text)
                text = re.sub(pattern_starting_ref_tag, '<ref>', text)
                text = re.sub(pattern_ending_ref_tag, '</ref>', text)
                text = re.sub(pattern_ref, '', text)
                record[2][paragraph_number] = text

        # TEXT EXTRACTION FOR ANNOTATION (before identifying sentences from paragraphs)

        output_file_name = os.path.basename(xml_file)[:-4]

        for record in document:
            for _, paragraph in record[2].items():
                with open(f'D:\\Drive\\SISE\\CelloGraph\\Dev\\Data\\Text\\{output_file_name}.txt', 'a', encoding='utf-8') as output_file:
                    output_file.write(paragraph + '\n')
    

In [37]:
# xml_to_text_all()

### XML Structure
- teiHeader
    - fileDesc
        - titleStmt
            - title
        - publicationStmt
            - date
        - sourceDesc
            - biblStruct
                - idno
    - encodingDesc
    - profileDesc

### List Structure
- [
    - [
        - section_number,
        - section_title,
        - {
            - paragraph_number:
                - {
                    - sentence_number: sectence_text,
                - },
        - }
    - ],
- ]

### XML to List

In [2]:
# loading xml and creating root

xml_file = 'D:\\Drive\\SISE\\CelloGraph\\Dev\\Data\\XML\\Wolf et al. - How the shape of fillers affects properties of nanocomposites.tei.xmlScitex.xml'
tree = ET.parse(xml_file)
root = tree.getroot()

# getting metadata (title, publication_date and doi)

# title = ''
# publication_date = ''
# doi = ''

metadata = []

for elem in root:
    if elem.tag[29:] == 'teiHeader':
        for sub_elem1 in elem:
            if sub_elem1.tag[29:] == 'fileDesc':
                publication_info = {}
                for sub_elem2 in sub_elem1:
                    if sub_elem2.tag[29:] == 'titleStmt':
                        for sub_elem3 in sub_elem2:
                            if sub_elem3.tag[29:] == 'title':
#                                 title = sub_elem3.text
                                publication_info['Title'] = sub_elem3.text
                    if sub_elem2.tag[29:] == 'publicationStmt':
                        for sub_elem3 in sub_elem2:
                            if sub_elem3.tag[29:] == 'date':
#                                 publication_date = sub_elem3.text
                                publication_info['Publication Date'] = sub_elem3.text
                    if sub_elem2.tag[29:] == 'sourceDesc':
                        for sub_elem3 in sub_elem2:
                            if sub_elem3.tag[29:] == 'biblStruct':
                                for sub_elem4 in sub_elem3:
                                    if sub_elem4.tag[29:] == 'idno':
#                                         doi = sub_elem4.text
                                        publication_info['DOI'] = sub_elem4.text
                metadata.append([0, 'Metadata', publication_info])
                                        
print(metadata)
# print(metadata[0][2]['DOI'])

[[0, 'Metadata', {'Title': 'How the shape of fillers affects the barrier properties of polymer/non-porous particles nanocomposites: A review', 'Publication Date': '03 April 2018', 'DOI': '10.1016/j.memsci.2018.03.085'}]]


In [3]:
# getting abstract

abstract = []

for elem in root:
    if elem.tag[29:] == 'teiHeader':
        for sub_elem1 in elem:
            if sub_elem1.tag[29:] == 'profileDesc':
                for sub_elem2 in sub_elem1:
                    if sub_elem2.tag[29:] == 'abstract':
                        for sub_elem3 in sub_elem2:
                            if sub_elem3.tag[29:] == 'div':
                                list_of_paragraphs = {}
                                paragraph_number = 0
                                for sub_elem4 in sub_elem3:
                                    if sub_elem4.tag[29:] == 'p':
                                        paragraph_number += 1
                                        list_of_paragraphs[paragraph_number] = ET.tostring(sub_elem4, encoding='unicode')
                                abstract.append([0, 'Abstract', list_of_paragraphs])
                                
abstract

[[0,
  'Abstract',
  {1: '<ns0:p xmlns:ns0="http://www.tei-c.org/ns/1.0">More than 1000 published experimental data of gas (O 2 and CO 2 ) and vapor (H 2 O) permeability in nanocomposites containing either spherical, elongated or platelet particles were collected, assorted and compared in order to decipher the role of particle shape on the reduction of the relative permeability of the nanocomposite. It is well known that inclusion of homogeneously dispersed and oriented impermeable fillers with high aspect ratio, such as platelets or elongated particles, should significantly increase the diffusion path of gas and vapors and yield to improve barrier properties. Results revealed that this expected impact was not systematically achieved, even for impermeable lamellar fillers that usually displayed the highest aspect ratio. More specifically, an unexpected increase of the permeability in the nanocomposite was often observed. To explain this deviation of the \'ideal behavior\', this paper d

In [4]:
# getting other sections

list_of_sections = []

for elem in root:
    if (elem.tag[29:] == 'text'):
        for sub_elem1 in elem:
            if (sub_elem1.tag[29:] == 'body'):
                for sub_elem2 in sub_elem1:
                    if sub_elem2.tag[29:] == 'div':
                        section_number = ''
                        section_name = ''
                        list_of_paragraphs = []
                        for sub_elem3 in sub_elem2:
                            if sub_elem3.tag[29:] == 'head':
                                if bool(sub_elem3.attrib):
                                    section_number = str(sub_elem3.attrib)[7:-3]
                                else:
                                    section_number = 'NO_SECTION_NUMBER'
                                section_name = sub_elem3.text
                            if sub_elem3.tag[29:] == 'p':
                                list_of_paragraphs.append(ET.tostring(sub_elem3, encoding='unicode'))
                        # commented logic skips NO_SECTION_NUMBER with no paragraphs, i.e., Table 1, Table 2 .....
#                         if section_number == 'NO_SECTION_NUMBER' and not bool(list_of_paragraphs):
                        if section_number == 'NO_SECTION_NUMBER':
                            pass
                        else:
                            list_of_sections.append([section_number, section_name, list_of_paragraphs])

list_of_sections

[['1',
  'Introduction',
  ['<ns0:p xmlns:ns0="http://www.tei-c.org/ns/1.0">In the objective of developing efficient and optimal packaging, one of the main challenges is to design and provide food packaging materials able to protect the food from the external environment and to maintain food quality and safety throughout its shelf life <ns0:ref type="bibr" target="#b0">[1,</ns0:ref><ns0:ref type="bibr" target="#b1">2]</ns0:ref>. The mass transfers are thus at the heart of the feature of the food packaging, especially transfers of water vapor, oxygen and/or carbon dioxide, which condition the rates of numerous reactions of food degradation (oxidation, microbial development, physiological reactions, etc.). The development of bulk nanocomposite structures by introducing nanoparticles, i.e. fillers having at least one dimension lower than 100 nm, in polymeric matrices appeared as one of the most promising directions in the development of packaging materials with advanced mass transfer prop

In [5]:
# # DON'T USE THIS PIECE OF CODE  (commented - 12 Nov 23)
# # HAVE TO ADJUST THIS CODE FOR TABLES/IMAGES
# # merging "paragraphs with no section number" to its previous section

# list_of_sections_length = len(list_of_sections)

# for i in range(list_of_sections_length-1,0,-1):
#     if list_of_sections[i][0] == 'NO_SECTION_NUMBER':
#         list_of_sections[i-1][2].extend(list_of_sections[i][2])
#         list_of_sections.remove(list_of_sections[i])
        
# list_of_sections

In [6]:
# organizing paragraph(s) sequentially in a dictionary
# adding dictionary with each respective record as a 4th element

for section in list_of_sections:
    list_of_paragraphs = {}
    for paragraph_number, paragraph_text in enumerate(section[2], start=1):
        list_of_paragraphs[paragraph_number] = paragraph_text
    section.append(list_of_paragraphs)
    
list_of_sections

[['1',
  'Introduction',
  ['<ns0:p xmlns:ns0="http://www.tei-c.org/ns/1.0">In the objective of developing efficient and optimal packaging, one of the main challenges is to design and provide food packaging materials able to protect the food from the external environment and to maintain food quality and safety throughout its shelf life <ns0:ref type="bibr" target="#b0">[1,</ns0:ref><ns0:ref type="bibr" target="#b1">2]</ns0:ref>. The mass transfers are thus at the heart of the feature of the food packaging, especially transfers of water vapor, oxygen and/or carbon dioxide, which condition the rates of numerous reactions of food degradation (oxidation, microbial development, physiological reactions, etc.). The development of bulk nanocomposite structures by introducing nanoparticles, i.e. fillers having at least one dimension lower than 100 nm, in polymeric matrices appeared as one of the most promising directions in the development of packaging materials with advanced mass transfer prop

In [7]:
# deleting 3rd element from each record of document

for section in list_of_sections:
    section.remove(section[2])
    
list_of_sections

[['1',
  'Introduction',
  {1: '<ns0:p xmlns:ns0="http://www.tei-c.org/ns/1.0">In the objective of developing efficient and optimal packaging, one of the main challenges is to design and provide food packaging materials able to protect the food from the external environment and to maintain food quality and safety throughout its shelf life <ns0:ref type="bibr" target="#b0">[1,</ns0:ref><ns0:ref type="bibr" target="#b1">2]</ns0:ref>. The mass transfers are thus at the heart of the feature of the food packaging, especially transfers of water vapor, oxygen and/or carbon dioxide, which condition the rates of numerous reactions of food degradation (oxidation, microbial development, physiological reactions, etc.). The development of bulk nanocomposite structures by introducing nanoparticles, i.e. fillers having at least one dimension lower than 100 nm, in polymeric matrices appeared as one of the most promising directions in the development of packaging materials with advanced mass transfer p

In [8]:
# merging abstract with other sections

document = abstract + list_of_sections
document

[[0,
  'Abstract',
  {1: '<ns0:p xmlns:ns0="http://www.tei-c.org/ns/1.0">More than 1000 published experimental data of gas (O 2 and CO 2 ) and vapor (H 2 O) permeability in nanocomposites containing either spherical, elongated or platelet particles were collected, assorted and compared in order to decipher the role of particle shape on the reduction of the relative permeability of the nanocomposite. It is well known that inclusion of homogeneously dispersed and oriented impermeable fillers with high aspect ratio, such as platelets or elongated particles, should significantly increase the diffusion path of gas and vapors and yield to improve barrier properties. Results revealed that this expected impact was not systematically achieved, even for impermeable lamellar fillers that usually displayed the highest aspect ratio. More specifically, an unexpected increase of the permeability in the nanocomposite was often observed. To explain this deviation of the \'ideal behavior\', this paper d

In [9]:
# text preprocessing

pattern_starting_p_tag = r'<ns0:p[^>]+>'
pattern_ending_p_tag = '</ns0:p>'
pattern_starting_ref_tag = r'<ns0:ref[^>]+>'
pattern_ending_ref_tag = '</ns0:ref>'
pattern_ref = r'<ref>.*?</ref>'          # temporary for removing ref tag
# subscript_pattern = r'<sub>.*?</sub>'
# superscript_pattern = r'<super>.*?</super>'

for record in document:
    for paragraph_number, paragraph_text in record[2].items():
        text = paragraph_text
        
#         # format subscript
#         all_subscript = re.findall(subscript_pattern, text)
#         unique_subscript = [subscript for index, subscript in enumerate(all_subscript) if subscript not in all_subscript[:index]]
#         for subscript in unique_subscript:
#             new_subscript_format = '_{' + subscript[5:-6] + '}'
#             old_subscript_format = ' ' + subscript
#             text = text.replace(old_subscript_format, new_subscript_format)
        
#         # format superscript
#         all_superscript = re.findall(superscript_pattern, text)
#         unique_superscript = [superscript for index, superscript in enumerate(all_superscript) if superscript not in all_superscript[:index]]
#         for superscript in unique_superscript:
#             new_superscript_format = '^{' + superscript[7:-8] + '}'
#             old_superscript_format = ' ' + subscript
#             text = text.replace(old_superscript_format, new_superscript_format)
            
        text = re.sub(pattern_starting_p_tag, '', text)
        text = re.sub(pattern_ending_p_tag, '', text)
        text = re.sub(pattern_starting_ref_tag, '<ref>', text)
        text = re.sub(pattern_ending_ref_tag, '</ref>', text)
        text = re.sub(pattern_ref, '', text)
        record[2][paragraph_number] = text
        
document

[[0,
  'Abstract',
  {1: "More than 1000 published experimental data of gas (O 2 and CO 2 ) and vapor (H 2 O) permeability in nanocomposites containing either spherical, elongated or platelet particles were collected, assorted and compared in order to decipher the role of particle shape on the reduction of the relative permeability of the nanocomposite. It is well known that inclusion of homogeneously dispersed and oriented impermeable fillers with high aspect ratio, such as platelets or elongated particles, should significantly increase the diffusion path of gas and vapors and yield to improve barrier properties. Results revealed that this expected impact was not systematically achieved, even for impermeable lamellar fillers that usually displayed the highest aspect ratio. More specifically, an unexpected increase of the permeability in the nanocomposite was often observed. To explain this deviation of the 'ideal behavior', this paper discusses extensively the impact of the nanopartic

In [10]:
# # TEXT EXTRACTION FOR ANNOTATION (before identifying sentences from paragraphs)

# file_name = os.path.basename(xml_file)[:-4]

# for record in document:
#     for _, paragraph in record[2].items():
#         with open(f'output\\{file_name}.txt', 'a', encoding='utf-8') as output_file:
#             output_file.write(paragraph + '\n')

In [11]:
# import spacy and load en_core_web_sm model

import spacy
nlp = spacy.load('en_core_web_sm')

In [12]:
# organizing sentences sequentially in a dictionary
# adding dictionary with each respective record as a 4th element

for record in document:
    text_content = record[2]
    list_of_paragraphs = {}
    
    for paragraph_number, paragraph_text in text_content.items():
        list_of_sentences = {}
        doc = nlp(paragraph_text)
        
        for sentence_number, sentence_text in enumerate(doc.sents, start=1):
            list_of_sentences[sentence_number] = sentence_text.text            # .text added to convert into text
        
        list_of_paragraphs[paragraph_number] = list_of_sentences
        
    record.append(list_of_paragraphs)

In [13]:
# deleting 3rd element from each record of document

for record in document:
    record.remove(record[2])  

In [14]:
document = metadata + document
document

[[0,
  'Metadata',
  {'Title': 'How the shape of fillers affects the barrier properties of polymer/non-porous particles nanocomposites: A review',
   'Publication Date': '03 April 2018',
   'DOI': '10.1016/j.memsci.2018.03.085'}],
 [0,
  'Abstract',
  {1: {1: 'More than 1000 published experimental data of gas (O 2 and CO 2 ) and vapor (H 2 O) permeability in nanocomposites containing either spherical, elongated or platelet particles were collected, assorted and compared in order to decipher the role of particle shape on the reduction of the relative permeability of the nanocomposite.',
    2: 'It is well known that inclusion of homogeneously dispersed and oriented impermeable fillers with high aspect ratio, such as platelets or elongated particles, should significantly increase the diffusion path of gas and vapors and yield to improve barrier properties.',
    3: 'Results revealed that this expected impact was not systematically achieved, even for impermeable lamellar fillers that usua

In [15]:
# # TEXT EXTRACTION FOR ANNOTATION (after identifying sentences from paragraphs)

# document_length = len(document)
# text = ''

# for index in range(1, document_length):
#     for _, paragraph in document[index][2].items():
#         paragraph_text = ''
#         for _, sentence in paragraph.items():
#             paragraph_text = paragraph_text + sentence + ' '
        
#         with open('test_file2.txt', 'a', encoding='utf-8') as output_file:
#             output_file.write(paragraph_text + '\n')
# #         text = text + paragraph_text + '\n'
# # print(text)
# #         text = ''
# #         for _, sentence in paragraph.items():
# #             text = text + sentence + ' '
# #         print(text)
# # #         print()

# # for record in document:
# #     for _, paragraph in record[2].items():
# #         print(paragraph)
# #         print('----')
# # #         with open('test_file.txt', 'a', encoding='utf-8') as output_file:
# # #             output_file.write(paragraph + '\n')

### List to RDF

In [16]:
nlp = spacy.load('C:/Users/umayer/_dev/experiment/SINGLE_LABEL_CELLULOSIC_MODEL_ACC/model-best/')

In [230]:
# printing abstract and sections

# doi = 'DOI'
doi = document[0][2]['DOI']
doi = doi.replace('/', '_')
term_number = 1
labeling_schema = {0: 'ENTITY', 
                   1: 'CHEMICAL_ENTITY', 
                   2: 'MATERIAL_ENTITY', 
                   3: 'STRUCTURE_ENTITY', 
                   4: 'RELATIONSHIP', 
                   5: 'PROPERTY', 
                   6: 'PROCESS_OR_TECHNIQUE', 
                   7: 'APPLICATION', 
                   8: 'MEASUREMENT', 
                   9: 'ABBREVIATION'}

print("@prefix SciPub: <http://spatialai.org/SciPub/v2.0#> .")
print("@prefix : <http://spatialai.org/SciPub/v2.0/data#> .")
print("@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .")
print("@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .")
print("@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .")
print("@prefix owl: <http://www.w3.org/2002/07/owl#> .")
print()

for record in document:
    if record[1] == 'Metadata':
        print(f":Publication rdf:type SciPub:ScientificPublication ;")
        print(f"SciPub:title '{record[2]['Title']}'^^xsd:string ;")
        print(f"SciPub:doi '{record[2]['DOI']}'^^xsd:string ;")
        print(f"SciPub:publicationDate '{record[2]['Publication Date']}'^^xsd:date .")
        print()
    
    elif record[1] == 'Abstract':
        index = document.index(record) + 1
        next_section = document[index][0]
        paragraph_positions = []
        
        # RDF>> ABSTRACT rdf:type Abstract
        print(f":{doi}_A rdf:type SciPub:Abstract ;")
        
        # RDF>> ABSTRACT sp:directlyContainsDocumentPart PARAGRAPH
        for paragraph_number, paragraph_object in record[2].items():
            position = ':' + doi + '_A' + '-' + str(paragraph_number)
            paragraph_positions.append(position)
            
        joined_paragraph_positions = ', '.join(paragraph_positions)
        print(f"SciPub:directlyContainsDocumentPart {joined_paragraph_positions} .")
        print()
            
        # RDF>> PARAGRAPH rdf:type Paragraph
        for element in paragraph_positions:
            print(f"{element} rdf:type SciPub:Paragraph ;")
            
        for paragraph_number, paragraph_object in record[2].items():
            sentence_positions = []
            
            # RDF>> PARAGRAPH sp:directlyContainsDocumentPart SENTENCE
            for sentence_number, sentence_text in paragraph_object.items():
                position = ':' + doi + '_A' + '-' + str(paragraph_number) + '-' + str(sentence_number)
                sentence_positions.append(position)
            
            joined_sentence_positions = ', '.join(sentence_positions)
            print(f"SciPub:directlyContainsDocumentPart {joined_sentence_positions} .")
            print()
            
            # RDF>> SENTENCE rdf:type Sectence
            # RDF>> SENTENCE sp:positionInParentDocumentPart 'POSITION'^^xsd:nonNegativeInteger
            # RDF>> SENTENCE sp:nextDocumentPart SENTENCE/PARAGRAPH/SECTION
            # RDF>> SENTENCE sp:sentenceText 'TEXT_OF_SENTENCE'^^xsd:string
            for sentence_number, sentence_text in paragraph_object.items():
                
                # replacing ' with \' in text
                if "'" in sentence_text:
                    sentence_text = sentence_text.replace("'", r"\'")
                    
                print(f":{doi}_A-{paragraph_number}-{sentence_number} rdf:type SciPub:Sectence ;")
                print(f"SciPub:positionInParentDocumentPart '{sentence_number}'^^xsd:nonNegativeInteger ;")
                
                if sentence_number == len(sentence_positions):
                    if paragraph_number == len(paragraph_positions):
                        print(f"SciPub:nextDocumentPart :{doi}_{next_section} ;")
                    else:
                        print(f"SciPub:nextDocumentPart :{doi}_A-{paragraph_number+1} ;")
                else:
                    print(f"SciPub:nextDocumentPart :{doi}_A-{paragraph_number}-{sentence_number+1} ;")
                    
                print(f"SciPub:sentenceText '{sentence_text}'^^xsd:string .")
                print()
                
                # RDF>> LABELED_TERM rdf:type AtomicLabeledTerm/CompoundLabeledTerm
                # RDF>> LABELED_TERM sp:labeledTermText 'TEXT_OF_TERM'^^xsd:string
                # RDF>> LABELED_TERM sp:labeledTermIsContainedBy SENTENCE
                # RDF>> LABELED_TERM sp:offset 'STARTING_POSITION'^^xsd:nonNegativeInteger
                # RDF>> LABELED_TERM sp:length 'LENGTH'^^xsd:nonNegativeInteger
                # RDF>> LABELED_TERM sp:hasLabel LABEL
                doc = nlp(sentence_text)
                for ent in doc.ents:
                    print(f":LabeledTerm{term_number} rdf:type SciPub:AtomicLabeledTerm ;")  ## DEAL WITH ATOMIC / COMPOUND
                    print(f"SciPub:labeledTermText '{ent.text}'^^xsd:string ;")
                    print(f"SciPub:documentPartIsDirectlyContainedBy :{doi}_A-{paragraph_number}-{sentence_number} ;")
                    print(f"SciPub:offset '{ent.start_char}'^^xsd:nonNegativeInteger ;")
                    print(f"SciPub:length '{ent.end_char - ent.start_char}'^^xsd:nonNegativeInteger ;")
                    
                    for label_number, label_text in labeling_schema.items():
                        if label_text == ent.label_:
                            print(f"SciPub:hasLabel :Label{label_number} .")
                            break
                        else:
                            print(f"SciPub:hasLabel :Label_0 .")
                            break
                            
                    term_number += 1
                    print()
            
        print('#=================== TEMPORARY SECTION DIVIDER ===================#')
        print()
    
    else:
        section_number = record[0]
        section_name = record[1]
        index = document.index(record) + 1
        paragraph_positions = []
        
        if index == len(document):
            next_section = 'EndOfDocument'
        else:
            next_section = document[index][0]
        
        # RDF>> 
        # RDF>> 
        print(f":{doi}_{section_number} rdf:type SciPub:Section ;")
        print(f"SciPub:headerText '{record[1]}'^^xsd:string ;")

        # RDF>> SECTION sp:directlyContainsDocumentPart PARAGRAPH
        for paragraph_number, paragraph_object in record[2].items():
            position = ':' + doi + '_' + str(section_number) + '-' + str(paragraph_number)
            paragraph_positions.append(position)
        
        joined_paragraph_positions = ', '.join(paragraph_positions)
        print(f"SciPub:directlyContainsDocumentPart {joined_paragraph_positions} .")
        print()  
        
        for paragraph_number, paragraph_object in record[2].items():
            sentence_positions = []
            
            # RDF>> PARAGRAPH rdf:type Paragraph
            # RDF>> PARAGRAPH sp:directlyContainsDocumentPart SENTENCE
            for sentence_number, sentence_text in paragraph_object.items():
                position = ':' + doi + '_' + str(section_number) + '-' + str(paragraph_number) + '-' + str(sentence_number)
                sentence_positions.append(position)
            
            joined_sentence_positions = ', '.join(sentence_positions)
            print(f":{doi}_{section_number}-{paragraph_number} rdf:type SciPub:Paragraph ;")
            print(f"SciPub:directlyContainsDocumentPart {joined_sentence_positions} .")
            print()
            
            # RDF>> SENTENCE rdf:type Sectence
            # RDF>> SENTENCE sp:positionInParentDocumentPart 'POSITION'^^xsd:nonNegativeInteger
            # RDF>> SENTENCE sp:nextDocumentPart SENTENCE/PARAGRAPH/SECTION
            # RDF>> SENTENCE sp:sentenceText 'TEXT_OF_SENTENCE'^^xsd:string
            for sentence_number, sentence_text in paragraph_object.items():
                
                # replacing ' with \' in text
                if "'" in sentence_text:
                    sentence_text = sentence_text.replace("'", r"\'")
                    
                print(f":{doi}_{section_number}-{paragraph_number}-{sentence_number} rdf:type SciPub:Sentence ;")
                print(f"SciPub:positionInParentDocumentPart '{sentence_number}'^^xsd:nonNegativeInteger ;")
                
                if sentence_number == len(sentence_positions):
                    if paragraph_number == len(paragraph_positions):
                        print(f"SciPub:nextDocumentPart :{doi}_{next_section} ;")
                    else:
                        print(f"SciPub:nextDocumentPart :{doi}_{section_number}-{paragraph_number+1} ;")
                else:
                    print(f"SciPub:nextDocumentPart :{doi}_{section_number}-{paragraph_number}-{sentence_number+1} ;")
                    
                print(f"SciPub:sentenceText '{sentence_text}'^^xsd:string .")
                print()
                
                # RDF>> LABELED_TERM rdf:type AtomicLabeledTerm/CompoundLabeledTerm
                # RDF>> LABELED_TERM sp:labeledTermText 'TEXT_OF_TERM'^^xsd:string
                # RDF>> LABELED_TERM sp:labeledTermIsContainedBy SENTENCE
                # RDF>> LABELED_TERM sp:offset 'STARTING_POSITION'^^xsd:nonNegativeInteger
                # RDF>> LABELED_TERM sp:length 'LENGTH'^^xsd:nonNegativeInteger
                # RDF>> LABELED_TERM sp:hasLabel LABEL
                doc = nlp(sentence_text)
                for ent in doc.ents:
                    print(f":LabeledTerm{term_number} rdf:type SciPub:AtomicLabeledTerm ;")  ## DEAL WITH ATOMIC / COMPOUND
                    print(f"SciPub:labeledTermText '{ent.text}'^^xsd:string ;")
                    print(f"SciPub:documentPartIsDirectlyContainedBy :{doi}_{section_number}-{paragraph_number}-{sentence_number} ;")
                    print(f"SciPub:offset '{ent.start_char}'^^xsd:nonNegativeInteger ;")
                    print(f"SciPub:length '{ent.end_char - ent.start_char}'^^xsd:nonNegativeInteger ;")
                    
                    for label_number, label_text in labeling_schema.items():
                        if label_text == ent.label_:
                            print(f"SciPub:hasLabel :Label{label_number} .")
                            break
                        else:
                            print(f"SciPub:hasLabel :Label_0 .")
                            break
                            
                    term_number += 1
                    print()
            
        print('#=================== TEMPORARY SECTION DIVIDER ===================#')
        print()
        
for label_number, label_text in labeling_schema.items():
    print(f":Label{label_number} rdf:type SciPub:Label ;")
    print(f"SciPub:fromLabelingSchema :LabelingSchema{label_number} ;")
    print(f"SciPub:labelText '{label_text}'^^xsd:string .")
    print()
            
for label_number, _ in labeling_schema.items():
    print(f":LabelingSchema{label_number} rdf:type SciPub:LabelingSchema .")
    

@prefix SciPub: <http://spatialai.org/SciPub/v2.0#> .
@prefix : <http://spatialai.org/SciPub/v2.0/data#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .

:Publication rdf:type SciPub:ScientificPublication ;
SciPub:title 'How the shape of fillers affects the barrier properties of polymer/non-porous particles nanocomposites: A review'^^xsd:string ;
SciPub:doi '10.1016/j.memsci.2018.03.085'^^xsd:string ;
SciPub:publicationDate '03 April 2018'^^xsd:date .

:10.1016_j.memsci.2018.03.085_A rdf:type SciPub:Abstract ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_A-1 .

:10.1016_j.memsci.2018.03.085_A-1 rdf:type SciPub:Paragraph ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_A-1-1, :10.1016_j.memsci.2018.03.085_A-1-2, :10.1016_j.memsci.2018.03.085_A-1-3, :10.1016_j.memsci.2018.03.085_A-

:LabeledTerm33 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'matrix'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_1-1-4 ;
SciPub:offset '131'^^xsd:nonNegativeInteger ;
SciPub:length '6'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm34 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'physical obstacles'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_1-1-4 ;
SciPub:offset '151'^^xsd:nonNegativeInteger ;
SciPub:length '18'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm35 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'molecules'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_1-1-4 ;
SciPub:offset '215'^^xsd:nonNegativeInteger ;
SciPub:length '9'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_1-1-5 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '5'^^x

:LabeledTerm61 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanocomposites'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_1-2-5 ;
SciPub:offset '52'^^xsd:nonNegativeInteger ;
SciPub:length '14'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_1-2-6 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '6'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_1-2-7 ;
SciPub:sentenceText 'Among these studies, some of them focused slightly on the mass transfer properties  but without deepening the topic.'^^xsd:string .

:10.1016_j.memsci.2018.03.085_1-2-7 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '7'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_1-2-8 ;
SciPub:sentenceText 'These studies generally focused on one type of nanoparticle, i.e. spherical nanoparticles or nano-platelets for instance, particularly for mem

:LabeledTerm88 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'quantitative models'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_1-3-5 ;
SciPub:offset '127'^^xsd:nonNegativeInteger ;
SciPub:length '19'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_1-3-6 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '6'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_1-3-7 ;
SciPub:sentenceText 'It must be specified that, in order to build up the database for analysis, which is of interest here, recent data for gas barrier in polymer based materials with dispersed graphene and graphene derivatives were not taken into consideration.'^^xsd:string .

:LabeledTerm89 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'polymer based materials'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_1-3-6 ;
SciPub:offset '133'^^xsd:nonNeg

:LabeledTerm111 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'polymer matrices'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_2-1-9 ;
SciPub:offset '26'^^xsd:nonNegativeInteger ;
SciPub:length '16'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm112 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanoparticles'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_2-1-9 ;
SciPub:offset '47'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm113 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanocomposites based'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_2-1-9 ;
SciPub:offset '120'^^xsd:nonNegativeInteger ;
SciPub:length '20'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_2-1-10 rdf:type SciPub:Sentence ;
SciPub:positionInParen


:10.1016_j.memsci.2018.03.085_3.1 rdf:type SciPub:Section ;
SciPub:headerText 'Theoretical background'^^xsd:string ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_3.1-1, :10.1016_j.memsci.2018.03.085_3.1-2, :10.1016_j.memsci.2018.03.085_3.1-3 .

:10.1016_j.memsci.2018.03.085_3.1-1 rdf:type SciPub:Paragraph ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_3.1-1-1, :10.1016_j.memsci.2018.03.085_3.1-1-2, :10.1016_j.memsci.2018.03.085_3.1-1-3, :10.1016_j.memsci.2018.03.085_3.1-1-4 .

:10.1016_j.memsci.2018.03.085_3.1-1-1 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.1-1-2 ;
SciPub:sentenceText 'A great amount of effort has been made in the literature to describe the effect on transport properties of the inclusion of non-conducting particles into a conductive matrix and the effect of particle shape has been a focus since the beginning of these studie

:LabeledTerm148 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'contour length'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.1-2-3 ;
SciPub:offset '130'^^xsd:nonNegativeInteger ;
SciPub:length '14'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm149 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'diffusing molecules'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.1-2-3 ;
SciPub:offset '156'^^xsd:nonNegativeInteger ;
SciPub:length '19'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm150 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'composite medium'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.1-2-3 ;
SciPub:offset '183'^^xsd:nonNegativeInteger ;
SciPub:length '16'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_3.1-2-4 rdf:type SciPub:Sentence ;
SciPub:positi

:LabeledTerm179 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanocomposites'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.2-1-7 ;
SciPub:offset '32'^^xsd:nonNegativeInteger ;
SciPub:length '14'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_3.2-2 rdf:type SciPub:Paragraph ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_3.2-2-1, :10.1016_j.memsci.2018.03.085_3.2-2-2 .

:10.1016_j.memsci.2018.03.085_3.2-2-1 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.2-2-2 ;
SciPub:sentenceText 'In the following, the evolution of the relative permeability, i.e. the ratio of the composite permeability to the permeability of the neat matrix (P/P <sub>0</sub> ), was represented as a function of the nanoparticle volume fraction (φ <sub>vol</sub> ), which is considered as input parameter in all 

:LabeledTerm195 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'scatter graph'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.3-1-3 ;
SciPub:offset '13'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_3.3-1-4 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '4'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.3-1-5 ;
SciPub:sentenceText 'For comparison, 45 publications were found for isodimensional nanoparticles-based nanocomposites against 22 for elongated nanoparticles-based nanocomposites, resulting in about 380 and 120 permeability values respectively (Fig. ).'^^xsd:string .

:LabeledTerm196 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'isodimensional nanoparticles-based nanocomposites'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.3-1-4 ;
SciPub:offse

:LabeledTerm212 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'The</super'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.3-2-1 ;
SciPub:offset '9'^^xsd:nonNegativeInteger ;
SciPub:length '10'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm213 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'φ</super'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.3-2-1 ;
SciPub:offset '56'^^xsd:nonNegativeInteger ;
SciPub:length '8'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm214 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'range</super'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.3-2-1 ;
SciPub:offset '92'^^xsd:nonNegativeInteger ;
SciPub:length '12'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm215 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'higher</super'^^xsd:string

:LabeledTerm240 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'scatter graph'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.3-3-4 ;
SciPub:offset '16'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm241 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'P/P <sub>0</sub'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.3-3-4 ;
SciPub:offset '100'^^xsd:nonNegativeInteger ;
SciPub:length '15'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_3.3-3-5 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '5'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.3-4 ;
SciPub:sentenceText 'On the contrary, Fig.  shows that for nanoplatelets, except for one or two cases, P/P <sub>0</sub> values remain always below 1.'^^xsd:string .

:LabeledTerm242 rdf:type SciPub:

:LabeledTerm261 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'isodimensional nanoparticles'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.3-5-2 ;
SciPub:offset '51'^^xsd:nonNegativeInteger ;
SciPub:length '28'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm262 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanoparticles'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.3-5-2 ;
SciPub:offset '112'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm263 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanoplatelets'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.3-5-2 ;
SciPub:offset '157'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_3.3-6 rdf:type SciPub:Paragraph ;
SciPub:dir

:LabeledTerm280 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'quantitative analysis'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.4-1-1 ;
SciPub:offset '22'^^xsd:nonNegativeInteger ;
SciPub:length '21'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm281 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanoparticle shape'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.4-1-1 ;
SciPub:offset '78'^^xsd:nonNegativeInteger ;
SciPub:length '18'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm282 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'P/P <sub>0</sub'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.4-1-1 ;
SciPub:offset '100'^^xsd:nonNegativeInteger ;
SciPub:length '15'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm283 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 

:LabeledTerm305 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'volume'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.4-5-1 ;
SciPub:offset '177'^^xsd:nonNegativeInteger ;
SciPub:length '6'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_3.4-5-2 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '2'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.4-5-3 ;
SciPub:sentenceText 'The classes chosen for filler fractions were 0-5 vol%, 5-10 vol%, 10-20 vol% and 20-30 vol%.'^^xsd:string .

:LabeledTerm306 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText '5-10 vol%'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.4-5-2 ;
SciPub:offset '55'^^xsd:nonNegativeInteger ;
SciPub:length '9'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_3.4-5-3 rdf:type SciPub:Sentence ;
SciPub:positionI

:10.1016_j.memsci.2018.03.085_3.4-5-15 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '15'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.4-6 ;
SciPub:sentenceText 'We noted that studies conducted with such high amounts of platelets (&gt; 10 vol%) were scarce, which makes it difficult to generalize the effect of high contents of platelets on the permeability of resulting nanocomposites (Fig. ).'^^xsd:string .

:LabeledTerm324 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'contents'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.4-5-15 ;
SciPub:offset '154'^^xsd:nonNegativeInteger ;
SciPub:length '8'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm325 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanocomposites'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_3.4-5-15 ;
SciPub:offset '209'^^xsd:nonNegativeInteger ;
Sc

:10.1016_j.memsci.2018.03.085_4.1-1-3 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '3'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.1-1-4 ;
SciPub:sentenceText 'for a large range of nanoparticles displaying various shapes, either isodimensional, elongated or nanoplatelets.'^^xsd:string .

:LabeledTerm345 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanoparticles'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1-1-3 ;
SciPub:offset '21'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm346 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'shapes'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1-1-3 ;
SciPub:offset '54'^^xsd:nonNegativeInteger ;
SciPub:length '6'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm347 rdf:type SciPub:AtomicLabeledTerm ;
SciPu

:LabeledTerm361 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'tortuosity'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.1-2-1 ;
SciPub:offset '5'^^xsd:nonNegativeInteger ;
SciPub:length '10'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm362 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'volume'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.1-2-1 ;
SciPub:offset '44'^^xsd:nonNegativeInteger ;
SciPub:length '6'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm363 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'papers:'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.1-2-1 ;
SciPub:offset '173'^^xsd:nonNegativeInteger ;
SciPub:length '7'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.1.1-3 rdf:type SciPub:Paragraph ;
SciPub:directlyContainsDocumentPart :

:LabeledTerm392 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'size'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.1-4-12 ;
SciPub:offset '30'^^xsd:nonNegativeInteger ;
SciPub:length '4'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm393 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'native nanoparticle'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.1-4-12 ;
SciPub:offset '89'^^xsd:nonNegativeInteger ;
SciPub:length '19'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.1.1-4-13 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '13'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.1.1-4-14 ;
SciPub:sentenceText 'As an example, Angellier-Coussy et al.  found average values of in-situ size aspect ratio of 17 and 23 for 10.8 and 2.2 vol% of filler, respectively in wheat gluten/mont

:10.1016_j.memsci.2018.03.085_4.1.1-6-4 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '4'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.1.1-6-5 ;
SciPub:sentenceText 'Contrary to the in-situ size aspect ratio, which is rarely determined, the impact of the particle dispersion state on nanocomposite mass transfer properties has been largely demonstrated.'^^xsd:string .

:LabeledTerm419 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'size'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.1-6-4 ;
SciPub:offset '24'^^xsd:nonNegativeInteger ;
SciPub:length '4'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm420 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanocomposite mass'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.1-6-4 ;
SciPub:offset '118'^^xsd:nonNegativeInteger ;
SciPub:length '18'^^xsd:nonNegativeInteger

:10.1016_j.memsci.2018.03.085_4.1.1-8-10 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '10'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.1.1-8-11 ;
SciPub:sentenceText 'It is all the more difficult that particles display high aspect ratio in 2dimension such as platelets.'^^xsd:string .

:LabeledTerm449 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText '2dimension'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.1-8-10 ;
SciPub:offset '73'^^xsd:nonNegativeInteger ;
SciPub:length '10'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm450 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'platelets'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.1-8-10 ;
SciPub:offset '92'^^xsd:nonNegativeInteger ;
SciPub:length '9'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.1.1-8-11 rdf:type SciP

:LabeledTerm466 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanoparticles'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.2-1-4 ;
SciPub:offset '14'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm467 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'polymer crystallinity'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.2-1-4 ;
SciPub:offset '31'^^xsd:nonNegativeInteger ;
SciPub:length '21'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm468 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'spherical particles'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.2-1-4 ;
SciPub:offset '85'^^xsd:nonNegativeInteger ;
SciPub:length '19'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm469 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermT

:LabeledTerm485 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'wheat gluten/montmorillonite'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.3-1-6 ;
SciPub:offset '8'^^xsd:nonNegativeInteger ;
SciPub:length '28'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.1.3-1-7 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '7'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.2 ;
SciPub:sentenceText 'in soy protein/ montmorillonite nanocomposites.'^^xsd:string .

:LabeledTerm486 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'protein/ montmorillonite nanocomposites'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.3-1-7 ;
SciPub:offset '7'^^xsd:nonNegativeInteger ;
SciPub:length '39'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .


:10.1016_j.memsci.2018.03.085_4.2 rdf:type SciPub:Section ;
SciPub:heade

:10.1016_j.memsci.2018.03.085_4.2.1-1-5 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '5'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.2.1-1-6 ;
SciPub:sentenceText 'to explain the constancy of O <sub>2</sub> permeability in wheat gluten/montmorillonite nanocomposites.'^^xsd:string .

:LabeledTerm504 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'O <sub>2</sub'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.2.1-1-5 ;
SciPub:offset '28'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm505 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'wheat gluten/montmorillonite nanocomposites'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.2.1-1-5 ;
SciPub:offset '59'^^xsd:nonNegativeInteger ;
SciPub:length '43'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.20

:10.1016_j.memsci.2018.03.085_4.2.1-3 rdf:type SciPub:Paragraph ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_4.2.1-3-1, :10.1016_j.memsci.2018.03.085_4.2.1-3-2, :10.1016_j.memsci.2018.03.085_4.2.1-3-3, :10.1016_j.memsci.2018.03.085_4.2.1-3-4, :10.1016_j.memsci.2018.03.085_4.2.1-3-5, :10.1016_j.memsci.2018.03.085_4.2.1-3-6, :10.1016_j.memsci.2018.03.085_4.2.1-3-7, :10.1016_j.memsci.2018.03.085_4.2.1-3-8 .

:10.1016_j.memsci.2018.03.085_4.2.1-3-1 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.2.1-3-2 ;
SciPub:sentenceText 'Agglomeration could lead to the formation of "interfacial voids" at the particle/polymer matrix interface and to the formation of a preferential pathway for the migration of the molecular penetrant (Ismail et al. , Rafiq et al. ).'^^xsd:string .

:LabeledTerm542 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'Agglomeration'^^xsd:strin

:LabeledTerm567 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'volume'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.2.3-2-5 ;
SciPub:offset '20'^^xsd:nonNegativeInteger ;
SciPub:length '6'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm568 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'bad compatibility'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.2.3-2-5 ;
SciPub:offset '99'^^xsd:nonNegativeInteger ;
SciPub:length '17'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm569 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'compatibilizer'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.2.3-2-5 ;
SciPub:offset '129'^^xsd:nonNegativeInteger ;
SciPub:length '14'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.2.3-2-6 rdf:type SciPub:Sentence ;
SciPub:positionInPar

:LabeledTerm587 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'volume'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.2.3-3-5 ;
SciPub:offset '114'^^xsd:nonNegativeInteger ;
SciPub:length '6'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm588 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'evidenced'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.2.3-3-5 ;
SciPub:offset '124'^^xsd:nonNegativeInteger ;
SciPub:length '9'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.2.3-3-6 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '6'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.2.4 ;
SciPub:sentenceText 'in spherical-based composites with titanium dioxide.'^^xsd:string .

:LabeledTerm589 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'spherical-based composites'^^xsd:string 

:LabeledTerm604 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'water vapor permeability'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.2.4-2-2 ;
SciPub:offset '73'^^xsd:nonNegativeInteger ;
SciPub:length '24'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm605 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'hydroxyl propyl methyl'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.2.4-2-2 ;
SciPub:offset '101'^^xsd:nonNegativeInteger ;
SciPub:length '22'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm606 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'microcrystalline cellulose nanocomposites'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.2.4-2-2 ;
SciPub:offset '134'^^xsd:nonNegativeInteger ;
SciPub:length '41'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm607 rdf:type SciPub:Ato

:LabeledTerm620 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'P/P <sub>0</sub'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.3-1-2 ;
SciPub:offset '102'^^xsd:nonNegativeInteger ;
SciPub:length '15'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.3-2 rdf:type SciPub:Paragraph ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_4.3-2-1, :10.1016_j.memsci.2018.03.085_4.3-2-2, :10.1016_j.memsci.2018.03.085_4.3-2-3, :10.1016_j.memsci.2018.03.085_4.3-2-4, :10.1016_j.memsci.2018.03.085_4.3-2-5, :10.1016_j.memsci.2018.03.085_4.3-2-6 .

:10.1016_j.memsci.2018.03.085_4.3-2-1 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.3-2-2 ;
SciPub:sentenceText 'The most observed non-monotonic variation of P/P <sub>0</sub> is first a decrease of P/P <sub>0</sub> until reaching a threshold value of fill

:LabeledTerm647 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'purpose'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.4-1-4 ;
SciPub:offset '9'^^xsd:nonNegativeInteger ;
SciPub:length '7'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm648 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'diffusivity'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.4-1-4 ;
SciPub:offset '48'^^xsd:nonNegativeInteger ;
SciPub:length '11'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm649 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'solubility coefficients'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.4-1-4 ;
SciPub:offset '67'^^xsd:nonNegativeInteger ;
SciPub:length '23'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.4-2 rdf:type SciPub:Paragraph ;
SciPub:directlyContainsDocu

:LabeledTerm667 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'solubility'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.4-3-4 ;
SciPub:offset '107'^^xsd:nonNegativeInteger ;
SciPub:length '10'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.4-3-5 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '5'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.4-4 ;
SciPub:sentenceText 'As mentioned above in § 3.2, increase of S could also be related to the progressive uncovering of active sorption sites in the polymer matrix as a consequence of particle addition and to the creation of specific sites at the particle/polymer interface that favor the sorption of molecular penetrants as hypothesized by Sadeghi et al.  and Suzuki et al. .'^^xsd:string .

:LabeledTerm668 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText '§ 3.2'^^xsd:string ;
SciPub:documentPa

:LabeledTerm691 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'P/P <sub>0</sub'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-1-2 ;
SciPub:offset '71'^^xsd:nonNegativeInteger ;
SciPub:length '15'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm692 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanocomposites'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-1-2 ;
SciPub:offset '91'^^xsd:nonNegativeInteger ;
SciPub:length '14'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.5-1-3 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '3'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.5-1-4 ;
SciPub:sentenceText 'Fig. .'^^xsd:string .

:10.1016_j.memsci.2018.03.085_4.5-1-4 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '4'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPa

:LabeledTerm712 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'CO <'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-1-10 ;
SciPub:offset '18'^^xsd:nonNegativeInteger ;
SciPub:length '4'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm713 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText '/O'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-1-10 ;
SciPub:offset '34'^^xsd:nonNegativeInteger ;
SciPub:length '2'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.5-1-11 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '11'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.5-1-12 ;
SciPub:sentenceText '<sub>2</sub> and H <sub>2</sub>'^^xsd:string .

:LabeledTerm714 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'H <sub>2</sub'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :

:LabeledTerm736 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'permselectivity'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-3-3 ;
SciPub:offset '20'^^xsd:nonNegativeInteger ;
SciPub:length '15'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.5-3-4 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '4'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.5-3-5 ;
SciPub:sentenceText 'Evolution of the H <sub>2</sub> 0/O <sub>2</sub> selectivity ratio* as a function of particle volume fraction in nanocomposite materials containing either elongated or layered particles.'^^xsd:string .

:LabeledTerm737 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'H <sub>2</sub'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-3-4 ;
SciPub:offset '17'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;


:LabeledTerm763 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'permselectivity'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-3-16 ;
SciPub:offset '13'^^xsd:nonNegativeInteger ;
SciPub:length '15'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm764 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanoparticle'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-3-16 ;
SciPub:offset '54'^^xsd:nonNegativeInteger ;
SciPub:length '12'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm765 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'MMT'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-3-16 ;
SciPub:offset '68'^^xsd:nonNegativeInteger ;
SciPub:length '3'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm766 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'LDPE matrix'^^xsd:stri

:LabeledTerm778 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'permselectivity'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-4-5 ;
SciPub:offset '13'^^xsd:nonNegativeInteger ;
SciPub:length '15'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.5-4-6 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '6'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.5-4-7 ;
SciPub:sentenceText 'Among this group of studies, we noted a general trend toward an increase of permselectivity with nanoparticle volume fraction (4 studies  observing an increase against only one  observing a decrease).'^^xsd:string .

:LabeledTerm779 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'studies'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-4-6 ;
SciPub:offset '20'^^xsd:nonNegativeInteger ;
SciPub:length '7'^^xsd:nonNegativeInt

:10.1016_j.memsci.2018.03.085_4.5-4-22 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '22'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.5-4-23 ;
SciPub:sentenceText 'for O <sub>2</sub> /N'^^xsd:string .

:LabeledTerm812 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'O <sub>2</sub'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-4-22 ;
SciPub:offset '4'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm813 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText '/N'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-4-22 ;
SciPub:offset '19'^^xsd:nonNegativeInteger ;
SciPub:length '2'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.5-4-23 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '23'^^xsd:nonNegativeInteger ;
SciPub:nextDocu

:LabeledTerm855 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanoparticle content'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-4-34 ;
SciPub:offset '42'^^xsd:nonNegativeInteger ;
SciPub:length '20'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.5-5 rdf:type SciPub:Paragraph ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_4.5-5-1, :10.1016_j.memsci.2018.03.085_4.5-5-2, :10.1016_j.memsci.2018.03.085_4.5-5-3, :10.1016_j.memsci.2018.03.085_4.5-5-4, :10.1016_j.memsci.2018.03.085_4.5-5-5, :10.1016_j.memsci.2018.03.085_4.5-5-6, :10.1016_j.memsci.2018.03.085_4.5-5-7 .

:10.1016_j.memsci.2018.03.085_4.5-5-1 rdf:type SciPub:Sentence ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.5-5-2 ;
SciPub:sentenceText 'The comparative examination of permeability and selectivity variation of data collected in the dat

:LabeledTerm888 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'nanocomposites'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-6-6 ;
SciPub:offset '37'^^xsd:nonNegativeInteger ;
SciPub:length '14'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm889 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'permselectivity'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-6-6 ;
SciPub:offset '191'^^xsd:nonNegativeInteger ;
SciPub:length '15'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm890 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'tortuosity'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-6-6 ;
SciPub:offset '297'^^xsd:nonNegativeInteger ;
SciPub:length '10'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm891 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'gas molecules

:LabeledTerm905 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'O/O <sub>2</sub'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-6-16 ;
SciPub:offset '0'^^xsd:nonNegativeInteger ;
SciPub:length '15'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm906 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'O <sub>2</sub'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-6-16 ;
SciPub:offset '137'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm907 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'H <sub>2</sub'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.5-6-16 ;
SciPub:offset '199'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:10.1016_j.memsci.2018.03.085_4.5-6-17 rdf:type SciPub:Sentence ;
SciPub:positionInPa

:LabeledTerm932 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'tortuosity'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_5-2-3 ;
SciPub:offset '113'^^xsd:nonNegativeInteger ;
SciPub:length '10'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm933 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'sorption'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_5-2-3 ;
SciPub:offset '125'^^xsd:nonNegativeInteger ;
SciPub:length '8'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm934 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'polymer matrix'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_5-2-3 ;
SciPub:offset '135'^^xsd:nonNegativeInteger ;
SciPub:length '14'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .

:LabeledTerm935 rdf:type SciPub:AtomicLabeledTerm ;
SciPub:labeledTermText 'crystallinity'^^xsd:string

# Connect to GraphDB

In [56]:
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import json

In [231]:
# connect and query graph database

# specify the repository
sparql = SPARQLWrapper("http://LAPTOP-S7TVD5I4:7200/repositories/TestRepo")

# SPARQL query
sparql.setQuery("""
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX : <http://spatialai.org/SciPub/v2.0#>
    PREFIX SciPub: <http://spatialai.org/SciPub/v2.0#>

    SELECT ?sentenceID ?sentenceText ?labeledTermText ?offset ?length ?labelText (NOW() AS ?systemDateTime)
    WHERE {
        ?paragraphID rdf:type SciPub:Paragraph ;
                     SciPub:directlyContainsDocumentPart ?sentenceID .

        ?sentenceID SciPub:sentenceText ?sentenceText .

        OPTIONAL {
            ?labeledTermID SciPub:documentPartIsDirectlyContainedBy ?sentenceID ;
                           SciPub:labeledTermText ?labeledTermText ;
                           SciPub:offset ?offset ;
                           SciPub:length ?length ;
                           SciPub:hasLabel ?labelID .

            ?labelID rdf:type SciPub:Label ;
                     SciPub:labelText ?labelText .
        }
    }
""")

# convert results to JSON
sparql.setReturnFormat(JSON)
fetched_data = sparql.query().convert()


In [266]:
# convert query result from JSON to dataframe

column_header = ['sentence_id', 'sentence_text', 'labeled_term_text', 'offset', 'length', 'label_text', 'system_datetime']
fetched_data_df = pd.DataFrame(columns=column_header)
sentence_id = []
sentence_text = []
labeled_term_text = []
offset = []
length = []
label_text = []
system_datetime = []

for record in fetched_data['results']['bindings']:
    if len(record) == 7:
        for key, value in record.items():
            if key == 'sentenceID':
                sentence_id.append(value['value'])
            if key == 'sentenceText':
                sentence_text.append(value['value'])
            if key == 'labeledTermText':
                labeled_term_text.append(value['value'])
            if key == 'offset':
                offset.append(value['value'])
            if key == 'length':
                length.append(value['value'])
            if key == 'labelText':
                label_text.append(value['value'])
            if key == 'systemDateTime':
                format_system_datetime = value['value'][:-10]
                format_system_datetime = format_system_datetime.replace('T', ' ')
                system_datetime.append(format_system_datetime)
    
    elif len(record) == 3:
        for key, value in record.items():
            if key == 'sentenceID':
                sentence_id.append(value['value'])
            if key == 'sentenceText':
                sentence_text.append(value['value'])
            if key == 'systemDateTime':
                format_system_datetime = value['value'][:-10]
                format_system_datetime = format_system_datetime.replace('T', ' ')
                system_datetime.append(format_system_datetime)
        
        labeled_term_text.append('')
        offset.append('')
        length.append('')
        label_text.append('')
    
    else:
        print('Different record length (should be 7 or 3) found in SPARQL result (JSON format).')
        
if len(sentence_id) == len(sentence_text) == len(labeled_term_text) == len(offset) == len(length) == len(label_text) == len(system_datetime):
    fetched_data_df['sentence_id'] = sentence_id
    fetched_data_df['sentence_text'] = sentence_text
    fetched_data_df['labeled_term_text'] = labeled_term_text
    fetched_data_df['offset'] = offset
    fetched_data_df['length'] = length
    fetched_data_df['label_text'] = label_text
    fetched_data_df['system_datetime'] = system_datetime 
else:
    print('Different length found for dataframe columns.')

In [274]:
fetched_data_df

Unnamed: 0,sentence_id,sentence_text,labeled_term_text,offset,length,label_text,system_datetime
0,http://spatialai.org/SciPub/v2.0/data#10.1016_...,More than 1000 published experimental data of ...,gas,46,3,ENTITY,2023-12-06 02:02:48
1,http://spatialai.org/SciPub/v2.0/data#10.1016_...,More than 1000 published experimental data of ...,O 2,51,3,ENTITY,2023-12-06 02:02:48
2,http://spatialai.org/SciPub/v2.0/data#10.1016_...,More than 1000 published experimental data of ...,CO,59,2,ENTITY,2023-12-06 02:02:48
3,http://spatialai.org/SciPub/v2.0/data#10.1016_...,More than 1000 published experimental data of ...,H 2 O,77,5,ENTITY,2023-12-06 02:02:48
4,http://spatialai.org/SciPub/v2.0/data#10.1016_...,More than 1000 published experimental data of ...,nanocomposites,100,14,ENTITY,2023-12-06 02:02:48
...,...,...,...,...,...,...,...
1027,http://spatialai.org/SciPub/v2.0/data#10.1016_...,"It was concluded that tortuosity, the main mec...",nanocomposites,98,14,ENTITY,2023-12-06 02:02:48
1028,http://spatialai.org/SciPub/v2.0/data#10.1016_...,"It was concluded that tortuosity, the main mec...",nanoparticles,154,13,ENTITY,2023-12-06 02:02:48
1029,http://spatialai.org/SciPub/v2.0/data#10.1016_...,"It was concluded that tortuosity, the main mec...",nanocomposite barrier properties,171,32,ENTITY,2023-12-06 02:02:48
1030,http://spatialai.org/SciPub/v2.0/data#10.1016_...,This is particularly important knowing that mo...,nanocomposites,108,14,ENTITY,2023-12-06 02:02:48


In [268]:
# create JSON from dataframe

distinct_sentence_id = list(fetched_data_df['sentence_id'].unique())
fetched_data_json = {'classes': [], 'annotations': []}

for sentence_id in distinct_sentence_id:
    single_sentence_annotation = []
    fetched_data_sub_df = fetched_data_df.loc[fetched_data_df['sentence_id'] == sentence_id]
    first_index = list(fetched_data_sub_df.index)[0]
    single_sentence_annotation.append(sentence_id)
    single_sentence_annotation.append(fetched_data_sub_df['sentence_text'][first_index])
    single_sentence_annotation.append({'entities': []})

    for idx in fetched_data_sub_df.index:          
        if offset[idx] == '' or length[idx] == '' or label_text[idx] == '':
            ll = ['', '', ['', [['', '', '']]]]
            single_sentence_annotation[2]['entities'].append(ll)
        else:
            ll = [int(offset[idx]), int(offset[idx])+int(length[idx]), [label_text[idx], [['Candidate', system_datetime[idx], 'NER Model']]]]
            single_sentence_annotation[2]['entities'].append(ll)
        
    fetched_data_json['annotations'].append(single_sentence_annotation)


In [262]:
json.dumps(fetched_data_json)

'{"classes": [], "annotations": [["http://spatialai.org/SciPub/v2.0/data#10.1016_j.memsci.2018.03.085_A-1-1", "More than 1000 published experimental data of gas (O 2 and CO 2 ) and vapor (H 2 O) permeability in nanocomposites containing either spherical, elongated or platelet particles were collected, assorted and compared in order to decipher the role of particle shape on the reduction of the relative permeability of the nanocomposite.", {"entities": [[46, 49, ["ENTITY", [["Candidate", "2023-12-06 02:02:48", "NER Model"]]]], [51, 54, ["ENTITY", [["Candidate", "2023-12-06 02:02:48", "NER Model"]]]], [59, 61, ["ENTITY", [["Candidate", "2023-12-06 02:02:48", "NER Model"]]]], [77, 82, ["ENTITY", [["Candidate", "2023-12-06 02:02:48", "NER Model"]]]], [100, 114, ["ENTITY", [["Candidate", "2023-12-06 02:02:48", "NER Model"]]]], [157, 175, ["ENTITY", [["Candidate", "2023-12-06 02:02:48", "NER Model"]]]], [247, 261, ["ENTITY", [["Candidate", "2023-12-06 02:02:48", "NER Model"]]]], [315, 328, [

In [247]:
a = "To explain this deviation of the \'ideal behavior\', this paper discusses extensively the impact of the nanoparticle shape on the nanocomposite permeability along with structural aspects, related to both the particle nature and size, and the nanocomposite processing routes."

In [249]:
a.replace("\'", "'")

"To explain this deviation of the 'ideal behavior', this paper discusses extensively the impact of the nanoparticle shape on the nanocomposite permeability along with structural aspects, related to both the particle nature and size, and the nanocomposite processing routes."

In [250]:
a

"To explain this deviation of the 'ideal behavior', this paper discusses extensively the impact of the nanoparticle shape on the nanocomposite permeability along with structural aspects, related to both the particle nature and size, and the nanocomposite processing routes."