In [1]:
import xml.etree.ElementTree as ET
import re
import os
import spacy
import scispacy

### XML to Text

In [None]:
# THIS FUNCTION MUST BE ALIGNED WITH CORRESPONDING CODE CELLS

def xml_to_text_all():
    
    # CREATING A LIST OF FILE WITH AVAILABLE FILES
    
    directory_path = 'D:\\Drive\\SISE\\CelloGraph\\Dev\\Data\\XML'
    file_list = os.listdir(directory_path)
    
    
    for xml_file in file_list:
        
        # LOADING XML AND CREATING ROOT
        
        tree = ET.parse(directory_path + '\\' + xml_file)
        root = tree.getroot()


        # EXTRACTING METADATA (available data: title, publication_date and doi)

        metadata = []

        for elem in root:
            if elem.tag[29:] == 'teiHeader':
                for sub_elem1 in elem:
                    if sub_elem1.tag[29:] == 'fileDesc':
                        publication_info = {}
                        for sub_elem2 in sub_elem1:
                            if sub_elem2.tag[29:] == 'titleStmt':
                                for sub_elem3 in sub_elem2:
                                    if sub_elem3.tag[29:] == 'title':
                                        publication_info['Title'] = sub_elem3.text
                            if sub_elem2.tag[29:] == 'publicationStmt':
                                for sub_elem3 in sub_elem2:
                                    if sub_elem3.tag[29:] == 'date':
                                        publication_info['Publication Date'] = sub_elem3.text
                            if sub_elem2.tag[29:] == 'sourceDesc':
                                for sub_elem3 in sub_elem2:
                                    if sub_elem3.tag[29:] == 'biblStruct':
                                        for sub_elem4 in sub_elem3:
                                            if sub_elem4.tag[29:] == 'idno':
                                                publication_info['DOI'] = sub_elem4.text
                        metadata.append([0, 'Metadata', publication_info])


        # EXTRACTING ABSTRACT AND NUMBERING PARAGRAPHS

        abstract = []

        for elem in root:
            if elem.tag[29:] == 'teiHeader':
                for sub_elem1 in elem:
                    if sub_elem1.tag[29:] == 'profileDesc':
                        for sub_elem2 in sub_elem1:
                            if sub_elem2.tag[29:] == 'abstract':
                                for sub_elem3 in sub_elem2:
                                    if sub_elem3.tag[29:] == 'div':
                                        list_of_paragraphs = {}
                                        paragraph_number = 0
                                        for sub_elem4 in sub_elem3:
                                            if sub_elem4.tag[29:] == 'p':
                                                paragraph_number += 1
                                                list_of_paragraphs[paragraph_number] = ET.tostring(sub_elem4, encoding='unicode')
                                        abstract.append([0, 'Abstract', list_of_paragraphs])


        # EXTRACTING OTHER SECTIONS

        list_of_sections = []

        for elem in root:
            if (elem.tag[29:] == 'text'):
                for sub_elem1 in elem:
                    if (sub_elem1.tag[29:] == 'body'):
                        for sub_elem2 in sub_elem1:
                            if sub_elem2.tag[29:] == 'div':
                                section_number = ''
                                section_name = ''
                                list_of_paragraphs = []
                                for sub_elem3 in sub_elem2:
                                    if sub_elem3.tag[29:] == 'head':
                                        if bool(sub_elem3.attrib):
                                            section_number = str(sub_elem3.attrib)[7:-3]
                                        else:
                                            section_number = 'NO_SECTION_NUMBER'
                                        section_name = sub_elem3.text
                                    if sub_elem3.tag[29:] == 'p':
                                        list_of_paragraphs.append(ET.tostring(sub_elem3, encoding='unicode'))
                                # commented logic skips NO_SECTION_NUMBER with no paragraphs, i.e., Table 1, Table 2, .....
                                # if section_number == 'NO_SECTION_NUMBER' and not bool(list_of_paragraphs):
                                if section_number == 'NO_SECTION_NUMBER':
                                    pass
                                else:
                                    list_of_sections.append([section_number, section_name, list_of_paragraphs])


        # NUMBERING PARAGRAPHS OF SECTIONS 
        # paragraphs are sequentially added in a dictionary
        # dictionary is added with each respective record as a 4th element

        for section in list_of_sections:
            list_of_paragraphs = {}
            for paragraph_number, paragraph_text in enumerate(section[2], start=1):
                list_of_paragraphs[paragraph_number] = paragraph_text
            section.append(list_of_paragraphs)


        # DELETING THE 3RD ELEMENT FROM EACH RECORD

        for section in list_of_sections:
            section.remove(section[2])


        # MERGING ABSTRACT WITH OTHER SECTIONS

        document = abstract + list_of_sections


        # PREPROCESSING TEXT

        starting_p_tag_pattern = r'<ns0:p[^>]+>'
        ending_p_tag_pattern = '</ns0:p>'
        starting_ref_tag_pattern = r'<ns0:ref[^>]+>'
        ending_ref_tag_pattern = '</ns0:ref>'
        ref_pattern = r'<ref>.*?</ref>'          # temporary for removing ref tag

        for record in document:
            for paragraph_number, paragraph_text in record[2].items():
                text = paragraph_text
                text = re.sub(starting_p_tag_pattern, '', text)
                text = re.sub(ending_p_tag_pattern, '', text)
                text = re.sub(starting_ref_tag_pattern, '<ref>', text)
                text = re.sub(ending_ref_tag_pattern, '</ref>', text)
                text = re.sub(ref_pattern, '', text)
                record[2][paragraph_number] = text


        # EXTRACTING TEXT AND WRITING IN FILE

        output_file_name = os.path.basename(xml_file)[:-4]

        for record in document:
            for _, paragraph in record[2].items():
                with open(f'D:\\Drive\\SISE\\CelloGraph\\Dev\\Data\\Text\\{output_file_name}.txt', 'a', encoding='utf-8') as output_file:
                    output_file.write(paragraph + '\n')
    

In [None]:
# xml_to_text_all()

### XML Structure
- teiHeader
    - fileDesc
        - titleStmt
            - title
        - publicationStmt
            - date
        - sourceDesc
            - biblStruct
                - idno
    - encodingDesc
    - profileDesc

### List Structure
- [
    - [
        - section_number,
        - section_title,
        - {
            - paragraph_number:
                - {
                    - sentence_number: sectence_text,
                - },
        - }
    - ],
- ]

### XML to List

In [2]:
# LOADING XML AND CREATING ROOT

xml_file = 'D:\\Drive\\SISE\\CelloGraph\\Dev\\Data\\XML\\Wolf et al. - How the shape of fillers affects properties of nanocomposites.tei.xmlScitex.xml'
tree = ET.parse(xml_file)
root = tree.getroot()

# EXTRACTING METADATA (available data: title, publication_date and doi)

metadata = []

for elem in root:
    if elem.tag[29:] == 'teiHeader':
        for sub_elem1 in elem:
            if sub_elem1.tag[29:] == 'fileDesc':
                publication_info = {}
                for sub_elem2 in sub_elem1:
                    if sub_elem2.tag[29:] == 'titleStmt':
                        for sub_elem3 in sub_elem2:
                            if sub_elem3.tag[29:] == 'title':
                                publication_info['Title'] = sub_elem3.text
                    if sub_elem2.tag[29:] == 'publicationStmt':
                        for sub_elem3 in sub_elem2:
                            if sub_elem3.tag[29:] == 'date':
                                publication_info['Publication Date'] = sub_elem3.text
                    if sub_elem2.tag[29:] == 'sourceDesc':
                        for sub_elem3 in sub_elem2:
                            if sub_elem3.tag[29:] == 'biblStruct':
                                for sub_elem4 in sub_elem3:
                                    if sub_elem4.tag[29:] == 'idno':
                                        publication_info['DOI'] = sub_elem4.text
                metadata.append([0, 'Metadata', publication_info])
                                        
print(metadata)

[[0, 'Metadata', {'Title': 'How the shape of fillers affects the barrier properties of polymer/non-porous particles nanocomposites: A review', 'Publication Date': '03 April 2018', 'DOI': '10.1016/j.memsci.2018.03.085'}]]


In [3]:
# EXTRACTING ABSTRACT AND NUMBERING PARAGRAPHS

abstract = []

for elem in root:
    if elem.tag[29:] == 'teiHeader':
        for sub_elem1 in elem:
            if sub_elem1.tag[29:] == 'profileDesc':
                for sub_elem2 in sub_elem1:
                    if sub_elem2.tag[29:] == 'abstract':
                        for sub_elem3 in sub_elem2:
                            if sub_elem3.tag[29:] == 'div':
                                list_of_paragraphs = {}
                                paragraph_number = 0
                                for sub_elem4 in sub_elem3:
                                    if sub_elem4.tag[29:] == 'p':
                                        paragraph_number += 1
                                        list_of_paragraphs[paragraph_number] = ET.tostring(sub_elem4, encoding='unicode')
                                abstract.append([0, 'Abstract', list_of_paragraphs])
                                
print(abstract)

[[0, 'Abstract', {1: '<ns0:p xmlns:ns0="http://www.tei-c.org/ns/1.0">More than 1000 published experimental data of gas (O 2 and CO 2 ) and vapor (H 2 O) permeability in nanocomposites containing either spherical, elongated or platelet particles were collected, assorted and compared in order to decipher the role of particle shape on the reduction of the relative permeability of the nanocomposite. It is well known that inclusion of homogeneously dispersed and oriented impermeable fillers with high aspect ratio, such as platelets or elongated particles, should significantly increase the diffusion path of gas and vapors and yield to improve barrier properties. Results revealed that this expected impact was not systematically achieved, even for impermeable lamellar fillers that usually displayed the highest aspect ratio. More specifically, an unexpected increase of the permeability in the nanocomposite was often observed. To explain this deviation of the \'ideal behavior\', this paper discu

In [4]:
# EXTRACTING OTHER SECTIONS

list_of_sections = []

for elem in root:
    if (elem.tag[29:] == 'text'):
        for sub_elem1 in elem:
            if (sub_elem1.tag[29:] == 'body'):
                for sub_elem2 in sub_elem1:
                    if sub_elem2.tag[29:] == 'div':
                        section_number = ''
                        section_name = ''
                        list_of_paragraphs = []
                        for sub_elem3 in sub_elem2:
                            if sub_elem3.tag[29:] == 'head':
                                if bool(sub_elem3.attrib):
                                    section_number = str(sub_elem3.attrib)[7:-3]
                                else:
                                    section_number = 'NO_SECTION_NUMBER'
                                section_name = sub_elem3.text
                            if sub_elem3.tag[29:] == 'p':
                                list_of_paragraphs.append(ET.tostring(sub_elem3, encoding='unicode'))
                        # commented logic skips NO_SECTION_NUMBER with no paragraphs, i.e., Table 1, Table 2, .....
                        # if section_number == 'NO_SECTION_NUMBER' and not bool(list_of_paragraphs):
                        if section_number == 'NO_SECTION_NUMBER':
                            pass
                        else:
                            list_of_sections.append([section_number, section_name, list_of_paragraphs])

print(list_of_sections)

[['1', 'Introduction', ['<ns0:p xmlns:ns0="http://www.tei-c.org/ns/1.0">In the objective of developing efficient and optimal packaging, one of the main challenges is to design and provide food packaging materials able to protect the food from the external environment and to maintain food quality and safety throughout its shelf life <ns0:ref type="bibr" target="#b0">[1,</ns0:ref><ns0:ref type="bibr" target="#b1">2]</ns0:ref>. The mass transfers are thus at the heart of the feature of the food packaging, especially transfers of water vapor, oxygen and/or carbon dioxide, which condition the rates of numerous reactions of food degradation (oxidation, microbial development, physiological reactions, etc.). The development of bulk nanocomposite structures by introducing nanoparticles, i.e. fillers having at least one dimension lower than 100 nm, in polymeric matrices appeared as one of the most promising directions in the development of packaging materials with advanced mass transfer properti

In [None]:
# # DON'T USE THIS PIECE OF CODE  (commented - 12 Nov 23)
# # HAVE TO ADJUST THIS CODE FOR TABLES/IMAGES
# # merging "paragraphs with no section number" to its previous section

# list_of_sections_length = len(list_of_sections)

# for i in range(list_of_sections_length-1,0,-1):
#     if list_of_sections[i][0] == 'NO_SECTION_NUMBER':
#         list_of_sections[i-1][2].extend(list_of_sections[i][2])
#         list_of_sections.remove(list_of_sections[i])
        
# list_of_sections

In [5]:
# NUMBERING PARAGRAPHS OF SECTIONS 
# paragraphs are sequentially added in a dictionary
# dictionary is added with each respective record as a 4th element

for section in list_of_sections:
    list_of_paragraphs = {}
    for paragraph_number, paragraph_text in enumerate(section[2], start=1):
        list_of_paragraphs[paragraph_number] = paragraph_text
    section.append(list_of_paragraphs)
    
print(list_of_sections)

[['1', 'Introduction', ['<ns0:p xmlns:ns0="http://www.tei-c.org/ns/1.0">In the objective of developing efficient and optimal packaging, one of the main challenges is to design and provide food packaging materials able to protect the food from the external environment and to maintain food quality and safety throughout its shelf life <ns0:ref type="bibr" target="#b0">[1,</ns0:ref><ns0:ref type="bibr" target="#b1">2]</ns0:ref>. The mass transfers are thus at the heart of the feature of the food packaging, especially transfers of water vapor, oxygen and/or carbon dioxide, which condition the rates of numerous reactions of food degradation (oxidation, microbial development, physiological reactions, etc.). The development of bulk nanocomposite structures by introducing nanoparticles, i.e. fillers having at least one dimension lower than 100 nm, in polymeric matrices appeared as one of the most promising directions in the development of packaging materials with advanced mass transfer properti

In [6]:
# DELETING THE 3RD ELEMENT FROM EACH RECORD

for section in list_of_sections:
    section.remove(section[2])
    
print(list_of_sections)

[['1', 'Introduction', {1: '<ns0:p xmlns:ns0="http://www.tei-c.org/ns/1.0">In the objective of developing efficient and optimal packaging, one of the main challenges is to design and provide food packaging materials able to protect the food from the external environment and to maintain food quality and safety throughout its shelf life <ns0:ref type="bibr" target="#b0">[1,</ns0:ref><ns0:ref type="bibr" target="#b1">2]</ns0:ref>. The mass transfers are thus at the heart of the feature of the food packaging, especially transfers of water vapor, oxygen and/or carbon dioxide, which condition the rates of numerous reactions of food degradation (oxidation, microbial development, physiological reactions, etc.). The development of bulk nanocomposite structures by introducing nanoparticles, i.e. fillers having at least one dimension lower than 100 nm, in polymeric matrices appeared as one of the most promising directions in the development of packaging materials with advanced mass transfer prope

In [7]:
# MERGING ABSTRACT WITH OTHER SECTIONS

document = abstract + list_of_sections
print(document)

[[0, 'Abstract', {1: '<ns0:p xmlns:ns0="http://www.tei-c.org/ns/1.0">More than 1000 published experimental data of gas (O 2 and CO 2 ) and vapor (H 2 O) permeability in nanocomposites containing either spherical, elongated or platelet particles were collected, assorted and compared in order to decipher the role of particle shape on the reduction of the relative permeability of the nanocomposite. It is well known that inclusion of homogeneously dispersed and oriented impermeable fillers with high aspect ratio, such as platelets or elongated particles, should significantly increase the diffusion path of gas and vapors and yield to improve barrier properties. Results revealed that this expected impact was not systematically achieved, even for impermeable lamellar fillers that usually displayed the highest aspect ratio. More specifically, an unexpected increase of the permeability in the nanocomposite was often observed. To explain this deviation of the \'ideal behavior\', this paper discu

In [8]:
# PREPROCESSING TEXT

starting_p_tag_pattern = r'<ns0:p[^>]+>'
ending_p_tag_pattern = '</ns0:p>'
starting_ref_tag_pattern = r'<ns0:ref[^>]+>'
ending_ref_tag_pattern = '</ns0:ref>'
ref_pattern = r'<ref>.*?</ref>'          # temporary for removing ref tag

for record in document:
    for paragraph_number, paragraph_text in record[2].items():
        text = paragraph_text
        text = re.sub(starting_p_tag_pattern, '', text)
        text = re.sub(ending_p_tag_pattern, '', text)
        text = re.sub(starting_ref_tag_pattern, '<ref>', text)
        text = re.sub(ending_ref_tag_pattern, '</ref>', text)
        text = re.sub(ref_pattern, '', text)
        record[2][paragraph_number] = text
        
print(document)

[[0, 'Abstract', {1: "More than 1000 published experimental data of gas (O 2 and CO 2 ) and vapor (H 2 O) permeability in nanocomposites containing either spherical, elongated or platelet particles were collected, assorted and compared in order to decipher the role of particle shape on the reduction of the relative permeability of the nanocomposite. It is well known that inclusion of homogeneously dispersed and oriented impermeable fillers with high aspect ratio, such as platelets or elongated particles, should significantly increase the diffusion path of gas and vapors and yield to improve barrier properties. Results revealed that this expected impact was not systematically achieved, even for impermeable lamellar fillers that usually displayed the highest aspect ratio. More specifically, an unexpected increase of the permeability in the nanocomposite was often observed. To explain this deviation of the 'ideal behavior', this paper discusses extensively the impact of the nanoparticle s

In [None]:
# # EXTRACTING TEXT AND WRITING IN FILE

# file_name = os.path.basename(xml_file)[:-4]

# for record in document:
#     for _, paragraph in record[2].items():
#         with open(f'output\\{file_name}.txt', 'a', encoding='utf-8') as output_file:
#             output_file.write(paragraph + '\n')

In [None]:
# # NOT IN USE NOW! WILL BE DELETED!!
# # import spacy and load en_core_web_sm model

# import spacy
# nlp = spacy.load('en_core_web_sm')

In [None]:
# # NOT IN USE NOW! WILL BE DELETED!!
# # organizing sentences sequentially in a dictionary
# # adding dictionary with each respective record as a 4th element

# for record in document:
#     text_content = record[2]
#     list_of_paragraphs = {}
    
#     for paragraph_number, paragraph_text in text_content.items():
#         list_of_sentences = {}
#         doc = nlp(paragraph_text)
        
#         for sentence_number, sentence_text in enumerate(doc.sents, start=1):
#             list_of_sentences[sentence_number] = sentence_text.text            # .text added to convert into text
        
#         list_of_paragraphs[paragraph_number] = list_of_sentences
        
#     record.append(list_of_paragraphs)

In [None]:
# # NOT IN USE NOW! WILL BE DELETED!!
# # deleting 3rd element from each record of document

# for record in document:
#     record.remove(record[2])  

In [9]:
# MERGING METADATA WITH OTHER DOCUMENT PARTS

document = metadata + document
print(document)

[[0, 'Metadata', {'Title': 'How the shape of fillers affects the barrier properties of polymer/non-porous particles nanocomposites: A review', 'Publication Date': '03 April 2018', 'DOI': '10.1016/j.memsci.2018.03.085'}], [0, 'Abstract', {1: "More than 1000 published experimental data of gas (O 2 and CO 2 ) and vapor (H 2 O) permeability in nanocomposites containing either spherical, elongated or platelet particles were collected, assorted and compared in order to decipher the role of particle shape on the reduction of the relative permeability of the nanocomposite. It is well known that inclusion of homogeneously dispersed and oriented impermeable fillers with high aspect ratio, such as platelets or elongated particles, should significantly increase the diffusion path of gas and vapors and yield to improve barrier properties. Results revealed that this expected impact was not systematically achieved, even for impermeable lamellar fillers that usually displayed the highest aspect ratio.

### List to RDF

In [10]:
# LOADING THE NLP MODEL

model_path = 'C:/Users/umayer/_dev/experiment'
model_name = 'SINGLE_LABEL_CELLULOSIC_MODEL_ACC/model-best'
nlp = spacy.load(f'{model_path}/{model_name}')

In [50]:
# WRITING RDF TRIPLES FOR DOCUMENT

doi = document[0][2]['DOI']
doi = doi.replace('/', '_')
dev_schema = {0: 'ENTITY', 
              1: 'CHEMICAL_ENTITY', 
              2: 'MATERIAL_ENTITY', 
              3: 'STRUCTURE_ENTITY', 
              4: 'RELATIONSHIP', 
              5: 'PROPERTY', 
              6: 'PROCESS_OR_TECHNIQUE', 
              7: 'APPLICATION', 
              8: 'MEASUREMENT', 
              9: 'ABBREVIATION'}

print("@prefix SciPub: <http://spatialai.org/SciPub/v2.0#> .")
print("@prefix : <http://spatialai.org/SciPub/v2.0/data#> .")
print("@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .")
print("@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .")
print("@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .")
print("@prefix owl: <http://www.w3.org/2002/07/owl#> .")
print()

for record in document:
    
    # RDF>> PUBLICATION rdf:type SciPub:ScientificPublication
    # RDF>> PUBLICATION SciPub:title 'title'^^xsd:string
    # RDF>> PUBLICATION SciPub:doi 'doi'^^xsd:string
    # RDF>> PUBLICATION SciPub:publicationDate 'publication_date'^^xsd:date
    if record[1] == 'Metadata':
        print(f":Publication rdf:type SciPub:ScientificPublication ;")
        print(f"SciPub:title '{record[2]['Title']}'^^xsd:string ;")
        print(f"SciPub:doi '{record[2]['DOI']}'^^xsd:string ;")
        print(f"SciPub:publicationDate '{record[2]['Publication Date']}'^^xsd:date .")
        print()
    
    elif record[1] == 'Abstract':
        index = document.index(record) + 1
        next_section = document[index][0]
        paragraph_ids = []
        
        # RDF>> ABSTRACT rdf:type SciPub:Abstract
        # RDF>> ABSTRACT sp:directlyContainsDocumentPart SciPub:Paragraph
        for paragraph_number, _ in record[2].items():
            paragraph_id = ':' + doi + '_A' + '-' + str(paragraph_number)
            paragraph_ids.append(paragraph_id)
            
        paragraph_ids_joined = ', '.join(paragraph_ids)
        print(f":{doi}_A rdf:type SciPub:Abstract ;")
        print(f"SciPub:directlyContainsDocumentPart {paragraph_ids_joined} .")
        print()
        
        for paragraph_number, paragraph_text in record[2].items():
            labeled_term_info_list = []
            labeled_term_number = 1
            
            # replacing ' with \' in text
            if "'" in paragraph_text:
                paragraph_text = paragraph_text.replace("'", r"\'")
            
            doc = nlp(paragraph_text)
            
            for ent in doc.ents:
                labeled_term_id = ':' + doi + '_A' + '-' + str(paragraph_number) + '-' + str(labeled_term_number)
                labeled_term = ent.text
                offset = ent.start_char
                length = ent.end_char - ent.start_char
                labeled_term_info = [labeled_term_id, labeled_term, offset, length]
                labeled_term_info_list.append(labeled_term_info)
                labeled_term_number += 1
            
            labeled_term_ids = [i[0] for i in labeled_term_info_list]
            labeled_term_ids_joined = ', '.join(labeled_term_ids)
            
            # RDF>> PARAGRAPH rdf:type SciPub:Paragraph
            # RDF>> PARAGRAPH SciPub:positionInParentDocumentPart 'paragraph_number'^^xsd:nonNegativeInteger
            # RDF>> PARAGRAPH SciPub:nextDocumentPart SciPub:Paragraph/SciPub:Section
            # RDF>> PARAGRAPH SciPub:paragraphText 'paragraph_text'^^xsd:string
            # RDF>> PARAGRAPH SciPub:directlyContainsDocumentPart SciPub:LabeledTerm
            print(f":{doi}_A-{paragraph_number} rdf:type SciPub:Paragraph ;")
            print(f"SciPub:positionInParentDocumentPart '{paragraph_number}'^^xsd:nonNegativeInteger ;")
            
            if paragraph_number == len(paragraph_ids):
                print(f"SciPub:nextDocumentPart :{doi}_{next_section} ;")
            else:
                print(f"SciPub:nextDocumentPart :{doi}_A-{paragraph_number+1} ;")
                
            print(f"SciPub:paragraphText '{paragraph_text}'^^xsd:string ;")
            print(f"SciPub:directlyContainsDocumentPart {labeled_term_ids_joined} .")
            print()
            
            # RDF>> LABELED_TERM rdf:type SciPub:LabeledTerm
            # RDF>> LABELED_TERM sp:labeledTermText 'term_text'^^xsd:string
            # RDF>> LABELED_TERM sp:labeledTermIsContainedBy SciPub:Paragraph
            # RDF>> LABELED_TERM sp:offset 'starting_position'^^xsd:nonNegativeInteger
            # RDF>> LABELED_TERM sp:length 'length'^^xsd:nonNegativeInteger
            # RDF>> LABELED_TERM sp:hasLabel SciPub:Label
            for info in labeled_term_info_list:
                print(f"{info[0]} rdf:type SciPub:LabeledTerm ;")    # DEAL WITH ATOMIC / COMPOUND
                print(f"SciPub:labeledTermText '{info[1]}'^^xsd:string ;")
                print(f"SciPub:documentPartIsDirectlyContainedBy :{doi}_{section_number}-{paragraph_number} ;")
                print(f"SciPub:offset '{info[2]}'^^xsd:nonNegativeInteger ;")
                print(f"SciPub:length '{info[3]}'^^xsd:nonNegativeInteger ;")
                
                for label_number, label_text in labeling_schema.items():
                    if label_text == ent.label_:
                        print(f"SciPub:hasLabel :Label{label_number} .")
                        break
                    else:
                        print(f"SciPub:hasLabel :Label_0 .")
                        break
                print()
            
        print('#========================= SECTION DIVIDER =========================#')
        print()
    
    else:
        section_number = record[0]
        section_name = record[1]
        index = document.index(record) + 1
        paragraph_ids = []
        
        if index == len(document):
            next_section = 'EndOfDocument'
        else:
            next_section = document[index][0]
        
        # RDF>> SECTION rdf:type SciPub:Section
        # RDF>> SECTION SciPub:headerText 'section_header'^^xsd:string
        # RDF>> SECTION sp:directlyContainsDocumentPart SciPub:Paragraph
        for paragraph_number, _ in record[2].items():
            paragraph_id = ':' + doi + '_' + str(section_number) + '-' + str(paragraph_number)
            paragraph_ids.append(paragraph_id)
        
        paragraph_ids_joined = ', '.join(paragraph_ids)
        print(f":{doi}_{section_number} rdf:type SciPub:Section ;")
        print(f"SciPub:headerText '{record[1]}'^^xsd:string ;")
        print(f"SciPub:directlyContainsDocumentPart {paragraph_ids_joined} .")
        print()  
        
        for paragraph_number, paragraph_text in record[2].items():
            labeled_term_info_list = []
            labeled_term_sequence = 1
            
            # replacing ' with \' in text
            if "'" in paragraph_text:
                paragraph_text = paragraph_text.replace("'", r"\'")
            
            doc = nlp(paragraph_text)
            
            for ent in doc.ents:
                labeled_term_id = ':' + doi + '_' + str(section_number) + '-' + str(paragraph_number) + '-' + str(labeled_term_sequence)
                labeled_term = ent.text
                offset = ent.start_char
                length = ent.end_char - ent.start_char
                labeled_term_info = [labeled_term_id, labeled_term, offset, length]
                labeled_term_info_list.append(labeled_term_info)
                labeled_term_sequence += 1
                
            labeled_term_ids = [i[0] for i in labeled_term_info_list]
            labeled_term_ids_joined = ', '.join(labeled_term_ids)
            
            # RDF>> PARAGRAPH rdf:type SciPub:Paragraph
            # RDF>> PARAGRAPH SciPub:positionInParentDocumentPart 'paragraph_number'^^xsd:nonNegativeInteger
            # RDF>> PARAGRAPH SciPub:nextDocumentPart SciPub:Paragraph/SciPub:Section
            # RDF>> PARAGRAPH SciPub:paragraphText 'paragraph_text'^^xsd:string
            # RDF>> PARAGRAPH SciPub:directlyContainsDocumentPart SciPub:LabeledTerm
            print(f":{doi}_{section_number}-{paragraph_number} rdf:type SciPub:Paragraph ;")
            print(f"SciPub:positionInParentDocumentPart '{paragraph_number}'^^xsd:nonNegativeInteger ;")
            
            if paragraph_number == len(paragraph_ids):
                print(f"SciPub:nextDocumentPart :{doi}_{next_section} ;")
            else:
                print(f"SciPub:nextDocumentPart :{doi}_{section_number}-{paragraph_number+1} ;")
                
            print(f"SciPub:paragraphText '{paragraph_text}'^^xsd:string ;")
            print(f"SciPub:directlyContainsDocumentPart {labeled_term_ids_joined} .")
            print()
            
            # RDF>> LABELED_TERM rdf:type SciPub:LabeledTerm
            # RDF>> LABELED_TERM sp:labeledTermText 'term_text'^^xsd:string
            # RDF>> LABELED_TERM sp:labeledTermIsContainedBy SciPub:Paragraph
            # RDF>> LABELED_TERM sp:offset 'starting_position'^^xsd:nonNegativeInteger
            # RDF>> LABELED_TERM sp:length 'length'^^xsd:nonNegativeInteger
            # RDF>> LABELED_TERM sp:hasLabel SciPub:Label
            for info in labeled_term_info_list:
                print(f"{info[0]} rdf:type SciPub:LabeledTerm ;")    # DEAL WITH ATOMIC / COMPOUND
                print(f"SciPub:labeledTermText '{info[1]}'^^xsd:string ;")
                print(f"SciPub:documentPartIsDirectlyContainedBy :{doi}_{section_number}-{paragraph_number} ;")
                print(f"SciPub:offset '{info[2]}'^^xsd:nonNegativeInteger ;")
                print(f"SciPub:length '{info[3]}'^^xsd:nonNegativeInteger ;")
                
                for label_number, label_text in labeling_schema.items():
                    if label_text == ent.label_:
                        print(f"SciPub:hasLabel :Label{label_number} .")
                        break
                    else:
                        print(f"SciPub:hasLabel :Label_0 .")
                        break
                print()
            
        print('#========================= SECTION DIVIDER =========================#')
        print()
        
for label_number, label_text in dev_schema.items():
    print(f":Label{label_number} rdf:type SciPub:Label ;")
    print(f"SciPub:fromLabelingSchema :dev_schema ;")
    print(f"SciPub:labelText '{label_text}'^^xsd:string .")
    print()
            

print(f":dev_schema rdf:type SciPub:LabelingSchema .")

@prefix SciPub: <http://spatialai.org/SciPub/v2.0#> .
@prefix : <http://spatialai.org/SciPub/v2.0/data#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .

:Publication rdf:type SciPub:ScientificPublication ;
SciPub:title 'How the shape of fillers affects the barrier properties of polymer/non-porous particles nanocomposites: A review'^^xsd:string ;
SciPub:doi '10.1016/j.memsci.2018.03.085'^^xsd:string ;
SciPub:publicationDate '03 April 2018'^^xsd:date .

:10.1016_j.memsci.2018.03.085_A rdf:type SciPub:Abstract ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_A-1 .

:10.1016_j.memsci.2018.03.085_A-1 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_1 ;
SciPub:paragraphText 'More than 1000 published experime

:10.1016_j.memsci.2018.03.085_1-1 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_1-2 ;
SciPub:paragraphText 'In the objective of developing efficient and optimal packaging, one of the main challenges is to design and provide food packaging materials able to protect the food from the external environment and to maintain food quality and safety throughout its shelf life . The mass transfers are thus at the heart of the feature of the food packaging, especially transfers of water vapor, oxygen and/or carbon dioxide, which condition the rates of numerous reactions of food degradation (oxidation, microbial development, physiological reactions, etc.). The development of bulk nanocomposite structures by introducing nanoparticles, i.e. fillers having at least one dimension lower than 100 nm, in polymeric matrices appeared as one of the most promising directions in the development of packaging m

:10.1016_j.memsci.2018.03.085_1-3 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '3'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_1-4 ;
SciPub:paragraphText 'In order to better correlate the mass transfer properties in nanocomposite materials with their multi-scale structure, the objective of the present review is to decipher and comprehensively discuss the role of the nanoparticle shape (either isodimensional, elongated or layered) on the modulation of the mass transfer properties in nanocomposites, as a function of filler volume fraction and in the light of the nanocomposite structure achieved. For that purpose, more than 1000 values (i.e. about 170 articles) of the 1995-2015 period containing measured values of O <sub>2</sub> , CO <sub>2</sub> and H <sub>2</sub> O permeability in polymer-based nanocomposites were collected from the available literature and capitalized in a dedicated on-line database . All data could be uploaded fro

:10.1016_j.memsci.2018.03.085_2-2 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '2'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.1 ;
SciPub:paragraphText 'Depending on the processing conditions, the particle shape and the affinity between constituents, i.e. the polymer matrix and the nanoparticle, different morphologies can be obtained. For isodimensional and elongated nanoparticles, three arrangements can be observed, i.e. (i) well-dispersed, (ii) agglomerated, or (iii) percolating systems. For platelets, specific names are given to the various possible morphologies: (i) micro-composite, (ii) intercalated nanocomposite or (iii) fully exfoliated nanocomposites. The micro-composite structure corresponds to '^^xsd:string ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_2-2-1, :10.1016_j.memsci.2018.03.085_2-2-2, :10.1016_j.memsci.2018.03.085_2-2-3, :10.1016_j.memsci.2018.03.085_2-2-4, :10.1016_j.memsci.2018.03.085_

:10.1016_j.memsci.2018.03.085_3.1-2 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '2'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.1-3 ;
SciPub:paragraphText 'Wiener  in 1912 extended the results to the case of dilute dispersion of long cylinders in the matrix, all aligned in the direction normal to the overall flux. While exact solutions are also available for the case of spheroids (dilute mixture, perfect alignment), empirical relations as well as numerical result have been discussed for the case of different shapes of the inclusions. Among the best, known results of this kind are those obtained by Nielsen  in 1967 following empirical ideas about the increase in contour length of path of diffusing molecules in the composite medium due to the presence of impermeable inclusions in the form of bricks. The specific expression for effective conductivity as function of particle concentration in the composite as derived from the above m

:10.1016_j.memsci.2018.03.085_3.2-2 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '2'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.2-3 ;
SciPub:paragraphText 'In the following, the evolution of the relative permeability, i.e. the ratio of the composite permeability to the permeability of the neat matrix (P/P <sub>0</sub> ), was represented as a function of the nanoparticle volume fraction (φ <sub>vol</sub> ), which is considered as input parameter in all mathematical models. This implied to systematically calculate φ <sub>vol</sub> values from weight fractions (φ <sub>wt</sub> ) values given in the original papers, based on the knowledge of the true density of each constituent (Eq. ( )):'^^xsd:string ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_3.2-2-1, :10.1016_j.memsci.2018.03.085_3.2-2-2, :10.1016_j.memsci.2018.03.085_3.2-2-3, :10.1016_j.memsci.2018.03.085_3.2-2-4, :10.1016_j.memsci.2018.03.085_3.2-2-5, :1

:10.1016_j.memsci.2018.03.085_3.3-2 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '2'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.3-3 ;
SciPub:paragraphText '• <super>The</super> <super>investigated</super> <super>φ</super> <super>vol</super> <super>range</super> <super>is</super> <super>higher</super> <super>for</super> <super>isodimensional</super> <super>and</super> <super>elon-</super> gated nanoparticles than for platelets with maximal φ <sub>vol</sub> values of 40 vol% and 45 vol% respectively (Fig. ), against 15 vol% for nanoplatelets (Fig. ). Only 3 publications (i.e. 11 permeability values) were in the range of 15-40 vol% for nanoplatelets-based nanocomposites (not shown in Fig. ). This feature could be ascribed to the fact that usually, only a small weight fraction of nanoplatelets (generally 1-5 wt%) is necessary to obtain a significant effect on the material functional properties.'^^xsd:string ;
SciPub:directlyContains

:10.1016_j.memsci.2018.03.085_3.3-5 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '5'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.3-6 ;
SciPub:paragraphText '• <super>For</super> <super>all</super> <super>the</super> <super>kinds</super> <super>of</super> <super>nanoparticles,</super> <super>some</super> <super>non-monotonic</super> <super>variations</super> <super>of</super> P/P <sub>0</sub> are noted with simultaneous increase and decrease of the relative permeability for the nanocomposites. It represents approximately 12% of the studies for isodimensional nanoparticles, 21% of the study for elongated nanoparticles and only 7% of the studies for nanoplatelets.'^^xsd:string ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_3.3-5-1, :10.1016_j.memsci.2018.03.085_3.3-5-2, :10.1016_j.memsci.2018.03.085_3.3-5-3, :10.1016_j.memsci.2018.03.085_3.3-5-4, :10.1016_j.memsci.2018.03.085_3.3-5-5, :10.1016_j.memsci.2018.03.0

:10.1016_j.memsci.2018.03.085_3.4-1 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.4-2 ;
SciPub:paragraphText 'In order to perform a quantitative analysis of the efficiency of each type of nanoparticle shape on P/P <sub>0</sub> , all the permeability data collected in the literature were gathered on a histogram displaying the percentage of values per class of modulation, i.e. per range of P/P <sub>0</sub> values. The maximum number of permeability values collected for isodimensional (43%), elongated (29%) and layered (30%) nanoparticles is obtained for the classes 1 &lt; P/P <sub>0</sub> &lt; 2, 1 &lt; P/P <sub>0</sub> &lt; 2, and 0.6 &lt; P/P <sub>0</sub> &lt; 0.8 respectively, confirming that layered nanoparticles were more prone to improve the barrier properties of a given polymer (Fig. ).'^^xsd:string ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_3.4-1-1, :10

:10.1016_j.memsci.2018.03.085_3.4-5 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '5'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_3.4-6 ;
SciPub:paragraphText 'To verify if the conclusions drawn from this global analysis were confirmed for all the tested filler contents, permeability values were also apportioned into classes of filler volume fraction in addition to the classes of P/P <sub>0</sub> values (Fig. ). The classes chosen for filler fractions were 0-5 vol%, 5-10 vol%, 10-20 vol% and 20-30 vol%. For this analysis, the range for P/P <sub>0</sub> was limited to a maximal value of 3, considering that a higher increase of the permeability ratio was an exceptional behavior. In the case of isodimensional nanoparticles, exactly the same evolution of P/P <sub>0</sub> is obtained for each filler fraction, with an Fig. . Evolution of the relative permeability P/P <sub>0</sub> as a function of particle volume fraction in nanocomposite 

:10.1016_j.memsci.2018.03.085_4-1 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.1 ;
SciPub:paragraphText 'In the following section, a case by case qualitative explanation of the modulation of the permeability according to particle morphology will be proposed by focusing on the nanostructure obtained and its relationship with mass transfer properties. Three cases will be considered: (1) "ideal" case of monotonic decrease of P/P <sub>0</sub> , (2) monotonic increase of P/P <sub>0</sub> and (3) non-monotonic variation of P/P 0 . A critical discussion relying on the mechanisms proposed by the different authors will be offered.'^^xsd:string ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_4-1-1 .

:10.1016_j.memsci.2018.03.085_4-1-1 rdf:type SciPub:LabeledTerm ;
SciPub:labeledTermText 'P/P <sub>0</sub'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.me

:10.1016_j.memsci.2018.03.085_4.1.1-3 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '3'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.1.1-4 ;
SciPub:paragraphText 'For spherical particles: dispersion and distribution states of nanoparticles within the polymer matrix. Concretely, it could be related to two quantitative parameters, i.e. the in-situ size aspect ratio of the particle and the orientation of nanoparticles.'^^xsd:string ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_4.1.1-3-1, :10.1016_j.memsci.2018.03.085_4.1.1-3-2, :10.1016_j.memsci.2018.03.085_4.1.1-3-3, :10.1016_j.memsci.2018.03.085_4.1.1-3-4 .

:10.1016_j.memsci.2018.03.085_4.1.1-3-1 rdf:type SciPub:LabeledTerm ;
SciPub:labeledTermText 'nanoparticles'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.1.1-3 ;
SciPub:offset '63'^^xsd:nonNegativeInteger ;
SciPub:length '13'^^xsd:nonNegativeInteger ;
SciPub:hasLabe

:10.1016_j.memsci.2018.03.085_4.1.1-5 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '5'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.1.1-6 ;
SciPub:paragraphText 'In practice, the knowledge of the in-situ size aspect ratio is thus necessary to fully understand and predict the impact of tortuosity on mass transfer properties. It is usually evaluated using image analysis of TEM pictures when nanoparticles can be easily distinguished from the matrix as is the case for montmorillonite . This analysis is tricky and time consuming. Indeed, a large number of images must be analyzed to obtain a significant representativeness of the whole material. In addition, it is well-known that TEM analysis has some inherent limitations due to the fact that it projects three-dimensional (3D) objects onto a two-dimensional (2D) plane, resulting in TEM cross-sections that do not necessarily represent the actual size of all the platelets . Furthermore, an

:10.1016_j.memsci.2018.03.085_4.1.1-7 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '7'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.1.1-8 ;
SciPub:paragraphText 'It is worth noting that the achievement of a good dispersion state is all the more difficult as the filler content increases due to agglomeration phenomena, as evidenced in many systems such as agar-(Rhim et al. ), polyester-(Bharadwaj et al. ), polyamide-(Picard et al. ) or polyimide-(Chang et al. ) based nanocomposites with montmorillonite as nanoparticles. Chemical modification of nanoparticles does not always prevent agglomeration, as evidenced by Zhu et al.  with modified SiO <sub>2</sub> particles in poly (vinyl chloride) matrix.'^^xsd:string ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_4.1.1-7-1, :10.1016_j.memsci.2018.03.085_4.1.1-7-2, :10.1016_j.memsci.2018.03.085_4.1.1-7-3, :10.1016_j.memsci.2018.03.085_4.1.1-7-4, :10.1016_j.memsci.2018.03

:10.1016_j.memsci.2018.03.085_4.1.3-1 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.2 ;
SciPub:paragraphText 'The chemical modification of the nanoparticle surface is generally performed to reach a better filler/matrix affinity and thus a better dispersion of nanofillers within the polymer matrix. A side effect of this higher affinity is a decrease of the availability of the sorption sites in the polymer. It is particularly true for highly interactive molecules such as water vapor that interacts though hydrogen bonding with hydrophilic sites. These sites bonded with the nanoparticles are not any more available for water sorption. Such changes in specific interactions between the migrant and the polymer were proposed to explain the decrease of water vapor permeability observed by Tunc et al.  in wheat gluten/montmorillonite or by Lee et al.  in soy protein/ montmorillonite nanocomposi

:10.1016_j.memsci.2018.03.085_4.2.1-1 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.2.1-2 ;
SciPub:paragraphText 'In some cases, the dispersion state of nanoparticles did not allow to achieve a sufficient tortuosity effect to slow down the diffusion of migrating molecules, especially for small molecules such as gases. Such an explanation is widely proposed to explain why values of permeability did not change or only slightly decreased in the nanocomposite as compared to the neat matrix. This has been, for example, proposed by Tunc et al.  to explain the constancy of O <sub>2</sub> permeability in wheat gluten/montmorillonite nanocomposites. In the same article, authors however demonstrated that the achieved tortuosity was nevertheless sufficient for decreasing permeability of bigger molecules such as aroma compounds. This highlighted that the nature, molecular weight and/or steric hi

:10.1016_j.memsci.2018.03.085_4.2.1-3 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '3'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.2.3 ;
SciPub:paragraphText 'Agglomeration could lead to the formation of "interfacial voids" at the particle/polymer matrix interface and to the formation of a preferential pathway for the migration of the molecular penetrant (Ismail et al. , Rafiq et al. ). This is further detailed below in the section dedicated to the "interphase". 4.2.2.2. Interphase. An interphase, i.e. a "third compartment" with its own properties, is generated at the particles/polymer interface due to the establishment of either weak or strong interfacial interactions between the particles and the polymer matrix. Depending on its nature and thickness (or volume fraction), the interphase could significantly participate to the overall mass transfer properties and strongly influence the overall permeability of the composite as evid

:10.1016_j.memsci.2018.03.085_4.2.3-2 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '2'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.2.3-3 ;
SciPub:paragraphText 'Such hypothesis of polymer matrix modification with increase of free volume was speculated by several authors for isodimensional particles-based composites with titanium dioxide (Matteucci et al. , Moghadam et al. ) or silica nanoparticles (Dougnac et al. , Romero et al. ) and in elongated particles-based nanocomposites with carbon nanotube (Murali et al. ). An increase of free volume was also hypothesized to occur when using compatibilizing agents due to bad compatibility between the compatibilizer and the surface treated particles (Mittal et al. ). The addition of nanoparticles in a polymer matrix could also hinder the crystallization process if chain mobility is hindered, leading thus to a decrease of the overall crystallinity of the polymer that could contribute to en

:10.1016_j.memsci.2018.03.085_4.2.4-1 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.2.4-2 ;
SciPub:paragraphText 'The increase of P/P <sub>0</sub> could be related to the establishment of specific interactions between the molecular penetrant and the material constituents, as largely observed for water vapor and hygroscopic nanoparticles such as silica nanoparticles (Bracho et al. , Dougnac et al. ) or microcrystalline cellulose (Dogan et al. ), and also for O <sub>2</sub> and organo-modified montmorillonite (Chang et al. ), for O <sub>2</sub> or modified silica nanoparticles (Iwata et al. ) or CO <sub>2</sub> and titanium dioxide nanoparticles (Matteucci et al. ).'^^xsd:string ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_4.2.4-1-1, :10.1016_j.memsci.2018.03.085_4.2.4-1-2, :10.1016_j.memsci.2018.03.085_4.2.4-1-3, :10.1016_j.memsci.2018.03.085_4.2.4-1-4, :10.1016

:10.1016_j.memsci.2018.03.085_4.2.4-3 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '3'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.3 ;
SciPub:paragraphText 'Fig.  is providing summary of the two previous Sections 4.1 and 4.2 with key take home messages about the main mechanisms explaining monotonic increase of decrease of the ratio P/P 0 .'^^xsd:string ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_4.2.4-3-1 .

:10.1016_j.memsci.2018.03.085_4.2.4-3-1 rdf:type SciPub:LabeledTerm ;
SciPub:labeledTermText 'Sections 4.1'^^xsd:string ;
SciPub:documentPartIsDirectlyContainedBy :10.1016_j.memsci.2018.03.085_4.2.4-3 ;
SciPub:offset '47'^^xsd:nonNegativeInteger ;
SciPub:length '12'^^xsd:nonNegativeInteger ;
SciPub:hasLabel :Label0 .


:10.1016_j.memsci.2018.03.085_4.3 rdf:type SciPub:Section ;
SciPub:headerText 'Non-monotonic variation of P/P '^^xsd:string ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.0

:10.1016_j.memsci.2018.03.085_4.4-1 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '1'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.4-2 ;
SciPub:paragraphText 'In this section, it is considered that the properties of the polymer matrix are not affected by the incorporation of nanoparticles and that nanocomposites are two-phases systems with a perfect adhesion at the particle/matrix interface. This makes the relation P = D × S, where P, D and S are respectively the permeability, diffusion and solubility coefficients, valid . In order to explain the complex evolution of P/P <sub>0</sub> as a function of filler content, some authors tried to decompose the permeability into diffusivity and solubility and to relate the apparent evolution of P/P <sub>0</sub> to change in D or S or both. For that purpose, they generally determined the diffusivity and/or solubility coefficients independently of the permeability.'^^xsd:string ;
SciPub:direct

:10.1016_j.memsci.2018.03.085_4.4-3 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '3'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.4-4 ;
SciPub:paragraphText 'For example, O <sub>2</sub> /CO <sub>2</sub> solubility was found to remain constant in poly(amide-6-b-ethylene oxide)  or brominated poly(2,6-diphenyl-1,4-phenylene oxide)  filled with silica nanoparticles while CO <sub>2</sub> solubility was found to increase with the addition of silica particles in polybenzimidazole . Despite the fact that the same nanoparticle was used in these three studies, the effect on gas solubility obtained was not. This demonstrated that if the particles could be considered as impermeable (D=0), it is not true for their solubility. As mentioned above in § 3.2, increase of S could also be related to the progressive uncovering of active sorption sites in the polymer matrix as a consequence of particle addition and to the creation of specific sites at

:10.1016_j.memsci.2018.03.085_4.4-5 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '5'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.5 ;
SciPub:paragraphText 'To sum up, following analysis of all aforementioned experimental evidences, we can conclude that there is generally a competition between an increase of the solubility and a decrease of the diffusivity. Therefore, non-monotonic changes of P/P <sub>0</sub> , in peculiar, decrease of P/ P <sub>0</sub> followed by an increase for higher particle volume fraction, could be generally ascribed to an increase of S that could, from a threshold value of particle volume fraction, counteract the decrease of D.'^^xsd:string ;
SciPub:directlyContainsDocumentPart :10.1016_j.memsci.2018.03.085_4.4-5-1, :10.1016_j.memsci.2018.03.085_4.4-5-2, :10.1016_j.memsci.2018.03.085_4.4-5-3, :10.1016_j.memsci.2018.03.085_4.4-5-4, :10.1016_j.memsci.2018.03.085_4.4-5-5, :10.1016_j.memsci.2018.03.085_4.4-5-6,

:10.1016_j.memsci.2018.03.085_4.5-3 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '3'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.5-4 ;
SciPub:paragraphText 'Logically, CO <sub>2</sub> /O <sub>2</sub> permselectivity of layered particles-based nanocomposites was centered around a mean value of 4, which is usually the permselectivity of oil-based polymers used as neat matrix in most of the studies (Fig. ). In most cases, this permselectivity did not change so Fig. . Evolution of the H <sub>2</sub> 0/O <sub>2</sub> selectivity ratio* as a function of particle volume fraction in nanocomposite materials containing either elongated or layered particles. *note that for the sake of clarity, the ratio between selectivity of the nanocomposite to that of the neat polymer was preferred in the representation, instead of true selectivity values. much as a function of the nanoparticle volume fraction (up to 8 vol/vol % for the study of Jacquelo

:10.1016_j.memsci.2018.03.085_4.5-4 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '4'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.5-5 ;
SciPub:paragraphText 'For isodimensional particles-based nanocomposites, the CO <sub>2</sub> /O <sub>2</sub> permselectivity is also centered around a value of 5 (Fig. ) and kept almost constant as a function of filler volume fraction. Nevertheless, a group of 5 studies displays very high CO <sub>2</sub> /O <sub>2</sub> permselectivity (&gt; 15). Among this group of studies, we noted a general trend toward an increase of permselectivity with nanoparticle volume fraction (4 studies  observing an increase against only one  observing a decrease). Variation of CO <sub>2</sub> /O <sub>2</sub> permselectivity in the aforementioned studies was principally due to the fact that O <sub>2</sub> and CO <sub>2</sub> permeability did not decrease as much as a function of filler content resulting in a nonconstan

:10.1016_j.memsci.2018.03.085_4.5-6 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '6'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_4.5-7 ;
SciPub:paragraphText 'where α <sub>NC</sub> and α <sub>M</sub> are CO <sub>2</sub> /O <sub>2</sub> permselectivity in nanocomposite and neat matrix, respectively. Despite the scattering, data in Fig.  put in evidence that, both for the case of isodimensional particles and platelets, relative permeability higher than one is typically accompanied by a significant deviation of nanocomposite permselectivity from the value of neat matrix, while an interval exists for relative permeability of nanocomposite moderately lower than 1, for which the deviation in permselectivity from the value of neat matrix is negligible. Significantly, the above interval for relative permeability extends to lower values for the case of nanocomposites including platelets with respect to the case of isodimensional particles. 

:10.1016_j.memsci.2018.03.085_5-2 rdf:type SciPub:Paragraph ;
SciPub:positionInParentDocumentPart '2'^^xsd:nonNegativeInteger ;
SciPub:nextDocumentPart :10.1016_j.memsci.2018.03.085_5-3 ;
SciPub:paragraphText 'This review has revealed that despite numerous and intensive studies on the optimization of nanocomposite processing and structure in the perspective of modulating their barrier properties, the expected decreasing effect on the permeability is often not so high as expected and sometimes completely the opposite. This review confirmed that layered nanoparticles are more efficient than isodimensional and elongated nanoparticles to decrease the relative permeability (P/P <sub>0</sub> ). But, this effect is difficult to anticipate due to numerous interfering mechanisms that provoke modifications of tortuosity, sorption, polymer matrix (crystallinity, free volume, molar mass), creation of an interphase, voids and cracks, and more that lead to unexpected behaviors of the permeability in

In [13]:
# printing abstract and sections

doi = document[0][2]['DOI']
doi = doi.replace('/', '_')
term_number = 1
labeling_schema = {0: 'ENTITY', 
                   1: 'CHEMICAL_ENTITY', 
                   2: 'MATERIAL_ENTITY', 
                   3: 'STRUCTURE_ENTITY', 
                   4: 'RELATIONSHIP', 
                   5: 'PROPERTY', 
                   6: 'PROCESS_OR_TECHNIQUE', 
                   7: 'APPLICATION', 
                   8: 'MEASUREMENT', 
                   9: 'ABBREVIATION'}

print("@prefix SciPub: <http://spatialai.org/SciPub/v2.0#> .")
print("@prefix : <http://spatialai.org/SciPub/v2.0/data#> .")
print("@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .")
print("@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .")
print("@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .")
print("@prefix owl: <http://www.w3.org/2002/07/owl#> .")
print()

for record in document:
    if record[1] == 'Metadata':
        print(f":Publication rdf:type SciPub:ScientificPublication ;")
        print(f"SciPub:title '{record[2]['Title']}'^^xsd:string ;")
        print(f"SciPub:doi '{record[2]['DOI']}'^^xsd:string ;")
        print(f"SciPub:publicationDate '{record[2]['Publication Date']}'^^xsd:date .")
        print()
    
    elif record[1] == 'Abstract':
        index = document.index(record) + 1
        next_section = document[index][0]
        paragraph_positions = []
        
        # RDF>> ABSTRACT rdf:type Abstract
        print(f":{doi}_A rdf:type SciPub:Abstract ;")
        
        # RDF>> ABSTRACT sp:directlyContainsDocumentPart PARAGRAPH
        for paragraph_number, paragraph_object in record[2].items():
            position = ':' + doi + '_A' + '-' + str(paragraph_number)
            paragraph_positions.append(position)
            
        joined_paragraph_positions = ', '.join(paragraph_positions)
        print(f"SciPub:directlyContainsDocumentPart {joined_paragraph_positions} .")
        print()
            
        # RDF>> PARAGRAPH rdf:type Paragraph
        for element in paragraph_positions:
            print(f"{element} rdf:type SciPub:Paragraph ;")
            
        for paragraph_number, paragraph_object in record[2].items():
            sentence_positions = []
            
            # RDF>> PARAGRAPH sp:directlyContainsDocumentPart SENTENCE
            for sentence_number, sentence_text in paragraph_object.items():
                position = ':' + doi + '_A' + '-' + str(paragraph_number) + '-' + str(sentence_number)
                sentence_positions.append(position)
            
            joined_sentence_positions = ', '.join(sentence_positions)
            print(f"SciPub:directlyContainsDocumentPart {joined_sentence_positions} .")
            print()
            
            # RDF>> SENTENCE rdf:type Sectence
            # RDF>> SENTENCE sp:positionInParentDocumentPart 'POSITION'^^xsd:nonNegativeInteger
            # RDF>> SENTENCE sp:nextDocumentPart SENTENCE/PARAGRAPH/SECTION
            # RDF>> SENTENCE sp:sentenceText 'TEXT_OF_SENTENCE'^^xsd:string
            for sentence_number, sentence_text in paragraph_object.items():
                
                # replacing ' with \' in text
                if "'" in sentence_text:
                    sentence_text = sentence_text.replace("'", r"\'")
                    
                print(f":{doi}_A-{paragraph_number}-{sentence_number} rdf:type SciPub:Sectence ;")
                print(f"SciPub:positionInParentDocumentPart '{sentence_number}'^^xsd:nonNegativeInteger ;")
                
                if sentence_number == len(sentence_positions):
                    if paragraph_number == len(paragraph_positions):
                        print(f"SciPub:nextDocumentPart :{doi}_{next_section} ;")
                    else:
                        print(f"SciPub:nextDocumentPart :{doi}_A-{paragraph_number+1} ;")
                else:
                    print(f"SciPub:nextDocumentPart :{doi}_A-{paragraph_number}-{sentence_number+1} ;")
                    
                print(f"SciPub:sentenceText '{sentence_text}'^^xsd:string .")
                print()
                
                # RDF>> LABELED_TERM rdf:type AtomicLabeledTerm/CompoundLabeledTerm
                # RDF>> LABELED_TERM sp:labeledTermText 'TEXT_OF_TERM'^^xsd:string
                # RDF>> LABELED_TERM sp:labeledTermIsContainedBy SENTENCE
                # RDF>> LABELED_TERM sp:offset 'STARTING_POSITION'^^xsd:nonNegativeInteger
                # RDF>> LABELED_TERM sp:length 'LENGTH'^^xsd:nonNegativeInteger
                # RDF>> LABELED_TERM sp:hasLabel LABEL
                doc = nlp(sentence_text)
                for ent in doc.ents:
                    print(f":LabeledTerm{term_number} rdf:type SciPub:AtomicLabeledTerm ;")  ## DEAL WITH ATOMIC / COMPOUND
                    print(f"SciPub:labeledTermText '{ent.text}'^^xsd:string ;")
                    print(f"SciPub:documentPartIsDirectlyContainedBy :{doi}_A-{paragraph_number}-{sentence_number} ;")
                    print(f"SciPub:offset '{ent.start_char}'^^xsd:nonNegativeInteger ;")
                    print(f"SciPub:length '{ent.end_char - ent.start_char}'^^xsd:nonNegativeInteger ;")
                    
                    for label_number, label_text in labeling_schema.items():
                        if label_text == ent.label_:
                            print(f"SciPub:hasLabel :Label{label_number} .")
                            break
                        else:
                            print(f"SciPub:hasLabel :Label_0 .")
                            break
                            
                    term_number += 1
                    print()
            
        print('#=================== TEMPORARY SECTION DIVIDER ===================#')
        print()
    
    else:
        section_number = record[0]
        section_name = record[1]
        index = document.index(record) + 1
        paragraph_positions = []
        
        if index == len(document):
            next_section = 'EndOfDocument'
        else:
            next_section = document[index][0]
        
        # RDF>> 
        # RDF>> 
        print(f":{doi}_{section_number} rdf:type SciPub:Section ;")
        print(f"SciPub:headerText '{record[1]}'^^xsd:string ;")

        # RDF>> SECTION sp:directlyContainsDocumentPart PARAGRAPH
        for paragraph_number, paragraph_object in record[2].items():
            position = ':' + doi + '_' + str(section_number) + '-' + str(paragraph_number)
            paragraph_positions.append(position)
        
        joined_paragraph_positions = ', '.join(paragraph_positions)
        print(f"SciPub:directlyContainsDocumentPart {joined_paragraph_positions} .")
        print()  
        
        for paragraph_number, paragraph_object in record[2].items():
            sentence_positions = []
            
            # RDF>> PARAGRAPH rdf:type Paragraph
            # RDF>> PARAGRAPH sp:directlyContainsDocumentPart SENTENCE
            for sentence_number, sentence_text in paragraph_object.items():
                position = ':' + doi + '_' + str(section_number) + '-' + str(paragraph_number) + '-' + str(sentence_number)
                sentence_positions.append(position)
            
            joined_sentence_positions = ', '.join(sentence_positions)
            print(f":{doi}_{section_number}-{paragraph_number} rdf:type SciPub:Paragraph ;")
            print(f"SciPub:directlyContainsDocumentPart {joined_sentence_positions} .")
            print()
            
            # RDF>> SENTENCE rdf:type Sectence
            # RDF>> SENTENCE sp:positionInParentDocumentPart 'POSITION'^^xsd:nonNegativeInteger
            # RDF>> SENTENCE sp:nextDocumentPart SENTENCE/PARAGRAPH/SECTION
            # RDF>> SENTENCE sp:sentenceText 'TEXT_OF_SENTENCE'^^xsd:string
            for sentence_number, sentence_text in paragraph_object.items():
                
                # replacing ' with \' in text
                if "'" in sentence_text:
                    sentence_text = sentence_text.replace("'", r"\'")
                    
                print(f":{doi}_{section_number}-{paragraph_number}-{sentence_number} rdf:type SciPub:Sentence ;")
                print(f"SciPub:positionInParentDocumentPart '{sentence_number}'^^xsd:nonNegativeInteger ;")
                
                if sentence_number == len(sentence_positions):
                    if paragraph_number == len(paragraph_positions):
                        print(f"SciPub:nextDocumentPart :{doi}_{next_section} ;")
                    else:
                        print(f"SciPub:nextDocumentPart :{doi}_{section_number}-{paragraph_number+1} ;")
                else:
                    print(f"SciPub:nextDocumentPart :{doi}_{section_number}-{paragraph_number}-{sentence_number+1} ;")
                    
                print(f"SciPub:sentenceText '{sentence_text}'^^xsd:string .")
                print()
                
                # RDF>> LABELED_TERM rdf:type AtomicLabeledTerm/CompoundLabeledTerm
                # RDF>> LABELED_TERM sp:labeledTermText 'TEXT_OF_TERM'^^xsd:string
                # RDF>> LABELED_TERM sp:labeledTermIsContainedBy SENTENCE
                # RDF>> LABELED_TERM sp:offset 'STARTING_POSITION'^^xsd:nonNegativeInteger
                # RDF>> LABELED_TERM sp:length 'LENGTH'^^xsd:nonNegativeInteger
                # RDF>> LABELED_TERM sp:hasLabel LABEL
                doc = nlp(sentence_text)
                for ent in doc.ents:
                    print(f":LabeledTerm{term_number} rdf:type SciPub:AtomicLabeledTerm ;")  ## DEAL WITH ATOMIC / COMPOUND
                    print(f"SciPub:labeledTermText '{ent.text}'^^xsd:string ;")
                    print(f"SciPub:documentPartIsDirectlyContainedBy :{doi}_{section_number}-{paragraph_number}-{sentence_number} ;")
                    print(f"SciPub:offset '{ent.start_char}'^^xsd:nonNegativeInteger ;")
                    print(f"SciPub:length '{ent.end_char - ent.start_char}'^^xsd:nonNegativeInteger ;")
                    
                    for label_number, label_text in labeling_schema.items():
                        if label_text == ent.label_:
                            print(f"SciPub:hasLabel :Label{label_number} .")
                            break
                        else:
                            print(f"SciPub:hasLabel :Label_0 .")
                            break
                            
                    term_number += 1
                    print()
            
        print('#=================== TEMPORARY SECTION DIVIDER ===================#')
        print()
        
for label_number, label_text in labeling_schema.items():
    print(f":Label{label_number} rdf:type SciPub:Label ;")
    print(f"SciPub:fromLabelingSchema :LabelingSchema{label_number} ;")
    print(f"SciPub:labelText '{label_text}'^^xsd:string .")
    print()
            
for label_number, _ in labeling_schema.items():
    print(f":LabelingSchema{label_number} rdf:type SciPub:LabelingSchema .")
    

# Connect to GraphDB

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import json

In [None]:
# connect and query graph database

# specify the repository
sparql = SPARQLWrapper("http://LAPTOP-S7TVD5I4:7200/repositories/SciPub")

# SPARQL query
sparql.setQuery("""
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX : <http://spatialai.org/SciPub/v2.0#>
    PREFIX SciPub: <http://spatialai.org/SciPub/v2.0#>

    SELECT ?sentenceID ?sentenceText ?labeledTermText ?offset ?length ?labelText (NOW() AS ?systemDateTime)
    WHERE {
        ?paragraphID rdf:type SciPub:Paragraph ;
                     SciPub:directlyContainsDocumentPart ?sentenceID .

        ?sentenceID SciPub:sentenceText ?sentenceText .

        OPTIONAL {
            ?labeledTermID SciPub:documentPartIsDirectlyContainedBy ?sentenceID ;
                           SciPub:labeledTermText ?labeledTermText ;
                           SciPub:offset ?offset ;
                           SciPub:length ?length ;
                           SciPub:hasLabel ?labelID .

            ?labelID rdf:type SciPub:Label ;
                     SciPub:labelText ?labelText .
        }
    }
""")

# convert results to JSON
sparql.setReturnFormat(JSON)
fetched_data = sparql.query().convert()


In [None]:
# convert query result from JSON to dataframe

column_header = ['sentence_id', 'sentence_text', 'labeled_term_text', 'offset', 'length', 'label_text', 'system_datetime']
fetched_data_df = pd.DataFrame(columns=column_header)
sentence_id = []
sentence_text = []
labeled_term_text = []
offset = []
length = []
label_text = []
system_datetime = []

for record in fetched_data['results']['bindings']:
    if len(record) == 7:
        for key, value in record.items():
            if key == 'sentenceID':
                sentence_id.append(value['value'])
            if key == 'sentenceText':
                sentence_text.append(value['value'])
            if key == 'labeledTermText':
                labeled_term_text.append(value['value'])
            if key == 'offset':
                offset.append(value['value'])
            if key == 'length':
                length.append(value['value'])
            if key == 'labelText':
                label_text.append(value['value'])
            if key == 'systemDateTime':
                format_system_datetime = value['value'][:-10]
                format_system_datetime = format_system_datetime.replace('T', ' ')
                system_datetime.append(format_system_datetime)
    
    elif len(record) == 3:
        for key, value in record.items():
            if key == 'sentenceID':
                sentence_id.append(value['value'])
            if key == 'sentenceText':
                sentence_text.append(value['value'])
            if key == 'systemDateTime':
                format_system_datetime = value['value'][:-10]
                format_system_datetime = format_system_datetime.replace('T', ' ')
                system_datetime.append(format_system_datetime)
        
        labeled_term_text.append('')
        offset.append('')
        length.append('')
        label_text.append('')
    
    else:
        print('Different record length (should be 7 or 3) found in SPARQL result (JSON format).')
        
if len(sentence_id) == len(sentence_text) == len(labeled_term_text) == len(offset) == len(length) == len(label_text) == len(system_datetime):
    fetched_data_df['sentence_id'] = sentence_id
    fetched_data_df['sentence_text'] = sentence_text
    fetched_data_df['labeled_term_text'] = labeled_term_text
    fetched_data_df['offset'] = offset
    fetched_data_df['length'] = length
    fetched_data_df['label_text'] = label_text
    fetched_data_df['system_datetime'] = system_datetime 
else:
    print('Different length found for dataframe columns.')

In [None]:
fetched_data_df

In [None]:
# create JSON from dataframe

distinct_sentence_id = list(fetched_data_df['sentence_id'].unique())
fetched_data_json = {'classes': [], 'annotations': []}

for sentence_id in distinct_sentence_id:
    single_sentence_annotation = []
    fetched_data_sub_df = fetched_data_df.loc[fetched_data_df['sentence_id'] == sentence_id]
    first_index = list(fetched_data_sub_df.index)[0]
    single_sentence_annotation.append(sentence_id)
    single_sentence_annotation.append(fetched_data_sub_df['sentence_text'][first_index])
    single_sentence_annotation.append({'entities': []})

    for idx in fetched_data_sub_df.index:          
        if offset[idx] == '' or length[idx] == '' or label_text[idx] == '':
            ll = ['', '', ['', [['', '', '']]]]
            single_sentence_annotation[2]['entities'].append(ll)
        else:
            ll = [int(offset[idx]), int(offset[idx])+int(length[idx]), [label_text[idx], [['Candidate', system_datetime[idx], 'NER Model']]]]
            single_sentence_annotation[2]['entities'].append(ll)
        
    fetched_data_json['annotations'].append(single_sentence_annotation)


In [None]:
json.dumps(fetched_data_json)

In [None]:
a = "To explain this deviation of the \'ideal behavior\', this paper discusses extensively the impact of the nanoparticle shape on the nanocomposite permeability along with structural aspects, related to both the particle nature and size, and the nanocomposite processing routes."

In [None]:
a.replace("\'", "'")

In [None]:
a = "It is well known that inclusion of homogeneously dispersed and oriented impermeable fillers with high aspect ratio, such as platelets or elongated particles, should significantly increase the diffusion path of gas and vapors and yield to improve barrier properties."

In [None]:
a[218:224]