In [4]:
import xml.etree.ElementTree as ET

from py2neo import Graph, Node, Relationship, NodeMatcher, RelationshipMatcher
graph = Graph("neo4j://localhost:7687", auth=("neo4j", "neo4j123"))

# Load the XML file
tree = ET.parse('uniprot.xml')

# Get the root element
root = tree.getroot()

# Define the namespace used in the XML document
ns = {"uniprot": "http://uniprot.org/uniprot"}

 
def uniport_data():
 
    name = root.find('.//uniprot:name', ns).text
    accessions = [acc.text for acc in root.findall(".//uniprot:accession", ns) if acc.text == 'Q9Y261']
    
    ## protien node
    prot = root.find('.//uniprot:protein', ns)
    prot_node = Node (":Protien", name = name, id = "".join(accessions))
    graph.create(prot_node)
    
    ## fullName node
    recommended_name = prot.find('.//uniprot:recommendedName', ns)
    rec_full_name = recommended_name.find('.//uniprot:fullName', ns).text
    #rec_short_names = [sn.text for sn in recommended_name.findall('.//uniprot:shortName',ns)]
    fullName_node = Node(":FullName", name = rec_full_name)
    graph.create(fullName_node)
    
    has_full_name_prop = Relationship(prot_node, ':HAS_FULL_NAME',fullName_node)
    graph.create(has_full_name_prop) 
   
    ## gene node
    gene = root.find(".//uniprot:gene", ns)
    names = gene.findall(".//uniprot:name", ns)
    for name in names:
        if name.get("type") == 'primary':
            rel = Relationship(prot_node, "FROM_GENE", Node("Gene", name = name.text))
            rel['status'] = name.get("type")
        else:
            rel = Relationship(prot_node, "FROM_GENE", Node("Gene", name = name.text))
            rel['status'] = name.get("type")
        graph.create(rel)

    ## organism node 
    organism =  root.find(".//uniprot:organism", ns)
    db_ref = organism.find(".//uniprot:dbReference", ns)
    taxonomy_id = db_ref.get('id')
    names = organism.findall(".//uniprot:name", ns)

    
    for name in names:
        rel = Relationship(prot_node, "PRESENT_IN", Node("Organism", name = name.text,taxonomy_id =taxonomy_id))
        graph.create(rel)
    ## refrences
    ref =  root.findall(".//uniprot:reference", ns)
    for rf in ref:
        ref_id= rf.get('key')
        citation = rf.find(".//uniprot:citation", ns)
        cit_type = citation.get('type')
        cit_date = citation.get('date')
        cit_name = citation.get('name')
        cit_volume = citation.get('volume')
        cit_title = citation.find(".//uniprot:title", ns)
        cit_title = cit_title.text

        db_ref = citation.findall(".//uniprot:dbReference", ns)

#         for db_rf in db_ref:
#             print(db_rf.get('type'))
#             print(db_rf.get('id'))
        
        ref_node = Node("Reference", id = ref_id, type = cit_type, name = cit_name,
                        volume = cit_volume, title = cit_title)
        
        rel = Relationship(prot_node, "HAS_REFERENCE", ref_node)
        graph.create(rel)
        
        auth = citation.find(".//uniprot:authorList", ns)
        person = auth.findall(".//uniprot:person", ns)
        for pers in person:
            person_node = Node("Person", pers.get('name'))
            rel = Relationship(ref_node, "HAS_AUTHOR", person_node)
            graph.create(rel)
    
    ## features
    features =  root.findall(".//uniprot:feature", ns)
    for feat in features:
        feat_type = feat.get('type')
        feat_name = feat.get('description')
        
        position = None
        locations = feat.findall(".//uniprot:location", ns)
        for loc in locations:
            lc = loc.find(".//uniprot:position", ns)
            if lc is None:
                continue
            else:
                position = lc.get('position')
        
        feat_node = Node('Feature', type = feat_type, name = feat_name)
        rel = Relationship(prot_node, "HAS_FEATURE", feat_node)
        rel['position'] = position
        graph.create(rel)
        
   
    print('data is dumped into neo4g')
    

# function call
uniport_data()


data is dumped into neo4g


In [None]:

# query all the nodes and relationshlip using cypher query

query = """
MATCH (n)
OPTIONAL MATCH (n)-[r]->(m)
RETURN n, r, m
"""
results = graph.run(query)
# Iterate over the results and print the nodes and relationships
for record in results:
    node = record['n']
    rel = record['r']
    dest = record['m']
    
    if node is not None:
        print("Node:", node)
    
    if rel is not None:
        print("Relationship:", rel)
    
    if dest is not None:
        print("Destination:", dest)