In [1]:
# from neomodel import config

# from neomodel import (config, StructuredNode, StringProperty, IntegerProperty,
#     UniqueIdProperty, RelationshipTo)
# config.DATABASE_URL = 'bolt://neo4j:WBrtpKCUW28e@44.206.130.87:7687'
# config.DATABASE_URL = 'http://44.206.130.87:7474'

In [8]:
from __future__ import annotations
from neomodel import (StructuredNode, StructuredRel,
                    OneOrMore, ZeroOrOne,ZeroOrMore,One,
                    RelationshipTo,RelationshipFrom,
                    StringProperty, FloatProperty,
                    IntegerProperty,UniqueIdProperty)

from neomodel import config, db


### ###    
    
class HasregRel(StructuredRel):
    regid = FloatProperty(required=True)
    iselemental = IntegerProperty(required=True)

class HasmemberRel(StructuredRel):
    pass

class HasantiRel(StructuredRel):
    pass

class HasdownstreamRel(StructuredRel):
    begin_at = IntegerProperty(required=True)
    nested = IntegerProperty(required=True)

class HasdownstreamsetRel(StructuredRel):
    degree = IntegerProperty(required=True)
    
## #####


class Fasta(StructuredNode):
    '''
    Original Record of a full nt sequence 
    '''
    # __abstract_node__ = True
    # id=UniqueIdProperty()
    name = StringProperty(required=True)
    seq = StringProperty(required=True)
    
    hasreg = RelationshipTo("Region", "HasReg", cardinality=OneOrMore, model=HasregRel)
    hasanti = RelationshipTo("Fasta", "HasAnti", cardinality=ZeroOrOne, model=HasantiRel)
    isanti = RelationshipFrom("Fasta", "HasAnti", cardinality=ZeroOrOne, model=HasantiRel)
    
class Genome(Fasta):
    '''
    a complete genome sequence
    '''
    source = StringProperty(required=True)
    accession = StringProperty(unique_index=True)
    annotation = StringProperty()
    #circular

from typing import Optional

class Region(StructuredNode):
    '''
    a region in genome
    '''
    # __abstract_node__ = True
    # id=UniqueIdProperty()
    name = StringProperty(required=True)
    b = IntegerProperty(required=True)
    e = IntegerProperty(required=True)
    
    regfrom = RelationshipFrom("Fasta", "HasReg", cardinality=One, model=HasregRel)
    memberof = RelationshipFrom("RegionSet", "HasMember", cardinality=One, model=HasmemberRel)
    
    hasdownstream = RelationshipTo("Region", "HasDownstream", cardinality=One, model=HasdownstreamRel)
    downstreamof = RelationshipFrom("Region", "HasDownstream", cardinality=One, model=HasdownstreamRel)
    
    @property
    def seq(self):
        mother_seq:str=self.regfrom[0].seq
        return mother_seq[self.b:self.e]

    def connect_regionset(self,regionprops:dict,rel_props:dict):
        raise NotImplementedError
    
    def connect_fasta(self,fastaprops:dict,rel_props:dict):
       genome=Genome.get_or_create(fastaprops)[0]
       self.regfrom.connect(genome,rel_props)
       
    def connect_fasta_shortcut(self,genome:Genome,regid:float,iselemental:int):
        self.regfrom.connect(genome,
        properties={"regid":regid,"iselemental":iselemental})
    
    def connect_last_region(self,last_region:Optional[Region],
            begin_at:int=0,nested:int=0):
        if isinstance(last_region,Region):
            # if not self.downstreamof.relationship(last_region):
            #     self.downstreamof.disconnect(last_region)
            self.downstreamof.connect(last_region,
                properties={"begin_at":begin_at,"nested":nested})
            
            last_regionset:RegionSet=last_region.memberof[0]
            this_regionset:RegionSet=self.memberof[0]
            rel:HasdownstreamsetRel=last_regionset.hasdownstreamset.relationship(this_regionset)
            if rel is None:
                last_regionset.hasdownstreamset.connect(this_regionset,
                properties={"degree":1})
            else:
                rel.degree=rel.degree+1
                rel.save()
        else:
            if last_region is not None:
                raise TypeError(f'last_region has wrong type: {type(last_region)}')
    
    
    
class FuncDomain(Region):
    memberof = RelationshipFrom("FuncDomainSet", "HasMember", cardinality=One, model=HasmemberRel)
    hasdownstream = RelationshipTo("DomainLinkage", "HasDownstream", cardinality=One, model=HasdownstreamRel)
    downstreamof = RelationshipFrom("DomainLinkage", "HasDownstream", cardinality=One, model=HasdownstreamRel)
    
    def connect_regionset(self,regionprops:dict,rel_props:dict={}):
        domainset=FuncDomainSet.get_or_create(regionprops)[0]
        self.memberof.connect(domainset)

    
class HmmFuncDomain(FuncDomain):
    hmmstart = IntegerProperty(required=True)
    hmmend=IntegerProperty(required=True)
    
    
class DomainLinkage(Region):
    memberof = RelationshipFrom("DomainLinkageSet", "HasMember", cardinality=One, model=HasmemberRel)
    hasdownstream = RelationshipTo("FuncDomain", "HasDownstream", cardinality=One, model=HasdownstreamRel)
    downstreamof = RelationshipFrom("FuncDomain", "HasDownstream", cardinality=One, model=HasdownstreamRel)
    def connect_regionset(self,regionprops:dict,rel_props:dict={}):
        domainset=DomainLinkageSet.get_or_create(regionprops)[0]
        self.memberof.connect(domainset)
    
class RegionSet(StructuredNode):
    # __abstract_node__ = True
    # id=UniqueIdProperty()
    name = StringProperty(required=True)
    
    hasdownstreamset=RelationshipTo("RegionSet", "hasDownstreamSet", cardinality=ZeroOrMore, model=HasdownstreamsetRel)
    downstreamsetof=RelationshipFrom("RegionSet", "hasDownstreamSet", cardinality=ZeroOrMore, model=HasdownstreamsetRel)
    hasmember=RelationshipTo("Region", "HasMember", cardinality=OneOrMore, model=HasmemberRel)
    
class FuncDomainSet(RegionSet):
    source = StringProperty(unique_index=True)
    accession = StringProperty(unique_index=True)
    annotation = StringProperty()
    
    hasdownstreamset=RelationshipTo("DomainLinkageSet", "hasDownstreamSet", cardinality=ZeroOrMore, model=HasdownstreamsetRel)
    downstreamsetof=RelationshipFrom("DomainLinkageSet", "hasDownstreamSet", cardinality=ZeroOrMore, model=HasdownstreamsetRel)
    
class DomainLinkageSet(RegionSet):
    hasdownstreamset=RelationshipTo("FuncDomainSet", "hasDownstreamSet", cardinality=ZeroOrOne, model=HasdownstreamsetRel)
    downstreamsetof=RelationshipFrom("FuncDomainSet", "hasDownstreamSet", cardinality=ZeroOrOne, model=HasdownstreamsetRel)



# class Genome(StructuredNode):
#     source = StringProperty()
#     name = StringProperty()
#     accession = StringProperty()
#     seq = StringProperty()
#     hasreg = RelationshipTo("Region", "HasReg", cardinality=OneOrMore, model="HasregRel")


# class HasregRel(StructuredRel):
#     regid = IntegerProperty()


# class FuncDomain(StructuredNode):
#     e = IntegerProperty()
#     b = IntegerProperty()
#     name = StringProperty()




RelationshipClassRedefined: Relationship of type HasReg redefined as <class '__main__.HasregRel'>.
HasReg --> <class '__main__.HasregRel'>
HasAnti --> <class '__main__.HasantiRel'>
Fasta --> <class '__main__.Fasta'>
Fasta,Genome --> <class '__main__.Genome'>
HasMember --> <class '__main__.HasmemberRel'>
HasDownstream --> <class '__main__.HasdownstreamRel'>
Region --> <class '__main__.Region'>
FuncDomain,Region --> <class '__main__.FuncDomain'>
HmmFuncDomain,FuncDomain,Region --> <class '__main__.HmmFuncDomain'>
Region,DomainLinkage --> <class '__main__.DomainLinkage'>
hasDownstreamSet --> <class '__main__.HasdownstreamsetRel'>
RegionSet --> <class '__main__.RegionSet'>
FuncDomainSet,RegionSet --> <class '__main__.FuncDomainSet'>
DomainLinkageSet,RegionSet --> <class '__main__.DomainLinkageSet'>


In [2]:
config.DATABASE_URL = 'bolt://neo4j:WBrtpKCUW28e@44.206.130.87:7687'
results, meta = db.cypher_query("RETURN 'Hello World' as message")

In [3]:
from neomodel import clear_neo4j_database
clear_neo4j_database(db,clear_constraints=True,clear_indexes=True)

In [4]:
import pandas as pd
# acan=pd.read_csv('zika/zika-domains-acan.csv')
domains=pd.read_csv('zika/zika-domains.csv')
# with db.transaction:
#     Person(name='Bob').save()


In [5]:
from pathlib import Path
from Bio.Seq import Seq

def get_seq(k:str,fasta_dir='CoreData/genome_fasta'):
    genome_dir=Path(fasta_dir)
    seq=open(genome_dir/f"{k}:genome.fasta").readlines()[-1].strip()
    return seq

def get_rc_seq(seq:str):
    return Seq(seq).reverse_complement()._data.decode()


In [6]:
def get_props(last_b:int,last_e:int,b:int,e:int,id:int,sub_id:int):
    assert b>last_b
    if b>=last_e:
        begin_at,nested=0,0
        this_lk_id=float(id)
        this_dm_id=float(id+1)
        linkb=last_e 
        
        id+=2
        sub_id=1
        iselemental=1
        
    else:
        begin_at= b-last_e
        nested = 0 if e>last_e else 1
        this_lk_id=float(f"{id}.{sub_id}")
        this_dm_id=float(f"{id}.{sub_id+1}")
        linkb=b
        
        sub_id+=2
        iselemental=0
    return (begin_at,nested,
            this_lk_id,this_dm_id,
            linkb,iselemental,
            id,sub_id
            )

In [7]:
from typing import Union,Optional
for g,subd in domains.groupby('genome_name'):
    subd.sort_values(by='start',inplace=True)
    genome:Genome=Genome.get_or_create(dict(
            name=g,
            seq=get_seq(g),
            source='GenBank',
            accession=g.split('|')[-1]))[0]
    
    for _,s in subd.iterrows():
        with db.transaction:
            last_b,last_e,id,sub_id=0,0,0,1
            last_region:Optional[Region]=None
            last_domain_name='BEGIN'
            
            b,e=s['start'],s['end']
            this_domain_name,annot=s['domain_annotation'].split(':')
            (begin_at,nested,this_lk_id,this_dm_id,
                linkb,iselemental,id,sub_id)=get_props(
                last_b,last_e,b,e,id,sub_id)
                
            linkname=f'Linkage:{last_domain_name}:{this_domain_name}'
            linkage=DomainLinkage.create(dict(
                name=f'{s["genome_name"]}:{linkname}',
                b=linkb,e=b))[0]
            linkage.connect_fasta_shortcut(genome,this_lk_id,iselemental)
            linkage.connect_regionset(dict(
                name=linkname))
            linkage.connect_last_region(last_region,begin_at,nested)
            last_region=linkage

            funcdomain_name=f'Funcdomain:{this_domain_name}'
            funcdomain=HmmFuncDomain.get_or_create(dict(
                name=f'{s["genome_name"]}:{funcdomain_name}',
                b=b,hmmstart=s["hmmStart"],
                e=e,hmmend=s["hmmEnd"]))[0]
            funcdomain.save()
            funcdomain.connect_fasta_shortcut(genome,this_dm_id,iselemental)
            funcdomain.connect_regionset(dict(
                        name=this_domain_name,source='Pfam',
                        annotation=annot, 
                        accession=s['domain_accession']))
            funcdomain.connect_last_region(last_region,begin_at,nested)
            
            last_region=funcdomain
            last_b,last_e=b,e
            last_domain_name=this_domain_name
            
    with db.transaction:
        this_domain_name='END'
        b,e=s['genome_length'],-1
        (begin_at,nested,this_lk_id,this_dm_id,
                linkb,iselemental,id,sub_id)=get_props(
                last_b,last_e,b,e,id,sub_id)
        linkname=f'Linkage:{last_domain_name}:{this_domain_name}'
        linkage=DomainLinkage.create(dict(
            name=f'{s["genome_name"]}:{linkname}',
            b=linkb,e=b))[0]
        linkage.connect_fasta_shortcut(genome,this_lk_id,iselemental)
        linkage.connect_regionset(dict(
            name=linkname))
        linkage.connect_last_region(last_region,begin_at,nested)
        last_region=linkage
        
    

KeyboardInterrupt: 

In [6]:
with db.transaction:
    domainset_dict={}
    for _,s in acan.iterrows():
        name,annot=s['annotation'].split(':')
        domainset=FuncDomainSet.create_or_update(dict(
            name=name,
            source='Pfam',
            annotation=annot,
            accession=s['accession']))[0]
        domainset.save()
        domainset_dict[name]=domainset 

In [7]:
FuncDomainSet.nodes.get(**dict(
            name=name,
            annotation=annot,
            accession=s['accession']))

<FuncDomainSet: {'name': 'DUF3640', 'source': 'Pfam', 'accession': 'PF12342', 'annotation': 'Protein of unknown function (DUF3640)', 'element_id_property': '4:16158f05-17a5-4aaf-b109-4117be641c45:0'}>

In [7]:
with db.transaction:
    genome_dict={}
    for g in domains[domains['strand']=='SENSE']['genome_name'].unique():
        g:str
        genome_dict[g]=Genome.create_or_update(dict(
            name=g,
            seq=get_seq(g),
            source='GenBank',
            accession=g.split('|')[-1]))
    
    # g in domains[domains['strand']!='SENSE']['genome_name'].unique():

In [6]:
from typing import Union,Optional

linkageset_dict={}

linkage_dict={}
domain_dict={}

for g,subd in domains.groupby('genome_name'):
    break

In [7]:
subd.sort_values(by='start',inplace=True)

In [28]:
last_b,last_e=0,0
id=0
sub_id=1
last_region:Optional[Region]=None
last_domain_name='BEGIN'
genome:Optional[Genome]=Genome.nodes.get_or_none(name=g)
genome:Genome=Genome.get_or_create(dict(
            name=g,
            seq=get_seq(g),
            source='GenBank',
            accession=g.split('|')[-1]))[0]

In [9]:
for _,s in subd.iterrows():
    break
s

genome_name                                    AHFV||AF331718
genome_length                                           10685
domain_accession                                      PF01003
strand                                                  SENSE
start                                                     130
end                                                       460
hmmStart                                                    2
hmmEnd                                                    115
evalue                                                    0.0
domain_annotation    Flavi_capsid:Flavivirus capsid protein C
Name: 1530, dtype: object

In [10]:
b,e=s['start'],s['end']
this_domain_name,annot=s['domain_annotation'].split(':')


In [29]:
(begin_at,nested,this_lk_id,this_dm_id,
    linkb,iselemental,id,sub_id)=get_props(
    last_b,last_e,b,e,id,sub_id)

In [12]:
linkname=f'Linkage:{last_domain_name}:{this_domain_name}'
linkage=DomainLinkage.get_or_create(dict(
    name=f'{s["genome_name"]}:{linkname}',
    b=linkb,e=b))[0]
linkage.save()
linkage.regfrom.connect(genome,
        properties={"regid":this_lk_id,"iselemental":iselemental})


# genome.hasreg.connect(linkage,
#         properties={"regid":this_lk_id,"iselemental":iselemental})


<__main__.HasregRel at 0x7f229d0e1040>

In [13]:
linkageset=DomainLinkageSet.get_or_create(dict(name=linkname))[0]
linkage.memberof.connect(linkageset)
# if  not linkname in linkageset_dict:
#     linkageset_dict[linkname]=DomainLinkageSet(
#         name=linkname)
    

# linkageset.hasmember.connect(linkage)


<__main__.HasmemberRel at 0x7f229758a660>

In [14]:
if isinstance(last_region,Region):
    linkage.downstreamof.connect(last_region,
        properties={"begin_at":begin_at,"nested":nested})
    # last_region.hasdownstream.connect(linkage,
    #     properties=p)
    last_regionset:RegionSet=last_region.memberof.start_node()
    last_regionset.hasdownstreamset.connect(linkageset)
    # linkageset.downstreamsetof.connect(last_regionset)

In [15]:
last_region=linkage

In [16]:
funcdomain_name=f'Funcdomain:{this_domain_name}'
funcdomain=FuncDomain.get_or_create(dict(
    name=f'{s["genome_name"]}:{funcdomain_name}',
    b=b,e=e))[0]
funcdomain.save()
funcdomain.regfrom.connect(genome,
        properties={"regid":this_dm_id,"iselemental":iselemental})
# genome.hasreg.connect(funcdomain,
#         properties={"regid":this_dm_id,"iselemental":iselemental})



domainset=FuncDomainSet.get_or_create(dict(
            name=this_domain_name,
            source='Pfam',
            annotation=annot,
            accession=s['domain_accession']))[0]

domainset.hasmember.connect(funcdomain)
# funcdomain.memberof.connect(domainset)

    # if last_regionset.

<__main__.HasmemberRel at 0x7f22975899d0>

In [21]:
if isinstance(last_region,Region):
    if not funcdomain.downstreamof.relationship(last_region):
        funcdomain.downstreamof.disconnect(last_region)
    funcdomain.downstreamof.connect(last_region,
        properties={"begin_at":0,"nested":0})
    
    
    last_regionset:RegionSet=last_region.memberof[0]
    rel=last_regionset.hasdownstreamset.relationship(domainset)
    if rel is None:
        last_regionset.hasdownstreamset.connect(domainset,
        properties={"degree":1})
    else:
        rel.degree=rel.degree+1
        rel.save()
    


'agtgctctcgtttcagacaacgtgagtggcgctttgtttgtacttccttggtgggaaagttttgaagcgttaacgtgttgaggaaaagacagcttaggagaacaagagctggggatggccaaaggagccgtccttaaaggaaaggggggcggtccccctaggcgagtgccgaaagagaccgcaaaaaagacgcgtcaaggaccaggccgattgccaaatggactggtgttgatgcgcatgatgggagtgttgtggcatatgatcgccgggacggccaggagtccgattctcaagcgattctgggcgacagttccggtgcggcaggccatcgcagcgctccgcaaaattagaaagacagttggactgctactggactctctaaacagaagaagagggaagagaaggtcaaccactgggcttctcacatcaatcttgctggcctgcctggcgacactggtgatctccgcgacaattcgcagagagagaacaggggacatggtgatcagggcagaaggaaaggacgctgccacgcaagtggaagtcgtgaatggaacgtgcatcattctcgccacagacatggggagttggtgtgatgattcaatcatgtacgagtgcgtcactattgactcgggagaagaaccagttgatgtggactgtttctgcaggggcgtcgagcgggtgtccctggaatacgggaggtgtgggaagccagttggcggcagaagcaggaggtcggtgtcgattccagtgcatgctcatagtgatcttaccggaagagggcataagtggcttaggggggactcagtcaagacgcatctgacacgtgtggaaggctgggtatggaagaataagctcctgacgatggccttttgtgcagttgtgtggatggtcacagacagcttaccgacaaggttcattgtcataacagtggccctttgtctggctcccacatatgccactcggtgcacacacctgcagaaccgggactttgtttcaggg

In [None]:

linkage.memberof.connect(linkageset)
linkageset.hasmember.connect(linkage)
    

In [None]:
lastb,last_e=b,e
last_domain_name=this_domain_name
id+=1

In [5]:
subd.sort_values(by='start',inplace=True)
subd.iloc[:2]


Unnamed: 0,genome_name,genome_length,domain_accession,strand,start,end,hmmStart,hmmEnd,evalue,domain_annotation
1530,AHFV||AF331718,10685,PF01003,SENSE,130,460,2,115,3.4e-23,Flavi_capsid:Flavivirus capsid protein C
1527,AHFV||AF331718,10685,PF01570,SENSE,517,736,16,87,3e-11,Flavi_propep:Flavivirus polyprotein propeptide


In [4]:
for k,subd in domains.groupby('genome_name'):
    break