# Calculate the out-degree of EcoIN and core-EcoIN, then get the node information of the top 20

# Import the required packages

In [1]:
import networkx as nx
import numpy as np
import pandas as pd
from EcoIN_analysis_function import *

# Input file

In [2]:
total_file=r'./EcoIN.txt'#All EcoIN data
core_total_file=r'./core-EcoIN.txt'#High quality EcoIN data

total_reaction_file=r'./reaction_information.txt'#Reaction annotation information
total_gene_file=r'./ecogene_information.txt' #Gene annotation information
total_met_file=r'./metabolite_information.txt'#Metabolic annotation information


# Output file

In [3]:
run_file=r'./network_analysis/'
create_file(run_file)#Create an output directory

EcoIN_node_analysis_degree=r'%s%s.txt' % (run_file,'EcoIN_node_analysis_degree')
core_EcoIN_node_analysis_degree=r'%s%s.txt' % (run_file,'core_EcoIN_node_analysis_degree')
EcoIN_TOP20_out_Degree_file=r'%s%s.txt' % (run_file,'EcoIN_TOP20_out_Degree_file')
EcoIN_TOP20_out_Degree_file_ano=r'%s%s.txt' % (run_file,'EcoIN_TOP20_out_Degree_file_ano')
EcoIN_TOP20_out_Degree_file_detail=r'%s%s.txt' % (run_file,'EcoIN_TOP20_out_Degree_file_detail')
core_EcoIN_TOP20_out_Degree_file=r'%s%s.txt' % (run_file,'coreEcoIN_TOP20_out_Degree_file')
core_EcoIN_TOP20_out_Degree_file_ano=r'%s%s.txt' % (run_file,'coreEcoIN_TOP20_out_Degree_file_ano')
core_EcoIN_TOP20_out_Degree_file_detail=r'%s%s.txt' % (run_file,'coreEcoIN_TOP20_out_Degree_file_detail')

./network_analysis/


# Network initialization

In [4]:
DG_total=nx.DiGraph()
DG_core=nx.DiGraph()

DG_total=initial_network(DG_total,total_file)
DG_core=initial_network(DG_core,core_total_file)


# Outdegree analysis

Calculate the indegree, outdegree and total degree of EcoIN and core-EcoIN

In [5]:
outFile=open(EcoIN_node_analysis_degree,'w')

for eachnode in DG_total.nodes():
    anostr=reaction_anontation(eachnode,total_reaction_file)+gene_anontation(eachnode,total_gene_file)+met_anontation(eachnode,total_met_file)
    outstr=eachnode+'\t'+anostr+'\t'+str(DG_total.in_degree(eachnode))+'\t'+str(DG_total.out_degree(eachnode))+'\t'+str(DG_total.degree(eachnode))+'\n'
    outFile.write(outstr)   
outFile.close()

outFile=open(core_EcoIN_node_analysis_degree,'w')

for eachnode in DG_core.nodes():
    anostr=reaction_anontation(eachnode,total_reaction_file)+gene_anontation(eachnode,total_gene_file)+met_anontation(eachnode,total_met_file)
    outstr=eachnode+'\t'+anostr+'\t'+str(DG_core.in_degree(eachnode))+'\t'+str(DG_core.out_degree(eachnode))+'\t'+str(DG_core.degree(eachnode))+'\n'
    outFile.write(outstr)   
outFile.close()

The top 20 nodes in EcoIN and core-EcoIN

In [6]:
lc=pd.read_csv(EcoIN_node_analysis_degree,sep='\t',names=['node_id','node', 'in_degree', 'out_degree', 'total_degree'])
TOP20_out_Degree=lc.sort_values(["out_degree"],ascending=False).head(20)
TOP20_out_Degree.to_csv(EcoIN_TOP20_out_Degree_file, header=False, index=False,mode='w',sep='\t') 

lc=pd.read_csv(core_EcoIN_node_analysis_degree,sep='\t',names=['node_id','node', 'in_degree', 'out_degree', 'total_degree'])
TOP20_out_Degree=lc.sort_values(["out_degree"],ascending=False).head(20)
TOP20_out_Degree.to_csv(core_EcoIN_TOP20_out_Degree_file, header=False, index=False,mode='w',sep='\t') 

Node annotation for EcoIN

In [7]:
outFile=open(EcoIN_TOP20_out_Degree_file_detail,'w')
outFile2=open(EcoIN_TOP20_out_Degree_file_ano,'w')
for eachdata in open(EcoIN_TOP20_out_Degree_file):   
    data=eachdata.split('\t')
    
    anostr=reaction_anontation(data[0],total_reaction_file)+gene_anontation(data[0],total_gene_file)+met_anontation(data[0],total_met_file)
    outstr2=anostr+'\t'+eachdata
    outFile2.write(outstr2)
    node_succ=DG_total.successors(data[0])
    node_type=[]
    for eachnode in node_succ:
        node_type.append(DG_total.get_edge_data(data[0],eachnode)['edgetype'])
    myset = set(node_type)
    for item in myset: 
        outstr=anostr+'\t'+data[0]+'\t'+str(node_type.count(item))+'\t'+item+'\n'
        outFile.write(outstr)
outFile.close()
outFile2.close()



Node annotation for coreEcoIN

In [8]:
outFile=open(core_EcoIN_TOP20_out_Degree_file_detail,'w')
outFile2=open(core_EcoIN_TOP20_out_Degree_file_ano,'w')
for eachdata in open(core_EcoIN_TOP20_out_Degree_file):   
    data=eachdata.split('\t')
    anostr=reaction_anontation(data[0],total_reaction_file)+gene_anontation(data[0],total_gene_file)+met_anontation(data[0],total_met_file)
    outstr2=anostr+'\t'+eachdata
    outFile2.write(outstr2)
    node_succ=DG_core.successors(data[0])
    node_type=[]
    for eachnode in node_succ:
        node_type.append(DG_core.get_edge_data(data[0],eachnode)['edgetype'])
    myset = set(node_type) 
    for item in myset: 
        outstr=anostr+'\t'+data[0]+'\t'+str(node_type.count(item))+'\t'+item+'\n'
        outFile.write(outstr)
outFile.close()
outFile2.close()

