## save weighted networks

In [37]:
import matplotlib.pyplot as plt 
import networkx as nx
import pandas as pd
import numpy as np

from os import listdir
from os.path import isfile, join
 

### Model as networks

In [33]:
#reactions = pd.read_csv('UNIMIB_data/ReactionMetabolites_list/R22_reactions.txt',sep="\t")
metabolites= pd.read_csv('UNIMIB_data/ReactionMetabolites_list/R22_metabolites.txt',sep="\t")


def read_network(filename): 
    """ return complete gcc network"""
    df = pd.read_csv(filename,sep="\t")
    nodeA= df.iloc[:,0]
    nodeB= df.iloc[:,1]
    peso = df.iloc[:,2]

    all_nodes =pd.concat([nodeA, nodeB], ignore_index =True) #ignore_index, i.e re-indexa

    G=nx.Graph() #DiGraph

    #add nodes
    for m in all_nodes[all_nodes.isin(metabolites['ID'])]:
        #G.add_node(m, color='r') #metabolite    
        G.add_node(m) #metabolite    
    for r in all_nodes[~all_nodes.isin(metabolites['ID'])]:
        #G.add_node(r, color='b') #reaction
        G.add_node(r) #reaction
    #add edges
    for i in range(nodeA.size):
        e1, e2, p=nodeA.get(i), nodeB.get(i), peso.get(i)
        G.add_edge(e1, e2, peso=float(p))   
    return G    
    # largest connected component
    #G0 = sorted(nx.connected_component_subgraphs(G), key=len, reverse=True)[0]#0 = the largest network
    #return G0
    
def threshold_network_bigger_than(G, thre):
    """ filtering by threshold """
    filtered=[(u,v) for (u,v,w) in G.edges(data=True) if (w['peso'] >= thre)]

    if(len(list(filtered)) == 0):
        raise Exception('filtered graph is empty')  
    else: 
        # largest connected component
        #Gthre = sorted(nx.connected_component_subgraphs(nx.Graph(filtered)), key=len, reverse=True)[0]
        #deprecated in version 2.1
        # identify largest connected component version 2.4
        g=nx.Graph(filtered)
        Gcc = sorted(nx.connected_components(g), key=len, reverse=True)
        Gthre = G.subgraph(Gcc[0])
        #print('thre= [%.4f, %.4f], nodes= %d'% (thre_min, thre_max, nx.number_of_nodes(Gthre)))        
        return Gthre        
         

### Main 

In [49]:
thre= [10**-4, 10**-3,10**-2, 0.1, 0.2,  0.3 ,0.4, 0.5,0.6, 0.7]


pacientes= [f for f in listdir('approaches/weigth/cancer') if isfile(join('approaches/weigth/cancer', f))] 
#cancer has the same pacientes as normal

path= 'approaches/weigth/'
labels= ['cancer', 'normal']

num_pacientes=len(pacientes)
num_thre =len(thre)#>= threshold   

for c in range(len(labels)):
    print(labels[c])
    for p in range(num_pacientes):
        G= read_network(path+labels[c]+'/'+ pacientes[p]) #reading 
        print(pacientes[p])
        v= len(thre)  #>= threshold   
        for i in range(v):
            Gthre=threshold_network_bigger_than(G, thre[i]) #>= threshold     
            #num_nodes =  nx.number_of_nodes(Gthre)            
            file = "nets_generated/"+labels[c]+'/'+ pacientes[p][0:-4] +"_thre="+str(thre[i]) +".graphml" #removing .txt
            #print(file)
            nx.write_graphml(Gthre,file) 


cancer
TCGA_E2_A15K.txt
TCGA_BH_A1F0.txt
TCGA_A7_A0DB.txt
TCGA_BH_A1FU.txt
TCGA_E9_A1RB.txt
TCGA_BH_A0H9.txt
TCGA_BH_A0H5.txt
TCGA_BH_A0DV.txt
TCGA_BH_A0DH.txt
TCGA_BH_A1EO.txt
TCGA_BH_A18Q.txt
TCGA_A7_A13G.txt
TCGA_BH_A0DO.txt
TCGA_A7_A0CH.txt
TCGA_BH_A0HA.txt
TCGA_BH_A0E1.txt
TCGA_BH_A0BW.txt
TCGA_BH_A18U.txt
TCGA_BH_A18V.txt
TCGA_E9_A1R7.txt
TCGA_E2_A1LH.txt
TCGA_BH_A0E0.txt
TCGA_BH_A1F2.txt
TCGA_BH_A0BV.txt
TCGA_E2_A153.txt
TCGA_BH_A0B7.txt
TCGA_BH_A1FM.txt
TCGA_BH_A203.txt
TCGA_BH_A0DK.txt
TCGA_E9_A1N4.txt
TCGA_BH_A0DT.txt
TCGA_BH_A0B3.txt
TCGA_E9_A1RI.txt
TCGA_E9_A1NA.txt
TCGA_BH_A1F8.txt
TCGA_BH_A0AY.txt
TCGA_BH_A1FH.txt
TCGA_E9_A1NG.txt
TCGA_BH_A18S.txt
TCGA_BH_A0DG.txt
TCGA_E9_A1N5.txt
TCGA_BH_A1EN.txt
TCGA_BH_A0BQ.txt
TCGA_A7_A0CE.txt
TCGA_E9_A1RD.txt
TCGA_BH_A0DQ.txt
TCGA_E2_A15M.txt
TCGA_E2_A158.txt
TCGA_BH_A1EW.txt
TCGA_BH_A0C0.txt
TCGA_E9_A1NF.txt
TCGA_BH_A18L.txt
TCGA_BH_A0BZ.txt
TCGA_E2_A1IG.txt
TCGA_BH_A0B5.txt
TCGA_E9_A1RH.txt
TCGA_AC_A2FF.txt
TCGA_BH_A0C3.txt
TCGA_BH