In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
import time
import random
import seaborn as sns
from glob import glob
import os
import networkx as nx
import generators
import collections

In [2]:
def graph_import(directory):
    paths=glob(os.path.join(directory,'*'))
    dir_names=os.listdir(directory)
    
    dict_of_graphs = collections.defaultdict(list)
    
    if dir_names[0] == ".ipynb_checkpoints":
        dir_names.pop(0)
    
    for i, graph_dir in enumerate(paths):
        paths_graphs = glob(os.path.join(graph_dir,'*'))
        
        for j, edgelist in enumerate(paths_graphs):
            graph = nx.read_edgelist(edgelist)
            dict_of_graphs[dir_names[i]].append(graph)
    
    return dict_of_graphs

In [3]:
rbfm_graphs = graph_import("./model_graphs/rbfm")
mixture_graphs = graph_import("./model_graphs/mixture")
shm_graphs = graph_import("./model_graphs/shm")
uv_graphs = graph_import("./model_graphs/uvflower")

In [4]:
def aux_f(g):
        
    if nx.is_connected(g):
        largest_comp = g
    else:
        cc = sorted(nx.connected_components(g), key=len, reverse=True)
        largest_comp = g.subgraph(cc[0]).copy()
            
    if nx.number_of_selfloops(largest_comp)>0: 
        largest_comp.remove_edges_from(list(nx.selfloop_edges(largest_comp)))
        
    return nx.convert_node_labels_to_integers(largest_comp)


def network_import_nx(directory):
    paths=glob(os.path.join(directory,'*'))
    file_names=os.listdir(directory)
#     print(len(paths))
#     print(len(file_names))
    graphs_and_names=[]
    
    if file_names[0] == ".ipynb_checkpoints":
        file_names.pop(0)
    
    for index,file in enumerate(paths):
        edge_data=open(file,'r', encoding = "ISO-8859-1")
        edge_lines=edge_data.readlines()
        edge_data.close()
        if '.inp' not in file_names[index]:
            edge_list=[' '.join(str.split(lines)[0:2]) for lines in edge_lines]
        else:
            print('inp')
            edge_list=[' '.join(str.split(lines)[1:3]) for lines in edge_lines]
#             print(edge_list)
        graphs_and_names += [(file_names[index], aux_f(nx.convert_node_labels_to_integers(nx.parse_edgelist(edge_list, comments='%'))))]
    return(graphs_and_names)


def network_import_nx_metabolic(directory):
    paths=glob(os.path.join(directory,'*'))
    file_names=os.listdir(directory)
    graphs_and_names=[]
    
    if file_names[0] == ".ipynb_checkpoints":
        file_names.pop(0)
    
    for index,file in enumerate(paths):
        edge_data=open(file,'r')
        edge_lines=edge_data.readlines()
        edge_data.close()
        if '_tab' not in file_names[index]:
            edge_list=[' '.join(lines.split()[0:2]) for lines in edge_lines]
        else:
            edge_list=[' '.join(lines.strip("\n").split("\t")[0:2]) for lines in edge_lines]

        graphs_and_names += [(file_names[index], aux_f(nx.convert_node_labels_to_integers(nx.parse_edgelist(edge_list, comments='%'))))]
    return(graphs_and_names)


def network_matrix_import(directory):
    def g_from_sh(matrix_list_of_list):
        m=np.array(matrix_list_of_list)
        dim=list(m.shape)
        edges=[]
        index_of_horizontal_nodes=list(range(dim[0],np.sum(dim)))
        for i in range(dim[0]):
            for j in range(dim[1]):
                if m[i][j] > 0:
                    edges +=[(i,index_of_horizontal_nodes[j])]
        g=nx.Graph()  
        g.add_edges_from(edges)
        return(g)

    paths=glob(os.path.join(directory,'*.*'))
    file_names=os.listdir(directory)
    graphs_and_names=[]
    
    if file_names[0] == ".ipynb_checkpoints":
        file_names.pop(0)
    
    for index,file in enumerate(paths):
        matrix_data=open(file,'r')
        matrix_lines=matrix_data.readlines()
        matrix_data.close()
        matrix=[str.split(lines) for lines in matrix_lines]
        array=np.array(matrix).astype(np.float)
        graphs_and_names += [(file_names[index], aux_f(g_from_sh(array)))]
    return(graphs_and_names)

In [5]:
graphs_real = network_import_nx('./Cheminformatics')
graphs_real+=network_import_nx_metabolic("./metabolic")
graphs_real+=network_import_nx("./Brain")
graphs_real+=network_import_nx("./infrastructural")
graphs_real+=network_import_nx("./foodweb/edgelist")
graphs_real+=network_matrix_import("./foodweb/adj")
graphs_real+=network_import_nx("./social")

inp


___

### Ezek még hasznosak lehetnek:

In [6]:
##Csúcsszámokat számolják ki a paraméterekből
def num_of_nodes_rbfm(n, m):
    if m==1:
        return 2+(3**n - 1)
    else:
        return 2+((m/(m+1))*((2*m + 3)**n - 1))

def num_of_nodes_shm(n, m):
    return 2+((2*m + 1)**n - 1)

def num_of_nodes_uv(u, v, n):
    w = u+v
    V=(w**n)*((w-2)/(w-1))+(w/(w-1))
    return V

In [7]:
rbfm = pd.read_excel("final_database/rbfm_final.xlsx")
rbfm = rbfm.drop(rbfm.columns[0], axis=1)
rbfm["V"] = rbfm.apply(lambda x: num_of_nodes_rbfm(x['param1'], x['param2']), axis=1)

In [8]:
mixture = pd.read_excel("final_database/mixture_final.xlsx")
mixture = mixture.drop(mixture.columns[0], axis=1)

In [9]:
shm = pd.read_excel("final_database/shm_final.xlsx")
shm = shm.drop(shm.columns[0], axis=1)
shm["V"] = shm.apply(lambda x: num_of_nodes_shm(x['param1'], x['param2']), axis=1)

In [10]:
uv = pd.read_excel("final_database/uvflower_final.xlsx")
uv = uv.drop(uv.columns[0], axis=1)
uv["V"] = uv.apply(lambda x: num_of_nodes_uv(x['param1'], x['param2'], x['param3']), axis=1)

In [11]:
real = pd.read_excel("final_database/real_final.xlsx")
real = real.drop(real.columns[0], axis=1)
real['fractality'] = np.where(real['R']>0.65, 1, 0)