# Muestreando Grafos

## Cargar archivos csv
Utilizamos la base de datos de Marvel (https://www.kaggle.com/csanhueza/the-marvel-universe-social-network)

In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import networkx as nx
import time

In [None]:
h = pd.read_csv('../data/hero-network.csv')

Revisamos la información del DataFrame

In [None]:
h.info()

Transformamos el DataFrame en un Grafo Dirigido

In [None]:
G = nx.from_pandas_edgelist(h, source = "hero1", target = "hero2", create_using=nx.DiGraph())
print(nx.info(G))

Crear la función top_nodes que mostrará los valores más altos de un diccionario

In [None]:
def get_top_nodes(cdict, num=5):
    top_nodes ={}
    for i in range(num):
        top_nodes =dict(
            sorted(cdict.items(), key=lambda x: x[1], reverse=True)[:num]
            )
        return top_nodes

## Muestreo de Grafos
#### Método de Nodos Aleatorio

In [None]:
import random
sampled_nodes = random.sample(G.nodes, 700)
SG = G.subgraph(sampled_nodes)
print(nx.info(SG))

Nos quedamos con el componente más grande

In [None]:
SA=SG
SC = max(nx.strongly_connected_components(SA), key=len)
SG = SA.subgraph(SC)
print(nx.info(SG))

## Librería Graph_Sampling 
Puede instalarse desde: https://github.com/Ashish7129/Graph_Sampling
#### Método Snowball

In [None]:
import Graph_Sampling 
objects = Graph_Sampling.Snowball()
SN = objects.snowball(G, 200, 10)
print(nx.info(SN))

#### Método ForestFire

In [None]:
objectf = Graph_Sampling.ForestFire()
SF = objectf.forestfire(G, 200)
print(nx.info(SF))

## Métricas de Nodo
#### Grado

Guardar el grado de cada nodo en un diccionario

In [None]:
gdeg=SG.degree()

In [None]:
get_top_nodes(dict(gdeg))

#### In-Degree

In [None]:
indeg=SG.in_degree()
get_top_nodes(dict(indeg))

#### Out-Degree

In [None]:
outdeg=SG.out_degree()
get_top_nodes(dict(outdeg))

#### Degree Centrality

In [None]:
degree_centrality =nx.degree_centrality(SG)
nx.set_node_attributes(SG,degree_centrality, 'dc')
get_top_nodes(degree_centrality)

In [None]:
plt.figure(figsize=(32,18)) 
pos=nx.spring_layout(SG)
nx.draw_networkx(SG, width=0.1, node_size=[5000*v for v in nx.get_node_attributes(SG, 'dc').values()], pos=pos)

#### Betweenness

In [None]:
betweenness_centrality = nx.betweenness_centrality(SG)
nx.set_node_attributes(SG,betweenness_centrality, 'bc')
get_top_nodes(betweenness_centrality)

In [None]:
plt.figure(figsize=(32,18)) 
nx.draw_networkx(SG, width=0.1, node_size=[10000 * v for v in nx.get_node_attributes(SG, 'bc').values()], pos=pos)

#### Closeness

In [None]:
closeness_centrality =nx.closeness_centrality(SG)
nx.set_node_attributes(SG,closeness_centrality, 'cc')
get_top_nodes(closeness_centrality)

In [None]:
plt.figure(figsize=(32,18)) 
nx.draw_networkx(SG, width=0.1, node_size=[1000 * v for v in nx.get_node_attributes(SG, 'cc').values()], pos=pos)

#### Eigenvector Centrality

In [None]:
eigenvector_centrality = nx.eigenvector_centrality(SG)
nx.set_node_attributes(SG, eigenvector_centrality,'ec')
get_top_nodes(eigenvector_centrality)

In [None]:
plt.figure(figsize=(32,18)) 
nx.draw_networkx(SG, width=0.1, node_size=[10000 * v for v in nx.get_node_attributes(SG, 'ec').values()], pos=pos)

#### PageRank Centrality

In [None]:
pagerank_centrality =nx.pagerank(SG)
nx.set_node_attributes(SG, pagerank_centrality, 'pr')
get_top_nodes(pagerank_centrality)

In [None]:
plt.figure(figsize=(32,18)) 
nx.draw_networkx(SG, width=0.1, node_size=[100000 * v for v in nx.get_node_attributes(SG, 'pr').values()], pos=pos)

#### Densidad

In [None]:
nx.density(G)

Elaborado por Luis Cajachahua bajo licencia MIT (2021)