# Generacion Features de Grafo

## Imports & Load Data

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import geopandas as gpd
import networkx as nx

import numpy as np
import pandas as pd

import pickle

In [3]:
# Cargar endpoint
infile = open('./data/1_GDB_Layers.pkl','rb')
GDB = pickle.load(infile)
infile.close()

In [4]:
filename = './data/22_Full_Graph.pkl'
infile = open(filename, 'rb')
MTG = pickle.load(infile)
infile.close()

In [5]:
filename = './data/22_CTMT_Graph_Dict.pkl'
infile = open(filename, 'rb')
CTMT_GR = pickle.load(infile)
infile.close()

In [6]:
filename = './data/22_UNTRD_Conections.pkl'
infile = open(filename, 'rb')
UNTRD_CONS = pickle.load(infile)
infile.close()

## Metricas

In [7]:
UNTRD_CONS.UNTRD_PCON.values

array(['27156187', '27132900', '27137461', ..., '27142620', '27144485',
       '27144240'], dtype=object)

In [8]:
## MEJORAR EFICIENCIA !!!!!!!!!!!!!

In [9]:
CTMT_GR.keys()

dict_keys(['27117493', '80345445', '27117484', '27117498', '80345446', '27117512', '27117496', '27117500', '27117492', '27117510', '27117490', '27117503', '27117515', '27117508', '27117499', '27117514', '27117509', '27117513', '27117511', '27117507', '80345444', '80345447', '27117487', '27117497', '27117495', '27117488', '27117489', '27117486', '27117485', '27117483'])

In [10]:
untrd_nodes = UNTRD_CONS.UNTRD_PCON.values
graph_metrics_df = pd.DataFrame()

for G_key in CTMT_GR.keys():
    G = CTMT_GR[G_key]
    
    degree_dict = dict(G.degree(untrd_nodes))
    df = pd.DataFrame.from_dict(degree_dict, orient='index', columns=['DEGREE'])
    
    neig_degree_dict = nx.average_neighbor_degree(G)
    df = df.join(pd.DataFrame.from_dict(neig_degree_dict, orient='index', columns=['NEIG_DEGREE']))

    betweenness_dict = nx.betweenness_centrality(G)
    df = df.join(pd.DataFrame.from_dict(betweenness_dict, orient='index', columns=['BET_CEN']))
    
    closeness_dict = nx.closeness_centrality(G)
    df = df.join(pd.DataFrame.from_dict(closeness_dict, orient='index', columns=['CLO_CEN']))

    pagerank_dict = nx.pagerank(G)
    df = df.join(pd.DataFrame.from_dict(pagerank_dict, orient='index', columns=['PAGE_RANK']))
    
    graph_metrics_df = pd.concat([graph_metrics_df, df])
    
    print(f'Grafo {G_key} procesado')

Grafo 27117493 procesado
Grafo 80345445 procesado
Grafo 27117484 procesado
Grafo 27117498 procesado
Grafo 80345446 procesado
Grafo 27117512 procesado
Grafo 27117496 procesado
Grafo 27117500 procesado
Grafo 27117492 procesado
Grafo 27117510 procesado
Grafo 27117490 procesado
Grafo 27117503 procesado
Grafo 27117515 procesado
Grafo 27117508 procesado
Grafo 27117499 procesado
Grafo 27117514 procesado
Grafo 27117509 procesado
Grafo 27117513 procesado
Grafo 27117511 procesado
Grafo 27117507 procesado
Grafo 80345444 procesado
Grafo 80345447 procesado
Grafo 27117487 procesado
Grafo 27117497 procesado
Grafo 27117495 procesado
Grafo 27117488 procesado
Grafo 27117489 procesado
Grafo 27117486 procesado
Grafo 27117485 procesado
Grafo 27117483 procesado


In [11]:
graph_metrics_df.index.name = 'UNTRD_PCON'
graph_metrics_df.reset_index(inplace=True)

In [12]:
graph_metrics_df.sample(5)

Unnamed: 0,UNTRD_PCON,DEGREE,NEIG_DEGREE,BET_CEN,CLO_CEN,PAGE_RANK
2488,27140435,1,2.0,0.0,0.015211,0.001537
1883,27147080,2,2.5,0.02338,0.005864,0.000774
1666,27131363,2,2.0,0.049962,0.005341,0.000755
3563,27150613,2,2.5,0.006214,0.005147,0.000385
3503,27149407,2,2.5,0.059657,0.005555,0.000353


In [14]:
graph_metrics_df.shape

(3882, 6)

In [15]:
UNTRD_CONS.shape

(3848, 6)

In [16]:
UNTRD_GraphMetrics = pd.merge(UNTRD_CONS[['COD_ID','UNTRD_PCON']],graph_metrics_df, on='UNTRD_PCON')

In [18]:
# Retirar las columnas de keys
UNTRD_GraphMetrics.drop('UNTRD_PCON', axis=1, inplace= True)

In [19]:
UNTRD_GraphMetrics.sample(5)

Unnamed: 0,COD_ID,DEGREE,NEIG_DEGREE,BET_CEN,CLO_CEN,PAGE_RANK
3257,26876676,2,2.0,0.009257,0.00328,0.000258
333,26877139,2,2.0,0.042952,0.002924,0.000258
2593,26877238,2,2.0,0.086185,0.00787,0.000651
1775,26879783,2,2.0,0.213628,0.04507,0.005489
3521,26878769,1,2.0,0.0,0.00526,0.000457


## Exportar exit point

In [20]:
filename = f'./data/221_Graph_Metrics.pkl'
outfile = open(filename, 'wb')
pickle.dump(UNTRD_GraphMetrics, outfile)
outfile.close()