# Generacion Features de Grafo

## Imports & Load Data

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
import geopandas as gpd
import networkx as nx

import numpy as np
import pandas as pd

import pickle

In [5]:
# Cargar endpoint
infile = open('./data/1_GDB_Layers.pkl','rb')
GDB = pickle.load(infile)
infile.close()

In [6]:
filename = './data/22_Full_Graph.pkl'
infile = open(filename, 'rb')
MTG = pickle.load(infile)
infile.close()

In [7]:
filename = './data/22_CTMT_Graph_Dict.pkl'
infile = open(filename, 'rb')
CTMT_GR = pickle.load(infile)
infile.close()

In [8]:
filename = './data/22_UNTRD_Conections.pkl'
infile = open(filename, 'rb')
UNTRD_CONS = pickle.load(infile)
infile.close()

## Metricas

In [9]:
UNTRD_CONS.UNTRD_PCON.values

array(['27156187', '27132900', '27137461', ..., '27142620', '27144485',
       '27144240'], dtype=object)

In [10]:
## MEJORAR EFICIENCIA !!!!!!!!!!!!!

In [11]:
CTMT_GR.keys()

dict_keys(['27117493', '80345445', '27117484', '27117498', '80345446', '27117512', '27117496', '27117500', '27117492', '27117510', '27117490', '27117503', '27117515', '27117508', '27117499', '27117514', '27117509', '27117513', '27117511', '27117507', '80345444', '80345447', '27117487', '27117497', '27117495', '27117488', '27117489', '27117486', '27117485', '27117483'])

In [63]:
untrd_nodes = UNTRD_CONS.UNTRD_PCON.values
graph_metrics_df = pd.DataFrame()

for G_key in CTMT_GR.keys():
    G = CTMT_GR[G_key]
    
    degree_dict = dict(G.degree(untrd_nodes))
    df = pd.DataFrame.from_dict(degree_dict, orient='index', columns=['DEGREE'])
    
    neig_degree_dict = nx.average_neighbor_degree(G)
    df = df.join(pd.DataFrame.from_dict(neig_degree_dict, orient='index', columns=['NEIG_DEGREE']))

    betweenness_dict = nx.betweenness_centrality(G)
    df = df.join(pd.DataFrame.from_dict(betweenness_dict, orient='index', columns=['BET_CEN']))
    
    closeness_dict = nx.closeness_centrality(G)
    df = df.join(pd.DataFrame.from_dict(closeness_dict, orient='index', columns=['CLO_CEN']))

    pagerank_dict = nx.pagerank(G)
    df = df.join(pd.DataFrame.from_dict(pagerank_dict, orient='index', columns=['PAGE_RANK']))
    
    graph_metrics_df = pd.concat([graph_metrics_df, df])
    
    print(f'Grafo {G_key} procesado')

Grafo 27117493 procesado
Grafo 80345445 procesado
Grafo 27117484 procesado
Grafo 27117498 procesado
Grafo 80345446 procesado
Grafo 27117512 procesado
Grafo 27117496 procesado
Grafo 27117500 procesado
Grafo 27117492 procesado
Grafo 27117510 procesado
Grafo 27117490 procesado
Grafo 27117503 procesado
Grafo 27117515 procesado
Grafo 27117508 procesado
Grafo 27117499 procesado
Grafo 27117514 procesado
Grafo 27117509 procesado
Grafo 27117513 procesado
Grafo 27117511 procesado
Grafo 27117507 procesado
Grafo 80345444 procesado
Grafo 80345447 procesado
Grafo 27117487 procesado
Grafo 27117497 procesado
Grafo 27117495 procesado
Grafo 27117488 procesado
Grafo 27117489 procesado
Grafo 27117486 procesado
Grafo 27117485 procesado
Grafo 27117483 procesado


In [80]:
graph_metrics_df.index.name = 'UNTRD_PCON'
graph_metrics_df.reset_index(inplace=True)

In [81]:
graph_metrics_df.sample(5)

Unnamed: 0,UNTRD_PCON,DEGREE,NEIG_DEGREE,BET_CEN,CLO_CEN,PAGE_RANK
2059,27141347,1,2.0,0.0,0.00934,0.000505
3452,27157663,1,2.0,0.0,0.007596,0.000242
3364,27136104,2,2.0,0.030464,0.009735,0.000641
987,27151806,2,2.0,0.162772,0.003675,0.000255
2438,27135190,1,2.0,0.0,0.017134,0.00152


In [82]:
graph_metrics_df.shape

(3882, 6)

In [84]:
UNTRD_CONS.shape

(3848, 6)

In [86]:
UNTRD_GraphMetrics = pd.merge(UNTRD_CONS[['COD_ID','UNTRD_PCON']],graph_metrics_df, on='UNTRD_PCON')

In [93]:
UNTRD_GraphMetrics.sample(5)

Unnamed: 0,COD_ID,UNTRD_PCON,DEGREE,NEIG_DEGREE,BET_CEN,CLO_CEN,PAGE_RANK
2269,26877675,27141708,1,2.0,0.0,0.015678,0.001499
339,26877151,27161885,1,3.0,0.0,0.002743,0.000147
38,26880326,27143966,2,1.5,0.004662,0.020339,0.00264
686,26880277,27154932,2,2.0,0.003742,0.005142,0.000477
103,26878984,27132624,2,2.0,0.253389,0.005947,0.00061


## Exportar exit point

In [87]:
filename = f'./data/221_Graph_Metrics.pkl'
outfile = open(filename, 'wb')
pickle.dump(UNTRD_GraphMetrics, outfile)
outfile.close()