# Caso: Análisis de CDR de una Empresa de Telecomunicaciones

## Cargar archivos csv
Utilizamos un Call Detail Record de ejemplo
##### Clientes llamantes: 199
##### Clientes llamados: 400
##### Horizonte de tiempo: De Nov-2012 a Ene-2014 (15 meses)

In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import networkx as nx
import time

In [None]:
h = pd.read_csv('./data/llamadas.csv')

Revisamos la información del DataFrame

In [None]:
h.info()

Transformamos el DataFrame en un Grafo dirigido con pesos (cantidad de llamadas y duración)

In [None]:
G = nx.from_pandas_edgelist(h, source="fromuserid", target="touserid", edge_attr=["numbercalls","secondscalls"], create_using=nx.DiGraph())
print(nx.info(G))

Crear la función top_nodes que mostrará los valores más altos de un diccionario

In [None]:
def get_top_nodes(cdict, num=5):
    top_nodes ={}
    for i in range(num):
        top_nodes =dict(
            sorted(cdict.items(), key=lambda x: x[1], reverse=True)[:num]
            )
        return top_nodes

#### Grado

Guardar el grado de cada nodo en un diccionario

In [None]:
gdeg=G.degree()

In [None]:
G.degree(350024,weight="secondscalls")

In [None]:
get_top_nodes(dict(gdeg))

In [None]:
plt.hist(nx.degree_histogram(G))

In [None]:
print(nx.info(G,350159))

In [None]:
plt.figure(figsize=(80,45)) 
nx.draw_networkx(G, width=0.1, node_size=[100*val for(node,val)in gdeg])
plt.show()

#### In-Degree

In [None]:
indeg=G.in_degree()
get_top_nodes(dict(indeg))

#### Out-Degree

In [None]:
outdeg=G.out_degree()
get_top_nodes(dict(outdeg))

In [None]:
nx.set_node_attributes(G,dict(gdeg), 'd')
nx.set_node_attributes(G,dict(indeg), 'id')
nx.set_node_attributes(G,dict(outdeg), 'od')

In [None]:
gdeg=G.degree(weight="secondscalls")
indeg=G.in_degree(weight="secondscalls")
outdeg=G.out_degree(weight="secondscalls")
nx.set_node_attributes(G,dict(gdeg), 'wd')
nx.set_node_attributes(G,dict(indeg), 'wid')
nx.set_node_attributes(G,dict(outdeg), 'wod')

#### Hubs and Authorities Algorithm
Para calcular los Authorities sumariza los pesos de los enlaces entrantes. Los Hubs sumarizan los pesos de los enlaces salientes

In [None]:
hub, aut = nx.hits(G)

In [None]:
get_top_nodes(aut)

In [None]:
get_top_nodes(hub)

In [None]:
nx.set_node_attributes(G,hub, 'hub')
nx.set_node_attributes(G,hub, 'aut')

#### Degree Centrality

In [None]:
degree_centrality =nx.degree_centrality(G)
nx.set_node_attributes(G,degree_centrality, 'dc')
get_top_nodes(degree_centrality)

In [None]:
plt.figure(figsize=(80,45)) 
nx.draw_networkx(G, width=0.1, node_size=[1000*v for v in nx.get_node_attributes(G, 'dc').values()])

#### Betweenness

In [None]:
betweenness_centrality = nx.betweenness_centrality(G,weight="secondscalls")
nx.set_node_attributes(G,betweenness_centrality, 'bc')

In [None]:
get_top_nodes(betweenness_centrality)

In [None]:
print(nx.info(G,350146))

In [None]:
list(G.successors(350146))

In [None]:
list(G.predecessors(350146))

In [None]:
S = nx.bfs_tree(G,350146,depth_limit=2)
plt.figure(figsize=(64,36))
nx.draw_networkx(S)

In [None]:
plt.figure(figsize=(80,45)) 
nx.draw_networkx(G, width=0.1, node_size=[10000 * v for v in nx.get_node_attributes(G, 'bc').values()])

#### Closeness

In [None]:
closeness_centrality =nx.closeness_centrality(G,distance="secondscalls")
nx.set_node_attributes(G,closeness_centrality, 'cc')

In [None]:
get_top_nodes(closeness_centrality)

In [None]:
S = nx.bfs_tree(G,350058,depth_limit=2)
plt.figure(figsize=(64,36))
nx.draw_networkx(S)

In [None]:
plt.figure(figsize=(80,45)) 
pos =nx.spring_layout(G)
ec =nx.draw_networkx_edges(G, width=0.1, pos=pos)
nc =nx.draw_networkx_nodes(G, pos=pos,
                           node_color=[v for v in nx.get_node_attributes(G, 'cc').values()],
                           node_size=[100000 * v for v in nx.get_node_attributes(G, 'cc').values()])
lb =nx.draw_networkx_labels(G,pos =pos)

#### Eigenvector Centrality

In [None]:
eigenvector_centrality = nx.eigenvector_centrality(G,weight="secondscalls")
nx.set_node_attributes(G, eigenvector_centrality,'ec')

In [None]:
get_top_nodes(eigenvector_centrality)

In [None]:
S = nx.bfs_tree(G,350009,depth_limit=2)
plt.figure(figsize=(64,36))
nx.draw_networkx(S)

In [None]:
plt.figure(figsize=(80,45)) 
nx.draw_networkx(G, width=0.1, node_size=[20000 * v for v in nx.get_node_attributes(G, 'ec').values()])

#### PageRank Centrality

In [None]:
pagerank_centrality =nx.pagerank(G,weight="secondscalls")
nx.set_node_attributes(G, pagerank_centrality, 'pr')

In [None]:
get_top_nodes(pagerank_centrality)

In [None]:
S = nx.bfs_tree(G,350055,depth_limit=2)
plt.figure(figsize=(64,36))
nx.draw_networkx(S)

In [None]:
plt.figure(figsize=(80,45)) 
nx.draw_networkx(G, width=0.1, node_size=[50000 * v for v in nx.get_node_attributes(G, 'pr').values()], pos=pos)

## Métricas de Grafo

#### All Shortest Path

In [None]:
list(nx.all_shortest_paths(G,350166,350024))

In [None]:
nx.shortest_path_length(G,350166,350024)

#### Average Path Length

In [None]:
nx.average_shortest_path_length(G)

#### Diametro

In [None]:
##nx.diameter(G)

#### Densidad

In [None]:
nx.density(G)

#### Local Clustering Coefficient

In [None]:
nx.average_clustering(G)

## Calculando el Valor de Red para cada Cliente

Extraer las métricas por nodo a un dataframe de Pandas

In [None]:
graph = G.nodes(data = True)

In [None]:
def func(graph):
    data={}
    data['fromuserid']=[x[0] for x in graph]
    other_cols = list([*graph][0][1].keys())
    for key in other_cols:
        data[key] = [x[1][key] for x in graph]
    return data

In [None]:
df = pd.DataFrame(func(graph)).sort_values(by=['fromuserid']).reset_index(drop=True)

In [None]:
df.head()

In [None]:
df.info()

Exportando para utilizar fuera

In [None]:
df.to_csv(r'../data/ind_grafo.csv',index=False)

Elaborado por Luis Cajachahua bajo licencia MIT (2021)