## Importar datos

In [69]:
import pandas as pd

df = pd.read_csv("BI.csv")
# Eliminar cliente inusual
df = df[df["ClientID"] != "C442030"]
# Transforma a formato de fecha
df["DATE"] = pd.to_datetime(df["DATE"], format="%d/%m/%Y")
df

Unnamed: 0,DATE,ClientID,MOVEMENT,BALANCE_IN,TRANSFEROR,BALANCE_OUT,RECEPIENT,AGENT ID,STATUS,CHANNEL,aux,IN,OUT,NUM_MOVEMENTS
0,2017-09-30,C689751,in,1073.05,Competitor 4,0.00,Competitor 9,A6022,Inactive,CHANNEL 5,1,1,0,2
1,2018-12-31,C689751,out,0.00,Competitor 4,1066.17,Competitor 7,A6022,Inactive,CHANNEL 5,1,0,1,2
2,2019-08-31,C689750,in,667984.98,Competitor 12,0.00,Competitor 9,A17817,Active,CHANNEL 4,1,1,0,1
3,2017-07-31,C689749,in,15180.39,Competitor 14,0.00,Competitor 9,A24328,Active,CHANNEL 4,1,1,0,1
4,2020-03-31,C689748,in,142252.63,Competitor 7,0.00,Competitor 12,A12016,Active,CHANNEL 1,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
749197,2017-03-31,C5,out,0.00,Competitor 8,294338.39,Competitor 14,A30677,Inactive,CHANNEL 5,1,0,1,1
749198,2019-08-31,C4,in,19651.88,Competitor 2,0.00,Competitor 2,A14667,Active,CHANNEL 1,1,1,0,1
749199,2017-01-31,C3,out,0.00,Competitor 7,175529.62,Competitor 2,A18024,Inactive,CHANNEL 5,1,0,1,1
749200,2018-02-28,C2,out,0.00,Competitor 3,0.00,Competitor 2,A13087,Inactive,CHANNEL 5,1,0,1,1


## Filtrado de clientes

In [73]:
# Filtrar clientes con aux > 2
clients = df.groupby(by=["ClientID"]).sum().sort_values(by="aux", ascending=False)
clients = clients[clients["aux"] > 2]
# Guarda todos los clientes de interes en una lista
list_clients = clients.index
clients

Unnamed: 0_level_0,BALANCE_IN,BALANCE_OUT,aux,IN,OUT,NUM_MOVEMENTS
ClientID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
C360972,1035163.00,696410.57,5,3,2,25
C165158,577092.85,362948.07,5,3,2,25
C223529,898740.62,1439215.51,5,2,3,25
C573382,889565.16,1301891.35,5,2,3,25
C35393,1721634.47,1918932.29,4,2,2,16
...,...,...,...,...,...,...
C570913,992882.56,2245904.92,3,1,2,9
C16785,176997.59,356925.32,3,1,2,9
C83361,933737.69,2057644.43,3,1,2,9
C166463,12130.49,9759.98,3,2,1,9


## Dataframe solo con los clientes de interés

In [74]:
# Nuevo dataframe con los clientes filtrados
new_df = df.set_index("ClientID").loc[list_clients]
new_df = new_df.reset_index()
new_df

Unnamed: 0,ClientID,DATE,MOVEMENT,BALANCE_IN,TRANSFEROR,BALANCE_OUT,RECEPIENT,AGENT ID,STATUS,CHANNEL,aux,IN,OUT,NUM_MOVEMENTS
0,C360972,2016-04-30,in,322991.31,Competitor 14,0.00,Competitor 9,A22369,Inactive,CHANNEL 4,1,1,0,5
1,C360972,2016-08-31,out,0.00,Competitor 14,336472.77,Competitor 13,A22369,Inactive,CHANNEL 5,1,0,1,5
2,C360972,2017-10-31,in,357612.30,Competitor 13,0.00,Competitor 13,A11624,Active,CHANNEL 1,1,1,0,5
3,C360972,2018-02-28,out,0.00,Competitor 13,359937.80,Competitor 13,A11624,Active,CHANNEL 1,1,0,1,5
4,C360972,2019-08-31,in,354559.39,Competitor 13,0.00,Competitor 13,A22380,Active,CHANNEL 4,1,1,0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4962,C166463,2018-10-31,out,0.00,Competitor 14,9759.98,Competitor 7,A22656,Active,CHANNEL 4,1,0,1,3
4963,C166463,2019-06-30,in,9969.74,Competitor 7,0.00,Competitor 7,A22656,Active,CHANNEL 4,1,1,0,3
4964,C65686,2016-07-31,out,0.00,Competitor 6,128997.22,Competitor 13,A17081,Inactive,CHANNEL 5,1,0,1,3
4965,C65686,2018-04-30,in,164694.08,Competitor 13,0.00,Competitor 13,A24597,Active,CHANNEL 4,1,1,0,3


## Lista de agentes de interés

In [75]:
# Lista de agentes contenidas en el nuevo dataframe
agents = new_df["AGENT ID"].unique()
print(agents)
print("Agentes totales:",len(agents))

['A10014' 'A10020' 'A10058' ... 'A9915' 'A9921' 'A9922']
Agentes totales: 1379


## Función intersección de clientes

In [80]:
def intersection(lst1, lst2):
    """Retorna una lista de los clientes comunes entre dos agentes.
    Argumentos de entrada
    lst1: ingresa la lista de clientes del agente1
    lst2: ingresa la lista de clientes del agente2
    """
    lst1 = set(lst1)
    lst2 = set(lst2)
    return list(lst1.intersection(lst2))

In [81]:
# Prueba de la función
common_client = intersection(new_df[new_df["AGENT ID"] == agents[322]]["ClientID"],
             new_df[new_df["AGENT ID"] == agents[987]]["ClientID"])
common_client

['C193655',
 'C263611',
 'C456913',
 'C55371',
 'C546766',
 'C98418',
 'C32642',
 'C377193',
 'C371780',
 'C262884',
 'C91420',
 'C444331',
 'C509697',
 'C205139']

## Iteración para todos los clientes

In [104]:
from time import time
table_list = []
x = 0
n = len(agents)
t = time()

# Itera sobre todas las combinaciones de agentes
for i in range(n):
    for j in range(i+1,n):
        # Encuentra los clientes en común
        common_client = intersection(new_df[new_df["AGENT ID"] == agents[i]]["ClientID"],
                                     new_df[new_df["AGENT ID"] == agents[j]]["ClientID"])
        # Cuenta los clientes en común
        x = len(common_client)
        # Agrega a la tabla si hay al menos un cliente en comun
        if x > 0:
            table_list.append([agents[i],agents[j],x])
    # Imprime cada 10 agentes el estatus del progreso
    if i % 10 == 0:
        print("\rAgentes analizados: {}/{}".format(i,n),end="")

print("\nFinalizado en {:4.2f} min".format((time()-t)/60))

Agentes analizados: 1370/1379
Finalizado en 971.51 min


## Tabla final

In [106]:
table = pd.DataFrame(table_list, columns=[
                     "AGENTID1", "AGENTID2", "CLIENTS"])
table.sort_values("CLIENTS",ascending=False)

Unnamed: 0,AGENTID1,AGENTID2,CLIENTS
486,A16558,A25272,14
592,A17700,A23944,9
942,A21662,A6098,9
596,A17700,A30621,8
466,A16483,A21856,8
...,...,...,...
441,A15818,A23867,1
438,A15805,A19046,1
437,A15772,A23911,1
436,A15772,A20993,1


In [107]:
table.to_csv("tabla_red_social.csv")

## Social Network

In [153]:
import pandas as pd
table = pd.read_csv("tabla_red_social.csv")
table.drop(["Unnamed: 0"], axis=1, inplace=True)
table.sort_values("CLIENTS", ascending=False)

Unnamed: 0,AGENTID1,AGENTID2,CLIENTS
486,A16558,A25272,14
592,A17700,A23944,9
942,A21662,A6098,9
596,A17700,A30621,8
466,A16483,A21856,8
...,...,...,...
441,A15818,A23867,1
438,A15805,A19046,1
437,A15772,A23911,1
436,A15772,A20993,1


In [157]:
import networkx as nx

G = nx.from_pandas_edgelist(table,
                            source="AGENTID1",
                            target="AGENTID2",
                            edge_attr="CLIENTS")

In [169]:
from pyvis.network import Network

nt = Network(height="800px",width="800px",notebook=True)
nt.from_nx(G)
nt.show("social_net.html")