# Análisis de Redes Complejas para la Identificación de Patrones de Comportamiento en las Ventas

In [1]:
pip install "notebook>=5.3" "ipywidgets>=7.5"

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install jupyterlab "ipywidgets>=7.5"

Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt
#import plotly.graph_objects as go

In [4]:
import cudf
import cugraph
import networkx as nx
import plotly.graph_objs as go
from cuml.manifold import UMAP

--------------------------------------------------------------------------------

  CuPy may not function correctly because multiple CuPy packages are installed
  in your environment:

    cupy, cupy-cuda12x

  Follow these steps to resolve this issue:

    1. For all packages listed above, run the following command to remove all
       existing CuPy installations:

         $ pip uninstall <package_name>

      If you previously installed CuPy via conda, also run the following:

         $ conda uninstall cupy

    2. Install the appropriate CuPy package.
       Refer to the Installation Guide for detailed instructions.

         https://docs.cupy.dev/en/stable/install.html

--------------------------------------------------------------------------------



In [5]:
import plotly.io as pio

## Identificación de Patrones en las Redes de Productos

In [6]:
df_ofertas_relampago = pd.read_csv('/home/insightlab/Documents/MercadoLibre/arquivos/ofertas_relampago.csv', encoding='latin1')

In [7]:
# Crear un dataframe con las combinaciones de productos que se venden juntos en la misma oferta
product_pairs = []

for _, group in df_ofertas_relampago.groupby('OFFER_START_DATE'):
    products = group['DOMAIN_ID'].unique()
    if len(products) > 1:
        pairs = list(itertools.combinations(products, 2))
        product_pairs.extend(pairs)

In [8]:
# Convertir las combinaciones a cuDF DataFrame
df_pairs = cudf.DataFrame(product_pairs, columns=['Product_A', 'Product_B'])

# Contar las veces que cada par de productos aparece juntos
df_pairs_count = df_pairs.groupby(['Product_A', 'Product_B']).size().reset_index(name='count')

In [9]:
# Crear el grafo usando cuGraph
G_cugraph = cugraph.Graph()
G_cugraph.from_cudf_edgelist(df_pairs_count, source='Product_A', destination='Product_B', edge_attr='count')

In [10]:
# Calcular la centralidad de intermediación en GPU con cuGraph
betweenness_centrality = cugraph.centrality.betweenness_centrality(G_cugraph)

# Ordenar el DataFrame por la columna 'betweenness' de forma descendente
betweenness_sorted = betweenness_centrality.sort_values('betweenness_centrality', ascending=False)

# Seleccionar los 10 nodos más críticos (con mayor centralidad)
top_10_critical_nodes = betweenness_sorted.head(10)

# Convertir de nuevo a CPU (pandas) para visualización con NetworkX y Plotly
df_pairs_count_cpu = df_pairs_count.to_pandas()

# Crear grafo en NetworkX para usar layout (cuGraph no soporta layouts)
G = nx.Graph()

# Añadir los nodos y aristas (con pesos)
for _, row in df_pairs_count_cpu.iterrows():
    G.add_edge(row['Product_A'], row['Product_B'], weight=row['count'])

In [11]:
betweenness_centrality

Unnamed: 0,betweenness_centrality,vertex
0,0.000000,MLM-MANICURE_PRACTICE_HANDS_AND_FINGERS
1,0.000000,MLM-SCULPTURED_NAIL_FORMS
2,0.000000,MLM-WATERPROOF_CAMERA_CASES
3,0.000000,MLM-CAKE_LEVELERS
4,0.000000,MLM-MOTOR_STARTERS
...,...,...
1261,0.002488,MLM-BODYWEIGHT_SCALES
1262,0.002550,MLM-CELLPHONE_ACCESSORIES
1263,0.002428,MLM-MICROPHONES
1264,0.002448,MLM-HATS_AND_CAPS


In [12]:
top_10_critical_nodes

Unnamed: 0,betweenness_centrality,vertex
288,0.003702,MLM-WRISTWATCHES
289,0.003702,MLM-SURGICAL_AND_INDUSTRIAL_MASKS
290,0.003702,MLM-WALLETS
291,0.003702,MLM-SHORTS
292,0.003702,MLM-SANDALS_AND_FLIP_FLOPS
293,0.003702,MLM-PANTS
294,0.003702,MLM-HEADPHONES
295,0.003684,MLM-SNEAKERS
296,0.003677,MLM-AEROBICS_AND_FITNESS_EQUIPMENT
297,0.003654,MLM-ACTION_FIGURES


In [13]:
# Crear un dataframe con las combinaciones de productos que se venden juntos en la misma oferta
product_pairs = []

for _, group in df_ofertas_relampago.groupby('OFFER_START_DATE'):
    products = group['DOMAIN_ID'].unique()
    if len(products) > 1:
        pairs = list(itertools.combinations(products, 2))
        product_pairs.extend(pairs)

# Convertir a DataFrame
df_pairs = pd.DataFrame(product_pairs, columns=['Product_A', 'Product_B'])

# Contar las veces que cada par de productos aparece juntos
df_pairs_count = df_pairs.groupby(['Product_A', 'Product_B']).size().reset_index(name='count')

# Construcción de la red
G_nx = nx.Graph()

# Añadir los nodos y aristas (enlaces)
for _, row in df_pairs_count.iterrows():
    G_nx.add_edge(row['Product_A'], row['Product_B'], weight=row['count'])

In [15]:
pio.renderers.default = 'browser' 

In [16]:
# Generar la matriz de adyacencia del grafo en formato scipy sparse matrix
adj_matrix = nx.to_scipy_sparse_array(G_nx)  # Usar to_scipy_sparse_array para obtener la matriz dispersa

# Usar UMAP para calcular el layout en GPU
umap_model = UMAP(n_components=2)  # Queremos un layout en 2D
layout = umap_model.fit_transform(adj_matrix.toarray())  # Calcular el layout con UMAP

# Mapear las posiciones generadas por UMAP a los nodos
pos_umap = {node: layout[i] for i, node in enumerate(G_nx.nodes())}

# Crear listas temporales para las aristas y nodos
edge_x = []
edge_y = []

# Crear las trazas de aristas
for edge in G_nx.edges():
    x0, y0 = pos_umap[edge[0]]
    x1, y1 = pos_umap[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])

# Crear traza de aristas
edge_trace = go.Scatter(
    x=edge_x,
    y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

# Crear las posiciones de los nodos
node_x = []
node_y = []
node_color = []
node_size = []

# Calcular la centralidad de intermediación con cuGraph
betweenness_centrality = cugraph.centrality.betweenness_centrality(G_cugraph)
top_10_critical_nodes = betweenness_centrality.sort_values('betweenness_centrality', ascending=False).head(10)

# Trazar los nodos
for node in G_nx.nodes():
    x, y = pos_umap[node]
    node_x.append(x)
    node_y.append(y)
    
    if node in top_10_critical_nodes['vertex'].values_host:
        node_color.append('red')  # Nodos críticos
        #node_size.append(1000 * betweenness_centrality.query(f'vertex == {node}')['betweenness_centrality'].values[0])
        betweenness_value = betweenness_centrality.loc[betweenness_centrality['vertex'] == node, 'betweenness_centrality'].values[0]
        node_size.append(1000 * betweenness_value.item())  # Convertir el array a escalar con .item()


    else:
        node_color.append('blue')  # Otros nodos
        node_size.append(5)  # Tamaño por defecto

# Crear la traza de nodos
node_trace = go.Scatter(
    x=node_x,
    y=node_y,
    text=[f'<br>{node}' for node in G_nx.nodes()],
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale='YlGnBu',
        reversescale=True,
        color=node_color,
        size=node_size,
        colorbar=dict(
            thickness=15,
            title='Nivel de Criticidad',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))

# Crear la figura en Plotly
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='<br>Network graph accelerated with UMAP and GPU',
                    titlefont_size=16,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20, l=5, r=5, t=40),
                    annotations=[dict(
                        text="Python code: <a href='https://plotly.com/'> Plotly.com</a>",
                        showarrow=False,
                        xref="paper", yref="paper",
                        x=0.005, y=-0.002)],
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

# Mostrar la figura
fig.show()