In [None]:
import networkx as nx
import pandas as pd
import random
import matplotlib.pyplot as plt
import math

In [None]:
def topo_pos(G):
    """Display in topological order, with simple offsetting for legibility"""
    pos_dict = {}
    for i, node_list in enumerate(nx.topological_generations(G)):
        x_offset = len(node_list) / 2
        y_offset = 0.1
        for j, name in enumerate(node_list):
            pos_dict[name] = (j - x_offset, -i + j * y_offset)

    return pos_dict

In [None]:
df=pd.read_csv('../data/Base.csv')
df.columns

In [None]:
df.head()

In [None]:
fraud_df = df[df.fraud_bool==1]
fraud_df.shape

In [None]:
df_layer_1 = fraud_df[['source']]\
                .groupby(['source'])\
                .size()\
                .reset_index()\
                .rename(columns={0:'weight', 'source':'destination'})
df_layer_1['source'] = 'Source'
df_layer_1 = df_layer_1[['source', 'destination', 'weight']]
df_layer_1['weight'] = df_layer_1['weight']
df_layer_1['layer'] = 'layer_1'
df_layer_1

In [None]:
df_layer_2 = fraud_df[['source', 'device_os']]\
                .groupby(['source', 'device_os'])\
                .size()\
                .reset_index()\
                .rename(columns={0:'weight', 'device_os':'destination'})
df_layer_2['layer'] = 'layer_2'
df_layer_2

In [None]:
df_layer_3 = fraud_df[['device_os', 'payment_type']]\
                .groupby(['device_os', 'payment_type'])\
                .size()\
                .reset_index()\
                .rename(columns={0:'weight', 'device_os':'source', 'payment_type':'destination'})
df_layer_3['layer'] = 'layer_3'
df_layer_3

In [None]:
df_layer_4 = fraud_df[['payment_type', 'keep_alive_session']]
df_layer_4['keep_alive_session'] = df_layer_4['keep_alive_session'].map({0:'No', 1:'Yes'})
df_layer_4 = df_layer_4[['payment_type', 'keep_alive_session']]\
                .groupby(['payment_type', 'keep_alive_session'])\
                .size()\
                .reset_index()\
                .rename(columns={0:'weight', 'payment_type':'source', 'keep_alive_session':'destination'})
df_layer_4['layer'] = 'layer_4'
df_layer_4



In [None]:
df_layer_5                          = fraud_df[['keep_alive_session', 'session_length_in_minutes']]
df_layer_5['keep_alive_session']    = df_layer_5['keep_alive_session'].map({0:'No', 1:'Yes'})
df_layer_5                          = df_layer_5[df_layer_5.session_length_in_minutes > 0]

df_layer_5['session_length'] = pd.cut(
                        x=df_layer_5['session_length_in_minutes'], 
                        bins=[0, 5, 15, 30, 60, 100],
                        labels=['< 5 Mins', '5-15 mins', '15-30 mins', '30-60 mins', '>60 mins']
                        )

df_layer_5 = df_layer_5[['keep_alive_session', 'session_length']]\
                .groupby(['keep_alive_session', 'session_length'])\
                .size()\
                .reset_index()\
                .rename(columns={0:'weight', 'keep_alive_session':'source', 'session_length':'destination'})
df_layer_5['layer'] = 'layer_5'
df_layer_5

In [None]:
graph_df = pd.concat([df_layer_1, df_layer_2, df_layer_3, df_layer_4, df_layer_5])
# graph_df.apply(lambda x: x.astype(str).str.upper())

In [None]:
graph_layer = list(graph_df[['source', 'destination', 'weight']].apply(tuple, axis=1))
graph_layer

In [None]:
# Same example data as top answer, but directed
G=nx.DiGraph()
G.add_weighted_edges_from(graph_layer)
pos = topo_pos(G)
labels = nx.get_edge_attributes(G,'weight')


node_color = [G.degree(v) for v in G] # Node Color
node_size_dict = dict(G.degree)
node_size  = [node_size_dict[k]*400 for k in node_size_dict] # Node Color

edge_width = [0.0010 * G[u][v]['weight'] for u, v in G.edges()]  # Edge Width

fig, ax = plt.subplots(figsize=(10,10))
nx.draw_networkx(G, 
                 pos,
                 ax=ax,
                 font_size=6,
                 node_color = node_color, 
                 node_size=node_size,
                #  node_shape="c",
                 alpha = 0.6, 
                 width = edge_width, 
                 font_weight="bold",
                 cmap = plt.cm.tab20_r
                 )
nx.draw_networkx_edge_labels(G,
                             pos, 
                             edge_labels=labels,
                             label_pos = 0.6,
                             font_size=6,
                             alpha=1,
                             rotate=False,
                             )

ax.set_title("Fraudulent Transaction Flow")
fig.tight_layout()
plt.savefig('Fraudulent_Transaction_Flow.png')
plt.show()

In [None]:
# Remove Node Customization
G=nx.DiGraph()
G.add_weighted_edges_from(graph_layer)
G.remove_nodes_from(['linux', 'macintosh', 'other']) ### Remove Nodes
pos = topo_pos(G)
labels = nx.get_edge_attributes(G,'weight')


node_color = [G.degree(v) for v in G] # Node Color
node_size_dict = dict(G.degree)
node_size  = [node_size_dict[k]*400 for k in node_size_dict] # Node Color

edge_width = [0.0010 * G[u][v]['weight'] for u, v in G.edges()]  # Edge Width

fig, ax = plt.subplots(figsize=(10,10))

nx.draw_networkx(G, 
                 pos,
                 ax=ax,
                 font_size=6,
                 node_color = node_color, 
                 node_size=node_size,
                #  node_shape="c",
                 alpha = 0.6, 
                 width = edge_width, 
                 font_weight="bold",
                 cmap = plt.cm.tab20_r
                 )
nx.draw_networkx_edge_labels(G,
                             pos, 
                             edge_labels=labels,
                             label_pos = 0.6,
                             font_size=6,
                             alpha=1,
                             rotate=False,
                             )

ax.set_title("Fraudulent Transaction Flow")
fig.tight_layout()
plt.savefig('Fraudulent_Transaction_Flow.png')
plt.show()
# list(G.edges)


In [None]:
from matplotlib import colormaps
list(colormaps)