In [1]:
import pandas as pd
import networkx as nx
import community as community_louvain  # Louvain method from the python-louvain package
import json

In [2]:
# Step 1: Load the CSV file and preprocess
def load_data(csv_file):
    df = pd.read_csv(csv_file)
    return df

In [3]:
# Step 2: Build the Graph with call times as edge attributes
def build_graph(df):
    G = nx.Graph()
    # Add edges between clients with call time attributes (start and end)
    for _, row in df.iterrows():
        node1 = int(row['nodeNaam1'])  # Ensure int conversion
        node2 = int(row['nodeNaam2'])  # Ensure int conversion
        begintijd = int(row['begintijd'])  # Ensure int conversion
        eindtijd = int(row['eindtijd'])  # Ensure int conversion
        G.add_edge(node1, node2, begintijd=begintijd, eindtijd=eindtijd)  # Add edge with attributes
    return G

In [4]:
# Step 3: Detect Communities Using Louvain Method and Store Call Times
def detect_communities_with_louvain(G):
    # Louvain method to detect communities
    partition = community_louvain.best_partition(G)
    
    # Create a dictionary to store nodes and edges per community
    community_dict = {}
    community_id = 1  # Start community IDs from 1

    for node, community in partition.items():
        if community not in community_dict:
            community_dict[community] = {
                'community_id': community_id,
                'nodes': [],
                'edges': []
            }
            community_id += 1
        community_dict[community]['nodes'].append(node)
    
    # Add edges for each community
    for community in community_dict.values():
        for node1 in community['nodes']:
            for node2 in community['nodes']:
                if node1 < node2 and G.has_edge(node1, node2):
                    # Get call times for each edge between nodes
                    call_times = G[node1][node2]
                    edge_info = {
                        'node1': int(node1),  # Ensure int conversion
                        'node2': int(node2),  # Ensure int conversion
                        'begintijd': int(call_times['begintijd']),  # Ensure int conversion
                        'eindtijd': int(call_times['eindtijd'])  # Ensure int conversion
                    }
                    if edge_info not in community['edges']:
                        community['edges'].append(edge_info)
    
    return list(community_dict.values())

In [5]:
def store_communities_with_call_times(communities, output_file):
    with open(output_file, 'w') as f:
        json.dump(communities, f, indent=4)


In [6]:
# Full pipeline
def community_detection_with_louvain(csv_file, output_file):
    # Load data
    df = load_data(csv_file)
    
    # Build graph with call times
    G = build_graph(df)
    
    # Detect communities using Louvain method
    communities = detect_communities_with_louvain(G)
    
    # Store communities with call times
    store_communities_with_call_times(communities, output_file)

In [None]:
data/avgRela50_data_50000_20.csv

In [7]:
csv_file_path = '../data/avgRela50_data_50000_20.csv'  # replace with the actual CSV file path
output_communities_file = '50K_20.json'
community_detection_with_louvain(csv_file_path, output_communities_file)

In [8]:
csv_file_path = '../data/full_data_1000.csv'  # replace with the actual CSV file path
output_communities_file = '10K.json'
community_detection_with_louvain(csv_file_path, output_communities_file)

In [9]:
csv_file_path = '../data/small_test2_data.csv'  # replace with the actual CSV file path
output_communities_file = 'Small_test.json'
community_detection_with_louvain(csv_file_path, output_communities_file)

In [8]:
csv_file_path = '../data/full_data_20000.csv'  # replace with the actual CSV file path
output_communities_file = '20K.json'
community_detection_with_louvain(csv_file_path, output_communities_file)