In [1]:
import pandas as pd
import networkx as nx
from networkx import connected_components
import json
from itertools import combinations

In [29]:
def load_data(csv_file):
    df = pd.read_csv(csv_file)
    return df

In [30]:
def build_graph(df):
    G = nx.Graph()
    # Add edges between clients with call time attributes (start and end)
    for _, row in df.iterrows():
        node1 = int(row['nodeNaam1'])  # Ensure int conversion
        node2 = int(row['nodeNaam2'])  # Ensure int conversion
        begintijd = int(row['begintijd'])  # Ensure int conversion
        eindtijd = int(row['eindtijd'])  # Ensure int conversion
        G.add_edge(node1, node2, begintijd=begintijd, eindtijd=eindtijd)
    return G

#def detect_communities_with_call_times(G):
    communities = []
    for community in connected_components(G):
        community_data = {
            'nodes': [int(node) for node in community],  # Convert nodes to Python int
            'edges': []
        }
        for node1 in community:
            for node2 in G[node1]:
                if node2 in community:
                    # Get call times for each edge between nodes
                    call_times = G[node1][node2]
                    edge_info = {
                        'node1': int(node1),  # Convert to Python int
                        'node2': int(node2),  # Convert to Python int
                        'begintijd': int(call_times['begintijd']),  # Convert to Python int
                        'eindtijd': int(call_times['eindtijd'])  # Convert to Python int
                    }
                    # Only add each edge once (undirected graph)
                    if edge_info not in community_data['edges']:
                        community_data['edges'].append(edge_info)
        communities.append(community_data)
    return communities

In [36]:
#Version 2
def detect_communities_with_call_times(G):
    communities = []
    
    # connected_components returns each connected set of nodes as a separate community
    for community in connected_components(G):  
        community_nodes = list(community)
        community_data = {
            'nodes': community_nodes,
            'edges': []
        }
        
        # Now, find edges within this community
        for node1 in community_nodes:
            for node2 in community_nodes:
                if node1 != node2 and G.has_edge(node1, node2):  # Ensure node1 and node2 are different
                    # Get call times for each edge between nodes
                    call_times = G[node1][node2]
                    edge_info = {
                        'node1': int(node1),
                        'node2': int(node2),
                        'begintijd': int(call_times['begintijd']),
                        'eindtijd': int(call_times['eindtijd'])
                    }
                    # Only add the edge once
                    if edge_info not in community_data['edges']:
                        community_data['edges'].append(edge_info)
        communities.append(community_data)
    return communities

In [32]:
def store_communities_with_call_times(communities, output_file):
    with open(output_file, 'w') as f:
        json.dump(communities, f, indent=4)

In [33]:
def community_detection_with_call_times(csv_file, output_file):
    # Load data
    df = load_data(csv_file)
    
    # Build graph with call times
    G = build_graph(df)
    
    # Detect communities with call times
    communities = detect_communities_with_call_times(G)
    
    # Store communities with call times
    store_communities_with_call_times(communities, output_file)

In [38]:
csv_file_path = './data/full_data_1000.csv'  # replace with the actual CSV file path
output_communities_file = 'communities_with_call_times_1000.json'
community_detection_with_call_times(csv_file_path, output_communities_file)