# Import Package

In [2]:
import pandas as pd
import networkx as nx
from community import community_louvain

# Load Data

In [3]:
if __name__ == '__main__':
    train_data_df = pd.read_csv('train.csv')
    test_data_df = pd.read_csv('test.csv')

# Create Graph

In [4]:
    node_pairs = [ pair for pair in zip(train_data_df['Node1'], train_data_df['Node2'])]
    G = nx.Graph()
    G.add_edges_from(node_pairs)
    print('# of nodes:', G.number_of_nodes())
    print('# of edges:', G.number_of_edges())

# of nodes: 317080
# of edges: 1049866


# Community Detection with Fast Unfolding

In [5]:
    partition = community_louvain.best_partition(G, random_state=42)

In [6]:
    print(partition)

{317079: 0, 317078: 0, 317077: 0, 315631: 0, 307192: 0, 302247: 0, 301873: 0, 280285: 0, 266078: 0, 256076: 0, 254194: 0, 254137: 0, 127965: 0, 180713: 0, 127964: 0, 127963: 0, 127962: 0, 315027: 0, 314233: 0, 314193: 0, 314133: 0, 314073: 0, 314072: 0, 313528: 95, 312880: 0, 312327: 0, 311943: 0, 311766: 0, 311576: 2, 311157: 0, 310992: 0, 310788: 0, 310427: 0, 310368: 0, 310215: 0, 309911: 0, 127961: 0, 307929: 0, 307237: 3, 305206: 0, 304974: 0, 304215: 0, 303760: 0, 303175: 0, 302210: 0, 301953: 0, 301169: 0, 300213: 0, 299830: 0, 298333: 0, 296394: 0, 296205: 0, 295173: 0, 294724: 0, 293985: 0, 293944: 0, 290201: 0, 289284: 4, 289252: 0, 127960: 0, 127959: 2, 284914: 0, 284653: 5, 284359: 0, 281233: 0, 280549: 0, 280412: 0, 279007: 0, 278482: 0, 276638: 0, 276019: 0, 275536: 0, 275228: 0, 275082: 0, 274473: 0, 271789: 0, 270710: 0, 270709: 0, 270352: 0, 269200: 0, 268299: 6, 268298: 0, 268197: 0, 265007: 0, 127958: 0, 259963: 0, 127957: 0, 127956: 0, 236358: 0, 234918: 0, 233831: 

In [7]:
    community_louvain.modularity(partition, G)

0.8202227853323101

In [8]:
    test_node_pairs = [ pair for pair in zip(test_data_df['Node1'], test_data_df['Node2'])]
    predictions = [ 1 if partition[node_pair[0]] == partition[node_pair[1]] else 0 for node_pair in test_node_pairs ] 

In [9]:
    result = pd.DataFrame(columns=('Id', 'Category'))
    ans_list = list()
    for index, ans in enumerate(predictions):
        res = {
            'Id': index,
            'Category': ans
        }
        ans_list.append(res)
    result = result.append(ans_list)

In [10]:
    result.head()

Unnamed: 0,Id,Category
0,0,0
1,1,0
2,2,1
3,3,1
4,4,1


In [11]:
    result.to_csv('communitiy_detection_ans.csv', index=False)