In [None]:
import json
import os
import zipfile

import networkx as nx
import numpy as np
import pandas as pd
from kaggle.api.kaggle_api_extended import KaggleApi
from sklearn.metrics import adjusted_mutual_info_score, adjusted_rand_score

import algorithm.kcomm.graph_kClusterAlgorithm_functions as QCD
import algorithm.kcomm.graphFileUtility_functions as GFU

In [None]:
# Initialize and authenticate the Kaggle API
api = KaggleApi()
api.authenticate()

In [3]:
data_dir = "../data"
competition = 'cm4ai-community-detection-benchmark'
data_path = os.path.join(data_dir, competition)
os.makedirs(data_dir, exist_ok=True)


In [4]:
output_dir = "../output"
os.makedirs(output_dir, exist_ok=True)

In [5]:
# Download all files from a competition (e.g., Titanic)
competition = 'cm4ai-community-detection-benchmark'
api.competition_download_files(competition, path=data_dir, force=True)

In [6]:
# Extract all files from the zip to the specified directory
zip_file_path = data_path + ".zip"
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(data_path)

In [7]:
args_dict = {
    "output_dir" : output_dir
}
run_profile="defaults"
beta0 = 5
gamma0 = -250
threshold = 0.2
qsize = 64

In [8]:
run_label = "graph1"
input_graph = f"../data/cm4ai-community-detection-benchmark/{run_label}"

G = nx.read_edgelist(f"{input_graph}.edgelist")

In [None]:
A = nx.adjacency_matrix(G)
print ('\nAdjacency matrix:\n', A.todense())

# num_parts = len(np.unique(gt_arr))
num_parts = 32                              # TODO: What should we put for num_parts?
num_blocks = num_parts 
num_nodes = nx.number_of_nodes(G)
num_edges = nx.number_of_edges(G)
print (f"\nQuantum Community Detection: Up to {num_parts} communities")
print (f"Graph has {num_nodes} nodes and {num_edges} edges")

In [10]:
beta, gamma, GAMMA  = QCD.set_penalty_constant(num_nodes, num_blocks, beta0, gamma0)

In [None]:
mtotal, modularity = QCD.build_mod(A, threshold, num_edges)
print ("\nModularity matrix: \n", modularity)

print ("min value = ", modularity.min())
print ("max value = ", modularity.max())

print ("threshold = ", threshold)

In [None]:
Q = QCD.makeQubo(G, modularity, beta, gamma, GAMMA, num_nodes, num_parts, num_blocks, threshold)


In [None]:
result = {}
result['num_clusters'] = num_parts 
result['nodes'] = num_nodes
result['edges'] = num_edges
result['size'] = num_nodes * num_parts 
result['subqubo_size'] = qsize
result

In [None]:
# Run k-clustering with Hybrid/D-Wave using ocean
ss = QCD.clusterHybrid(Q, num_parts, qsize, run_label, run_profile, result)
result

In [None]:
# Process solution
part_number = QCD.process_solution(ss, G, num_blocks, num_nodes, num_parts, result)

In [None]:
mmetric = QCD.calcModularityMetric(mtotal, modularity, part_number)
result['modularity_metric'] = mmetric

In [None]:
# draw graph clusters and save .png
GFU.showClusters(part_number, G, args_dict)

In [None]:
# write comms file 
GFU.write_partFile(
    part_num=part_number, 
    Dim=num_nodes, 
    nparts=num_parts, 
    args_dict=args_dict
) 

In [None]:
columns = ["node_id", "comm_id"]
communities = []

pred_arr=[]

comm_file_path = os.path.join(args_dict['output_dir'], f"comm{num_parts}.txt")
with open(comm_file_path) as comm_file:
    i = 0
    for line in comm_file:
        i += 1
        if i == 1:
            continue
        fields = line.strip().split("  ")
        communities.append(fields)
        pred_arr.append(fields[1])

pred_arr = [int(x) for x in pred_arr]
pred_arr[:10]

In [None]:
submission_file_path = os.path.join(args_dict['output_dir'], 'submission.csv')
with open(comm_file_path, 'r') as infile, open(submission_file_path, 'w') as submission_file:
    # Skip the first line
    next(infile)

    for line in infile:
        # Strip the line and split by whitespace
        fields = line.strip().split()
        # Join fields with commas
        csv_line = ",".join(fields)
        submission_file.write(csv_line + "\n")

In [None]:
# Submit the file
api.competition_submit(file_name=submission_file_path, competition=competition)

print("Submission complete!")