In [1]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

from Qommunity.samplers.hierarchical.advantage_sampler import AdvantageSampler
from Qommunity.searchers.hierarchical_searcher import HierarchicalSearcher
# from Qommunity.iterative_searcher import IterativeSearcher

In [2]:
G = nx.powerlaw_cluster_graph(n=100, m=1, p=0.1)
num_reads = 100
version = ""
region = "na-west-1"

### Usage

In [3]:
advantage = AdvantageSampler(
    G, num_reads=num_reads, version=version, region=region, use_clique_embedding=True, elapse_times=True, return_sampleset_metadata=True
)

In [4]:
# iterative_searcher = IterativeSearcher(advantage)
# iterative_searcher.sampler

In [5]:
from Qommunity.samplers.hierarchical.hierarchical_sampler import (
    HierarchicalSampler,
)
from Qommunity.searchers.hierarchical_searcher import (
    HierarchicalSearcher,
)
import networkx as nx
from time import time
from tqdm import tqdm
import numpy as np
import warnings

from Qommunity.samplers.hierarchical.advantage_sampler import AdvantageSampler
import pickle

SAMPLESET_METADATA_KEYARG = "return_sampleset_metadata"

class MethodArgsWarning(Warning):
    def __init__(self, msg):
        super().__init__(msg)


# Warning format compatible with tqdm
def warn(message, category, filename, lineno, file=None, line=None):
    tqdm.write(f"Warning: {str(message)}")


warnings.showwarning = warn
warnings.simplefilter("always", MethodArgsWarning)


class IterativeHierarchicalSearcher:
    def __init__(self, sampler: HierarchicalSampler) -> None:
        self.sampler = sampler
        self.searcher = HierarchicalSearcher(self.sampler)

    def _default_saving_path(self) -> str:
        return (
            f"{self.sampler.__class__.__name__}"
            + "-network_size_"
            + f"{self.sampler.G.number_of_nodes()}"
        )

    def _verify_kwargs(self, kwargs) -> dict:
        kwargs_unhandled = ["division_tree", "return_modularities"]
        kwargs_warning = []
        for kwarg in kwargs_unhandled:
            if kwarg in kwargs:
                kwargs.pop(kwarg, None)
                kwargs_warning.append(kwarg)
        if kwargs_warning:
            msg = ", ".join(kwargs_warning)
            warnings.warn(
                f"in order to get {msg} run "
                + " IterativeSearcher.run_with_sampleset_info()"
            )

        return kwargs

    def run(
        self,
        num_runs: int,
        save_results: bool = True,
        saving_path: str | None = None,
        elapse_times: bool = True,
        iterative_verbosity: int = 0,
        # return_sampleset_metadata: bool = False,
        **kwargs,
    ):
        kwargs = self._verify_kwargs(kwargs)

        if iterative_verbosity >= 1:
            print("Starting community detection iterations")

        if save_results and saving_path is None:
            saving_path = self._default_saving_path()

        modularities = np.zeros((num_runs))
        communities = np.empty((num_runs), dtype=object)
        times = np.zeros((num_runs))
        
        # List instead of samplesets_data = np.empty((num_runs), dtype=object)
        # To prevent jupyter notebook kernel crashes
        # as handling big objects is not efficient with numpy dtype=object arrs
        samplesets_data = []

        for iter in tqdm(range(num_runs)):
            elapsed = time()
            result = self.searcher.hierarchical_community_search(**kwargs)
            times[iter] = time() - elapsed

            if SAMPLESET_METADATA_KEYARG in kwargs:
                result, sampleset_metadata = result

            try:
                modularity_score = nx.community.modularity(
                    self.searcher.sampler.G,
                    result,
                    resolution=self.sampler.resolution,
                )
            except Exception as e:
                print(f"iteration: {iter} exception: {e}")
                modularity_score = -1

            communities[iter] = result
            modularities[iter] = modularity_score
            samplesets_data[iter] = sampleset_metadata

            if save_results:
                np.save(f"{saving_path}_modularities", modularities)
                np.save(f"{saving_path}_communities", communities)
                if elapse_times:
                    np.save(f"{saving_path}_times", times)
                # if return_sampleset_metadata:
                with open(f"samplesets_data.pkl", "wb") as f:
                    pickle.dump(samplesets_data, f)

            if iterative_verbosity >= 1:
                print(f"Iteration {iter} completed")

        if elapse_times and sampleset_metadata:
            return communities, modularities, times, sampleset_metadata
        if elapse_times:
            return communities, modularities, times
        # if return_sampleset_metadata:
            return communities, modularities, samplesets_data
        return communities, modularities

    def run_with_sampleset_info(
        self,
        num_runs: int,
        save_results: bool = True,
        saving_path: str | None = None,
        iterative_verbosity: int = 0,
        return_sampleset_metadata: bool = True,
        **kwargs,
    ):

        if iterative_verbosity >= 1:
            print("Starting community detection iterations")

        if save_results and saving_path is None:
            saving_path = self._default_saving_path()

        modularities = np.zeros((num_runs))
        communities = np.empty((num_runs), dtype=object)
        times = np.zeros((num_runs))
        division_modularities = np.empty((num_runs), dtype=object)
        division_trees = np.empty((num_runs), dtype=object)
        # samplesets_data = np.empty((num_runs), dtype=object)
        samplesets_data = []

        if return_sampleset_metadata:
            kwargs[SAMPLESET_METADATA_KEYARG] = True

        for iter in tqdm(range(num_runs)):
            elapsed = time()
            result = self.searcher.hierarchical_community_search(
                return_modularities=True,
                division_tree=True,
                **kwargs,
            )

            # np.save(f"results_{iter}.npy", result)
            with open(f"results_{iter}.pkl", "wb") as f:
                pickle.dump(result, f)
            # LOADED_RES_MEDATADA_MOCK = np.load("results.npy", allow_pickle=True)
            # result = LOADED_RES_MEDATADA_MOCK

            # Currently only AdvantageSampler among the hierarchical solvers
            # provides sampleset metadata.
            if isinstance(self.sampler, AdvantageSampler) and self.sampler.return_sampleset_metadata and return_sampleset_metadata:
                (
                    communities_result,
                    div_tree,
                    div_modularities,
                    sampleset_data,
                ) = result
            else:
                (
                    communities_result,
                    div_tree,
                    div_modularities,
                ) = result
            times[iter] = time() - elapsed
            division_trees[iter] = div_tree
            division_modularities[iter] = div_modularities
            # samplesets_data[iter] = sampleset_data
            samplesets_data.append(sampleset_data)

            try:
                modularity_score = nx.community.modularity(
                    self.searcher.sampler.G,
                    communities_result,
                    resolution=self.sampler.resolution,
                )
            except Exception as e:
                print(f"iteration: {iter} exception: {e}")
                modularity_score = -1

            communities[iter] = communities_result
            modularities[iter] = modularity_score

            if save_results:
                np.save(f"{saving_path}_modularities", modularities)
                np.save(f"{saving_path}_communities", communities)
                np.save(f"{saving_path}_times", times)
                np.save(f"{saving_path}_division_trees", division_trees)
                np.save(
                    f"{saving_path}_division_modularities",
                    division_modularities,
                )
                with open(f"samplesets_data.pkl", "wb") as f:
                    pickle.dump(samplesets_data, f)
                # np.save(f"{saving_path}_samplesets_data", samplesets_data)
                

            if iterative_verbosity >= 1:
                print(f"Iteration {iter} completed")

        dtypes = [
            ("communities", object),
            ("modularity", np.float_),
            ("time", np.float_),
            ("division_tree", object),
            ("division_modularities", object),
        ]
        sampleset_components = [
            communities,
            modularities,
            times,
            division_trees,
            division_modularities,
        ]

        if return_sampleset_metadata:
            dtypes.append(("samplesets_data", object))
            sampleset_components.append(samplesets_data)

        sampleset = np.rec.fromarrays(
            sampleset_components,
            dtype=dtypes,
        )

        return sampleset

        # if not return_sampleset_metadata:
        #     return sampleset

        # results_processed = self._process_results(sampleset)

        # return results_processed

    # def _process_results(self, sampleset):
    #     dtype = [si.dwave_sampleset_metadata for si in sampleset[0].samplesets_data][
    #         0
    #     ].dtype.descr
    #     dwave_sampleset_metadata = np.array(
    #         [
    #             np.concatenate(
    #                 [
    #                     np.array([r], dtype=dtype)
    #                     for r in [
    #                         si.dwave_sampleset_info
    #                         for si in sampleset[run].samplesets_data
    #                     ]
    #                 ]
    #             ).view(np.recarray)
    #             for run in range(len(sampleset))
    #         ],
    #         dtype=object,
    #     )

    #     dtype = [si.time_measurements for si in sampleset[0].samplesets_data][
    #         0
    #     ].dtype.descr
    #     time_measurements = np.array(
    #         [
    #             np.concatenate(
    #                 [
    #                     np.array([r], dtype=dtype)
    #                     for r in [
    #                         si.time_measurements
    #                         for si in sampleset[run].samplesets_data
    #                     ]
    #                 ]
    #             ).view(np.recarray)
    #             for run in range(len(sampleset))
    #         ],
    #         dtype=object,
    #     )

    #     results_procesed_dtypes = sampleset.dtype.descr
    #     results_procesed_dtypes.pop()
    #     results_procesed_dtypes.append(("dwave_sampleset_metadata", object))
    #     results_procesed_dtypes.append(("time_measurements", object))
    #     results_procesed_dtypes

    #     results_processed_componenets = [
    #         sampleset.communities,
    #         sampleset.modularity,
    #         sampleset.time,
    #         sampleset.division_tree,
    #         sampleset.division_modularities,
    #         dwave_sampleset_metadata,
    #         time_measurements,
    #     ]

    #     results_processed = np.rec.fromarrays(
    #         results_processed_componenets,
    #         dtype=results_procesed_dtypes,
    #     )

    #     return results_processed


In [6]:
LOADED_RES_MEDATADA_MOCK = np.load("advantage_clique_samples_samplesets_data.npy", allow_pickle=True)

In [7]:
iterative_searcher = IterativeHierarchicalSearcher(advantage)

In [8]:
res = iterative_searcher.run_with_sampleset_info(
    num_runs=5,
    save_results=True,
    saving_path="advantage_clique_samples",
    iterative_verbosity=1,
    return_sampleset_metadata=True,
)

Starting community detection iterations


  0%|          | 0/5 [00:00<?, ?it/s]

 20%|██        | 1/5 [01:38<06:35, 98.97s/it]

Iteration 0 completed


 40%|████      | 2/5 [03:11<04:45, 95.04s/it]

Iteration 1 completed


 60%|██████    | 3/5 [04:51<03:15, 97.55s/it]

Iteration 2 completed


 80%|████████  | 4/5 [06:39<01:41, 101.74s/it]

Iteration 3 completed


100%|██████████| 5/5 [08:35<00:00, 103.08s/it]

Iteration 4 completed





In [12]:
res

rec.array([(list([[10, 40, 41, 43, 47, 52, 54, 56, 87, 96], [4], [11, 12, 24, 44, 48, 49, 51, 53, 55, 65, 66, 70, 72, 82, 97, 98], [38, 86], [0, 13, 20, 39, 75, 92, 95], [16, 23, 32, 35, 50, 58, 80, 85], [14, 33, 57, 76, 79, 89, 90, 91, 99], [22, 27, 29, 34, 62, 77, 84], [1, 5, 18, 25, 36, 37, 42, 45, 69, 71, 73, 78, 81, 83, 88, 93], [6, 21, 28, 30, 31, 46, 60, 63, 94], [2, 3, 7, 8, 9, 15, 17, 19, 26, 59, 61, 64, 67, 68, 74]]), 0.77757372,  98.96595883, list([[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]], [[0, 4, 10, 11, 12, 13, 14, 16, 20, 23, 24, 32, 33, 35, 38, 39, 40, 41, 43, 44, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 65, 66, 70, 72

In [10]:
communities, modularities, times, division_trees, division_modularities, sampleset_datas = res.communities, res.modularity, res.time, res.division_tree, res.division_modularities, res.samplesets_data

In [11]:
sampleset_datas[0].dwave_sampleset_metadata.qpu_access_time.sum()

628284.4

In [17]:
sampleset_datas

array([HierarchicalRunMetadata(dwave_sampleset_metadata=rec.array([(24068., 20., 200.1 , 39836.36, 2299.64, 15768.36, 20.58, 218., 218.),
                  (13604., 20.,  95.46, 29366.76, 1456.24, 15762.76, 20.58, 138., 138.),
                  (20296., 20., 162.38, 36059.16, 1920.84, 15763.16, 20.58,  24.,  24.),
                  (12660., 20.,  86.02, 28423.56, 1379.44, 15763.56, 20.58,   1.,   1.),
                  (10628., 20.,  65.7 , 26391.96, 1386.04, 15763.96, 20.58,   1.,   1.),
                  (12412., 20.,  83.54, 28175.16, 1314.84, 15763.16, 20.58,  28.,  28.),
                  (14990., 20., 109.32, 30753.56, 1036.44, 15763.56, 20.58,  61.,  61.),
                  (12412., 20.,  83.54, 28175.56, 1349.44, 15763.56, 20.58,   1.,   1.),
                  (12412., 20.,  83.54, 28175.96, 1526.04, 15763.96, 20.58,   1.,   1.),
                  (20884., 20., 168.26, 36645.56, 1300.44, 15761.56, 20.58,   1.,   1.),
                  (20296., 20., 162.38, 36057.96, 1590.04, 15

In [19]:
for run in sampleset_datas:
    run_qpu_access_time = run.dwave_sampleset_metadata.qpu_access_time.sum()
    print(run_qpu_access_time)

858568.8800000001
912949.2000000003
885029.64
885543.6400000001
884779.6400000001


In [20]:
qpu_access_times_per_hierarachical_run = [run.dwave_sampleset_metadata.qpu_access_time.sum() for run in sampleset_datas]
qpu_access_times_per_hierarachical_run

[858568.8800000001,
 912949.2000000003,
 885029.64,
 885543.6400000001,
 884779.6400000001]

In [16]:
for run in res:
    communities = run.communities
    modularities = run.modularity
    div_trees = run.division_tree
    div_modularities = run.division_modularities
    dwave_sampleset_info = run.samplesets_data.dwave_sampleset_metadata
    times_measured = run.samplesets_data.time_measurements

First run

Total time of "find_clique_embedding":

In [1]:
res[0].time_measurements.find_clique_embedding_time.sum()

NameError: name 'res' is not defined

In [None]:
res[1].dwave_sampleset_infos.qpu_access_time

array([39833.56, 29476.36, 35895.16, 35666.76, 24015.16, 36644.76,
       28392.36, 35576.76, 28437.16, 28173.16, 29365.96, 36489.16,
       28173.56, 36057.16, 26389.96, 35666.76, 35894.76, 28421.16,
       28173.96, 24016.36, 22365.16, 22365.16, 28129.56, 28436.76])

### Heuristic

In [None]:
advantage = AdvantageSampler(
    G, num_reads=num_reads, version=version, region=region, use_clique_embedding=False, measure_times=True, return_sampleset_info=True
)

In [None]:
iterative_searcher = IterativeSearcher(advantage)

In [None]:
res_heu = iterative_searcher.run_with_sampleset_info(
    num_runs=2,
    save_results=True,
    saving_path="heuristic)advantage_clique_samples",
    iterative_verbosity=1,
    return_sampleset_info=True,
)

Starting community detection iterations


 50%|█████     | 1/2 [04:56<04:56, 296.86s/it]

Iteration 0 completed


100%|██████████| 2/2 [14:59<00:00, 449.94s/it]

Iteration 1 completed





In [None]:
res_heu[0].time_measurements.embedding_composite_time

array([0.2807632, 0.2406278, 0.3359402, 0.302822 , 0.1841993, 0.1517898,
       0.2029491, 0.320833 , 0.2347661, 0.1915635, 0.2737376, 0.2755818,
       0.1963269, 0.2202596, 0.3618938, 0.4035531, 0.4405374, 0.427501 ,
       0.3805251, 0.3727842, 0.3873692, 0.456362 , 0.4334511, 0.3993097])

In [None]:
res_heu[0].time_measurements

rec.array([(0.2807632, 59.6475311), (0.2406278, 16.8091243),
           (0.3359402,  7.4627778), (0.302822 ,  0.6165701),
           (0.1841993,  0.7094622), (0.1517898,  0.0663413),
           (0.2029491,  0.2232563), (0.320833 ,  0.1497798),
           (0.2347661,  0.1103587), (0.1915635,  0.7009316),
           (0.2737376,  0.2508542), (0.2755818,  0.1312484),
           (0.1963269,  0.0945934), (0.2202596, 38.5239291),
           (0.3618938,  4.9918162), (0.4035531,  0.2753941),
           (0.4405374,  2.4519983), (0.427501 ,  0.1697537),
           (0.3805251,  1.2791192), (0.3727842,  1.8661805),
           (0.3873692,  0.2791576), (0.456362 ,  0.1886594),
           (0.4334511,  0.1984597), (0.3993097,  0.5174587)],
          dtype=[('embedding_composite_time', '<f8'), ('sample_time', '<f8')])

In [None]:
res_heu[0].time_measurements.embedding_composite_time.sum()

7.4754465000587516

In [None]:
res_heu[0].time_measurements.sample_time.sum()

137.71475569996983

In [None]:
res_heu[0].time_measurements.sample_time

array([59.6475311, 16.8091243,  7.4627778,  0.6165701,  0.7094622,
        0.0663413,  0.2232563,  0.1497798,  0.1103587,  0.7009316,
        0.2508542,  0.1312484,  0.0945934, 38.5239291,  4.9918162,
        0.2753941,  2.4519983,  0.1697537,  1.2791192,  1.8661805,
        0.2791576,  0.1886594,  0.1984597,  0.5174587])

In [81]:
res_heu[0].time_measurements.embedding_composite_time

array([0.2807632, 0.2406278, 0.3359402, 0.302822 , 0.1841993, 0.1517898,
       0.2029491, 0.320833 , 0.2347661, 0.1915635, 0.2737376, 0.2755818,
       0.1963269, 0.2202596, 0.3618938, 0.4035531, 0.4405374, 0.427501 ,
       0.3805251, 0.3727842, 0.3873692, 0.456362 , 0.4334511, 0.3993097])