In [1]:
# import necessary packages

import numpy as np
from pyqubo import Array, Constraint
import dimod
from dimod import ExactSolver
from dwave.samplers import SimulatedAnnealingSampler

In [2]:
# Create a synthetic dataset for clustering
np.random.seed(42)
data = np.array([
    [1, 2], [2, 3], [3, 4],  # Cluster 1
    [10, 10], [11, 11], [12, 12],  # Cluster 2
    [25, 30], [24, 28], [30, 25]  # Cluster 3
])

In [3]:
# Number of data points and clusters
num_points = len(data)
num_clusters = 3

In [4]:
# Define binary variables for each point-cluster assignment
x = Array.create('x', shape=(num_points, num_clusters), vartype='BINARY')

In [5]:
# Objective function: Minimize the number of clusters used
objective = -sum(sum(x[i, j] for j in range(num_clusters)) for i in range(num_points))

In [6]:
# Constraint 1: Each point must be assigned to exactly one cluster
constraints = []
for i in range(num_points):
    constraints.append(Constraint((sum(x[i, j] for j in range(num_clusters)) - 1) ** 2, label=f'one_cluster_{i}'))

In [7]:
# Constraint 2: Add distance terms to encourage clustering based on proximity
for i in range(num_points):
    for j in range(num_points):
        if i != j:
            dist = np.linalg.norm(data[i] - data[j])
            for k in range(num_clusters):
                objective += dist * x[i, k] * x[j, k]

In [8]:
# Combine objective and constraints
model = objective + sum(constraints)

In [9]:
# Compile the model to a Binary Quadratic Model (BQM)
bqm = model.compile().to_bqm()

In [10]:
# Use a simulated annealing solver to find the solution
sampler = SimulatedAnnealingSampler()
response = sampler.sample(bqm)

In [11]:
# Get the best solution
best_solution = response.first.sample

In [12]:
# Print cluster assignments
clusters = [[] for _ in range(num_clusters)]
for i in range(num_points):
    for j in range(num_clusters):
        if best_solution[f'x[{i}][{j}]'] == 1:
            clusters[j].append(i)

In [13]:
for idx, cluster in enumerate(clusters):
    print(f"Cluster {idx + 1}: {[data[i] for i in cluster]}")

Cluster 1: [array([2, 3])]
Cluster 2: [array([10, 10])]
Cluster 3: [array([1, 2])]
