In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram
from qiskit import QuantumCircuit, QuantumRegister, ClassicalRegister, transpile
from qiskit_aer import AerSimulator
%matplotlib inline

In [2]:
# !pip install numpy pandas matplotlib scipy qiskit qiskit-aer


In [3]:
# !pip install qiskit==1.0.1 qiskit-aer==0.14.2 qiskit-algorithms==0.3.0 qiskit-machine-learning==0.7.2

In [4]:
# Quantum simulator backend
backend = AerSimulator()

In [5]:
import numpy as np

# Function to normalize data for quantum state preparation
def normalize_data(data):
    # Calculate the norm of the data (Euclidean norm or L2 norm)
    norm = np.linalg.norm(data)
    
    # Check if the norm is not zero to avoid division by zero error
    if norm != 0:
        # Normalize the data by dividing each element by the norm
        normalized_data = data / norm
    else:
        # If norm is zero, return the original data (or handle this case appropriately)
        normalized_data = data
        
    return normalized_data


In [6]:
def quantum_state_prep(qc, qr, data):
    # Normalize the data (assuming 'data' is a list of data points)
    normalized_data = [normalize_data(d) for d in data]
    
    # Iterate over the data and apply the RY rotation for each normalized data point
    for i, d in enumerate(normalized_data):
        # Calculate the rotation angle theta, based on the normalized data point
        # For simplicity, let's use the arccos of the first element of the normalized data as theta
        # This is just an example, adjust according to your application
        theta = 2 * np.arccos(np.real(d[0]))  # Assuming d is a complex number, adjust as needed
        
        # Apply the RY rotation gate on the quantum register
        qc.ry(theta, qr[i])

In [7]:
import numpy as np
from qiskit import QuantumRegister, ClassicalRegister, QuantumCircuit, transpile
# from qiskit.providers.aer import AerSimulator

# Global variable to prevent re-drawing the circuit multiple times
print_circuit_once = False
# backend = AerSimulator()  # Choose your quantum simulator or real quantum hardware here

# Quantum Inner Product Estimation using Swap Test
def swap_test(data_point, centroid):
    global print_circuit_once 
    
    # Initialize quantum registers
    ancilla = QuantumRegister(1, 'ancilla')  # Ancilla qubit for swap test
    qr_x = QuantumRegister(len(data_point), 'x')  # Data point register
    qr_y = QuantumRegister(len(centroid), 'y')  # Centroid register
    cr = ClassicalRegister(1, 'result')  # Classical register to store result
    
    # Create the quantum circuit
    qc = QuantumCircuit(ancilla, qr_x, qr_y, cr)
    
    # Prepare the quantum states for data_point and centroid
    # Normalizing the quantum states
    data_point_normalized = normalize_data(data_point)
    centroid_normalized = normalize_data(centroid)
    
    # Apply the necessary rotations to encode the data_point and centroid into quantum states
    for i, val in enumerate(data_point_normalized):
        theta_x = 2 * np.arccos(np.real(val))  # Rotation angle for data_point
        qc.ry(theta_x, qr_x[i])
    
    for i, val in enumerate(centroid_normalized):
        theta_y = 2 * np.arccos(np.real(val))  # Rotation angle for centroid
        qc.ry(theta_y, qr_y[i])

    # Apply Hadamard gate to the ancilla qubit
    qc.h(ancilla[0])
    
    # Apply controlled swap gates between the data_point and centroid registers
    for i in range(len(data_point)):
        qc.cswap(ancilla[0], qr_x[i], qr_y[i])
    
    # Measure the ancilla qubit
    qc.measure(ancilla, cr)

    # Draw the circuit once
    if not print_circuit_once:
        qc.draw(output='mpl')
        print_circuit_once = True
    
    # Simulate the quantum circuit
    transpiled_qc = transpile(qc, backend)
    result = backend.run(transpiled_qc, shots=1024).result()
    counts = result.get_counts()
    
    # Calculate the probability of measuring '0'
    p0 = counts.get('0', 0) / 1024  # Probability of measuring 0
    
    # The inner product squared is related to the probability of measuring '0'
    inner_product = 2*p0-1
    return np.sqrt(inner_product)


In [8]:
import numpy as np

# Quantum Distance Estimation using the formula for squared Euclidean distance
def quantum_distance(data_point, centroid):
    # Normalize the data points and centroids (use the normalize_data function)
    norm_x = np.linalg.norm(data_point) ** 2  # Squared norm of data_point
    norm_y = np.linalg.norm(centroid) ** 2  # Squared norm of centroid

    # Use the swap_test function to compute the inner product between data_point and centroid
    inner_product = swap_test(data_point, centroid) ** 2  # Square the result of the swap test

    # Compute the squared Euclidean distance using the formula
    distance_squared = norm_x**2 + norm_y**2 - 2 * inner_product *norm_x*norm_y

    return np.sqrt(distance_squared)


In [9]:
import numpy as np

# Quantum Agglomerative Clustering with Dendrogram Tracking
def quantum_agglomerative_clustering_with_dendrogram(data):
    # Initialize clusters (each data point is its own cluster)
    clusters = [[i] for i in range(len(data))]
    cluster_sizes = {i: 1 for i in range(len(data))}
    cluster_labels = list(range(len(data)))
    linkage_matrix = []
    
    # Initialize distance matrix (quantum distance between each pair of points)
    distances = np.zeros((len(data), len(data)))
    for i in range(len(data)):
        for j in range(i + 1, len(data)):
            # Calculate the quantum distance for each pair of points
            distances[i, j] = distances[j, i] = quantum_distance(data[i], data[j])  # Use the quantum_distance function
    
    current_cluster_label = len(data)
    
    while len(clusters) > 1:
        # Find the pair of clusters with the smallest distance to merge
        min_dist = float('inf')
        merge_pair = None
        for i, cluster1 in enumerate(clusters):
            for j, cluster2 in enumerate(clusters):
                if i != j:
                    # Compute the average distance between the two clusters
                    dist = average_cluster_distance(clusters[i], clusters[j], distances)
                    if dist < min_dist:
                        min_dist = dist
                        merge_pair = (i, j)
        
        # Check if merge_pair is None and handle it
        if merge_pair is None:
            print("No valid pair found for merging. Exiting.")
            break
        
        # Merge the two closest clusters
        i, j = merge_pair
        size_i = cluster_sizes[cluster_labels[i]]
        size_j = cluster_sizes[cluster_labels[j]]
        
        # Add the merging step to the linkage matrix
        linkage_matrix.append([cluster_labels[i], cluster_labels[j], min_dist, size_i + size_j])
        
        # Update the clusters
        cluster_sizes[current_cluster_label] = size_i + size_j
        cluster_labels[i] = current_cluster_label
        current_cluster_label += 1
        
        clusters[i].extend(clusters[j])
        del clusters[j]
        del cluster_labels[j]

    return np.array(linkage_matrix)

# Function to compute the average distance between two clusters
def average_cluster_distance(cluster1, cluster2, distances):
    total_distance = 0
    count = 0
    
    for idx1 in cluster1:
        for idx2 in cluster2:
            total_distance += distances[idx1, idx2]
            count += 1
    
    return total_distance / count if count > 0 else 0


In [10]:
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram

def plot_dendrogram(linkage_matrix, data):
    # Ensure the linkage matrix has n-1 rows
    print("Shape of linkage matrix:", linkage_matrix.shape)
    
    # Check if the number of labels matches the number of data points (n)
    n = len(data)  # number of data points
    print(f"Number of data points: {n}")
    
    # Plot the dendrogram
    plt.figure(figsize=(20, 15))  # Set the figure size for better readability
    dendrogram(linkage_matrix, labels=[str(i) for i in range(n)])  # Ensure number of labels matches number of data points
    plt.title("Quantum-Enhanced Dendrogram")
    plt.xlabel("Data Points (Indices)")
    plt.ylabel("Quantum Distance")
    plt.show()


In [11]:
from sklearn.preprocessing import LabelEncoder

def load_csv_data(file_path):
    df = pd.read_csv(file_path)

    # Extract numerical columns
    data = df.select_dtypes(include=[float, int]).values

    # Example: Handle categorical columns (if any) by encoding them
    label_encoder = LabelEncoder()
    for column in df.select_dtypes(include=[object]).columns:
        df[column] = label_encoder.fit_transform(df[column])

    # Return both numerical and encoded data
    return df.select_dtypes(include=[float, int]).values


In [12]:
if __name__ == "__main__":
    print_circuit_once = False

    # Load real-world data from CSV
    file_path = 'data_banknote_authentication.csv'  # Update with the actual file path
    data = load_csv_data(file_path)

    # Normalize the data for quantum preparation
    data = normalize_data(data)

    # Run Quantum Agglomerative Clustering with Dendrogram Tracking
    linkage_matrix = quantum_agglomerative_clustering_with_dendrogram(data)

    # Plot the quantum-enhanced dendrogram
    plot_dendrogram(linkage_matrix, data)


  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return np.sqrt(inner_product)
  return

KeyboardInterrupt: 