In [12]:
import numpy as np
import cupy as cp
from cuml.cluster import KMeans  # 使用 cuML 的 KMeans 实现

def density_cluster_kmeans(n_clusters=10):
    # 加载特征矩阵
    feature_matrix = np.load('/CSCI2952X/imagenet_features.npy')
    feature_matrix = feature_matrix.astype(np.float32)
    
    # 将特征矩阵转换为 GPU 上的格式
    feature_matrix_gpu = cp.asarray(feature_matrix)  # 使用 CuPy 将 NumPy 数组转换为 GPU 数组
    
    # 使用 GPU 加速的 KMeans
    kmeans = KMeans(n_clusters=n_clusters, init='k-means++', max_iter=1000)  # 参数可根据需要调整
    labels = kmeans.fit_predict(feature_matrix_gpu)

    # 获取簇中心并将其保存为文件
    cluster_centers = kmeans.cluster_centers_.get()  # 将簇中心从 GPU 转移回 CPU
    np.save('cluster_centers.npy', cluster_centers)  # 保存为 cluster_centers.npy 文件

    # 将 GPU 上的标签数组转换回 CPU
    labels = labels.get()

    # 打印聚类结果
    print(f'K-Means 簇数量: {n_clusters}')
    print(f'每个数据点的标签: {labels}')

    # 统计每个簇的大小
    unique, counts = np.unique(labels, return_counts=True)
    cluster_sizes = dict(zip(unique, counts))
    print(f'每个簇的大小: {cluster_sizes}')

# 运行替换后的聚类函数
density_cluster_kmeans(n_clusters=1000)

K-Means 簇数量: 1000
每个数据点的标签: [ 66  66  66 ... 932  50 503]
每个簇的大小: {0: 1989, 1: 1100, 2: 1195, 3: 1585, 4: 1181, 5: 762, 6: 1118, 7: 1420, 8: 847, 9: 1124, 10: 1253, 11: 1542, 12: 1090, 13: 1220, 14: 1005, 15: 1409, 16: 1524, 17: 1269, 18: 1413, 19: 1542, 20: 1768, 21: 1886, 22: 1298, 23: 1322, 24: 1383, 25: 1512, 26: 1453, 27: 854, 28: 1726, 29: 800, 30: 1515, 31: 1284, 32: 2082, 33: 638, 34: 1181, 35: 727, 36: 1646, 37: 979, 38: 732, 39: 956, 40: 551, 41: 1442, 42: 1628, 43: 1132, 44: 1122, 45: 898, 46: 1193, 47: 1054, 48: 825, 49: 1571, 50: 2586, 51: 1238, 52: 1472, 53: 1773, 54: 1291, 55: 1275, 56: 1437, 57: 1093, 58: 1232, 59: 1274, 60: 1939, 61: 1582, 62: 1401, 63: 923, 64: 1032, 65: 1258, 66: 1174, 67: 1291, 68: 1453, 69: 1249, 70: 2168, 71: 1769, 72: 690, 73: 2151, 74: 666, 75: 1494, 76: 988, 77: 2275, 78: 1865, 79: 1232, 80: 1453, 81: 3230, 82: 1949, 83: 1099, 84: 879, 85: 1270, 86: 1282, 87: 1909, 88: 1333, 89: 2046, 90: 893, 91: 1334, 92: 1198, 93: 1062, 94: 1784, 95: 1850, 9

In [None]:
import numpy as np
import cupy as cp  
from cuml.cluster import DBSCAN  # 使用 cuML 的 DBSCAN 实现

def density_cluster():
    feature_matrix = np.load('/CSCI2952X/imagenet_features.npy')
    feature_matrix = feature_matrix.astype(np.float32)
    
    # 将特征矩阵转换为 GPU 上的格式
    feature_matrix_gpu = cp.asarray(feature_matrix)  # 使用 CuPy 将 NumPy 数组转换为 GPU 数组
    
    # 使用 GPU 加速的 DBSCAN
    dbscan = DBSCAN(eps=1, min_samples=100)  # 可根据需要调整参数
    labels = dbscan.fit_predict(feature_matrix_gpu)

    # 将 GPU 上的标签数组转换回 CPU
    labels = labels.get()

    # 打印聚类结果
    num_clusters = len(set(labels)) - (1 if -1 in labels else 0)
    print(f'发现的簇数量: {num_clusters}')
    print(f'每个数据点的标签: {labels}')

    # 统计每个簇的大小
    unique, counts = np.unique(labels, return_counts=True)
    cluster_sizes = dict(zip(unique, counts))
    print(f'每个簇的大小: {cluster_sizes}')


In [13]:

import torch
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitb14_reg_lc')
model.eval()


Using cache found in /home/peiyuan/.cache/torch/hub/facebookresearch_dinov2_main


_LinearClassifierWrapper(
  (backbone): DinoVisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (blocks): ModuleList(
      (0-11): 12 x NestedTensorBlock(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): MemEffAttention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): LayerScale()
        (drop_path1): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop): Dropout(p=0.0, inplace=Fals

In [14]:
from tqdm import tqdm
import numpy as np
import torch
import torch.optim as optim
import torchvision.transforms as transforms
from PIL import Image

# Load precomputed cluster centers
cluster_centers = np.load('cluster_centers.npy')
cluster_centers = torch.tensor(cluster_centers, dtype=torch.float32)

# Define a function to clamp image values to [0, 1]
def clamp_image(img):
    return img.clamp(0, 1)

# Normalization transform (for input to the model)
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

# Iterate over each cluster center
for i, target_center in enumerate(cluster_centers):
    # Initialize an input image with random values
    input_image = torch.randn((1, 3, 224, 224), requires_grad=True)

    # Set up an optimizer for the current image
    optimizer = optim.Adam([input_image], lr=0.01)

    # Gradient-based optimization loop
    for step in tqdm(range(20), desc=f'Optimizing for cluster {i}'):
        optimizer.zero_grad()
        
        # Normalize the input image and pass it through the model
        normalized_image = normalize(input_image.squeeze(0)).unsqueeze(0)
        output = model(normalized_image)  # Ensure 'model' is properly defined
        
        # Compute the mean squared error loss with the target cluster center
        loss = torch.nn.functional.mse_loss(output, target_center)
        print(f'Cluster {i}, Step {step}, Loss: {loss.item()}')
        
        # Backpropagate and optimize
        loss.backward()
        optimizer.step()
        
        # Clamp image values to keep them in the range [0, 1]
        input_image.data = clamp_image(input_image.data)

    # Save the generated image for the current cluster center
    generated_image = input_image.detach().squeeze(0).permute(1, 2, 0).numpy()
    generated_image = (generated_image * 255).astype(np.uint8)  # Convert to 0-255 range
    Image.fromarray(generated_image).save(f'generated_image_cluster_{i}.png')


  loss = torch.nn.functional.mse_loss(output, target_center)


Cluster 0, Step 0, Loss: 12.391698837280273


Optimizing for cluster 0:   5%|▌         | 1/20 [00:00<00:09,  1.99it/s]

Cluster 0, Step 1, Loss: 8.085114479064941


Optimizing for cluster 0:  10%|█         | 2/20 [00:01<00:10,  1.74it/s]

Cluster 0, Step 2, Loss: 7.4083967208862305


Optimizing for cluster 0:  15%|█▌        | 3/20 [00:01<00:10,  1.68it/s]

Cluster 0, Step 3, Loss: 6.728322982788086


Optimizing for cluster 0:  20%|██        | 4/20 [00:02<00:09,  1.67it/s]

Cluster 0, Step 4, Loss: 6.108248233795166


Optimizing for cluster 0:  25%|██▌       | 5/20 [00:02<00:09,  1.66it/s]

Cluster 0, Step 5, Loss: 5.387615203857422


Optimizing for cluster 0:  30%|███       | 6/20 [00:03<00:08,  1.65it/s]

Cluster 0, Step 6, Loss: 4.602733612060547


Optimizing for cluster 0:  35%|███▌      | 7/20 [00:04<00:07,  1.65it/s]

Cluster 0, Step 7, Loss: 4.009591102600098


Optimizing for cluster 0:  40%|████      | 8/20 [00:04<00:07,  1.64it/s]

Cluster 0, Step 8, Loss: 3.5333447456359863


Optimizing for cluster 0:  45%|████▌     | 9/20 [00:05<00:06,  1.64it/s]

Cluster 0, Step 9, Loss: 3.2467446327209473


Optimizing for cluster 0:  50%|█████     | 10/20 [00:06<00:06,  1.63it/s]

Cluster 0, Step 10, Loss: 3.0989649295806885


Optimizing for cluster 0:  55%|█████▌    | 11/20 [00:06<00:05,  1.62it/s]

Cluster 0, Step 11, Loss: 2.907315254211426


Optimizing for cluster 0:  60%|██████    | 12/20 [00:07<00:04,  1.62it/s]

Cluster 0, Step 12, Loss: 2.697659730911255


Optimizing for cluster 0:  65%|██████▌   | 13/20 [00:07<00:04,  1.62it/s]

Cluster 0, Step 13, Loss: 2.5199391841888428


Optimizing for cluster 0:  70%|███████   | 14/20 [00:08<00:03,  1.63it/s]

Cluster 0, Step 14, Loss: 2.3732781410217285


Optimizing for cluster 0:  75%|███████▌  | 15/20 [00:09<00:03,  1.66it/s]

Cluster 0, Step 15, Loss: 2.236978769302368


Optimizing for cluster 0:  80%|████████  | 16/20 [00:09<00:02,  1.65it/s]

Cluster 0, Step 16, Loss: 2.0868077278137207


Optimizing for cluster 0:  85%|████████▌ | 17/20 [00:10<00:01,  1.64it/s]

Cluster 0, Step 17, Loss: 1.943324327468872


Optimizing for cluster 0:  90%|█████████ | 18/20 [00:10<00:01,  1.63it/s]

Cluster 0, Step 18, Loss: 1.8073451519012451


Optimizing for cluster 0:  95%|█████████▌| 19/20 [00:11<00:00,  1.63it/s]

Cluster 0, Step 19, Loss: 1.6737275123596191


Optimizing for cluster 0: 100%|██████████| 20/20 [00:12<00:00,  1.65it/s]
Optimizing for cluster 1:   0%|          | 0/20 [00:00<?, ?it/s]

Cluster 1, Step 0, Loss: 8.694229125976562


Optimizing for cluster 1:   5%|▌         | 1/20 [00:00<00:11,  1.61it/s]

Cluster 1, Step 1, Loss: 6.7000274658203125


Optimizing for cluster 1:  10%|█         | 2/20 [00:01<00:11,  1.62it/s]

Cluster 1, Step 2, Loss: 6.140081405639648


Optimizing for cluster 1:  15%|█▌        | 3/20 [00:01<00:10,  1.62it/s]

Cluster 1, Step 3, Loss: 5.405917644500732


Optimizing for cluster 1:  20%|██        | 4/20 [00:02<00:09,  1.62it/s]

Cluster 1, Step 4, Loss: 4.533076286315918


Optimizing for cluster 1:  25%|██▌       | 5/20 [00:03<00:09,  1.63it/s]

Cluster 1, Step 5, Loss: 4.046082973480225


Optimizing for cluster 1:  30%|███       | 6/20 [00:03<00:08,  1.63it/s]

Cluster 1, Step 6, Loss: 3.6787939071655273


Optimizing for cluster 1:  35%|███▌      | 7/20 [00:04<00:07,  1.63it/s]

Cluster 1, Step 7, Loss: 3.3146657943725586


Optimizing for cluster 1:  40%|████      | 8/20 [00:04<00:07,  1.63it/s]

Cluster 1, Step 8, Loss: 3.0591447353363037


Optimizing for cluster 1:  45%|████▌     | 9/20 [00:05<00:06,  1.63it/s]

Cluster 1, Step 9, Loss: 2.923186779022217


Optimizing for cluster 1:  50%|█████     | 10/20 [00:06<00:06,  1.63it/s]

Cluster 1, Step 10, Loss: 2.6968085765838623


Optimizing for cluster 1:  55%|█████▌    | 11/20 [00:06<00:05,  1.73it/s]

Cluster 1, Step 11, Loss: 2.528254985809326


Optimizing for cluster 1:  60%|██████    | 12/20 [00:07<00:04,  1.69it/s]

Cluster 1, Step 12, Loss: 2.3534226417541504


Optimizing for cluster 1:  65%|██████▌   | 13/20 [00:07<00:03,  1.77it/s]

Cluster 1, Step 13, Loss: 2.1972827911376953


Optimizing for cluster 1:  70%|███████   | 14/20 [00:08<00:03,  1.73it/s]

Cluster 1, Step 14, Loss: 2.0445380210876465


Optimizing for cluster 1:  75%|███████▌  | 15/20 [00:09<00:03,  1.67it/s]

Cluster 1, Step 15, Loss: 1.8734688758850098


Optimizing for cluster 1:  80%|████████  | 16/20 [00:09<00:02,  1.63it/s]


KeyboardInterrupt: 