In [1]:
from operator import is_
import os
import sys

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter

import matplotlib.pyplot as plt
import numpy as np

import clip
sys.path.append("../")
from Metrics import base_kmeans_model_evaluation, kmeans_with_init, cosine_kmeans_with_init
from networks import CustomCLIP, load_clip_to_cpu
from lr_scheduler import ConstantWarmupScheduler

import argparse

# parser
clip_backbone="ViT-L/14"
batch_size=50
total_epoch=5
lr=3e-4
scheduler_operate=False
repeat=4
is_test_dataset = False
cosine_sim=True
n_ctx=16
total_dpoch = repeat
# parser

print(clip.available_models())
device = torch.device('cuda') if torch.cuda.is_available() else "cpu"
print('Device:', device)
print('Current cuda device:', torch.cuda.current_device())
print('Count of using GPUs:', torch.cuda.device_count())

backbone_name = clip_backbone
clip_model, preprocess = load_clip_to_cpu(backbone_name)


if is_test_dataset:
    fixed_testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                                 download=True, transform=preprocess)
    fixed_testloader = torch.utils.data.DataLoader(fixed_testset, batch_size=batch_size,
                                                   shuffle=False)
    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=True, transform=preprocess)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                             shuffle=False)
else:
    fixed_cset1 = torchvision.datasets.CIFAR10(root='./data', train=True,
                                               download=True, transform=preprocess)
    fixed_cset2 = torchvision.datasets.CIFAR10(root='./data', train=False,
                                               download=True, transform=preprocess)
    fixed_testset = torch.utils.data.ConcatDataset([fixed_cset1, fixed_cset2])

    fixed_testloader = torch.utils.data.DataLoader(fixed_testset, batch_size=batch_size,
                                                   shuffle=False)

    cset1 = torchvision.datasets.CIFAR10(root='./data', train=True,
                                         download=True, transform=preprocess)
    cset2 = torchvision.datasets.CIFAR10(root='./data', train=False,
                                         download=True, transform=preprocess)
    testset = torch.utils.data.ConcatDataset([cset1, cset2])

    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                             shuffle=False)

print("dataset_length:", len(testset))
if is_test_dataset:
    num_classes = len(fixed_testloader.dataset.classes)
else:
    num_classes = len(cset1.classes)
model = CustomCLIP(clip_model, num_classes, n_ctx=n_ctx)

for name, param in model.named_parameters():
    if "prompt_learner" not in name:
        param.requires_grad_(False)

model.to(device)

# device_count = torch.cuda.device_count()
# if device_count > 1:
#     print(
#         f"Multiple GPUs detected (n_gpus={device_count}), use all of them!")
#     model = nn.DataParallel(model)

print('done')

criterion = F.cross_entropy
# optimizer = torch.optim.SGD(
#     model.parameters(),
#     lr=lr,
#     momentum=0.9,
#     weight_decay=5e-4,
#     dampening=0,
#     nesterov=False,
# )
optimizer = torch.optim.Adam(
    model.parameters(),
    lr=lr,
    weight_decay=5e-4,
    betas=(0.9, 0.999),
)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, float(10)
)
scheduler = ConstantWarmupScheduler(
    optimizer, lr_scheduler, 1,
    1e-5
)
if not scheduler_operate:
    scheduler = ConstantWarmupScheduler(
        optimizer, lr_scheduler, 1,
        lr
    )
# initialization
# clip_model.to(device)
# with torch.no_grad():
#     first_centroids, label, acc = base_kmeans_model_evaluation(
#         clip_model, shortloader, num_classes)

# testloader label update with k-mean clsuter result

# shortset.targets = label
# trainloader = torch.utils.data.DataLoader(shortset, batch_size=batch_size,
#                                           shuffle=True)

train_cluster = np.load('../npy_folder/train_cluster.npy')
test_cluster = np.load('../npy_folder/test_cluster.npy')
concat_cluster = np.load('../npy_folder/concat_cluster.npy')
if is_test_dataset:
    testset.targets = test_cluster.tolist()
else:
    cset1.targets = concat_cluster[:len(cset1)].tolist()
    cset2.targets = concat_cluster[len(cset1):].tolist()
    testset = torch.utils.data.ConcatDataset([cset1, cset2])

testloader = torch.utils.data.DataLoader(
    testset, batch_size=batch_size, shuffle=True)


# real_testset = torchvision.datasets.CIFAR10(root='./data', train=False,
#                                             download=True, transform=preprocess)

# testloader = torch.utils.data.DataLoader(
#     real_testset, batch_size=batch_size, shuffle=False)

total_epoch = 200
sm = nn.Softmax(dim=1)
for epoch in range(total_epoch):
    for dpoch in range(total_dpoch):
        score = 0
        total_loss = 0
        print('==================================================================')
        print('dpoch:', dpoch + total_dpoch * epoch)
        for i, data in enumerate(testloader, 0):
            inputs, targets = data
            inputs, targets = inputs.to(device), targets.to(device)
            output = model(inputs)
            loss = criterion(output, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            pred = torch.argmax(output, dim=1)
            score += torch.sum(pred == targets)
            total_loss += loss.item()
            if i % 50 == 49:
                print('train_iter:', i, '/', len(testloader))
                print('target:',targets[0])
                print("output:",output[0])
                print("softmax_output:",sm(output)[0])
                print("accuracy:",score/(i*batch_size))
        print("train_accuracy: ", score/len(testloader.dataset))

    with torch.no_grad():
        model.eval()

        prompts = model.prompt_learner()
        tokenized_prompts = model.tokenized_prompts
        text_centroids = model.text_encoder(
            prompts, tokenized_prompts)
        # no normalized
        if cosine_sim:
            knn = cosine_kmeans_with_init
        else:
            knn = kmeans_with_init
        new_label, acc, nmi = knn(
            model, fixed_testloader, num_classes, text_centroids)
        print("val_acc", acc)
        print("val_nmi", nmi)

        model.train()
# new cluster assignments
    if is_test_dataset:
        testset.targets = new_label.tolist()
        testloader = torch.utils.data.DataLoader(
            testset, batch_size=batch_size, shuffle=True)
    else:
        cset1.targets = new_label[:len(cset1)].tolist()
        cset2.targets = new_label[len(cset1):].tolist()
        testset = torch.utils.data.ConcatDataset([cset1, cset2])
        testloader = torch.utils.data.DataLoader(
            testset, batch_size=batch_size, shuffle=True)
# model reinitialization
    model = CustomCLIP(clip_model, num_classes, n_ctx=n_ctx)

    for name, param in model.named_parameters():
        if "prompt_learner" not in name:
            param.requires_grad_(False)

    model.to(device)

    # optimizer = torch.optim.SGD(
    #     model.parameters(),
    #     lr=lr,
    #     momentum=0.9,
    #     weight_decay=5e-4,
    #     dampening=0,
    #     nesterov=False,
    # )
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=lr,
        weight_decay=5e-4,
        betas=(0.9, 0.999),
    )
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(10)
    )
    scheduler = ConstantWarmupScheduler(
        optimizer, lr_scheduler, 1,
        1e-5
    )
    if not scheduler_operate:
        scheduler = ConstantWarmupScheduler(
            optimizer, lr_scheduler, 1,
            lr
        )


['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14']
Device: cuda
Current cuda device: 0
Count of using GPUs: 4
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
dataset_length: 60000
Initializing class-specific contexts
Initial context: "X X X X X X X X X X X X X X X X"
Number of context words (tokens): 16
done
dpoch: 0
train_iter: 49 / 1200
target: tensor(6, device='cuda:0')
output: tensor([5.1004e-06, 5.2345e-05, 3.3932e-04, 1.0942e-04, 2.2136e-05, 9.4139e-04,
        9.9570e-01, 1.5703e-04, 1.0682e-05, 2.6593e-03], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.0854, 0.0854, 0.0854, 0.0854, 0.0854, 0.0855, 0.2311, 0.0854, 0.0854,
        0.0856], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(0.7735, device='cuda:0')
train_iter: 99 / 1200
target: tensor(5, device='cuda:0')
output: tensor([2.5734e-05, 1.825

train_iter: 899 / 1200
target: tensor(9, device='cuda:0')
output: tensor([2.2184e-04, 2.0207e-05, 1.8856e-05, 7.4577e-06, 1.1355e-05, 8.0612e-04,
        5.0844e-07, 2.5320e-06, 3.7843e-05, 9.9887e-01], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.0854, 0.0854, 0.0854, 0.0854, 0.0854, 0.0854, 0.0854, 0.0854, 0.0854,
        0.2317], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(0.9612, device='cuda:0')
train_iter: 949 / 1200
target: tensor(6, device='cuda:0')
output: tensor([7.2596e-04, 3.2882e-06, 4.7790e-07, 9.8052e-07, 6.2542e-08, 6.9473e-06,
        9.9924e-01, 1.5455e-06, 7.8110e-06, 1.1444e-05], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.0854, 0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.2318, 0.0853, 0.0853,
        0.0853], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(0.9621, device='cuda:0')
train_iter: 999 / 1200
target: tensor(8, device='cuda:0')
output: tensor([3.2096e-07, 7.9622e-

train_iter: 599 / 1200
target: tensor(0, device='cuda:0')
output: tensor([9.9995e-01, 1.4702e-08, 1.6905e-07, 4.0391e-09, 2.8051e-07, 4.3473e-05,
        1.2524e-08, 2.4953e-08, 1.2327e-06, 5.8000e-09], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.2320, 0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.0853,
        0.0853], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(0.9825, device='cuda:0')
train_iter: 649 / 1200
target: tensor(0, device='cuda:0')
output: tensor([9.9998e-01, 2.6346e-07, 4.9539e-08, 1.6319e-07, 7.4886e-07, 1.7454e-05,
        1.1729e-06, 9.0340e-07, 9.1802e-09, 1.0441e-06], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.2320, 0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.0853,
        0.0853], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(0.9825, device='cuda:0')
train_iter: 699 / 1200
target: tensor(8, device='cuda:0')
output: tensor([8.6234e-08, 2.7613e-

train_iter: 299 / 1200
target: tensor(8, device='cuda:0')
output: tensor([3.9667e-06, 3.5485e-08, 3.7083e-08, 2.4782e-09, 6.3588e-07, 1.9693e-04,
        7.0953e-06, 6.2374e-05, 9.9973e-01, 1.4511e-07], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.0854, 0.0853, 0.0853, 0.2319,
        0.0853], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(0.9863, device='cuda:0')
train_iter: 349 / 1200
target: tensor(7, device='cuda:0')
output: tensor([3.1774e-05, 2.5468e-09, 3.6922e-08, 8.6929e-07, 2.4102e-07, 1.0587e-07,
        1.2168e-07, 9.9996e-01, 3.7968e-06, 5.7011e-06], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.2320, 0.0853,
        0.0853], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(0.9859, device='cuda:0')
train_iter: 399 / 1200
target: tensor(6, device='cuda:0')
output: tensor([2.5507e-08, 2.2996e-

train_iter: 49 / 1200
target: tensor(4, device='cuda:0')
output: tensor([1.3814e-07, 1.0671e-07, 7.6465e-08, 2.9695e-07, 1.0000e+00, 6.0484e-08,
        9.5792e-08, 6.1462e-08, 1.1756e-10, 3.0961e-06], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.0853, 0.0853, 0.0853, 0.0853, 0.2320, 0.0853, 0.0853, 0.0853, 0.0853,
        0.0853], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(1.0057, device='cuda:0')
train_iter: 99 / 1200
target: tensor(3, device='cuda:0')
output: tensor([3.6941e-05, 1.1950e-06, 8.3964e-09, 9.8973e-01, 5.2766e-08, 5.3493e-07,
        6.0587e-07, 2.8990e-08, 1.6409e-06, 1.0229e-02], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.0855, 0.0855, 0.0855, 0.2299, 0.0855, 0.0855, 0.0855, 0.0855, 0.0855,
        0.0863], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(0.9925, device='cuda:0')
train_iter: 149 / 1200
target: tensor(3, device='cuda:0')
output: tensor([2.4446e-07, 5.2080e-07

train_iter: 999 / 1200
target: tensor(5, device='cuda:0')
output: tensor([1.8081e-07, 2.1546e-08, 6.0442e-09, 9.2175e-10, 9.4908e-09, 1.0000e+00,
        1.1044e-09, 4.3306e-10, 9.1026e-11, 1.4459e-09], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.2320, 0.0853, 0.0853, 0.0853,
        0.0853], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(0.9855, device='cuda:0')
train_iter: 1049 / 1200
target: tensor(4, device='cuda:0')
output: tensor([8.0633e-10, 9.9253e-09, 1.7410e-09, 1.4281e-07, 1.0000e+00, 1.3383e-08,
        1.2834e-09, 1.9695e-09, 5.2261e-11, 8.4051e-10], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.0853, 0.0853, 0.0853, 0.0853, 0.2320, 0.0853, 0.0853, 0.0853, 0.0853,
        0.0853], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(0.9855, device='cuda:0')
train_iter: 1099 / 1200
target: tensor(4, device='cuda:0')
output: tensor([6.3131e-07, 9.5504

train_iter: 699 / 1200
target: tensor(3, device='cuda:0')
output: tensor([2.0842e-05, 9.2416e-06, 5.2015e-07, 9.9991e-01, 1.1233e-05, 3.6456e-05,
        2.9963e-06, 9.8924e-06, 4.2318e-07, 1.7312e-06], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.0853, 0.0853, 0.0853, 0.2320, 0.0853, 0.0853, 0.0853, 0.0853, 0.0853,
        0.0853], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(0.9478, device='cuda:0')
train_iter: 749 / 1200
target: tensor(6, device='cuda:0')
output: tensor([8.1385e-07, 3.1500e-06, 9.6458e-06, 3.9472e-06, 1.4708e-05, 7.0808e-04,
        9.9924e-01, 1.3471e-05, 4.5688e-07, 6.2560e-06], device='cuda:0',
       grad_fn=<SelectBackward0>)
softmax_output: tensor([0.0853, 0.0853, 0.0853, 0.0853, 0.0853, 0.0854, 0.2318, 0.0853, 0.0853,
        0.0853], device='cuda:0', grad_fn=<SelectBackward0>)
accuracy: tensor(0.9501, device='cuda:0')
train_iter: 799 / 1200
target: tensor(2, device='cuda:0')
output: tensor([4.6790e-06, 5.0565e-

KeyboardInterrupt: 

In [3]:
with torch.no_grad():
    model.eval()

    prompts = model.prompt_learner()
    tokenized_prompts = model.tokenized_prompts
    text_centroids = model.text_encoder(
        prompts, tokenized_prompts)

In [4]:
text_centroids

tensor([[-0.6734,  0.4417, -0.5501,  ...,  0.6089,  0.2828, -0.5241],
        [-0.1357,  0.3524, -0.2181,  ..., -0.1356, -0.4981, -0.3242],
        [ 0.2851, -0.0177, -0.3161,  ...,  0.2290, -0.2723, -0.5159],
        ...,
        [-0.3672, -0.1307, -0.0663,  ...,  0.0161,  0.3462, -0.5525],
        [ 0.0424, -0.1261, -0.2594,  ...,  0.1181,  0.3776,  0.0690],
        [-0.2455,  0.1425,  0.4739,  ...,  0.1930,  0.1103, -0.0168]],
       device='cuda:0')

In [3]:
import numpy as np
x = np.array([9.9989e-01, 2.5081e-05, 6.0233e-07, 7.8063e-07, 5.0855e-05, 5.3249e-06,
        2.3968e-06, 1.6679e-06, 3.4923e-07, 2.1723e-05])

In [6]:
x = np.round(x,4)
x

array([9.999e-01, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e-04, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00])