In [3]:
import torch
from tqdm import tqdm
from triplet_pytorch import *

In [6]:
LEARNING_RATE = 0.005
DEVICE = get_default_device()

model = ResNet_Triplet()
model = model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(),lr = LEARNING_RATE)
criterion = TripletLoss()

checkpoint = torch.load("trained_model.pth")

model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimzier_state_dict'])


train_results = []
labels = []

model.eval()
with torch.no_grad():
    for img, _, _, label in tqdm(train_dl):
        train_results.append(model(img.to(DEVICE)).cpu().numpy())
        labels.append(label)
        
train_results = np.concatenate(train_results)
labels = np.concatenate(labels)
print(train_results.shape)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 809/809 [03:18<00:00,  4.08it/s]

(51744, 2)





In [7]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15, 10), facecolor="azure")
for label in np.unique(labels):
    tmp = train_results[labels==label]
    plt.scatter(tmp[:, 0], tmp[:, 1], label=label)

plt.legend()
plt.show()


# PCA -> K-means

In [1]:
from tqdm import tqdm
from triplet_pytorch import *

from sklearn.cluster import KMeans
from sklearn.decomposition import IncrementalPCA

In [10]:

# data
data_dir = "../../rawdata/logos"
file_paths = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, f))]
# clustering
pca_dim = 50
n_clusters = 64

IMAGE_SIZE = 256
LEARNING_RATE = 0.005
DEVICE = get_default_device()

features_hook = None
def hook_fn(module, input, output):
    global features_hook
    features_hook = output


model = ResNet_Triplet()
model = model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(),lr = LEARNING_RATE)
criterion = TripletLoss()
checkpoint = torch.load("trained_model.pth")
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimzier_state_dict'])

model.Feature_Extractor.fc[0].register_forward_hook(hook_fn)

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

features = []
for path in tqdm(file_paths[:3]):
    with Image.open(path).convert("RGB") as img:
        tensor = transform(img).unsqueeze(0)
        with torch.no_grad():
            feature = model.Feature_Extractor(tensor.cuda())
        features.append(feature.reshape(-1).cpu().numpy())

100%|███████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 31.01it/s]


In [4]:
pickle_path = '../../rawdata/tripletmining_features.pickle'
with open(pickle_path, 'wb') as f:
    pickle.dump(features, f)

In [12]:
features[1].shape

(10,)

In [5]:
pca = IncrementalPCA(n_components=pca_dim, batch_size=512, whiten=True)
reduced = pca.fit_transform(features)

kmeans = KMeans(n_clusters=n_clusters, random_state=0)
pseudo_labels = list(kmeans.fit_predict(reduced))


result = {
    "Filenames": file_paths,
    "Labels": pseudo_labels
}

pickle_path = '../../rawdata/tripletmining.pickle'
with open(pickle_path, 'wb') as f:
    pickle.dump(result, f)

def open_pickle(pickle_path):
    with open(pickle_path, "rb") as f:
        data = pickle.load(f)
    return data


data = open_pickle(pickle_path)
print(data["Labels"][:3])

ValueError: n_components=50 invalid for n_features=2, need more rows than columns for IncrementalPCA processing