In [59]:
import os
from PIL import Image
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA
import random
import shutil
import torch
import torchvision.transforms as transforms
import gc

In [60]:
torch.cuda.empty_cache()
gc.collect()

2617

In [61]:
# Setting CUDA devices as visible
cuda_devices = "0,1"
os.environ["CUDA_VISIBLE_DEVICES"] = cuda_devices

In [62]:
# Check if CUDA is available
if torch.cuda.is_available():
    print("Using GPUs:")
    for device_id in cuda_devices.split(','):
        device = torch.device(f"cuda:{device_id}")
        print(f"  Device {device_id}: {torch.cuda.get_device_name(int(device_id))}")
else:
    print("Using CPU")

Using GPUs:
  Device 0: NVIDIA GeForce RTX 3090
  Device 1: NVIDIA GeForce RTX 3090


In [63]:
# Main process
in_dir = '/mnt/lts/nis_lab_research/data/class_data/neg/far_shah_b1-b5_b8_train_neg'

In [64]:
cat_list = sorted(os.listdir(in_dir))

In [65]:
cat_list = []
for item in os.listdir(in_dir):
    item_path = os.path.join(in_dir, item)
    if os.path.isdir(item_path) and os.listdir(item_path):
        cat_list.append(item)
cat_list = sorted(cat_list)

In [66]:
cat_list

['Accept Button',
 'Advertisement',
 'Alert Notification',
 'Allow Button',
 'Checkbox',
 'Click Captcha',
 'Close Button',
 'Download Button',
 'Email Input Box',
 'General Button',
 'General Input Box',
 'Image Captcha',
 'Login Button',
 'Logo',
 'Name Input Box',
 'Password Input Box',
 'Phone Input Box',
 'Play Button',
 'Popup',
 'Random',
 'Search Button',
 'Search Input Box',
 'Submit Button',
 'Text Captcha',
 'Toggle Button',
 'Update Button',
 'Video']

In [67]:
# Load images
def load_images(directory):
    images = []
    filenames = []
    for filename in os.listdir(directory):
        if filename.endswith('.png'):  # Assuming images are in JPG format
            img = Image.open(os.path.join(directory, filename))
            img = img.resize((224, 224))  # Resize images
            img = img.convert('RGB')  # Convert to RGB
            images.append(img)
            filenames.append(os.path.join(directory, filename))
    return images, filenames

In [68]:
# Define the transform
transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [69]:
# Load your model (ensure it's on the correct device, e.g., CPU or GPU)
model = torch.load('/mnt/lts/nis_lab_research/data/pth/far_shah_b1-b5_b8_train_neg_ep25.pth')
model.to('cuda')
model.eval()

DataParallel(
  (module): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
      

In [70]:
# Create a directory for clusters
cluster_directory = './clusters'
if not os.path.exists(cluster_directory):
    os.makedirs(cluster_directory)

In [71]:
for i, cat in enumerate(cat_list):
    
    cat_path = os.path.join(in_dir, cat)
    images, filenames = load_images(cat_path)
    
    # Create a directory for clusters
    cat_dir = os.path.join(cluster_directory, cat)
    if not os.path.exists(cat_dir):
        os.makedirs(cat_dir)
        os.makedirs(os.path.join(cat_dir, "correct"))
        os.makedirs(os.path.join(cat_dir, "incorrect"))
    
    for j, img in enumerate(images):
        img = transform(img).unsqueeze(0)
        img = img.to('cuda')
        
        with torch.no_grad():
            output = model(img)
        _, predicted = torch.max(output, 1)
        pred = int(predicted.item())
        
        if pred == i:
            shutil.copy(os.path.join(cat_path, filenames[j]), os.path.join(cat_dir, "correct"))
        else:
            shutil.copy(os.path.join(cat_path, filenames[j]), os.path.join(cat_dir, "incorrect"))
        
    
    