In [23]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import numpy as np
import torch
from torch.utils.data import DataLoader
from collections import defaultdict
import torchvision.models as models
from sklearn.decomposition import PCA


Load , Resize and Normalise the images 

In [24]:

# Define transformations for feature extraction
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize for ImageNet
])

# Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='../Data/RawData', train=True, download=False, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='../Data/RawData', train=False, download=False, transform=transform)

# Function to filter the dataset
def select_images(dataset, num_images_per_class):
    class_dict = defaultdict(list)
    
    # Organize images by class
    for idx, (image, label) in enumerate(dataset):
        class_dict[label].append((image, label))
        
        # Stop if we have collected enough images for this class
        if len(class_dict[label]) >= num_images_per_class:
            continue
    
    # Select the first num_images_per_class for each class
    selected_images = []
    for label in range(10):  # CIFAR-10 has 10 classes
        selected_images.extend(class_dict[label][:num_images_per_class])
    
    return selected_images

# Select 500 training images and 100 test images per class
num_train_images_per_class = 500
num_test_images_per_class = 100

selected_train_images = select_images(train_dataset, num_train_images_per_class)
selected_test_images = select_images(test_dataset, num_test_images_per_class)

# Convert selected images to separate lists of images and labels
train_images, train_labels = zip(*selected_train_images)
test_images, test_labels = zip(*selected_test_images)

# Convert to tensors
train_images = torch.stack(train_images)
train_labels = torch.tensor(train_labels)
test_images = torch.stack(test_images)
test_labels = torch.tensor(test_labels)

print(f'Selected {len(train_images)} training images and {len(test_images)} test images.')


Selected 5000 training images and 1000 test images.


Process The Images with Resnet

In [25]:
# Create a custom dataset for the selected images
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

# Create custom datasets
train_custom_dataset = CustomDataset(train_images, train_labels)
test_custom_dataset = CustomDataset(test_images, test_labels)

# Create DataLoader for training and test datasets
train_loader = DataLoader(train_custom_dataset, batch_size=32, shuffle=True, num_workers=0)
test_loader = DataLoader(test_custom_dataset, batch_size=32, shuffle=False, num_workers=0)

# Load the pre-trained ResNet-18 model
model = models.resnet18(pretrained=True)

# Remove the last fully connected layer
model = torch.nn.Sequential(*(list(model.children())[:-1]))

# Set the model to evaluation mode
model.eval()

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def extract_features(data_loader):
    features_list = []
    labels_list = []
    
    for images, labels in data_loader:
        images = images.to(device)  # Move images to GPU
        labels = labels.to(device)  # Move labels to GPU (if needed, but not required for feature extraction)
        with torch.no_grad():  # No gradient computation needed
            features = model(images)  # Forward pass through the model
            features = features.view(features.size(0), -1)  # Flatten the features
        
        features_list.append(features)
        labels_list.append(labels)
    
    return torch.cat(features_list), torch.cat(labels_list)


# Extract features for training and test images
train_features, train_labels = extract_features(train_loader)
test_features, test_labels = extract_features(test_loader)

print(f'Extracted feature vectors: {train_features.shape} for training images and {test_features.shape} for test images.')




Extracted feature vectors: torch.Size([5000, 512]) for training images and torch.Size([1000, 512]) for test images.


Reshape Images with PCA

In [26]:
# Convert features to NumPy arrays for PCA
train_features_np = train_features.cpu().numpy()  # Convert to numpy array if using GPU
test_features_np = test_features.cpu().numpy()    # Convert to numpy array if using GPU

# Initialize PCA
pca = PCA(n_components=50)  # We want to reduce to 50 components

# Fit PCA on training features
pca.fit(train_features_np)

# Transform both training and test features
train_features_reduced = pca.transform(train_features_np)
test_features_reduced = pca.transform(test_features_np)

# Convert back to PyTorch tensors if needed
train_features_reduced_tensor = torch.tensor(train_features_reduced)
test_features_reduced_tensor = torch.tensor(test_features_reduced)

# Print shapes of the reduced features
print(f'Reduced feature vectors shape: {train_features_reduced_tensor.shape} for training images')
print(f'Reduced feature vectors shape: {test_features_reduced_tensor.shape} for test images')

Reduced feature vectors shape: torch.Size([5000, 50]) for training images
Reduced feature vectors shape: torch.Size([1000, 50]) for test images


Save Images with Labels

In [27]:
# Save training features and labels as a dictionary
torch.save({
    'features': train_features_reduced_tensor,
    'labels': train_labels
}, '../Data/ProcessedData/train_data.pth')

# Save test features and labels as a dictionary
torch.save({
    'features': test_features_reduced_tensor,
    'labels': test_labels
}, '../Data/ProcessedData/test_data.pth')


Load the data to test if the saving process is correct.

In [28]:
# Load training features and labels
train_data = torch.load('../Data/ProcessedData/train_data.pth')
train_features_loaded = train_data['features']
train_labels_loaded = train_data['labels']

# Load test features and labels
test_data = torch.load('../Data/ProcessedData/train_data.pth')
test_features_loaded = test_data['features']
test_labels_loaded = test_data['labels']

# Check if the loaded data is correct
print(f'Loaded Training Features Shape: {train_features_loaded.shape}')
print(f'Loaded Training Labels Shape: {train_labels_loaded.shape}')
print(f'Loaded Test Features Shape: {test_features_loaded.shape}')
print(f'Loaded Test Labels Shape: {test_labels_loaded.shape}')

# Optionally, you can check the first few items to verify the contents
print(f'First training feature vector: {train_features_loaded[0]}')
print(f'First training label: {train_labels_loaded[0]}')
print(f'First test feature vector: {test_features_loaded[0]}')
print(f'First test label: {test_labels_loaded[0]}')



Loaded Training Features Shape: torch.Size([5000, 50])
Loaded Training Labels Shape: torch.Size([5000])
Loaded Test Features Shape: torch.Size([5000, 50])
Loaded Test Labels Shape: torch.Size([5000])
First training feature vector: tensor([ 5.9841e+00, -3.6731e+00, -1.4204e+00,  1.4349e+00, -2.7493e+00,
         4.4789e-01, -2.9624e-01,  3.0293e-01, -2.0248e+00,  1.3278e+00,
         2.4916e+00,  5.6473e-03,  6.6921e-01, -1.8713e+00, -1.0279e+00,
         1.4384e+00,  1.5166e+00,  8.8583e-01, -2.6691e+00, -1.0197e+00,
         2.2547e+00, -5.5619e-01,  3.2156e+00, -4.9057e-01, -1.1145e-01,
         8.6599e-01,  1.2758e+00, -1.2245e-01, -3.3557e-01,  7.2982e-01,
        -1.7007e+00, -1.6288e+00, -1.1737e+00, -1.6764e+00, -1.5883e+00,
        -7.5804e-02, -5.3719e-01, -8.8505e-01, -3.0720e+00,  1.1160e+00,
         3.0222e-01,  2.0894e-01, -8.8151e-01, -5.7156e-01,  4.7041e-01,
         1.5313e+00, -7.0307e-01, -1.3531e+00,  7.1378e-01,  1.7253e+00])
First training label: 8
First test fea

  train_data = torch.load('../Data/ProcessedData/train_data.pth')
  test_data = torch.load('../Data/ProcessedData/train_data.pth')
