# Dupimage VGG19 Partial Compose Preprocessing

From KSC 2021

# California-ND Sample Preprocessing

In [1]:
import os
from tqdm import tqdm
import torch
from data_loaders import ImageDataLoader
from model import VGGPartialCompose

In [2]:
root = "/datasets/sample-california-nd-2022-03-31/"
images_dir = root + "images/"
preprocessed_dir = root + "preprocessed/"
n_components = 20
batch_size = 128

# Get dataloader
images_dataloader = ImageDataLoader(root=images_dir, batch_size=batch_size)

# Get device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Contents features
features = None
vgg_partial_compose = VGGPartialCompose().to(device)
vgg_partial_compose.eval()
with torch.no_grad():
    for i, (data, _) in enumerate(tqdm(images_dataloader)):
        data = data.to(device)
        output = vgg_partial_compose(data)
        if features is None:
            features = output
        else:
            features = torch.cat([features, output], dim=0)

torch.save(features, preprocessed_dir + 'vgg-partial-compose.pt')

100%|██████████| 6/6 [00:02<00:00,  2.42it/s]


# MFND-IND Preprocessing

In [4]:
root = "/datasets/sample-mfnd-ind-2022-03-31/"
images_dir = root + "images/"
preprocessed_dir = root + "preprocessed/"
n_components = 20
batch_size = 128

# Get dataloader
images_dataloader = ImageDataLoader(root=images_dir, batch_size=batch_size)

# Get device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Contents features
features = None
vgg_partial_compose = VGGPartialCompose().to(device)
vgg_partial_compose.eval()
with torch.no_grad():
    for i, (data, _) in enumerate(tqdm(images_dataloader)):
        data = data.to(device)
        output = vgg_partial_compose(data)
        if features is None:
            features = output
        else:
            features = torch.cat([features, output], dim=0)

torch.save(features, preprocessed_dir + 'vgg-partial-compose.pt')

100%|██████████| 146/146 [01:21<00:00,  1.79it/s]


# MFND-ALL Preprocessing

In [2]:
root = "/datasets/sample-mfnd-all-2022-03-31/"
images_dir = root + "images/"
preprocessed_dir = root + "preprocessed/"
n_components = 20
batch_size = 128

# Get dataloader
images_dataloader = ImageDataLoader(root=images_dir, batch_size=batch_size)

# Get device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Contents features
features = None
vgg_partial_compose = VGGPartialCompose().to(device)
vgg_partial_compose.eval()
with torch.no_grad():
    for i, (data, _) in enumerate(tqdm(images_dataloader)):
        data = data.to(device)
        output = vgg_partial_compose(data)
        if features is None:
            features = output
        else:
            features = torch.cat([features, output], dim=0)

100%|██████████| 662/662 [14:04<00:00,  1.28s/it]


---

In [14]:
#def cupca_embedding(features: torch.Tensor, n_components):
n_components = 20

# Check singularity
if 10 * features.shape[0] < features.shape[1]:
    high_singularity = True
    features.transpose_()
    features.transpose_(0, 1)

# Centering
features.sub_(features.mean(axis=0))    

# Get covariance matrix
cov_mat = torch.matmul(features.T, features)
eigvalues, eigvec = torch.linalg.eig(cov_mat)

TypeError: transpose_() missing 2 required positional argument: "dim0", "dim1"

In [None]:
cov_mat = torch.matmul(features.T, features)
_, eigvec = torch.linalg.eig(cov_mat)

eigvec.shape

In [None]:
# Dimensionality reduction with PCA
features_mean = features.mean(axis=0)
features_std = features.std(axis=0)
_, _, transpose_matrix = torch.pca_lowrank(features, q=n_components, center=True)

# Normalization
features.subtract_(features_mean)
features.divide_(features_std)

# Memory reduction
torch.cuda.empty_cache()
features = features@transpose_matrix


# Create sample dataset directory
if not os.path.exists(preprocessed_dir):
    os.makedirs(preprocessed_dir)