<a href="https://colab.research.google.com/github/mtrefilek/cs762/blob/main/Feature_Extractor_(ScatterNet).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import torch
from kymatio.torch import Scattering2D
cwd = os.getcwd().replace('\\','/')

In [None]:
import numpy as np
import pickle
from copy import deepcopy

In [None]:
SHAPES = {
    "CIFAR10": (32, 32, 3),
    "FMNIST": (28, 28, 1),
    "MNIST": (28, 28, 1)
}
n_batch = 128

In [None]:
def get_scatter_transform(dataset):
    shape = SHAPES[dataset]
    scattering = Scattering2D(J=2, shape=shape[:2])
    K = 81 * shape[2]
    (h, w) = shape[:2]
    return scattering, K, (h//4, w//4)

In [None]:
DSET_NAME = 'MNIST' #('MNIST', 'FMNIST', 'CIFAR10', 'CIFAR100', 'PlantDisease', 'EuroSAT', 'ChestXRay') 'ISIC2018', 'TinyImageNet' are not yet implemented

In [None]:
FEATURE_EXTRACTOR_NAME = 'scatternet'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')
model, K, (h, w) = get_scatter_transform(DSET_NAME)
d_dim = K * h * w
model.to(device)

Scattering2D()

In [None]:
### Dataset Preparation
from torchvision import datasets, transforms
from glob import glob, iglob
from PIL import Image

def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

dset_path = cwd + '/dataset'
feature_path = cwd+'/extracted_features/'

if DSET_NAME=='MNIST': # MNIST
    trainset = datasets.MNIST(root=dset_path, train=True, download=True, transform=transforms.ToTensor())
    testset = datasets.MNIST(root=dset_path, train=False, download=True, transform=transforms.ToTensor())
    imgs_tr, imgs_tst, labels_tr, labels_tst = [], [], [], []
    for (img, label) in trainset:
        imgs_tr.append(img)
        labels_tr.append(label)
    for (img, label) in testset:
        imgs_tst.append(img)
        labels_tst.append(label)    
    classnames = [str(a) for a in range(10)]

elif DSET_NAME=='FMNIST': # Fashion-MNIST
    trainset = datasets.FashionMNIST(root=dset_path, train=True, transform=transforms.ToTensor())
    testset = datasets.FashionMNIST(root=dset_path, train=False, transform=transforms.ToTensor())
    imgs_tr, imgs_tst, labels_tr, labels_tst = [], [], [], []
    for (img, label) in trainset:
        imgs_tr.append(img)
        labels_tr.append(label)
    for (img, label) in testset:
        imgs_tst.append(img)
        labels_tst.append(label)
    classnames = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

elif DSET_NAME=='CIFAR10': # CIFAR-10
    trainset = datasets.CIFAR10(root=dset_path, train=True, download=True, transform=transforms.ToTensor())
    testtest = datasets.CIFAR10(root=dset_path, train=False, download=True, transform=transforms.ToTensor())
    imgs_tr, imgs_tst, labels_tr, labels_tst = [], [], [], []
    for (img, label) in trainset:
        imgs_tr.append(img)
        labels_tr.append(label)
    for (img, label) in testset:
        imgs_tst.append(img)
        labels_tst.append(label)
    meta = pickle.load( open(dset_path+'/cifar-10-batches-py/batches.meta','rb') )
    classnames = meta['label_names']

elif DSET_NAME=='CIFAR100': # CIFAR-100
    trainset = datasets.CIFAR100(root=dset_path, train=True, download=True, transform=transforms.ToTensor())
    testset = datasets.CIFAR100(root=dset_path, train=False, download=True, transform=transforms.ToTensor())
    imgs_tr, imgs_tst, labels_tr, labels_tst = [], [], [], []
    for (img, label) in trainset:
        imgs_tr.append(img)
        labels_tr.append(label)
    for (img, label) in testset:
        imgs_tst.append(img)
        labels_tst.append(label)
    meta = pickle.load( open(dset_path+'/cifar-100-python/meta','rb') )
    classnames = meta['fine_label_names']

if DSET_NAME in ('MNIST', 'FMNIST', 'CIFAR10', 'CIFAR100'):
    n_tr = len(imgs_tr)
    feature_matrix_tr = np.zeros((1,d_dim))
    for img_batch in batch(imgs_tr, n=n_batch):
        inputs = torch.stack(img_batch).to(device)
        img_features_tr = model(inputs).cpu().detach().numpy().reshape(len(img_batch),d_dim)
        feature_matrix_tr = np.concatenate((feature_matrix_tr, img_features_tr), axis=0)
        print('Extracting Training Features: {0:.2f}% done'.format(100*len(feature_matrix_tr[1:])/n_tr) )
    feature_matrix_tr = feature_matrix_tr[1:]

    n_cls = np.max(labels_tr)+1
    labels_tr = np.array(labels_tr)
    feature_matrices_tr = []
    for i in range(n_cls):
        feature_matrices_tr.append(feature_matrix_tr[labels_tr==i])

    n_tst = len(imgs_tst)
    feature_matrix_tst = np.zeros((1,d_dim))
    for img_batch in batch(imgs_tst, n=n_batch):
        inputs = torch.stack(img_batch).to(device)
        img_features_tst = model(inputs).cpu().detach().numpy().reshape(len(img_batch),d_dim)
        feature_matrix_tst = np.concatenate((feature_matrix_tst, img_features_tst), axis=0)
        print('Extracting Test Features: {0:.2f}% done'.format(100*len(feature_matrix_tst[1:])/n_tst) )
    feature_matrix_tst = feature_matrix_tst[1:]

    n_cls = np.max(labels_tst)+1
    labels_tst = np.array(labels_tst)
    feature_matrices_tst = []
    for i in range(n_cls):
        feature_matrices_tst.append(feature_matrix_tr[labels_tst==i])
    
### Save Features
if DSET_NAME in ('MNIST', 'FMNIST', 'CIFAR10', 'CIFAR100'):
    fname_tr = DSET_NAME+'_'+FEATURE_EXTRACTOR_NAME+'_train.npz'
    fname_tst = DSET_NAME+'_'+FEATURE_EXTRACTOR_NAME+'_test.npz'

    np.savez(feature_path + fname_tr, feature_matrices = feature_matrices_tr, classnames = classnames)
    np.savez(feature_path + fname_tst, feature_matrices = feature_matrices_tst, classnames = classnames)

Extracting Training Features: 0.21% done
Extracting Training Features: 0.43% done
Extracting Training Features: 0.64% done
Extracting Training Features: 0.85% done
Extracting Training Features: 1.07% done
Extracting Training Features: 1.28% done
Extracting Training Features: 1.49% done
Extracting Training Features: 1.71% done
Extracting Training Features: 1.92% done
Extracting Training Features: 2.13% done
Extracting Training Features: 2.35% done
Extracting Training Features: 2.56% done
Extracting Training Features: 2.77% done
Extracting Training Features: 2.99% done
Extracting Training Features: 3.20% done
Extracting Training Features: 3.41% done
Extracting Training Features: 3.63% done
Extracting Training Features: 3.84% done
Extracting Training Features: 4.05% done
Extracting Training Features: 4.27% done
Extracting Training Features: 4.48% done
Extracting Training Features: 4.69% done
Extracting Training Features: 4.91% done
Extracting Training Features: 5.12% done
Extracting Train

Extracting Training Features: 42.24% done
Extracting Training Features: 42.45% done
Extracting Training Features: 42.67% done
Extracting Training Features: 42.88% done
Extracting Training Features: 43.09% done
Extracting Training Features: 43.31% done
Extracting Training Features: 43.52% done
Extracting Training Features: 43.73% done
Extracting Training Features: 43.95% done
Extracting Training Features: 44.16% done
Extracting Training Features: 44.37% done
Extracting Training Features: 44.59% done
Extracting Training Features: 44.80% done
Extracting Training Features: 45.01% done
Extracting Training Features: 45.23% done
Extracting Training Features: 45.44% done
Extracting Training Features: 45.65% done
Extracting Training Features: 45.87% done
Extracting Training Features: 46.08% done
Extracting Training Features: 46.29% done
Extracting Training Features: 46.51% done
Extracting Training Features: 46.72% done
Extracting Training Features: 46.93% done
Extracting Training Features: 47.1

Extracting Training Features: 84.05% done
Extracting Training Features: 84.27% done
Extracting Training Features: 84.48% done
Extracting Training Features: 84.69% done
Extracting Training Features: 84.91% done
Extracting Training Features: 85.12% done
Extracting Training Features: 85.33% done
Extracting Training Features: 85.55% done
Extracting Training Features: 85.76% done
Extracting Training Features: 85.97% done
Extracting Training Features: 86.19% done
Extracting Training Features: 86.40% done
Extracting Training Features: 86.61% done
Extracting Training Features: 86.83% done
Extracting Training Features: 87.04% done
Extracting Training Features: 87.25% done
Extracting Training Features: 87.47% done
Extracting Training Features: 87.68% done
Extracting Training Features: 87.89% done
Extracting Training Features: 88.11% done
Extracting Training Features: 88.32% done
Extracting Training Features: 88.53% done
Extracting Training Features: 88.75% done
Extracting Training Features: 88.9

NameError: name 'feature_matrix' is not defined

In [None]:
n_cls = np.max(labels_tr)+1
labels_tr = np.array(labels_tr)
feature_matrices_tr = []
for i in range(n_cls):
    feature_matrices_tr.append(feature_matrix_tr[labels_tr==i])

In [None]:
len(feature_matrices_tr)

10

In [None]:
np.sum([len(f) for f in feature_matrices_tr])

60000

In [None]:
len(labels_tr==i)

60000

In [None]:
len(labels_tr)

60000

In [None]:
i

9