In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.20.1-py3-none-any.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 8.8 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 13.4 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 56.5 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 54.4 MB/s 
Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling P

In [None]:
# Imports
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision import models
from torch.utils.data.sampler import SubsetRandomSampler
from numpy.lib.function_base import append
import numpy as np
from transformers import ConvNextFeatureExtractor, ConvNextForImageClassification

In [None]:
# Use CUDA/GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [None]:
#Seed Randomizers
random_seed = 42
np.random.seed(random_seed)
torch_rng = torch.manual_seed(random_seed)

#Classes
classes = ('Airplane', 'Car', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck')

#Randomly Splitting Train set into Training and Validation
train_data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)
val_data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)

indices = list(range(len(train_data)))
np.random.shuffle(indices)
train_indices = indices[:45000]
val_indices = indices[45000:]
train_sample = SubsetRandomSampler(train_indices)
val_sample = SubsetRandomSampler(val_indices)

# #Get mean and std from just the training set
train_sample_data = []
for index, item in enumerate(train_sample):
    train_sample_data.append(train_data[item])

img_arr = np.concatenate([np.asarray(train_sample_data[i][0]) for i in range(len(train_sample_data))])

train_mean = np.mean(img_arr, axis=(0, 1))/255
train_std = np.std(img_arr, axis=(0, 1))/255
print(train_mean, train_std)

#Set Train and Test/Validation Image Transformers For Data Augmentation
train_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomChoice(transforms=[
        transforms.RandomRotation(degrees=45), 
        transforms.GaussianBlur(kernel_size=3),
        transforms.RandomHorizontalFlip(p=1),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),#from ConvNeXt
        transforms.RandomVerticalFlip(p=0)
    ], 
        p=[0.05, 0.05, 0.05, 0.05, 0.8]),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=train_mean, std=train_std),
])
test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=train_mean, std=train_std)
])

#Reload/Transfor/DataLoader
train_data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
val_data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=test_transform)

batch_size = 8
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sample, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=1, sampler=val_sample, num_workers=2)

test_data = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=2)

# gen_data = datasets.ImageFolder(root='./Generalization_Images', transform=test_transform)
# gen_loader = torch.utils.data.DataLoader(gen_data, batch_size=1, shuffle=False, num_workers=2)

# Load Feature Extractor ConvNext
# feature_extractor = ConvNextFeatureExtractor.from_pretrained("facebook/convnext-tiny-224")
# model = ConvNextForImageClassification.from_pretrained("facebook/convnext-tiny-224")
# We dont need the above code anymore
model = torch.load("Model_ConvNeXt_97")


for param in model.parameters():
    param.requires_grad = False

model.eval()

new_classifier = nn.Sequential(*list(model.classifier.children())[:-1])
model.classifier = new_classifier

model.to(device)

# Saving Extracted Features - Tabular Modeling
def extract_save(data_loader, dataset, batch_size=1):
    x = []
    y = []

    with torch.no_grad():
        for data in data_loader:
            images, labels = data[0].to(device), data[1].to(device)
            y.append(labels)
            x.append(model(images))
    
    features = []
    for index, item in enumerate(x):
        features.append(item.logits)

    x = features
    x = torch.stack(x)
    y = torch.stack(y)

    x_size = x.size()
    x = x.cpu()
    x = x.numpy()   
    x = x.T.reshape(x_size[2],x_size[0]*batch_size)
    x = x.T

    y_size = y.size()
    y = y.cpu()
    y = y.numpy()
    y = y.T.reshape(1,y_size[0]*batch_size)
    y = y.flatten()
    
    torch.save(x, f'./{dataset}_extracted_features.pt')
    torch.save(y, f'./{dataset}_extracted_labels.pt')
    print(f'---extracted {dataset} saved---')

extract_save(train_loader, 'train', batch_size=batch_size)
extract_save(val_loader, 'val')
extract_save(test_loader, 'test')
# extract_save(gen_loader, 'gen')

# # Saving DataLoaders - CNN Modeling
# torch.save(train_loader, './train_cnn.pt')
# torch.save(val_loader, './val_cnn.pt')
# torch.save(test_loader, './test_cnn.pt')
# torch.save(gen_loader, './gen_cnn.pt')

# print(f'---DataLoaders Saved---')

# # Saving Mean and Std for Normalizing Images
# torch.save(train_mean, './normalizer_mean.pt')
# torch.save(train_std, './normalizer_std.pt')

# print(f'---Normalizing Data Saved---')

print('===Finished===')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
[0.49147618 0.48220086 0.44667191] [0.24713163 0.24367339 0.26168631]
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
---extracted train saved---
---extracted val saved---
---extracted test saved---
===Finished===


In [None]:
# # Saving DataLoaders - CNN Modeling
# torch.save(train_loader, './train_cnn.pt')
# torch.save(val_loader, './val_cnn.pt')
# torch.save(test_loader, './test_cnn.pt')
# # torch.save(gen_loader, './gen_cnn.pt')

# print(f'---DataLoaders Saved---')

# # Saving Mean and Std for Normalizing Images
# torch.save(train_mean, './normalizer_mean.pt')
# torch.save(train_std, './normalizer_std.pt')

# print(f'---Normalizing Data Saved---')

---DataLoaders Saved---
---Normalizing Data Saved---


In [None]:
# # Getting xtest, ytest from the model
# xtest = []
# ytest = []

# with torch.no_grad():
#     for data in test_loader:
#         images, labels = data[0].to(device), data[1].to(device)
#         ytest.append(labels)
#         xtest.append(model(images))


In [None]:
# # Getting xtrain, ytrain from the model
# xtrain = []
# ytrain = []

# with torch.no_grad():
#     for data in train_loader:
#         images, labels = data[0].to(device), data[1].to(device)
#         ytrain.append(labels)
#         xtrain.append(model(images))

In [None]:
# # Saving xtest, ytest to use in another file
# torch.save(xtest, 'xtestconvnext.pt')
# torch.save(ytest, 'ytestconvnext.pt')

In [None]:
# # Saving xtrain, ytrain to use in another file
# torch.save(xtrain, 'xtrainconvnext.pt')
# torch.save(ytrain, 'ytrainconvnext.pt')