## Extract Image Features

In [None]:
pip install transformers

In [None]:
import gc
import os
import csv
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

from PIL import Image

import torch
from torch.utils.data import DataLoader, TensorDataset, Dataset

from transformers import ViTFeatureExtractor, ViTForImageClassification

import torchvision.models as models
import torchvision.transforms as transforms

In [None]:
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/FML Project')

## Set Device

In [None]:
# set device
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available.")
else:
    device = torch.device("cpu")
    print("Using CPU.")

GPU is available.


## Load Image Data

In [None]:
image_data = np.load(sys.path[-1]+'/Data/image_data.npy')
image_data.shape

(18543, 100, 100, 3)

## Load Pre-trained Model

In [None]:
resnet = models.resnet50(pretrained=True)
modules = list(resnet.children())[:-1]
resnet = torch.nn.Sequential(*modules)
resnet.to(device)

## Prepare Dataset

In [None]:
transform = transforms.Compose([transforms.Resize((224, 224)), 
                                transforms.ToTensor(),
                                transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])

In [None]:
image_dataset = []
first = True
for image in tqdm(image_data):
    image_dataset.append(transform(Image.fromarray((image*255).astype(np.uint8))))

100%|██████████| 18543/18543 [00:29<00:00, 620.97it/s]


## Extract Features

In [None]:
batch_size = 16
image_loader = DataLoader(image_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
features = []

resnet.eval()
for batch in tqdm(image_loader):
    with torch.no_grad():
        batch_features = resnet(batch.to(device))
        features.append(batch_features.cpu())
        # print(features[-1].shape)

features = torch.cat(features, dim=0)
print(features.shape)

100%|██████████| 1159/1159 [01:05<00:00, 17.58it/s]

torch.Size([18543, 2048, 1, 1])





In [None]:
features = features.squeeze()
features.shape

torch.Size([18543, 2048])

In [None]:
torch.save(features, sys.path[-1]+"/Data/image_features.pt")
print('Image features shape: ', features.shape)

Image features shape:  torch.Size([18543, 2048])
