## Library Imports

In [1]:
from time import time
notebook_start_time = time()

In [2]:
import os
import re
import random as r
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader as DL
from torch.nn.utils import weight_norm as WN
from torchvision import models, transforms

import imgaug
from imgaug import augmenters

import warnings
warnings.filterwarnings("ignore")

## Constants and Utilities

In [3]:
def breaker(num=50, char="*") -> None:
    print("\n" + num*char + "\n")


def get_targets(path: str) -> np.ndarray:
    df = pd.read_csv(os.path.join(path, "train.csv"), engine="python")
    targets = df["Pawpularity"].copy().values
    return targets.reshape(-1, 1)


def head(x, no_of_ele=5) -> None:
    print(x[:no_of_ele])

    
def show(image: np.ndarray) -> None:
    plt.figure(figsize=(9, 6))
    plt.imshow(image)
    plt.axis("off")
    plt.show()

    
def get_augment(seed: int):
    imgaug.seed(seed)
    augment = augmenters.SomeOf(None, [
        augmenters.HorizontalFlip(p=0.5),
        augmenters.VerticalFlip(p=0.5),
        augmenters.Affine(scale=(0.75, 1.25), translate_percent=(-0.1, 0.1), rotate=(-45, 45), seed=seed),
    ], seed=seed)

    return augment

In [4]:
TRANSFORM_PRE = transforms.Compose([transforms.ToTensor(), 
                                    transforms.Normalize([0.485, 0.456, 0.406],
                                                         [0.229, 0.224, 0.225]),
                                   ])
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 49
NUM_FEATURES = 1664

## Dataset Template

In [5]:
class DS(Dataset):
    def __init__(self, images=None, transform=None):
        self.images    = images
        self.transform = transform
        
    def __len__(self):
        return self.images.shape[0]
    
    def __getitem__(self, idx):
        return self.transform(self.images[idx])

## Build Dataloader

In [6]:
def build_dataloader(images: np.ndarray, transform=None):    
    data_setup = DS(images=images, transform=transform)
    data = DL(data_setup, batch_size=64, shuffle=False)
    
    return data

## Build Model

In [7]:
def build_model(pretrained: bool):
    class ImageModel(nn.Module):
        def __init__(self, pretrained=False):
            super(ImageModel, self).__init__()

            self.features = models.densenet169(pretrained=pretrained, progress=True)
            if pretrained:
                self.freeze()
            self.features = nn.Sequential(*[*self.features.children()][:-1])
            self.features.add_module("Adaptive Average Pool", nn.AdaptiveAvgPool2d(output_size=(1, 1)))
            self.features.add_module("Flatten", nn.Flatten())
        
        def freeze(self):
            for params in self.parameters():
                params.requires_grad = False

        def forward(self, x):
            return self.features(x)

    # breaker()
    # print("Building Model ...")
    
    model = ImageModel(pretrained=pretrained)
    
    return model

## Acquire Features Helper

In [8]:
def get_features(model=None, dataloader=None, num_features=None):
    model.to(DEVICE)
    model.eval()

    y_pred = torch.zeros(1, num_features).to(DEVICE)
    for X in dataloader:
        X = X.to(DEVICE)
        with torch.no_grad():
            output = model(X)
        y_pred = torch.cat((y_pred, output.view(-1, num_features)), dim=0)
    
    return y_pred[1:].detach().cpu().numpy()

## Save Features

In [9]:
def save_features():
    DEBUG = False
    
    # start_time = time()
    images = np.load("../input/pet-finder-images/Images.npy")
    # breaker()
    # print("Time Taken to Read Data : {:.2f} minutes".format((time()-start_time)/60))
    
    augment = get_augment(SEED)
    augmented_images = augment(images=images)
    
    if DEBUG:
        index = r.randint(0, images.shape[0] - 1)
        show(images[index])
        show(augmented_images[index])
    
    else:
        # breaker()
        # print("Building DataLoaders ...")
        dataloader = build_dataloader(images=images, transform=TRANSFORM_PRE)
        augmented_dataloader = build_dataloader(images=augmented_images, transform=TRANSFORM_PRE)

        model = build_model(pretrained=True)

        # breaker()
        # print("Obtaining Features ...")

        features = get_features(model, dataloader, num_features=NUM_FEATURES)
        augmented_features = get_features(model, augmented_dataloader, num_features=NUM_FEATURES)

        targets = get_targets("../input/petfinder-pawpularity-score")

        # breaker()
        # print("Saving Features as a .npy File ...")
        np.save("./UA_features.npy", features)
        np.save("./A_features.npy", augmented_features)
        np.save("./Targets.npy", targets)

    # breaker()

save_features()

Downloading: "https://download.pytorch.org/models/densenet169-b2777c0a.pth" to /root/.cache/torch/hub/checkpoints/densenet169-b2777c0a.pth


  0%|          | 0.00/54.7M [00:00<?, ?B/s]

## End

In [10]:
breaker()
print("Notebook Run Time : {:.2f} minutes".format((time()-notebook_start_time)/60))
breaker()


**************************************************

Notebook Run Time : 1.38 minutes

**************************************************

