In [None]:
import sys
import importlib
from types import SimpleNamespace
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from scipy.special import softmax
from joblib import Parallel, delayed
import seaborn as sns

sys.path.append("../src")
sys.path.append("../configs")

sys.argv = ['--config', 'config1']

from models import *
from loss import *
from train import *
from data import *

def gpu_unravel(batch):
        input_dict, target_dict = batch
        input_dict = {k: input_dict[k].cuda() for k in input_dict}
        target_dict = {k: target_dict[k].cuda() for k in target_dict}
        return input_dict, target_dict
    
dict_unravel = gpu_unravel

In [None]:
name = "config1"
pretrained_weights = "../models/config1_ckpt_10.pth"

csv = "train"

train = pd.read_csv(f"../embeddings/{csv}.csv")

train["img_folder"] = "/ssd/kaggle-landmark/input/train/"
train["target"] = 0

In [None]:
aug = A.Compose([ 
                A.SmallestMaxSize(512),
                A.CenterCrop(always_apply=False, p=1.0, height=512, width=512), 
                ], 
                p=1.0
                )

In [None]:
val_ds = GLRDataset(train, normalization=args.normalization, aug=aug)

batch_size = 512
val_dl = DataLoader(dataset=val_ds,
                    batch_size=batch_size,
                    sampler=SequentialSampler(val_ds), collate_fn=collate_fn, num_workers=32, pin_memory=True)

In [None]:
model = Net(args)
model.eval()
model.cuda()

model.load_state_dict(torch.load(pretrained_weights))

model = nn.DataParallel(model)


In [None]:
def get_embeddings(dl, model):
    with torch.no_grad():
        embeddings = np.zeros((len(dl.dataset) , 512))
        total = len(dl)
        for idx, batch in tqdm(enumerate(dl), total=len(dl)):
            input_dict, target_dict = dict_unravel(batch)

            outs = model.forward(input_dict, get_embeddings=True)["embeddings"]

            embeddings[idx*batch_size:idx*batch_size+outs.size(0),:] = outs.detach().cpu().numpy()

    return embeddings

embeddings = get_embeddings(val_dl, model)

In [None]:
embeddings.shape

In [30]:
np.save(f"../embeddings/{name}_{csv}_embeddings", embeddings)