In [1]:
import argparse
import os
import shutil
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
from torchvision.models import *

from triplet_whale_loader import *
from tripletnet import Tripletnet
from configure import *

config = Config()

Using GPU


In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.base_model = resnet50(pretrained=True)
        self.base_model.fc = nn.Linear(in_features=2048, out_features=128, bias=True)
        # print(self.base_model)

    def forward(self, x):
        # print("X_shape {}".format(x.shape))
        return self.base_model(x)

In [3]:
model = Net()
tnet = Tripletnet(model)
if config.USE_GPU:
    tnet.cuda()

In [4]:
# optionally resume from a checkpoint
resume = "./runs/WhaleTriplet/checkpoint.pth.tar"
if os.path.isfile(resume):
    print("=> loading checkpoint '{}'".format(resume))
    checkpoint = torch.load(resume)
    # args.start_epoch = checkpoint['epoch']
    best_prec1 = checkpoint['best_prec1']
    tnet.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})"
            .format(resume, checkpoint['epoch']))
else:
    print("=> no checkpoint found at '{}'".format(resume))

=> loading checkpoint './runs/WhaleTriplet/checkpoint.pth.tar'


RuntimeError: While copying the parameter named embeddingnet.base_model.fc.weight, whose dimensions in the model are torch.Size([128, 2048]) and whose dimensions in the checkpoint are torch.Size([1000, 2048]).

In [5]:
import glob
train_files = glob.glob("../../input/train/*.jpg")
test_files = glob.glob("../../input/test/*.jpg")

In [6]:
test_transform = transforms.Compose([
    transforms.Resize(config.SZ),
    transforms.ToTensor(),
])

def load_image(path):
    img = default_image_loader(path)
    img = test_transform(img)
    img = img.numpy()
    return img

In [7]:
def data_generator(fpaths, batch=16):
    i = 0
    for path in fpaths:
        if i == 0:
            imgs = []
            fnames = []
        i += 1
        img = load_image(path)
        imgs.append(img)
        fnames.append(os.path.basename(path))
        if i == batch:
            i = 0
            imgs = np.array(imgs)
            yield fnames, imgs

    if i < batch:
        imgs = np.array(imgs)
        yield fnames, imgs

    raise StopIteration()

In [8]:
train_preds = []
train_file_names = []
i = 1
for fnames, imgs in tqdm(data_generator(train_files, batch=32), total=len(train_files) // 32):
    i += 1
    imgs = torch.from_numpy(imgs)
    if config.USE_GPU:
        imgs = imgs.cuda()
    imgs = Variable(imgs)
    predicts = tnet.get_embedded(imgs)
    predicts = predicts.cpu().data.numpy().tolist()
    train_preds += predicts
    train_file_names += fnames

train_preds = np.array(train_preds)

  0%|          | 0/307 [00:00<?, ?it/s]

Variable containing:
-0.0893  0.0567  0.1261  ...  -0.2041  0.2087 -0.0320
-0.0596  0.0323  0.1305  ...  -0.2116  0.1858 -0.0492
-0.1289  0.0715  0.1337  ...  -0.2039  0.2231 -0.0079
          ...             ⋱             ...          
-0.1197  0.0831  0.1238  ...  -0.1902  0.2328 -0.0242
-0.0983  0.0539  0.1238  ...  -0.2083  0.2063 -0.0301
-0.0518  0.0242  0.1385  ...  -0.2153  0.1948 -0.0454
[torch.cuda.FloatTensor of size 32x128 (GPU 0)]



  0%|          | 1/307 [00:01<09:03,  1.77s/it]

Variable containing:
-0.0784  0.0350  0.1329  ...  -0.2153  0.1945 -0.0292
-0.1009  0.0584  0.1270  ...  -0.2116  0.2009 -0.0212
-0.1115  0.0640  0.1295  ...  -0.2005  0.2066 -0.0299
          ...             ⋱             ...          
-0.0996  0.0617  0.1275  ...  -0.2005  0.2183 -0.0306
-0.1297  0.0914  0.1253  ...  -0.1954  0.2170 -0.0219
-0.1092  0.0761  0.1207  ...  -0.1995  0.2202 -0.0310
[torch.cuda.FloatTensor of size 32x128 (GPU 0)]



  1%|          | 2/307 [00:02<06:49,  1.34s/it]

Variable containing:
-0.0970  0.0693  0.1176  ...  -0.2057  0.2293 -0.0394
-0.1179  0.0814  0.1221  ...  -0.1944  0.2408 -0.0356
-0.0732  0.0355  0.1298  ...  -0.2135  0.2035 -0.0369
          ...             ⋱             ...          
-0.1070  0.0767  0.1191  ...  -0.2025  0.2176 -0.0256
-0.0851  0.0498  0.1353  ...  -0.2180  0.1902 -0.0236
-0.0868  0.0432  0.1333  ...  -0.2168  0.1954 -0.0176
[torch.cuda.FloatTensor of size 32x128 (GPU 0)]



  1%|          | 3/307 [00:03<05:59,  1.18s/it]

Variable containing:
-0.0974  0.0626  0.1255  ...  -0.2031  0.2098 -0.0321
-0.1004  0.0549  0.1308  ...  -0.2067  0.2077 -0.0250
-0.1033  0.0560  0.1250  ...  -0.2108  0.2135 -0.0168
          ...             ⋱             ...          
-0.1333  0.0962  0.1216  ...  -0.1989  0.2273 -0.0173
-0.1091  0.0593  0.1315  ...  -0.2002  0.2061 -0.0341
-0.1171  0.0916  0.1211  ...  -0.1884  0.2391 -0.0356
[torch.cuda.FloatTensor of size 32x128 (GPU 0)]



  1%|▏         | 4/307 [00:04<05:38,  1.12s/it]

KeyboardInterrupt: 

In [None]:
print(tnet)

In [None]:
test_preds = []
test_file_names = []
for fnames, imgs in tqdm(data_generator(test_files, batch=32), total=len(test_files) // 32):
    imgs = torch.from_numpy(imgs)
    if config.USE_GPU:
        imgs = imgs.cuda()
    imgs = Variable(imgs)
    predicts = tnet.embeddingnet(imgs)
    predicts = predicts.cpu().data.numpy().tolist()
    test_preds += predicts
    test_file_names += fnames

test_preds = np.array(test_preds)

In [None]:
def load_a_image(path):
    img = default_image_loader(path)
    img = test_transform(img)
    return img.unsqueeze(0)

In [None]:
def img_to_class(img):
    tmp = config.TRAIN_DF.loc[config.TRAIN_DF["Image"] == img]
    l = tmp.Id.values[0]
    return l

In [None]:
import operator
def nearest(arr, train_preds):
    dist_dict = {}
    arr = np.array(arr)
    for i, t in enumerate(train_preds):
        fname = train_file_names[i]
        k = img_to_class(fname)
        t = np.array(t)
        dist = np.sqrt(np.sum(np.power((t-arr), 2), axis=0))
        if not k in dist_dict:
            dist_dict[k] = dist
        else:
            if dist_dict[k] > dist:
                dist_dict[k] = dist
    dist_dict = sorted(dist_dict.items(), key=operator.itemgetter(1))
    dist_dict = list(dist_dict)
    # print(dist_dict)
    return [cls[0] for cls in dist_dict[:5]]

In [None]:
for i, row in tqdm(config.TEST_DF.iterrows()):
    fname = row["Image"]
    fname = f"{config.TEST_DIR}/{fname}"
    img = load_a_image(fname)
    if config.USE_GPU:
        img = img.cuda()
    img = Variable(img)
    predict = tnet.embeddingnet(img)
    predict = predict.cpu().data.numpy().tolist()
    clss = nearest(predict[0], train_preds)
    string = ""
    for c in clss:
        string += c + " "
    row["Id"] = string

In [None]:
from sklearn.neighbors import NearestNeighbors
neigh = NearestNeighbors(n_neighbors=6)
neigh.fit(train_preds)

distances_test, neighbors_test = neigh.kneighbors(test_preds)
distances_test, neighbors_test = distances_test.tolist(), neighbors_test.tolist()

In [None]:
preds_str = []

file_id_mapping = {k: v for k, v in zip(config.TRAIN_DF.Image.values, config.TRAIN_DF.Id.values)}

for filepath, distance, neighbour_ in zip(test_file_names, distances_test, neighbors_test):
    sample_result = []
    sample_classes = []
    for d, n in zip(distance, neighbour_):
        train_file = train_files[n].split(os.sep)[-1]
        class_train = file_id_mapping[train_file]
        sample_classes.append(class_train)
        sample_result.append((class_train, d))

    if "new_whale" not in sample_classes:
        sample_result.append(("new_whale", 0.1))
    sample_result.sort(key=lambda x: x[1])
    sample_result = sample_result[:5]
    preds_str.append(" ".join([x[0] for x in sample_result]))

df = pd.DataFrame(preds_str, columns=["Id"])
df['Image'] = [x.split(os.sep)[-1] for x in test_file_names]

In [None]:
df.to_csv('triplet_resnet50_new_model.csv', index=False)