In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import copy
import re

import torch
import torch.nn as nn
import torch.utils.data as Data

import torchvision
from torch.utils import model_zoo
import torchvision.transforms as transforms

from PIL import Image

import skimage
from skimage import transform as tf
from skimage import io

In [1]:
import torch
import torchvision

model = torchvision.models.resnet101(pretrained=True)

In [19]:
list(model.children())[5]

Sequential(
  (0): Bottleneck(
    (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (downsample): Sequential(
      (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): Bottleneck(
    (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(128, 

In [3]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_tp = 200 # Number of classes
num_im = 500 # Number of images in each class
total_im = num_tp * num_im
num_epochs = 30
batch_size = 100
learning_rate = 1e-3
embedding_size = 512

In [4]:
# Build model
# Problem: How to substitute the last linear layer so that embedding vector instead of classification is returned
# Define pre-trained model ResNet18
def resnet18(pretrained=True):
    model = torchvision.models.resnet.ResNet(torchvision.models.resnet.BasicBlock,[2,2,2,2])
    if pretrained:
        model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnet18-5c106cde.pth',model_dir="./"))
    return model

mod = resnet18()
model = copy.deepcopy(mod)
model.fc = nn.Linear(512, embedding_size, bias=True) # Adapt for our model
model = model.to(device)

# Build criterion and optimizer
criterion = nn.TripletMarginLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [5]:
class TinyImageDataset(Data.Dataset):
    def __init__(self, impath, label=None, train=True):
        self.path = impath
        self.label = label
        self.train = train
    
    def __getitem__(self, index):
        """
        Args:
        index (int): Index of Dataset
        
        Return:
        if train: 
            img_triplet (np.array, 3 x 224 x 224 x 3)
        else: (Will return error if label==None and train==False)
            img (np.array, 224 x 224 x 3), label (str) 
        """
        if self.train:
            img_triplet_path = re.split(string=self.path[index], pattern="\t")
            img_triplet = [io.imread(img_triplet_path[i]) for i in range(3)]
            return img_triplet
        else:
            img = io.imread(self.path[index])
            label = self.label[index]
            return img, label
    
    def __len__(self):
        return len(self.path)
    
    def __repr__(self):
        return "Triplet_Generate_TinyImageNet200"

In [6]:
# Load validation set
valfolder = "tiny-imagenet-200/val/images/"
valdataset = torchvision.datasets.ImageFolder(valfolder, transform=transforms.ToTensor()) 
val_loader = Data.DataLoader(valdataset, batch_size=1, shuffle=False, num_workers=8)
# Load training set for convenience of embedding
trainfolder = "tiny-imagenet-200/train"
train_embed_dataset = torchvision.datasets.ImageFolder(trainfolder, transform=transforms.ToTensor()) 
train_embed_loader = Data.DataLoader(train_embed_dataset, batch_size=1, shuffle=False, num_workers=8)  
print("Datasets Loaded!")

Datasets Loaded!


In [13]:
train_embed_dataset.imgs

[('tiny-imagenet-200/train\\n01443537\\images\\n01443537_0.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_1.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_10.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_100.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_101.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_102.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_103.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_104.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_105.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_106.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_107.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_108.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_109.JPEG', 0),
 ('tiny-imagenet-200/train\\n01443537\\images\\n01443537_11.JPEG', 0)

In [38]:
# Plot training loss
final = torch.load("best_checkpoint_final.pth.tar")
loss = final["Train_Loss"]

plt.plot(loss,color='blue',linestyle = '-')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.ylim(ymin=0)
plt.title("Train loss of Image Ranking")
plt.savefig("Train_Loss_Plot.png")

In [11]:
def Euclid_dist(x,y):
    return np.sqrt(np.sum((x-y)**2))

a = np.random.randn(10)
b = np.random.randn(100,10)
dist = [Euclid_dist(a,bb) for bb in b]
np.argsort(dist)[:10]

array([91, 56, 14,  0, 58, 27,  5,  2, 88, 94], dtype=int64)

In [19]:
a = np.random.randn(100000,4096)
b = np.random.randn(4096)
dist = [Euclid_dist(aa,b) for aa in a]
np.argsort(dist)[:10]

In [None]:
# Test if "model" is working (will break locally because of small memory)
file1 = open("ImagePath\\Epoch_{}_Triplet.txt".format(1), "r")
tm = file1.readlines()
train_dataset = TinyImageDataset(impath = tm, train = True)
train_loader = Data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

for i, image_triplet in enumerate(train_loader):
        image_qr = image_triplet[0].float()
        image_ps = image_triplet[1].float()
        image_ng = image_triplet[2].float()
        
        #image_qr2 = torch.reshape(image_qr,shape=(100,3,224,224))
        embed_qr = model(image_qr)
        print(i)
        if i>=10:
            break

In [12]:
# Test nn.TripletMarginLoss
a = torch.randn((10,20))
b = torch.randn((10,20))
c = torch.randn((10,20))

criterion = nn.TripletMarginLoss(reduce=False)
loss = criterion(a,b,c)
criterion2 = nn.TripletMarginLoss()
loss2 = criterion2(a,b,c)
loss.mean(), loss2

(tensor(1.1751), tensor(1.1751))

In [16]:
# Test NearestNeighbors
from sklearn.neighbors import NearestNeighbors
a = np.random.randn(100,5)
b = np.random.randn(100)

neigh = NearestNeighbors(n_neighbors=10)
neigh.fit(a.tolist())

b = np.random.randn(20,5)
c = neigh.kneighbors(b.tolist(),return_distance=False)

In [20]:
# Test Counter
from collections import Counter

cc = c // 10
for line in cc:
    count = Counter(line)
    mx = np.max(list(count.values()))
    locmx = [x for x,val in enumerate(count.values()) if val==mx]
    arglabel = np.array(list(count.keys()))[locmx]
    print(arglabel)

[0 7]
[9]
[7 8 9]
[7 4 6]
[6 1]
[0]
[4]
[7 3 5 4]
[0 8 2]
[6 5 4]
[9 6 4]
[4 2]
[0 8 2]
[1 8 2]
[7]
[3 0]
[9 8 0 1]
[3 1 7 6]
[9 1 7 5]
[9 4 3 2]


In [4]:
# Test Counter
a = np.random.randn(10000,128)
b = np.random.randn(128)
bb = np.tile(b, reps=(10000,1))
dist = np.sum((b-a)**2, axis=1)
argdistidx = np.argsort(dist)[:30]
argdistlabel = argdistidx // num_im

from collections import Counter
count = Counter(argdistlabel)
mx = np.max(list(count.values()))
locmx = [x for x,val in enumerate(count.values()) if val==mx]
arglabel = np.array(list(count.keys()))[locmx]

In [None]:
# Compute time of picture transforming in skimage
import skimage
from skimage import transform as tf
from skimage import io

trainfolder = "tiny-imagenet-200/train"
trainall = os.listdir(trainfolder)
traincls = os.path.join(trainfolder, os.listdir(trainfolder)[130], "Images")

a = os.listdir(traincls)
picpath = os.path.join(traincls,a[10])

start = datetime.datetime.now()
pic = io.imread(picpath)
opentime = datetime.datetime.now()
pic = tf.resize(pic,output_shape=(224,224))
resztime = datetime.datetime.now()
print(opentime-start, resztime-opentime)

In [None]:
# Compare two read_pic methods in PIL and skimage; skimage is faster
from skimage import io

trainfolder = "tiny-imagenet-200/train"
trainall = os.listdir(trainfolder)
traincls = os.path.join(trainfolder, os.listdir(trainfolder)[2], "Images")

trans_rz = transforms.Resize(size=(224,224))

a = os.listdir(traincls)
picpath = os.path.join(traincls,a[10])

start = datetime.datetime.now()
pic = Image.open(picpath)
pic1time = datetime.datetime.now()
pic2 = io.imread(picpath)
pic2time = datetime.datetime.now()
print(pic1time-start, pic2time-pic1time)

In [None]:
# Compute time of picture transforming in PIL
import os
import torchvision.transforms as transforms

trainfolder = "tiny-imagenet-200/train"
trainall = os.listdir(trainfolder)
traincls = os.path.join(trainfolder, os.listdir(trainfolder)[120], "Images")

trans_rz = transforms.Resize(size=(224,224))

a = os.listdir(traincls)
picpath = os.path.join(traincls,a[0])

start = datetime.datetime.now()
pic = Image.open(picpath)
opentime = datetime.datetime.now()
pic = trans_rz(pic)
transtime = datetime.datetime.now()
pic.save(picpath,"JPEG")
savetime = datetime.datetime.now()
print(opentime-start, transtime-opentime, savetime-transtime)