In [1]:
import os
import random
from pathlib import Path
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset
import torch
from torch import nn
import torch.optim as optim

In [11]:
class TripletData(Dataset):
    def __init__(self, path, transform):
        self.path = path
        self.categories_num = 6       
        self.transform = transform
        self.image_extensions = ["jpg", "jpeg", "png", "gif", "bmp", "tiff"]
        self.class_mapping = {
            1: "crescentes",
            2: "hipercellularity",
            3: "membranous",
            4: "normal",
            5: "Podocitopatia",
            6: "sclerosis",
        }

    def __getitem__(self, idx):
        # our positive class for the triplet
        idx = idx%self.categories_num + 1

        # choosing our pair of positive images (im1, im2)
        positive_data_dir = Path(os.path.join(self.path, self.class_mapping[idx]))
        positives = [file for file in positive_data_dir.glob('**/*') if file.suffix.lower()[1:] in self.image_extensions]
        im1, im2 = random.sample(positives, 2)

        # choosing a negative class and negative image (im3)
        negative_categories = list(self.class_mapping.values())
        negative_categories.remove(self.class_mapping[idx])
        negative_category = str(random.choice(negative_categories))
        negative_data_dir = Path(os.path.join(self.path, negative_category))
        negatives = [file for file in negative_data_dir.glob('**/*') if file.suffix.lower()[1:] in self.image_extensions]
        im3 = random.choice(negatives)

        im1 = self.transform(Image.open(im1))
        im2 = self.transform(Image.open(im2))
        im3 = self.transform(Image.open(im3))

        return [im1, im2, im3]

    # we'll put some value that we want since there can be far too many triplets possible
    # multiples of the number of images/ number of categories is a good choice
    def __len__(self):
        return self.categories_num*50



class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
        
    def calc_euclidean(self, x1, x2):
        return (x1 - x2).pow(2).sum(1)
    
    # Distances in embedding space is calculated in euclidean
    def forward(self, anchor, positive, negative):
        distance_positive = self.calc_euclidean(anchor, positive)
        distance_negative = self.calc_euclidean(anchor, negative)
        losses = torch.relu(distance_positive - distance_negative + self.margin)
        return losses.mean()

In [12]:
from efficientnet_pytorch import EfficientNet

class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x



model = EfficientNet.from_pretrained('efficientnet-b0')
# model._avg_pooling = Identity()
# model._dropout = Identity()
model._fc = Identity()

Loaded pretrained weights for efficientnet-b0


In [13]:
path= "C:/Users/Maods/Documents/Development/Mestrado/terumo/apps/renal-pathology-retrieval/data/02_data_split/train_data/"
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

train_data = TripletData(path, transform)
train_loader = torch.utils.data.DataLoader(dataset = train_data, batch_size=20, shuffle=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

epochs = 100 
# Our base model
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
triplet_loss = TripletLoss()
train_loss = []

# Training
for epoch in range(epochs):
    model.train()
    epoch_loss = 0.0
    cont =0
    for data in train_loader:
        cont=cont+1
        optimizer.zero_grad()
        x1,x2,x3 = data
        e1 = model(x1.to(device))
        e2 = model(x2.to(device))
        e3 = model(x3.to(device)) 
        
        loss = triplet_loss(e1,e2,e3)
        epoch_loss += loss
        loss.backward()
        optimizer.step()



    train_loss.append(epoch_loss.item())

    print("Train Loss: {}".format(epoch_loss.item()))

Train Loss: 225.2099609375
Train Loss: 157.10498046875
Train Loss: 84.47178649902344
Train Loss: 55.5457878112793
Train Loss: 41.929073333740234
Train Loss: 32.78470230102539
Train Loss: 27.710474014282227
Train Loss: 25.589614868164062
Train Loss: 20.47807502746582
Train Loss: 20.723966598510742
Train Loss: 21.62590789794922
Train Loss: 17.63516616821289
Train Loss: 17.389251708984375
Train Loss: 16.75670051574707
Train Loss: 16.653018951416016
Train Loss: 17.93628692626953
Train Loss: 15.407001495361328
Train Loss: 16.676788330078125
Train Loss: 16.034626007080078
Train Loss: 13.864738464355469
Train Loss: 13.505125045776367
Train Loss: 14.332818984985352
Train Loss: 15.217344284057617
Train Loss: 13.774264335632324
Train Loss: 14.844779968261719
Train Loss: 15.75804615020752
Train Loss: 14.27849292755127
Train Loss: 14.023294448852539
Train Loss: 12.201282501220703
Train Loss: 12.542526245117188
Train Loss: 10.421916007995605
Train Loss: 13.3844575881958
Train Loss: 16.1210670471191

In [24]:
import gc

gc.collect()
torch.cuda.empty_cache()

In [1]:
import matplotlib.pyplot as plt
plt.plot(train_loss);

NameError: name 'train_loss' is not defined

ModuleNotFoundError: No module named 'faiss'

: 