In [6]:
import torch
from torchvision import transforms
from torch.utils.data import Dataset
import os
from pathlib import Path
from PIL import Image
from torch import nn

In [69]:
class FolderDataset(Dataset):
    """
    Creates a PyTorch dataset from folder, returning two tensor images.
    Args: 
    main_dir : directory where images are stored.
    transform (optional) : torchvision transforms to be applied while making dataset
    """

    def __init__(self, main_dir, transform=None):
        self.main_dir = main_dir
        self.transform = transform
        self.all_imgs = self._get_images_path(main_dir)

    def __len__(self):
        return len(self.all_imgs)

    def __getitem__(self, idx):
        img_loc = os.path.join(self.main_dir, self.all_imgs[idx])
        image = Image.open(img_loc).convert("RGB")

        if self.transform is not None:
            tensor_image = self.transform(image)

        return tensor_image, tensor_image
    
    def _get_images_path(self, main_dir):
        image_extensions = ["jpg", "jpeg", "png", "gif", "bmp", "tiff"]
        data_dir = Path(main_dir)
        images_path = [file for file in data_dir.glob(
            '**/*') if file.suffix.lower()[1:] in image_extensions]
        return images_path
    

In [70]:
class ConvEncoder(nn.Module):
    """
    A simple Convolutional Encoder Model
    """

    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(3, 16, (3, 3), padding=(1, 1))
        self.relu1 = nn.ReLU(inplace=True)
        self.maxpool1 = nn.MaxPool2d((2, 2))

        self.conv2 = nn.Conv2d(16, 32, (3, 3), padding=(1, 1))
        self.relu2 = nn.ReLU(inplace=True)
        self.maxpool2 = nn.MaxPool2d((2, 2))

        self.conv3 = nn.Conv2d(32, 64, (3, 3), padding=(1, 1))
        self.relu3 = nn.ReLU(inplace=True)
        self.maxpool3 = nn.MaxPool2d((2, 2))

        self.conv4 = nn.Conv2d(64, 128, (3, 3), padding=(1, 1))
        self.relu4 = nn.ReLU(inplace=True)
        self.maxpool4 = nn.MaxPool2d((2, 2))

        self.conv5 = nn.Conv2d(128, 256, (3, 3), padding=(1, 1))
        self.relu5 = nn.ReLU(inplace=True)
        self.maxpool5 = nn.MaxPool2d((2, 2))

    def forward(self, x):
        # Downscale the image with conv maxpool etc.
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)

        x = self.conv3(x)
        x = self.relu3(x)
        x = self.maxpool3(x)

        x = self.conv4(x)
        x = self.relu4(x)
        x = self.maxpool4(x)

        x = self.conv5(x)
        x = self.relu5(x)
        x = self.maxpool5(x)
        
        return x

In [71]:
class ConvDecoder(nn.Module):
    """
    A simple Convolutional Decoder Model
    """

    def __init__(self):
        super().__init__()
        self.deconv1 = nn.ConvTranspose2d(256, 128, (2, 2), stride=(2, 2))
        self.relu1 = nn.ReLU(inplace=True)

        self.deconv2 = nn.ConvTranspose2d(128, 64, (2, 2), stride=(2, 2))
        self.relu2 = nn.ReLU(inplace=True)

        self.deconv3 = nn.ConvTranspose2d(64, 32, (2, 2), stride=(2, 2))
        self.relu3 = nn.ReLU(inplace=True)

        self.deconv4 = nn.ConvTranspose2d(32, 16, (2, 2), stride=(2, 2))
        self.relu4 = nn.ReLU(inplace=True)

        self.deconv5 = nn.ConvTranspose2d(16, 3, (2, 2), stride=(2, 2))
        self.relu5 = nn.ReLU(inplace=True)

    def forward(self, x):
         # Upscale the image with convtranspose etc.
        x = self.deconv1(x)
        x = self.relu1(x)

        x = self.deconv2(x)
        x = self.relu2(x)

        x = self.deconv3(x)
        x = self.relu3(x)

        x = self.deconv4(x)
        x = self.relu4(x)

        x = self.deconv5(x)
        x = self.relu5(x)
        return x

In [50]:


model = ConvEncoder()
model(torch.rand((1,3,224,224))).shape

torch.Size([1, 256, 7, 7])

In [13]:
from efficientnet_pytorch import EfficientNet

In [63]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x



model = EfficientNet.from_pretrained('efficientnet-b0')
# model._avg_pooling = Identity()
# model._dropout = Identity()
model._fc = Identity()



Loaded pretrained weights for efficientnet-b0


In [54]:
# model = nn.Sequential(*list(model.children())[:-2])
model._avg_pooling = nn.AdaptiveAvgPool2d(1)
model._fc = nn.Sequential(
    nn.Flatten(),
    nn.Linear(1280, 256 * 7 * 7)  # Adjust based on the specific EfficientNet variant
)
model._fc=Identity()

In [66]:
class CustomEncoder(nn.Module):
    def __init__(self, model,embedding_size):
        super(CustomEncoder, self).__init__()
        self.encoder = model
        self.encoder._avg_pooling = nn.AdaptiveAvgPool2d(1)  # Modify average pooling
        self.encoder._fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(embedding_size, 256 * 7 * 7)  # Adjust based on the specific EfficientNet variant
        )  # Modify the output layer

    def forward(self, x):
        return self.encoder(x).view(x.shape[0], 256, 7, 7)

# Create an instance of the custom encoder
custom_encoder = CustomEncoder(model=model, embedding_size=1280)

# Test the encoder with a random input of size (batch_size, channels, height, width)
input_tensor = torch.randn(150, 3, 224, 224)  # Adjust the input size as needed
output = custom_encoder(input_tensor)
print(output.shape)  

torch.Size([150, 256, 7, 7])


In [67]:
def train_step(encoder, decoder, train_loader, loss_fn, optimizer, device):
    """
    Performs a single training step
    Args:
    encoder: A convolutional Encoder. E.g. torch_model ConvEncoder
    decoder: A convolutional Decoder. E.g. torch_model ConvDecoder
    train_loader: PyTorch dataloader, containing (images, images).
    loss_fn: PyTorch loss_fn, computes loss between 2 images.
    optimizer: PyTorch optimizer.
    device: "cuda" or "cpu"
    Returns: Train Loss
    """
    #  Set networks to train mode.
    encoder.train()
    decoder.train()

    for batch_idx, (train_img, target_img) in enumerate(train_loader):
        # Move images to device
        train_img = train_img.to(device)
        target_img = target_img.to(device)
        
        # Zero grad the optimizer
        optimizer.zero_grad()
        # Feed the train images to encoder
        enc_output = encoder(train_img)
        # The output of encoder is input to decoder !
        dec_output = decoder(enc_output)
        
        # Decoder output is reconstructed image
        # Compute loss with it and orginal image which is target image.
        loss = loss_fn(dec_output, target_img)
        # Backpropogate
        loss.backward()
        # Apply the optimizer to network by calling step.
        optimizer.step()
    # Return the loss
    return loss.item()

def val_step(encoder, decoder, val_loader, loss_fn, device):
    """
    Performs a single training step
    Args:
    encoder: A convolutional Encoder. E.g. torch_model ConvEncoder
    decoder: A convolutional Decoder. E.g. torch_model ConvDecoder
    val_loader: PyTorch dataloader, containing (images, images).
    loss_fn: PyTorch loss_fn, computes loss between 2 images.
    device: "cuda" or "cpu"
    Returns: Validation Loss
    """
    
    # Set to eval mode.
    encoder.eval()
    decoder.eval()
    
    # We don't need to compute gradients while validating.
    with torch.no_grad():
        for batch_idx, (train_img, target_img) in enumerate(val_loader):
            # Move to device
            train_img = train_img.to(device)
            target_img = target_img.to(device)

            # Again as train. Feed encoder the train image.
            enc_output = encoder(train_img)
            # Decoder takes encoder output and reconstructs the image.
            dec_output = decoder(enc_output)

            # Validation loss for encoder and decoder.
            loss = loss_fn(dec_output, target_img)
    # Return the loss
    return loss.item()

In [80]:
import torch
import torchvision.transforms as T
import torch.optim as optim
import sys

transforms = T.Compose([T.ToTensor()]) # Normalize the pixels and convert to tensor.
transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize((0.4914, 0.4822, 0.4465),
                            (0.2023, 0.1994, 0.2010)),
])

path= "C:/Users/Maods/Documents/Development/Mestrado/terumo/apps/renal-pathology-retrieval/data/02_data_split/train_data/"
full_dataset = FolderDataset(path, transform) # Create folder dataset.

train_size = 0.75
val_size = 1 - train_size

# Split data to train and test
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size]) 

# Create the train dataloader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
 
# Create the validation dataloader
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32)



loss_fn = nn.MSELoss() # We use Mean squared loss which computes difference between two images.

encoder = ConvEncoder() # Our encoder model
decoder = ConvDecoder() # Our decoder model

device = "cuda"  # GPU device

# Shift models to GPU
encoder.to(device)
decoder.to(device)

# Both the enocder and decoder parameters
autoencoder_params = list(encoder.parameters()) + list(decoder.parameters())
optimizer = optim.Adam(autoencoder_params, lr=1e-3) # Adam Optimizer

max_loss = sys.maxsize

# Time to Train !!!
EPOCHS = 10
# Usual Training Loop
for epoch in range(EPOCHS):
        train_loss = train_step(encoder, decoder, train_loader, loss_fn, optimizer, device=device)
        
        print(f"Epochs = {epoch}, Training Loss : {train_loss}")
        
        val_loss = val_step(encoder, decoder, val_loader, loss_fn, device=device)
        
        print(f"Epochs = {epoch}, Validation Loss : {val_loss}")

        # Simple Best Model saving
        if val_loss < max_loss:
            print("Validation Loss decreased, saving new best model")
            torch.save(encoder.state_dict(), "encoder_model.pt")
            torch.save(decoder.state_dict(), "decoder_model.pt")






# Create the full dataloader
# full_loader = torch.utils.data.DataLoader(full_dataset, batch_size=32)

Epochs = 0, Training Loss : 0.526820182800293
Epochs = 0, Validation Loss : 0.4606035351753235
Validation Loss decreased, saving new best model
Epochs = 1, Training Loss : 0.3397727310657501
Epochs = 1, Validation Loss : 0.4182150959968567
Validation Loss decreased, saving new best model
Epochs = 2, Training Loss : 0.2586904764175415
Epochs = 2, Validation Loss : 0.389679491519928
Validation Loss decreased, saving new best model
Epochs = 3, Training Loss : 0.21819019317626953
Epochs = 3, Validation Loss : 0.3790033459663391
Validation Loss decreased, saving new best model
Epochs = 4, Training Loss : 0.2112281173467636
Epochs = 4, Validation Loss : 0.3613903522491455
Validation Loss decreased, saving new best model
Epochs = 5, Training Loss : 0.3766658306121826
Epochs = 5, Validation Loss : 0.3549455404281616
Validation Loss decreased, saving new best model


KeyboardInterrupt: 

In [82]:
full_dataset = FolderDataset(path, transform) # Create folder dataset.

train_size = 0.75
val_size = 1 - train_size

# Split data to train and test
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size]) 

# Create the train dataloader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
 
# Create the validation dataloader
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32)



loss_fn = nn.MSELoss() # We use Mean squared loss which computes difference between two images.

encoder = CustomEncoder(model=EfficientNet.from_pretrained('efficientnet-b0'), embedding_size=1280) # Our encoder model
decoder = ConvDecoder() # Our decoder model

device = "cuda"  # GPU device

# Shift models to GPU
encoder.to(device)
decoder.to(device)

# Both the enocder and decoder parameters
autoencoder_params = list(encoder.parameters()) + list(decoder.parameters())
optimizer = optim.Adam(autoencoder_params, lr=1e-3) # Adam Optimizer

max_loss = sys.maxsize

# Time to Train !!!
EPOCHS = 5
# Usual Training Loop
for epoch in range(EPOCHS):
        train_loss = train_step(encoder, decoder, train_loader, loss_fn, optimizer, device=device)
        
        print(f"Epochs = {epoch}, Training Loss : {train_loss}")
        
        val_loss = val_step(encoder, decoder, val_loader, loss_fn, device=device)
        
        print(f"Epochs = {epoch}, Validation Loss : {val_loss}")

        # Simple Best Model saving
        if val_loss < max_loss:
            print("Validation Loss decreased, saving new best model")
            torch.save(encoder.state_dict(), "encoder_model.pt")
            torch.save(decoder.state_dict(), "decoder_model.pt")

Loaded pretrained weights for efficientnet-b0
Epochs = 0, Training Loss : 0.713015079498291
Epochs = 0, Validation Loss : 0.7330640554428101
Validation Loss decreased, saving new best model
Epochs = 1, Training Loss : 0.34706607460975647
Epochs = 1, Validation Loss : 0.7095859050750732
Validation Loss decreased, saving new best model
Epochs = 2, Training Loss : 0.5655218362808228
Epochs = 2, Validation Loss : 0.6460226774215698
Validation Loss decreased, saving new best model
Epochs = 3, Training Loss : 0.48382991552352905
Epochs = 3, Validation Loss : 0.6189921498298645
Validation Loss decreased, saving new best model
Epochs = 4, Training Loss : 0.40977224707603455
Epochs = 4, Validation Loss : 0.6004781126976013
Validation Loss decreased, saving new best model


In [99]:

encoder = ConvEncoder()
encoder(torch.rand((1,3,224,224)))


encoder = CustomEncoder(model=EfficientNet.from_pretrained('efficientnet-b0'), embedding_size=1280)
output =encoder(torch.rand((40,3,224,224)))

Loaded pretrained weights for efficientnet-b0


In [101]:
output.reshape((output.shape[0], -1)).shape

torch.Size([40, 12544])

In [102]:
encoder = EfficientNet.from_pretrained('efficientnet-b0')
encoder._fc = Identity()
output = encoder(torch.rand((40,3,224,224)))

Loaded pretrained weights for efficientnet-b0


In [105]:
output.reshape((output.shape[0], -1)).shape

torch.Size([40, 1280])