In [1]:
import matplotlib.pyplot as plt # plotting library
import numpy as np # this module is useful to work with numerical arrays
import pandas as pd
import random
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader,random_split, Dataset
from typing import Any, Tuple
from PIL import Image
from torch import nn


class ImageNetDataset(Dataset):

    def __init__(self, root="ILSVRC2017_DET/ILSVRC/Data/DET/train/ILSVRC2014_train_000", multiplier=1, small_size=128, big_size=256):
        self.convert_tensor = transforms.ToTensor()
        self.root = root
        self.multiplier = multiplier
        self.small_transform = transforms.Compose([
            transforms.Resize((small_size, small_size))
        ])
        self.big_transform = transforms.Compose([
            transforms.Resize((big_size, big_size))
        ])
        return

    def __len__(self):
         return self.multiplier * 10000

    def __getitem__(self, idx)  -> Tuple[Any, Any]:
        in_folder_index = idx % 10000
        folder_index = int(idx / 10000)
        image_path = self.root + str(folder_index) + "/ILSVRC2014_train_000" + str(folder_index) + "{:04d}".format(in_folder_index) + ".jpeg"
        image = Image.open(image_path)
        image = transforms.Grayscale(num_output_channels=3).forward(image)
        small_image = self.small_transform(image)
        big_image = self.big_transform(image)
        return self.convert_tensor(small_image), self.convert_tensor(big_image)
        # return small_image, big_image

batch_size = 100
train_dataset = ImageNetDataset()
to_img_transform = transforms.ToPILImage()
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)

In [2]:
class Encoder(nn.Module):

    def __init__(self):
        super().__init__()

        ### Convolutional section
        self.encoder_cnn = nn.Sequential(
            nn.Conv2d(3, 6, 3, stride=2, padding=1),
            nn.BatchNorm2d(6),
            nn.ReLU(True),
            nn.Conv2d(6, 9, 3, stride=2, padding=1),
            nn.BatchNorm2d(9),
            nn.ReLU(True),
            nn.Conv2d(9, 12, 3, stride=2, padding=1),
            nn.ReLU(True),
            nn.BatchNorm2d(12),
            nn.Conv2d(12, 15, 3, stride=2, padding=1),
            nn.ReLU(True)
        )

        ### Flatten layer
        self.flatten = nn.Flatten(start_dim=1)
        ### Linear section
        self.encoder_lin = nn.Sequential(
            nn.Linear(15 * 16 * 16, 1000),
            nn.ReLU(True),
            nn.Linear(1000, 500)
        )

    def forward(self, x):
        x = self.encoder_cnn(x)
        x = self.flatten(x)
        x = self.encoder_lin(x)
        return x

class Decoder(nn.Module):

    def __init__(self):
        super().__init__()
        self.decoder_lin = nn.Sequential(
            nn.Linear(500, 1000),
            nn.ReLU(True),
            nn.Linear(1000, 15 * 16 * 16),
            nn.ReLU(True)
        )

        self.unflatten = nn.Unflatten(dim=1,
                                      unflattened_size=(15, 16, 16))

        self.decoder_conv = nn.Sequential(
            nn.ConvTranspose2d(15, 12, 3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(12),
            nn.ReLU(True),
            nn.ConvTranspose2d(12, 9, 3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(9),
            nn.ReLU(True),
            nn.ConvTranspose2d(9, 6, 3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(6),
            nn.ReLU(True),
            nn.ConvTranspose2d(6, 3, 3, stride=2, padding=1, output_padding=1)
        )

    def forward(self, x):
        x = self.decoder_lin(x)
        x = self.unflatten(x)
        x = self.decoder_conv(x)
        x = torch.sigmoid(x)
        return x

In [3]:
### Define the loss function
loss_fn = torch.nn.MSELoss()

### Define an optimizer (both for the encoder and the decoder!)
lr= 0.001

### Set the random seed for reproducible results
torch.manual_seed(0)

### Initialize the two networks
d = 32

#model = Autoencoder(encoded_space_dim=encoded_space_dim)
encoder = Encoder()
decoder = Decoder()
params_to_optimize = [
    {'params': encoder.parameters()},
    {'params': decoder.parameters()}
]

optim = torch.optim.Adam(params_to_optimize, lr=lr, weight_decay=1e-05)

# Check if the GPU is available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Selected device: {device}')

# Move both the encoder and the decoder to the selected device
encoder = encoder.to(device)
decoder = decoder.to(device)

Selected device: cuda


In [4]:
def train_epoch(encoder, decoder, device, dataloader, loss_fn, optimizer):
    # Set train mode for both the encoder and the decoder
    encoder.train()
    decoder.train()
    train_loss = []
    # Iterate the dataloader (we do not need the label values, this is unsupervised learning)
    for small_image_batch, big_image_batch in dataloader:  # with "_" we just ignore the labels (the second element of the dataloader tuple)
        # Move tensor to the proper device
        big_image_batch = big_image_batch.to(device)
        # Encode data
        encoded_data = encoder(big_image_batch)
        # Decode data
        decoded_data = decoder(encoded_data)
        # Evaluate loss
        loss = loss_fn(decoded_data, big_image_batch)
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # Print batch loss
        print('\t partial train loss (single batch): %f' % (loss.data))
        train_loss.append(loss.detach().cpu().numpy())

    return np.mean(train_loss)


In [5]:
diz_loss = {'train_loss':[],'val_loss':[]}

In [6]:
num_epochs = 1

for epoch in range(num_epochs):
    train_loss =train_epoch(encoder,decoder,device,train_loader,loss_fn,optim)
    # val_loss = test_epoch(encoder,decoder,device,test_loader,loss_fn)
    print('\n EPOCH {}/{} \t train loss {} \t val loss {}'.format(epoch + 1, num_epochs,train_loss,0))
    diz_loss['train_loss'].append(train_loss)
    # diz_loss['val_loss'].append(val_loss)
    # plot_ae_outputs(encoder,decoder,n=10)

15
16
16
3840
	 partial train loss (single batch): 0.084571
15
16
16
3840
	 partial train loss (single batch): 0.087806
15
16
16
3840
	 partial train loss (single batch): 0.082230
15
16
16
3840
	 partial train loss (single batch): 0.075418
15
16
16
3840
	 partial train loss (single batch): 0.085779
15
16
16
3840
	 partial train loss (single batch): 0.080308
15
16
16
3840
	 partial train loss (single batch): 0.082636
15
16
16
3840
	 partial train loss (single batch): 0.080300
15
16
16
3840
	 partial train loss (single batch): 0.077888
15
16
16
3840
	 partial train loss (single batch): 0.081843
15
16
16
3840
	 partial train loss (single batch): 0.076886
15
16
16
3840
	 partial train loss (single batch): 0.078376
15
16
16
3840
	 partial train loss (single batch): 0.074969
15
16
16
3840
	 partial train loss (single batch): 0.078492
15
16
16
3840
	 partial train loss (single batch): 0.072491
15
16
16
3840
	 partial train loss (single batch): 0.073737
15
16
16
3840
	 partial train loss (sing