In [1]:
import os
import sys
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from skimage import data
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from visdom import Visdom

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.utils.data
from torch import optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from torchvision.utils import save_image
from torchsummary import summary

In [2]:
viz = Visdom()

Setting up a new session...


In [3]:
def init_params(model):
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            init.kaiming_normal(m.weight, mode='fan_out', nonlinearity='relu')
        elif isinstance(m, nn.BatchNorm2d):
            init.constant_(m.weight, 1)
            init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
            init.constant_(m.bias, 0)

In [4]:
class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()
    def forward(self, x):
        batch_size = x.shape[0]
        return x.view(batch_size, -1)

In [5]:
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        # Encoder
        self.conv1 = nn.Conv2d(3, 3, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0)
        self.conv3 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(16 * 16 * 32, 128)

        # Latent space
        self.fc21 = nn.Linear(128, 20)
        self.fc22 = nn.Linear(128, 20)

        # Decoder
        self.fc3 = nn.Linear(args.hidden_size, args.intermediate_size)
        self.fc4 = nn.Linear(args.intermediate_size, 8192)
        self.deconv1 = nn.ConvTranspose2d(32, 32, kernel_size=3, stride=1, padding=1)
        self.deconv2 = nn.ConvTranspose2d(32, 32, kernel_size=3, stride=1, padding=1)
        self.deconv3 = nn.ConvTranspose2d(32, 32, kernel_size=2, stride=2, padding=0)
        self.conv5 = nn.Conv2d(32, 3, kernel_size=3, stride=1, padding=1)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def encode(self, x):
        out = self.relu(self.conv1(x))
        out = self.relu(self.conv2(out))
        out = self.relu(self.conv3(out))
        out = self.relu(self.conv4(out))
        out = out.view(out.size(0), -1)
        h1 = self.relu(self.fc1(out))
        return self.fc21(h1), self.fc22(h1)

    def reparameterize(self, mu, logvar):
        if self.training:
            std = logvar.mul(0.5).exp_()
            eps = Variable(std.data.new(std.size()).normal_())
            return eps.mul(std).add_(mu)
        else:
            return mu

    def decode(self, z):
        h3 = self.relu(self.fc3(z))
        out = self.relu(self.fc4(h3))
        # import pdb; pdb.set_trace()
        out = out.view(out.size(0), 32, 16, 16)
        out = self.relu(self.deconv1(out))
        out = self.relu(self.deconv2(out))
        out = self.relu(self.deconv3(out))
        out = self.sigmoid(self.conv5(out))
        return out

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

    
vae = VAE()
init_params(vae)
vae.cuda()
summary(vae, (3, 224, 224))

  after removing the cwd from sys.path.


CAE(
  (encoder): Sequential(
    (0): Conv2d(3, 12, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(12, 12, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(12, 24, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (decoder): Sequential(
    (0): ConvTranspose2d(24, 12, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (1): ReLU()
    (2): MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0))
    (3): ConvTranspose2d(12, 12, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (4): ReLU()
    (5): MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0))
    (6): ConvTranspose2d(12, 3, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (7): Tanh()
  )
)

In [None]:
def loss_function(recon_x, x, mu, logvar):
    BCE = F.binary_cross_entropy(recon_x.view(-1, 32 * 32 * 3),
                                 x.view(-1, 32 * 32 * 3), size_average=False)

    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return BCE + KLD


In [6]:
def to_img(x):
    x = 0.5*(x+1)
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 3, 224, 224)
    return x

In [7]:
normalize = transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
train_ds = datasets.ImageFolder(
    'dataset/normals_cut_224_pt/',
    transforms.Compose([
        #transforms.Resize(224),
        transforms.ToTensor(),
        normalize,
    ])
)

train_loader = DataLoader(
    train_ds,
    batch_size=16,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

test_ds = datasets.ImageFolder(
    'dataset/normals_cut_224_pt/',
    transforms.Compose([
        #transforms.Resize(224),
        transforms.ToTensor(),
        normalize,
    ])
)

test_loader = DataLoader(
    test_ds,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

In [8]:
torch.backends.cudnn.benchmark = True

In [9]:
criterion = loss_function
optimizer = torch.optim.Adam(vae.parameters(), lr=0.01, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 40, gamma=0.1)

In [10]:
train_loss_list = list()
for epoch in range(100):
    train_loss = 0
    
    vae.train()
    for i, (imgs, _) in enumerate(train_loader):
        imgs = imgs.cuda()
        
        recon_batch, mu, logvar = vae(imgs)
        loss = criterion(recon_batch, imgs, mu, logvar)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    avg_train_loss = train_loss / len(train_loader.dataset)
    
    # visualize
    viz.line(X=np.array([epoch]), Y=np.array([avg_train_loss]), win='loss', name='train_loss', update='append')
    
    print('epoch: {}, train_loss: {:.3f}'
          .format(epoch, avg_train_loss))
    
    if (epoch+1) % 20 == 0:
        pic = to_img(outputs.cpu().data)
        save_image(pic, f'./result/autoencoder{epoch+1}.png')
    
    scheduler.step()    
    

TypeError: forward() missing 1 required positional argument: 'indices'

In [None]:
cae.eval()
z = list()
z_label = list()
for i, (imgs, labels) in enumerate(test_loader):
        imgs = imgs.view(imgs.size(0), -1).cuda()
        features = cae.encoder(imgs).data.cpu().numpy()
        
        z.extend(features)
        z_label.extend(labels.numpy())
        
z = np.array(z, dtype=torch.dtype)
z_label = np.array(z_label, dtype=torch.dtype)
z.shape

In [None]:
z

In [None]:
feature_ae = pd.DataFrame({'label': z_label})
feature_ae

In [None]:
feature_ae['type'] = feature_ae['label'].apply(lambda x: 'a' if x == 0 else 'h')
color_code = {'a': '#FF0000', 'h': '#0000FF'}
feature_ae['color'] = feature_ae['type'].apply(lambda x: color_code[x])
feature_ae

In [None]:
tsne_z = TSNE(n_components=2).fit_transform(z)
tsne_z.shape

In [None]:
plt.figure(figsize=(10, 10))
plt.scatter(tsne_z[:, 0], tsne_z[:, 1], color=feature_ae['color'])
plt.title('t-SNE')
plt.xlabel('The first score')
plt.ylabel('The second score')
plt.savefig('result/tsne/cae.jpg')

In [None]:
pca_z = PCA(n_components=2).fit_transform(z)
pca_z.shape

In [None]:
plt.figure(figsize=(10, 10))
plt.scatter(pca_z[:, 0], pca_z[:, 1], color=feature_ae['color'])
plt.title('PCA')
plt.xlabel('The first principal component score')
plt.ylabel('The second principal component score')

In [None]:
kmeans = KMeans(n_clusters=2).fit(z)

In [None]:
pred = kmeans.labels_
pred

In [None]:
#Acc: 47+40/153 = 0.568
#Acc: 54+35/153 = 0.582
count = [{'a': 0, 'h': 0} for _ in range(2)]
for i, label in enumerate(pred):
    count[label][feature_ae['type'][i]] += 1
count