In [1]:
import os
import sys
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from skimage import data
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from visdom import Visdom

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from torchvision.utils import save_image
from torchsummary import summary

In [2]:
viz = Visdom()

Setting up a new session...


In [3]:
def init_params(model):
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            init.kaiming_normal(m.weight, mode='fan_out', nonlinearity='relu')
        elif isinstance(m, nn.BatchNorm2d):
            init.constant_(m.weight, 1)
            init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
            init.constant_(m.bias, 0)

In [4]:
class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()
    def forward(self, x):
        batch_size = x.shape[0]
        return x.view(batch_size, -1)

In [5]:
class CAE(nn.Module):
    def __init__(self):
        super(CAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 512, 3, stride=2, padding=1),
            nn.ReLU(),
            #nn.BatchNorm2d(512),
            nn.Conv2d(512, 256, 3, stride=2, padding=1),
            #nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.Conv2d(256, 128, 3, stride=2, padding=1),
            #nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.Conv2d(128, 64, 3, stride=2, padding=1),
            #nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.Conv2d(64, 32, 3, stride=2, padding=1),
            #nn.BatchNorm2d(32),
            nn.ReLU(True),
            #nn.Conv2d(32, 4, 1),
            #nn.BatchNorm2d(16),
            nn.ReLU(True),
        )
        
        self.decoder = nn.Sequential(
            #nn.Conv2d(4, 32, 1),
            nn.ReLU(True),
            #nn.ConvTranspose2d(16, 32, 2, stride=2, padding=0),
            #nn.BatchNorm2d(32),
            #nn.ReLU(True),
            nn.ConvTranspose2d(32, 64, 2, stride=2, padding=0),
            #nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 128, 2, stride=2, padding=0),  # b, 1, 28, 28
            #nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 256, 2, stride=2, padding=0),  # b, 1, 28, 28
            #nn.BatchNorm2d(3),
            nn.ReLU(True),
            nn.ConvTranspose2d(256, 512, 2, stride=2, padding=0),
            nn.ReLU(),
            #nn.BatchNorm2d(512),
            nn.ConvTranspose2d(512, 3, 2, stride=2, padding=0),
            nn.Tanh()
        )
        
        '''
            nn.Conv2d(12, 12, 3, stride=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(12, 24, 3, stride=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            
        )
        
        self.decoder = nn.Sequential(
            
            nn.ConvTranspose2d(24, 12, 3, stride=3, padding=1),  # b, 16, 5, 5
            nn.ReLU(),
            nn.MaxUnpool2d(2, 2),
            nn.ConvTranspose2d(12, 12, 3, stride=3, padding=1),  # b, 8, 15, 15
            nn.ReLU(),
            nn.MaxUnpool2d(2, 2),
        '''
        
        
        
    def forward(self, x, indices=None):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    
cae = CAE()
init_params(cae)
cae.cuda()
summary(cae, (3, 224, 224))

  after removing the cwd from sys.path.


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1        [-1, 512, 112, 112]          14,336
              ReLU-2        [-1, 512, 112, 112]               0
            Conv2d-3          [-1, 256, 56, 56]       1,179,904
              ReLU-4          [-1, 256, 56, 56]               0
            Conv2d-5          [-1, 128, 28, 28]         295,040
              ReLU-6          [-1, 128, 28, 28]               0
            Conv2d-7           [-1, 64, 14, 14]          73,792
              ReLU-8           [-1, 64, 14, 14]               0
            Conv2d-9             [-1, 32, 7, 7]          18,464
             ReLU-10             [-1, 32, 7, 7]               0
             ReLU-11             [-1, 32, 7, 7]               0
             ReLU-12             [-1, 32, 7, 7]               0
  ConvTranspose2d-13           [-1, 64, 14, 14]           8,256
             ReLU-14           [-1, 64,

class CAE(nn.Module):
    '''
    def __init__(self):
        super(CAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 256, 3, stride=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 128, 3, stride=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 64, 3, stride=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            
        )
        self.decoder = nn.Sequential(
            
            nn.ConvTranspose2d(24, 12, 3, stride=3, padding=1),  # b, 16, 5, 5
            nn.ReLU(),
            nn.MaxUnpool2d(2, 2),
            nn.ConvTranspose2d(12, 12, 3, stride=3, padding=1),  # b, 8, 15, 15
            nn.ReLU(),
            nn.MaxUnpool2d(2, 2),
            nn.ConvTranspose2d(12, 3, 3, stride=3, padding=1),  # b, 1, 28, 28
            nn.Tanh()
        )
        
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    '''
    def __init__(self):
        super(CAE, self).__init__()
        slef.encoder = nn.Sequential(
        )
    
cae = CAE()
init_params(cae)
cae.cuda()
#summary(cae, (3, 224, 224))

In [6]:
def to_img(x):
    x = 0.5*(x+1)
    x = x.clamp(0, 1)
    #x = x.view(x.size(0), 3, 224, 224)
    return x

In [7]:
normalize = transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
train_ds = datasets.ImageFolder(
    'dataset/normals_cut_224_pt/',
    transforms.Compose([
        #transforms.Resize(224),
        #transforms.RandomHorizontalFlip(),
        #transforms.RandomRotation(180),
        #transforms.RandomVerticalFlip(),
        transforms.ToTensor(),
        normalize,
    ])
)

train_loader = DataLoader(
    train_ds,
    batch_size=16,
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

test_ds = datasets.ImageFolder(
    'dataset/normals_cut_224_pt/',
    transforms.Compose([
        #transforms.Resize(224),
        transforms.ToTensor(),
        normalize,
    ])
)

test_loader = DataLoader(
    test_ds,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

In [8]:
torch.backends.cudnn.benchmark = True

In [9]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(cae.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 150, gamma=0.1)

In [10]:
train_loss_list = list()
for epoch in range(200):
    train_loss = 0
    
    cae.train()
    for i, (imgs, _) in enumerate(train_loader):
        imgs = imgs.cuda()
        
        outputs = cae(imgs)
        loss = criterion(outputs, imgs)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    avg_train_loss = train_loss / len(train_loader.dataset)
    
    # visualize
    viz.line(X=np.array([epoch]), Y=np.array([avg_train_loss]), win='loss', name='train_loss', update='append')
    
    print('epoch: {}, train_loss: {:.7f}'
          .format(epoch, avg_train_loss))
    
    if (epoch+1) % 20 == 0:
        pic = to_img(outputs.cpu().data[0])
        save_image(pic, f'./result/cae{epoch+1}.png', padding=0)
    
    scheduler.step()    
    

RuntimeError: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR

In [None]:
cae.eval()
z = list()
z_label = list()
for i, (imgs, labels) in enumerate(test_loader):
        imgs = imgs.cuda()
        features = cae.encoder(imgs).data.cpu().numpy()
        #features = np.reshape(len(features), -1)
        z.extend(features)
        z_label.extend(labels.numpy())
        
z = np.array(z).reshape(len(z), -1).astype(torch.dtype)
z_label = np.array(z_label, dtype=torch.dtype)
z.shape

In [None]:
z

In [None]:
feature_ae = pd.DataFrame({'label': z_label})
feature_ae

In [None]:
feature_ae['type'] = feature_ae['label'].apply(lambda x: 'a' if x == 0 else 'h')
color_code = {'a': '#FF0000', 'h': '#0000FF'}
feature_ae['color'] = feature_ae['type'].apply(lambda x: color_code[x])
feature_ae

In [None]:
tsne_z = TSNE(n_components=2).fit_transform(z)
tsne_z.shape

In [None]:
plt.figure(figsize=(10, 10))
plt.scatter(tsne_z[:, 0], tsne_z[:, 1], color=feature_ae['color'])
plt.title('t-SNE')
plt.xlabel('The first score')
plt.ylabel('The second score')
plt.savefig('result/tsne/cae.jpg')

In [None]:
pca_z = PCA(n_components=2).fit_transform(z)
pca_z.shape

In [None]:
plt.figure(figsize=(10, 10))
plt.scatter(pca_z[:, 0], pca_z[:, 1], color=feature_ae['color'])
plt.title('PCA')
plt.xlabel('The first principal component score')
plt.ylabel('The second principal component score')

In [None]:
kmeans = KMeans(n_clusters=2).fit(z)

In [None]:
pred = kmeans.labels_
pred

In [None]:

#Acc: 54+35/153 = 0.582
#Acc: 49+43/153 = 0.601
#Acc: 45+50/153 = 0.621 256->128->64
count = [{'a': 0, 'h': 0} for _ in range(2)]
for i, label in enumerate(pred):
    count[label][feature_ae['type'][i]] += 1
count