# **Homework 8 - Anomaly Detection**

If there are any questions, please contact mlta-2022spring-ta@googlegroups.com

Slide:    [Link]()　Kaggle: [Link](https://www.kaggle.com/c/ml2022spring-hw8)

# Set up the environment


## Package installation

In [14]:
# # Training progress bar
# !pip install -q qqdm

## Downloading data

In [3]:
# !wget https://github.com/MachineLearningHW/HW8_Dataset/releases/download/v1.0.0/data.zip

In [4]:
# !unzip data.zip

# Import packages

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "7"

In [2]:
import random
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision.models as models
from torch.optim import Adam, AdamW
from qqdm import qqdm, format_str
import pandas as pd

# Loading data

In [3]:
train = np.load('data/trainingset.npy', allow_pickle=True)
test = np.load('data/testingset.npy', allow_pickle=True)

print(train.shape)
print(test.shape)

(100000, 64, 64, 3)
(19636, 64, 64, 3)


## Random seed
Set the random seed to a certain value for reproducibility.

In [4]:
def same_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

same_seeds(16)

# Autoencoder

# Models & loss

In [5]:
class fcn_autoencoder(nn.Module):
    def __init__(self):
        super(fcn_autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            # nn.Linear(64 * 64 * 3, 128),
            # nn.ReLU(),
            # nn.Linear(128, 64),
            # nn.ReLU(), 
            # nn.Linear(64, 12), 
            # nn.ReLU(), 
            # nn.Linear(12, 3)
            
            nn.Linear(64 * 64 * 3, 64),
            nn.ReLU(), 
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 3)
        )
        
        self.decoder = nn.Sequential(
            # nn.Linear(3, 12),
            # nn.ReLU(), 
            # nn.Linear(12, 64),
            # nn.ReLU(),
            # nn.Linear(64, 128),
            # nn.ReLU(), 
            # nn.Linear(128, 64 * 64 * 3), 
            # nn.Tanh()
            
            nn.Linear(3, 16),
            nn.ReLU(), 
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 64 * 64 * 3), 
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x[:, 2] = 0
        x = self.decoder(x)
        return x


class conv_autoencoder(nn.Module):
    def __init__(self):
        super(conv_autoencoder, self).__init__()
			# self.encoder = nn.Sequential(
			# nn.Conv2d(3, 12, 4, stride=2, padding=1),         
			# nn.ReLU(),
			# nn.Conv2d(12, 24, 4, stride=2, padding=1),        
			# nn.ReLU(),
			# nn.Conv2d(24, 48, 4, stride=2, padding=1),         
			# nn.ReLU(),
			# )
			# self.decoder = nn.Sequential(
			# nn.ConvTranspose2d(48, 24, 4, stride=2, padding=1),
			# nn.ReLU(),
			# nn.ConvTranspose2d(24, 12, 4, stride=2, padding=1), 
			# nn.ReLU(),
			# nn.ConvTranspose2d(12, 3, 4, stride=2, padding=1),
			# nn.Tanh(),
			# )
        
			# self.encoder = nn.Sequential(
			# nn.Conv2d(3, 16, 4, stride=2, padding=1),         
			# nn.ReLU(),
			# nn.Conv2d(16, 32, 4, stride=2, padding=1),         
			# nn.ReLU(),
			# nn.Conv2d(32, 64, 4, stride=2, padding=1),        
			# nn.ReLU(),
			# )
			# self.decoder = nn.Sequential(
			# nn.ConvTranspose2d(64, 32, 4, stride=2, padding=1), 
			# nn.ReLU(),
			# nn.ConvTranspose2d(32, 16, 4, stride=2, padding=1), 
			# nn.ReLU(),
			# nn.ConvTranspose2d(16, 3, 4, stride=2, padding=1),
			# nn.Tanh(),
			# )
        
        
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 128, 4, stride=2, padding=1), 
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 256, 4, stride=2, padding=1), 
            nn.BatchNorm2d(256),
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1), 
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 3, 4, stride=2, padding=1),
            nn.Tanh(),
        )
            

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    
    
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        # self.encoder = nn.Sequential(
        #     nn.Conv2d(3, 12, 4, stride=2, padding=1),            
        #     nn.ReLU(),
        #     nn.Conv2d(12, 24, 4, stride=2, padding=1),    
        #     nn.ReLU(),
        # )
        # self.enc_out_1 = nn.Sequential(
        #     nn.Conv2d(24, 48, 4, stride=2, padding=1),  
        #     nn.ReLU(),
        # )
        # self.enc_out_2 = nn.Sequential(
        #     nn.Conv2d(24, 48, 4, stride=2, padding=1),
        #     nn.ReLU(),
        # )
        # self.decoder = nn.Sequential(
        # nn.ConvTranspose2d(48, 24, 4, stride=2, padding=1), 
        #     nn.ReLU(),
        # nn.ConvTranspose2d(24, 12, 4, stride=2, padding=1), 
        #     nn.ReLU(),
        #     nn.ConvTranspose2d(12, 3, 4, stride=2, padding=1), 
        #     nn.Tanh(),
        # )
        
        
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 4, stride=2, padding=1),            
            nn.ReLU(),
            nn.Conv2d(32, 64, 4, stride=2, padding=1),    
            nn.ReLU(),
            nn.Conv2d(64, 128, 4, stride=2, padding=1),    
            nn.ReLU(),
        )
        self.enc_out_1 = nn.Sequential(
            nn.Conv2d(128, 256, 4, stride=2, padding=1),  
            nn.ReLU(),
        )
        self.enc_out_2 = nn.Sequential(
            nn.Conv2d(128, 256, 4, stride=2, padding=1),
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1), 
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1), 
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 4, stride=2, padding=1), 
            nn.ReLU(),
            nn.ConvTranspose2d(32, 3, 4, stride=2, padding=1), 
            nn.Tanh(),
        )

    def encode(self, x):
        h1 = self.encoder(x)
        return self.enc_out_1(h1), self.enc_out_2(h1)

    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        if torch.cuda.is_available():
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)

    def decode(self, z):
        return self.decoder(z)

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparametrize(mu, logvar)
        return self.decode(z), mu, logvar


def loss_vae(recon_x, x, mu, logvar, criterion):
    """
    recon_x: generating images
    x: origin images
    mu: latent mean
    logvar: latent log variance
    """
    mse = criterion(recon_x, x)
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)
    return mse + KLD

In [6]:
class multi_fcn(nn.Module):
    def __init__(self):
        super(multi_fcn, self).__init__()
        
        self.encoder1 = nn.Sequential(
            nn.Linear(64 * 64 * 3, 64),
            nn.ReLU(), 
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 3)
        )
        
        self.encoder2 = nn.Sequential(
            nn.Linear(64 * 64 * 3, 1024),
            nn.ReLU(), 
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, 3)
        )
        
        self.encoder3 = nn.Sequential(
            nn.Linear(64 * 64 * 3, 512),
            nn.ReLU(), 
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 3)
        )
        
        self.encoder4 = nn.Sequential(
            nn.Linear(64 * 64 * 3, 1024),
            nn.ReLU(),
            nn.Linear(1024, 64),
            nn.ReLU(),
            nn.Linear(64, 3)
        )
        
        self.encoder5 = nn.Sequential(
            nn.Linear(64 * 64 * 3, 256),
            nn.ReLU(), 
            nn.Linear(256, 192),
            nn.ReLU(),
            nn.Linear(192, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 3)
        )
                
        self.decoder = nn.Sequential(
            nn.Linear(15, 64),
            nn.ReLU(), 
            nn.Linear(64, 1024),
            nn.ReLU(),
            nn.Linear(1024, 64 * 64 * 3), 
            nn.Tanh()
        )

    def forward(self, x):
        x1 = self.encoder1(x)
        x2 = self.encoder2(x)
        x3 = self.encoder3(x)
        x4 = self.encoder4(x)
        x5 = self.encoder5(x)
        x = torch.cat([x1, x2, x3, x4, x5], 1)
        
        x = x + torch.randn(x.shape).cuda() * 0.2
        
        x = self.decoder(x)
        return x

# Dataset module

Module for obtaining and processing data. The transform function here normalizes image's pixels from [0, 255] to [-1.0, 1.0].


In [7]:
class CustomTensorDataset(TensorDataset):
    """TensorDataset with support of transforms.
    """
    def __init__(self, tensors):
        self.tensors = tensors
        if tensors.shape[-1] == 3:
            self.tensors = tensors.permute(0, 3, 1, 2)
        
        self.transform = transforms.Compose([
          transforms.Lambda(lambda x: x.to(torch.float32)),
          transforms.Lambda(lambda x: 2. * x/255. - 1.),
        ])
        
    def __getitem__(self, index):
        x = self.tensors[index]
        
        if self.transform:
            # mapping images to [-1.0, 1.0]
            x = self.transform(x)

        return x

    def __len__(self):
        return len(self.tensors)

# Training

## Configuration


In [8]:
# Training hyperparameters
num_epochs = 500
batch_size = 10000
learning_rate = 1e-3

# Build training dataloader
x = torch.from_numpy(train)
train_dataset = CustomTensorDataset(x)

train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)

# Model
model_type = 'multi_fcn'   # selecting a model type from {'cnn', 'fcn', 'vae', 'resnet'}
model_classes = {'fcn': fcn_autoencoder(), 'cnn': conv_autoencoder(), 'vae': VAE(), 'multi_fcn': multi_fcn()}
model = model_classes[model_type].cuda()

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [9]:
# print(model)

## Training loop

In [10]:
best_loss = np.inf
model.train()

qqdm_train = qqdm(range(num_epochs), desc=format_str('bold', 'Description'))
for epoch in qqdm_train:
    
    tot_loss = list()
    for data in train_dataloader:

        # ===================loading=====================
        img = data.float().cuda()
        if model_type in ['fcn']:
            img = img.view(img.shape[0], -1)
        if model_type in ['multi_fcn']:
            img = img.view(img.shape[0], -1)
            
        # noise = torch.randn_like(img) * 0.2
        # img = img + noise    

        # ===================forward=====================
        output = model(img)
        if model_type in ['vae']:
            loss = loss_vae(output[0], img, output[1], output[2], criterion)
        else:
            loss = criterion(output, img)

        tot_loss.append(loss.item())
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # ===================save_best====================
    mean_loss = np.mean(tot_loss)
    if mean_loss < best_loss:
        best_loss = mean_loss
        torch.save(model, 'best_model_{}.pt'.format(model_type))
    # ===================log========================
    qqdm_train.set_infos({
        'epoch': f'{epoch + 1:.0f}/{num_epochs:.0f}',
        'loss': f'{mean_loss:.4f}',
    })
    # ===================save_last========================
    torch.save(model, 'last_model_{}.pt'.format(model_type))

 [1mIters[0m    [1mElapsed Time[0m      [1mSpeed[0m                                                                                       
 [99m0/[93m500[0m[0m  [99m        -        [0m  [99m   -    [0m                                                                                     
[1mDescription[0m   0.0% |                                                                                                   |[K[F[K[F [1mIters[0m    [1mElapsed Time[0m      [1mSpeed[0m    [1mepoch[0m   [1mloss[0m                                                                       
 [99m1/[93m500[0m[0m  [99m00:00:15<[93m02:05:39[0m[0m  [99m0.07it/s[0m  [99m1/500[0m  [99m0.2831[0m                                                                      
[1mDescription[0m   0.2% |                                                                                                   |[K[F[K[F [1mIters[0m    [1mElapsed Time[0m      [1mSpeed[0m    [1mepoch[0

# Inference
Model is loaded and generates its anomaly score predictions.

## Initialize
- dataloader
- model
- prediction file

In [11]:
eval_batch_size = 200

# build testing dataloader
data = torch.tensor(test, dtype=torch.float32)
test_dataset = CustomTensorDataset(data)
test_sampler = SequentialSampler(test_dataset)
test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=eval_batch_size, num_workers=1)
eval_loss = nn.MSELoss(reduction='none')

# load trained model
# checkpoint_path = f'last_model_{model_type}.pt'
checkpoint_path = f'best_model_{model_type}.pt'
model = torch.load(checkpoint_path)
model.eval()

# prediction file 
out_file = 'prediction.csv'

In [12]:
anomality = list()
with torch.no_grad():
  for i, data in enumerate(test_dataloader):
    img = data.float().cuda()
    if model_type in ['fcn', 'multi_fcn']:
      img = img.view(img.shape[0], -1)
    output = model(img)
    if model_type in ['vae']:
      output = output[0]
    if model_type in ['fcn', 'multi_fcn']:
        loss = eval_loss(output, img).sum(-1)
    else:
        loss = eval_loss(output, img).sum([1, 2, 3])
    anomality.append(loss)
anomality = torch.cat(anomality, axis=0)
anomality = torch.sqrt(anomality).reshape(len(test), 1).cpu().numpy()

df = pd.DataFrame(anomality, columns=['score'])
df.to_csv(out_file, index_label = 'ID')

In [None]:
# ref: https://colab.research.google.com/github/smartgeometry-ucl/dl4g/blob/master/variational_autoencoder.ipynb

import torchvision
import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def to_img(x):
    x = x.clamp(0, 1)
    return x

def show_image(img):
    img = to_img(img)
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

def visualise_output(images, model):

    with torch.no_grad():
    
        images = images.to(device)
        
        if model_type in ['fcn']:
            images = images.view(images.shape[0], -1)
            
        images = model(images)
        
        if model_type in ['vae']:
            images = images[0]
        
        images = images.cpu()
        images = to_img(images)
        images = images.reshape(200, 3, 64, 64)
        np_imagegrid = torchvision.utils.make_grid(images[1:50], 10, 5).numpy()
        plt.imshow(np.transpose(np_imagegrid, (1, 2, 0)))
        plt.show()
                
images = iter(test_dataloader).next()

print('original images:')
show_image(torchvision.utils.make_grid(images[1:50], 10, 5))
plt.show()
    
print('model reconstruction:')
visualise_output(images, model)

In [None]:
images = iter(test_dataloader).next()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
images = images.to(device)
if model_type in ['fcn']:
    images = images.view(images.shape[0], -1)
images = model(images)
if model_type in ['vae']:
    images = images[0]
    
images = images.cpu()
images = images.reshape(200, 3, 64, 64)

img = images[8]
plt.imshow(img.permute(1, 2, 0).detach().numpy())
plt.show()

In [34]:
# 過每個base line要5小時，epoch 200、300
# fcnn 比較好過
# dimension調高，epoch 2000、batch_size=128會變爛



# ======================================================== #



# strong

# 助教是用三個fcnn
# 建立三個不同的encoder架構（只有encoder不一樣，decoder只有一個）
# 把三個出來的representation直接接在一起
# 會變成比較長的vector
# 在直接丟到decoder
# e.g., 每個印出來是三維，疊起來是九維，那decoder第一層就要改成9維



# ======================================================== #



# report

# fully connected
# 丟到encoder是三維向量 e.g., 2, 4, 6
# 丟到decoder可能是原本img
# 希望對某一維度*2 e.g., 變成4, 4, 6，再去重建 e.g., z[1]=z[1]*2
# 看看有沒有什麼不一樣的地方

# 要動encoder output
# 比較的是decoder output

# 在forward
# x = self.encoder(x)
# x[:, i] = x[:, i] * 2 or x[:, i] = 0 -> 因為第一維是放很多張照片
# x = delf.decoder(x)