In [4]:
#%cd drive/MyDrive/Maroua/ab_interview/

/content/drive/MyDrive/Maroua/ab_interview


# Approach A : GAN 

## Motivation :
Inspired by the dummy approach one can detect the anomalies if we know the true data generating distribution of the non anomalous data by just evaluating the likelihood of a point.The problem with the dummy approach is that we are trying to fit a very complicated distribution that is clearly multimodal(many numbers) with a simple gaussian that is unimodal.

We should therefore seek a stronger model able to estimate the distribution of the data manifold .In the GAN training process the discriminator gets better and better at detecting data that is on the manifold from data that is outside the manifold and we could therefore use it to detect the points that are anomalies. The descriminator can therefore be used as a proxy for the likelihood estimation of the manifold. 



## Loadings Libs


In [5]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.distributions import Normal
from sklearn.metrics import roc_auc_score
from tqdm import tqdm
from torch.autograd import Variable
from dataset import MnistAnomaly
import torch.nn.functional as F
import matplotlib.pyplot as plt
import random
import numpy as np
np.random.seed(0)
random.seed(0)
torch.manual_seed(69)
device = "cuda" if torch.cuda.is_available() else "cpu"

## Model implementation

In [9]:
from torch import nn
latent_dim = 32


# The class for the generator part of the GAN
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(latent_dim, 64 * 8, 4, 1, 1, bias=False),
            nn.BatchNorm2d(64 * 8),
            nn.ReLU(True),
            nn.ConvTranspose2d(64 * 8, 64 * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 4),
            nn.ReLU(True),
            nn.ConvTranspose2d(64 * 4, 64 * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 2),
            nn.ReLU(True),
            nn.ConvTranspose2d(64 * 2, 64, 4, 2, 2, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 1, 4, 2, 1, bias=False),
            nn.Tanh()
        )

    def forward(self, input):
        output = self.main(input)
        return output  

# The class of the Discriminator part of the GAN
class Descriminator(nn.Module):
    def __init__(self):
        super(Descriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(1, 64, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 64 * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64 * 2, 64 * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64 * 4, 64 * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64 * 8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64 * 8, 1, 1, 1, 0, bias=False),
            nn.Sigmoid(),
            nn.Flatten()
            # Output: 1
        )

    def forward(self, input):
        output = self.main(input)
        return output

# Initizialisation of the weights of the models
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        torch.nn.init.normal_(m.weight, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        torch.nn.init.normal_(m.weight, 1.0, 0.02)
        torch.nn.init.zeros_(m.bias)    

# Scoring anomaly of samples using the descriminator
def get_scores(des,x_test):
    with torch.no_grad():
        likelihood = des(x_test.to(device))
    return likelihood

## Training 

In [15]:
aucs = []

for i in range(10):
    abnormal_digit = [i]
    train_set = MnistAnomaly(
        root=".", train=True, transform=transforms.ToTensor(), anomaly_categories=abnormal_digit,download=True
    )

    test_set = MnistAnomaly(
        root=".", train=False, transform=transforms.ToTensor(), anomaly_categories=abnormal_digit,download=True
    )

    device = "cuda" if torch.cuda.is_available() else "cpu"


    batch_size = 2048
    train_loader = DataLoader(train_set, batch_size=batch_size)
    print(device)
    des = Descriminator().to(device)
    gen = Generator().to(device)
    des.apply(weights_init)
    gen.apply(weights_init)
    learning_rate = 0.0002
    G_optimizer = torch.optim.Adam(gen.parameters(), lr = learning_rate, betas=(0.5, 0.999))
    D_optimizer = torch.optim.Adam(des.parameters(), lr = learning_rate, betas=(0.5, 0.999))
    criterion = torch.nn.BCELoss()

    for i in tqdm(range(30)):
      #epoch = tqdm(range(len(train_loader)))
      D_loss_list, G_loss_list = [], []
      for batch in range(len(train_loader)):
        D_optimizer.zero_grad()
        batch_images, _ = next(iter(train_loader))
        real_target = Variable(torch.ones(batch_images.size(0),1).to(device))
        fake_target = Variable(torch.zeros(batch_images.size(0),1).to(device))
        real_des_pred = des(batch_images.to(device)+0.05*torch.randn(batch_images.shape).to(device))
        real_des_loss = criterion(real_des_pred,real_target)
        real_des_loss.backward()
        noise = torch.randn(batch_images.size(0),latent_dim,1,1)
        batch_gen_images = gen(noise.to(device))
        gen_des_pred = des(batch_gen_images.detach()+0.05*torch.randn(batch_images.shape).to(device))
        fake_des_loss = criterion(gen_des_pred,fake_target)
        fake_des_loss.backward()
        D_total_loss = real_des_loss + fake_des_loss
        D_loss_list.append(D_total_loss.item())
        D_optimizer.step()
        G_optimizer.zero_grad()
        gen_output = des(batch_gen_images)
        G_loss = criterion(gen_output, real_target)
        G_loss_list.append(G_loss.item())
        G_loss.backward()
        G_optimizer.step()
        total_loss = (G_loss.item()+D_total_loss.item())/2
        #epoch.set_postfix({"epoch":i,"train_loss_D ":D_total_loss.item(),"train_loss_G":G_loss.item(),"total loss ":(G_loss.item()+D_total_loss.item())/2})
    # test model
    test_loader = DataLoader(test_set, batch_size=len(test_set))
    x_test, y_test = next(iter(test_loader))

    # compute score
    score_test = get_scores(des,x_test)

    # compute rocauc
    roc_auc = roc_auc_score(y_test, score_test.cpu())
    print(roc_auc)
    aucs.append(roc_auc)
print("roc_auc per digit:")
print(["{:0.3f} ".format(auc) for auc in aucs])
print("average roc_auc:")
print("{:0.3f}".format(torch.tensor(aucs).mean()))

cuda


100%|██████████| 30/30 [07:50<00:00, 15.68s/it]


0.6080182926829267
cuda


100%|██████████| 30/30 [07:50<00:00, 15.68s/it]


0.6665394028389623
cuda


100%|██████████| 30/30 [07:50<00:00, 15.69s/it]


0.5966293699735148
cuda


100%|██████████| 30/30 [07:50<00:00, 15.70s/it]


0.7639928303175145
cuda


100%|██████████| 30/30 [07:50<00:00, 15.67s/it]


0.7037192869296482
cuda


100%|██████████| 30/30 [07:49<00:00, 15.65s/it]


0.7762918101860878
cuda


100%|██████████| 30/30 [07:49<00:00, 15.65s/it]


0.8081386838225142
cuda


100%|██████████| 30/30 [07:49<00:00, 15.64s/it]


0.814721350990804
cuda


100%|██████████| 30/30 [07:49<00:00, 15.65s/it]


0.6988501959431822
cuda


100%|██████████| 30/30 [07:49<00:00, 15.65s/it]


0.7156012967046994
roc_auc per digit:
['0.608 ', '0.667 ', '0.597 ', '0.764 ', '0.704 ', '0.776 ', '0.808 ', '0.815 ', '0.699 ', '0.716 ']
average roc_auc:
0.715


# Approach B  : Auto Encoder

## Motivation : 
In this approach we use an auto encoder for a different purpose than reconstruction. Indeed , we fit the auto encoder to learn how to reconstruct the normal data and we hope that when encountering an anomalous point the model will struggle to reconstruct it and therefore that will be our clue that the point is an anomaly.

In [None]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.distributions import Normal
from sklearn.metrics import roc_auc_score

from dataset import MnistAnomaly

## Model implementation 

In [None]:
import torch.nn.functional as F

class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder,self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)  
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv5 = nn.Conv2d(256, 512, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.t_conv = nn.ConvTranspose2d(512, 256, 3, stride=1)
        self.t_conv1 = nn.ConvTranspose2d(256, 128, 2, stride=1)
        self.t_conv2 = nn.ConvTranspose2d(128, 64, 2, stride=1)
        self.t_conv3 = nn.ConvTranspose2d(64, 32, 2, stride=2)
        self.t_conv4 = nn.ConvTranspose2d(32, 1, 2, stride=2)

    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.relu(self.conv5(x))
        x = self.pool(x)
        x = F.relu(self.t_conv(x))
        x = F.relu(self.t_conv1(x))
        x = F.relu(self.t_conv2(x))
        x = F.relu(self.t_conv3(x))
        x = self.t_conv4(x)
        return x

In [None]:
def get_scores(autoencoder,x_test):
    with torch.no_grad():
        reconstructed_test = autoencoder(x_test.to(device))
        differences = x_test - reconstructed_test.cpu()
        diff_norm = torch.norm(differences,dim=[2,3]).view(-1)
    return F.sigmoid(-diff_norm)


## Training :
I trained the model for each anomaly class for 400 epochs on a colab pro P100 GPU . 

PS: i reordered the anomaly cases to start with the problematic ones (1,7,9) to iterate faster on the solution.

In [None]:
from tqdm import tqdm
from torch.optim import AdamW

def compress(x):
    return x.reshape(len(x), -1).sum(-1)

aucs = []
ordered = [1,9,7,2,3,4,5,6,8,0]
for i in ordered:
    abnormal_digit = [i]
    train_set = MnistAnomaly(
        root=".", train=True, transform=transforms.ToTensor(), anomaly_categories=abnormal_digit,download=True
    )

    test_set = MnistAnomaly(
        root=".", train=False, transform=transforms.ToTensor(), anomaly_categories=abnormal_digit,download=True
    )
    train_loader = DataLoader(train_set, batch_size=len(train_set))
    x, _ = next(iter(train_loader))
    x = compress(x)
    sd,m = x.std(),x.mean()
    device = "cuda" if torch.cuda.is_available() else "cpu"


    batch_size = 2048
    train_loader = DataLoader(train_set, batch_size=batch_size)

    autoencoder = AutoEncoder().to(device)
    optimizer = AdamW(params=autoencoder.parameters())
    criterion = nn.MSELoss()

    for _ in tqdm(range(100)):
      #epoch = tqdm(range(len(train_loader)))
      epoch = range(len(train_loader))
      for batch in epoch:
        batch_images, _ = next(iter(train_loader))
        #reconstructed_images = autoencoder.forward(batch_images.to(device)+(sd*torch.randn(batch_images.shape)+m).to(device))
        reconstructed_images = autoencoder.forward(batch_images.to(device))
        loss = criterion(batch_images.to(device),reconstructed_images)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #epoch.set_postfix({"train_loss : ":loss.item()})

    # test model
    test_loader = DataLoader(test_set, batch_size=len(test_set))
    x_test, y_test = next(iter(test_loader))

    # compute score
    score_test = get_scores(autoencoder,x_test)

    # compute rocauc
    roc_auc = roc_auc_score(y_test, score_test)
    print(roc_auc)
    aucs.append(roc_auc)
print("roc_auc per digit:")
print(["{:0.3f} ".format(auc) for auc in aucs])
print("average roc_auc:")
print("{:0.3f}".format(torch.tensor(aucs).mean()))

100%|██████████| 400/400 [34:05<00:00,  5.11s/it]


tensor(0.1325)




0.3737317719786022


100%|██████████| 400/400 [34:07<00:00,  5.12s/it]


tensor(0.1325)




0.5636228674440324


100%|██████████| 400/400 [34:12<00:00,  5.13s/it]


tensor(0.1325)




0.7007698290921518


100%|██████████| 400/400 [34:12<00:00,  5.13s/it]


tensor(0.1325)




0.9290432519760181


100%|██████████| 400/400 [34:32<00:00,  5.18s/it]


tensor(0.1325)




0.8408352514895538


100%|██████████| 400/400 [34:23<00:00,  5.16s/it]


tensor(0.1325)




0.7850067007871562


100%|██████████| 400/400 [34:22<00:00,  5.16s/it]


tensor(0.1325)




0.8458249388011524


100%|██████████| 400/400 [34:25<00:00,  5.16s/it]


tensor(0.1325)




0.9216806722883097


100%|██████████| 400/400 [34:24<00:00,  5.16s/it]


tensor(0.1325)




0.8556437574135591


100%|██████████| 400/400 [34:22<00:00,  5.16s/it]


tensor(0.1325)
0.8625040725824697
roc_auc per digit:
['0.374 ', '0.564 ', '0.701 ', '0.929 ', '0.841 ', '0.785 ', '0.846 ', '0.922 ', '0.856 ', '0.863 ']
average roc_auc:
0.768




# Results: 
 We see that the approach B achieves very interesting results (>80 AUC) on many anomaly cases.But we see that the approach finds some difficulties on some anomaly cases especially the 1 and 9 cases.

 For the 1 case , it is quite a surprise that the AUC is far lower than 0.5 meaning that inverting our classification scheme would lead to a better result.One explanation that i found by investgating the recontructions is that it easier to reconstruct the number one as it is mainly as straight line.


## Ideas to explore :

* trying to train a Normalizing Flow. This approach is similar to the GAN as i would train a generative model to generate my normal data but the good part in Normalizing Flows is that is allows exact likelihood computing through the chained change of variable formula. Calculating this likelihood would allow to differentiate the normal from anomalous data.
* Trying a Siamese Convolutional Network trained on the "normal classes" and missing the anomalous one. When spotting this class, identifying it as an unknown/new one.
* A paper i wanted to explore and test if i had more time : https://arxiv.org/pdf/2004.07657v4.pdf