In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
pip install alibi-detect

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting alibi-detect
  Downloading alibi_detect-0.11.0-py3-none-any.whl (337 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m337.9/337.9 KB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
Collecting transformers<5.0.0,>=4.0.0
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m109.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m70.7 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.1-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m19.1 MB/s[0m eta [36m0:00

In [None]:
import os
import logging
import matplotlib.pyplot as plt
import numpy as np
import cv2
from PIL import Image

import tensorflow as tf
tf.keras.backend.clear_session()
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, Dense, Layer, Reshape, InputLayer
from tqdm import tqdm

In [None]:
from alibi_detect.models.tensorflow import elbo
from alibi_detect.od import OutlierVAE
from alibi_detect.utils.fetching import fetch_detector
from alibi_detect.utils.perturbation import apply_mask
from alibi_detect.saving import save_detector, load_detector
from alibi_detect.utils.visualize import plot_instance_score, plot_feature_outlier_image

In [None]:
image_directory = '/content/drive/MyDrive/Outlier Detection/data/train/'
SIZE = 64
dataset = []

In [None]:
from sklearn.model_selection import train_test_split
import os

classes = os.listdir(image_directory)
train_data = []
train_labels = []

for class_name in classes:
    class_path = os.path.join(image_directory, class_name)
    images = os.listdir(class_path)
    for i, image_name in enumerate(images):
        if (image_name.split('.')[1] == 'jpg'):
            image_path = os.path.join(class_path, image_name)
            image = cv2.imread(image_path)
            image = Image.fromarray(image, 'RGB')
            image = image.resize((SIZE, SIZE))
            train_data.append(np.array(image))
            train_labels.append(class_name)

train_data = np.array(train_data)
train_labels = np.array(train_labels)

In [None]:
nsamples, nx, ny, nz = train_data.shape
train_data = train_data.reshape((nsamples, nx*ny*nz))
train_data.shape

(2178, 12288)

### SMOTE Oversampling

In [None]:
from imblearn.over_sampling import SMOTE
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the SMOTE oversampling object
smote = SMOTE(sampling_strategy='minority')

# Apply SMOTE to the training dataset
X_train_oversampled, y_train_oversampled = smote.fit_resample(train_data, train_labels)

X_train_oversampled = X_train_oversampled.reshape(-1, nx, ny, nz)
print(X_train_oversampled.shape)

y_train_oversampled = y_train_oversampled.reshape(-1,)
print(y_train_oversampled.shape)

(2324, 64, 64, 3)
(2324,)


In [None]:
X_train_oversampled = X_train_oversampled.astype('float32') / 255.

### Data Augmentation

In [None]:
# Define the Keras data generator for data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    horizontal_flip=True,
    vertical_flip=False,
    fill_mode='nearest'
)

# Fit the data generator to the oversampled training set
datagen.fit(X_train_oversampled)

In [None]:
# Generate augmented data and concatenate with the oversampled training set
batch_size = 32
num_augmented = 100  # Number of augmented images to generate
augmented_data = datagen.flow(X_train_oversampled, y_train_oversampled, batch_size=batch_size)
augmented_images = []
augmented_labels = []
for i in range(num_augmented // batch_size):
    x_batch, y_batch = next(augmented_data)
    augmented_images.append(x_batch)
    augmented_labels.append(y_batch)
augmented_images = np.concatenate(augmented_images, axis=0)
augmented_labels = np.concatenate(augmented_labels, axis=0)

In [None]:
# Combine the augmented data with the oversampled training set
X_train_augmented = np.concatenate([X_train_oversampled, augmented_images], axis=0)
y_train_augmented = np.concatenate([y_train_oversampled, augmented_labels], axis=0)

In [None]:
X_train_augmented.shape, y_train_augmented.shape

((2420, 64, 64, 3), (2420,))

### Load or define outlier detector

In [None]:
latent_dim = 1024

encoder_net = tf.keras.Sequential(
      [
          InputLayer(input_shape=(64, 64, 3)),
          Conv2D(64, 4, strides=2, padding='same', activation=tf.nn.relu),
          Conv2D(128, 4, strides=2, padding='same', activation=tf.nn.relu),
          Conv2D(512, 4, strides=2, padding='same', activation=tf.nn.relu)
      ])
print(encoder_net.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 32, 64)        3136      
                                                                 
 conv2d_1 (Conv2D)           (None, 16, 16, 128)       131200    
                                                                 
 conv2d_2 (Conv2D)           (None, 8, 8, 512)         1049088   
                                                                 
Total params: 1,183,424
Trainable params: 1,183,424
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
dense_dim = [8, 8, 512] #Dimension of the last conv. output. This is used to work our way back in the decoder. 

In [None]:
decoder_net = tf.keras.Sequential(
      [
          InputLayer(input_shape=(latent_dim,)),
          Dense(np.prod(dense_dim)),
          Reshape(target_shape=dense_dim),
          Conv2DTranspose(256, 4, strides=2, padding='same', activation=tf.nn.relu),
          Conv2DTranspose(64, 4, strides=2, padding='same', activation=tf.nn.relu),
          Conv2DTranspose(3, 4, strides=2, padding='same', activation='sigmoid')
      ])

In [None]:
od = OutlierVAE(threshold=0.20,  # threshold for outlier score
                score_type='mse',  # use MSE of reconstruction error for outlier detection
                encoder_net=encoder_net,  # can also pass VAE model instead
                decoder_net=decoder_net,  # of separate encoder and decoder
                latent_dim=latent_dim,
                samples=2)



In [None]:
od.fit(X_train_oversampled,
       loss_fn=elbo,
       cov_elbo=dict(sim=.05),
       epochs=25,
       verbose=False)

In [None]:
from alibi_detect.saving import save_detector, load_detector

# save the trained outlier detector
save_detector(od, "/content/drive/MyDrive/Outlier Detection/data/thresh_10/")
#print("Current threshold value is: ", od.threshold)



Current threshold value is:  0.1


### Evaluate on test data

In [None]:
# load the saved model
od = load_detector("/content/drive/MyDrive/Outlier Detection/data/thresh_10")



In [None]:
od.threshold

0.1

In [None]:
from alibi_detect.saving import save_detector, load_detector
import pandas as pd
import cv2
import numpy as np
import os
import warnings
warnings.filterwarnings('ignore')

# create an empty dataframe to store the results
results_df = pd.DataFrame(columns=['filename', 'Outlier Score', 'Is Outlier'])

test_dir = '/content/drive/MyDrive/Outlier Detection/data/test/'

# List all class folders in the data directory
class_folders = [os.path.join(test_dir, folder) for folder in os.listdir(test_dir)]

# preprocess the images and make predictions
for folder in class_folders:
    for img in os.listdir(folder):
        if (img.split('.')[1] == 'jpg'):
            image = cv2.imread(os.path.join(folder, img))
            image = cv2.resize(image, (64,64))
            image = image / 255.
            image = np.expand_dims(image, axis=0)
            predictions = od.predict(image)
    
            fscore, iscore = od.score(image, outlier_perc=5, batch_size=16)
            # print("Image:", img)
            # print("Outlier score:", iscore[0])
            # print("Is this image an outlier (0 for NO and 1 for YES)?", int(iscore[0] >= od.threshold))
        
            # add the results to the dataframe
            results_df = results_df.append({'filename': img, 'Outlier Score': iscore[0], 'Is Outlier': int(iscore[0] >= od.threshold)}, ignore_index=True)
results_df

Unnamed: 0,filename,Outlier Score,Is Outlier
0,199773781.jpg,0.032362,0
1,183330277.jpg,0.048528,0
2,183331658.jpg,0.057214,0
3,183330274.jpg,0.049467,0
4,199773955.jpg,0.032371,0
...,...,...,...
2173,198671169.jpg,0.053198,0
2174,224749368.jpg,0.053904,0
2175,224755455.jpg,0.022977,0
2176,224754767.jpg,0.022994,0


In [None]:
results_df[results_df['Is Outlier'] == 1]

Unnamed: 0,filename,Outlier Score,Is Outlier
14,231316658.jpg,0.101615,1
17,175559398.jpg,0.104023,1
40,227896614.jpg,0.112172,1
61,200089107.jpg,0.127574,1
83,225767415.jpg,0.105515,1
...,...,...,...
1889,230148017.jpg,0.110676,1
1899,231941862.jpg,0.101226,1
1909,232125443.jpg,0.109094,1
2139,231656363.jpg,0.156422,1


### Test on single image

In [None]:
from urllib.request import urlopen
from alibi_detect.saving import save_detector, load_detector
import pandas as pd
import cv2
import numpy as np

# load the saved model
od = load_detector("/content/drive/MyDrive/Outlier Detection/data/thresh_10/")

# load image from URL
url = 'https://encrypted-tbn2.gstatic.com/shopping?q=tbn:ANd9GcSYVy2C1Y1KGOfBop-hGFWnzJulZ5V5ulRiflvfEDqjW7gnO-wJYYAb1ma_QTq7WQ&usqp=CAc'
resp = urlopen(url)
image = np.asarray(bytearray(resp.read()), dtype="uint8")
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
image = cv2.resize(image, (64, 64))
image = image / 255.
image = np.expand_dims(image, axis=0)

predictions = od.predict(image)

fscore, iscore = od.score(image, outlier_perc=5, batch_size=16)
print("Outlier score:", iscore[0])
print("Is this image an outlier (0 for NO and 1 for YES)?", int(iscore[0] >= od.threshold))



Outlier score: 0.16582707551797385
Is this image an outlier (0 for NO and 1 for YES)? 1
