In [8]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import cv2
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import Model
from keras.layers import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from google.colab import drive
from zipfile import *
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
drive.mount("/content/drive")
!gdown --id '15CHt2ueS4c7emHpmzFHC3c0TGd51Mnvz' --output train.zip

# with ZipFile('train.zip', 'r') as zip_ref:
#   zip_ref.extractall('/content/drive/MyDrive/')

Mounted at /content/drive
Downloading...
From (original): https://drive.google.com/uc?id=15CHt2ueS4c7emHpmzFHC3c0TGd51Mnvz
From (redirected): https://drive.google.com/uc?id=15CHt2ueS4c7emHpmzFHC3c0TGd51Mnvz&confirm=t&uuid=a4d759dc-a348-483d-a36e-fe95878e06a5
To: /content/train.zip
100% 898M/898M [00:13<00:00, 65.2MB/s]


In [12]:
data_dir = '/content/drive/MyDrive'
train_dir = os.path.join(data_dir, 'train')
img_size = (128, 128)
def load_data_in_batches(directory, img_size, batch_size=1000):
    X = []
    y = []
    for folder in os.listdir(directory):
        folder_path = os.path.join(directory, folder)
        for filename in os.listdir(folder_path):
            img_path = os.path.join(folder_path, filename)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, img_size, interpolation=cv2.INTER_AREA)
                img = img.reshape(1, -1)  # Ensure each image has 2 dimensions
                X.append(img)
                y.append(folder)
                if len(X) >= batch_size:
                    yield np.array(X), np.array(y)
                    X, y = [], []
    if X:
        yield np.array(X), np.array(y)

le = LabelEncoder()
X = np.empty((0, img_size[0] * img_size[1] * 3))
y = []

for X_batch, y_batch in load_data_in_batches(train_dir, img_size):
    y_batch = le.fit_transform(y_batch)
    X = np.concatenate((X, X_batch), axis=0)
    y = np.concatenate((y, y_batch))

y = le.fit_transform(y)

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = BatchNormalization()(x)
x = BatchNormalization()(x)
model = Model(inputs=base_model.input, outputs=x)

X_features = model.predict(X)
X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, random_state=42)

model = IsolationForest(contamination=0.1)
model.fit(X_train)
y_pred = model.predict(X_test)

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 3 dimension(s)

In [None]:
anomaly_scores = model.decision_function(X_train)
anomalies = (anomaly_scores < 0).astype(int)
# 0==normal , 1==anormal
print(anomalies)

In [None]:
#Visualization
plt.figure(figsize=(10, 8))
plt.scatter(X_test[y_pred == 1, 0], X_test[y_pred == 1, 1], c='b', label='Normal')
plt.scatter(X_test[y_pred == -1, 0], X_test[y_pred == -1, 1], c='r', label='Anomaly')
plt.title('Anomaly Detection Results')
plt.legend()
plt.show()

#Evaluation
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1-score:', f1_score(y_test, y_pred))