In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import cv2
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import Model
from keras.layers import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from google.colab import drive
from zipfile import *
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
drive.mount("/content/drive")
!gdown --id '15CHt2ueS4c7emHpmzFHC3c0TGd51Mnvz' --output train.zip

# with ZipFile('train.zip', 'r') as zip_ref:
#   zip_ref.extractall('/content/drive/MyDrive/')

Mounted at /content/drive
Downloading...
From (original): https://drive.google.com/uc?id=15CHt2ueS4c7emHpmzFHC3c0TGd51Mnvz
From (redirected): https://drive.google.com/uc?id=15CHt2ueS4c7emHpmzFHC3c0TGd51Mnvz&confirm=t&uuid=fd544545-b1f0-406c-b923-1b4f1ed786b5
To: /content/train.zip
100% 898M/898M [00:11<00:00, 77.2MB/s]


In [None]:
data_dir = '/content/drive/MyDrive'
train_dir = os.path.join(data_dir, 'train')
X = []
y = []
for folder in os.listdir(train_dir):
    folder_path = os.path.join(train_dir, folder)
    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)
        img = cv2.imread(img_path)
        try:
            img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)
            X.append(img)
            y.append(folder)
        except:
            continue

le = LabelEncoder()
y = le.fit_transform(y)
X = np.array(X)

#Use VGG16 transfer learning 
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False

#Add batch normalization
x = base_model.output
x = BatchNormalization()(x)
x = BatchNormalization()(x)
model = Model(inputs=base_model.input, outputs=x)

X_features = model.predict(X)

X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, random_state=42)

#Use IsolationForest for anomaly detection
model = IsolationForest(contamination=0.1)
model.fit(X_train)
y_pred = model.predict(X_test)

# Visualization
plt.figure(figsize=(10, 8))
plt.scatter(X_test[y_pred == 1, 0], X_test[y_pred == 1, 1], c='b', label='Normal')
plt.scatter(X_test[y_pred == -1, 0], X_test[y_pred == -1, 1], c='r', label='Anomaly')
plt.title('Anomaly Detection Results')
plt.legend()
plt.show()

#Evaluation
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1-score:', f1_score(y_test