<a href="https://colab.research.google.com/github/123prashanth123/Fault-Detection-System/blob/main/All%20ML%20Classifiers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Notebook Setup

In [None]:
from IPython.display import clear_output

!pip install -q --upgrade imgaug
!pip install -q imagecorruptions

clear_output()

In [None]:
import torch
from torch import nn
from torchvision import models, transforms
from torch.utils.data import DataLoader as DL
from torch.utils.data import Dataset

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

import cv2
import re
import os
import random as r
import numpy as np
import matplotlib.pyplot as plt
import pickle
import imgaug
from imgaug import augmenters
from time import time

In [None]:
def breaker():
    print("\n" + 50*"*" + "\n")

def head(x=None, no_of_ele=5):
    print(x[:no_of_ele])

In [None]:
TRANSFORM = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.405], std=[0.229, 0.224, 0.225])])
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 0
SIZE = 224
FEATURE_VECTOR_LENGTH = 2048
SPLIT = 0.2

# Build Feature Extractor Model

In [None]:
def build_feature_extractor():
    class Model(nn.Module):
        def __init__(self):
            super(Model, self).__init__()

            self.model = models.vgg16_bn(pretrained=True, progress=True)
            self.model = nn.Sequential(*[*self.model.children()][:-2])
            self.model.add_module("Adaptive Average Pool", nn.AdaptiveAvgPool2d(output_size=(2, 2)))
            self.model.add_module("Flatten", nn.Flatten())

        def forward(self, x):
            return self.model(x)


    model = Model()
    model.to(DEVICE)
    model.eval()

    return model

# Build Features

In [None]:
def build_features(num_samples: int, batch_size: int, path: str, p_name: str, n_name: str):
    breaker()
    print("Building Features ...")

    class FEDS(Dataset):
        def __init__(self, X=None, transform=None):
            self.X = X
            self.transform = transform
        
        def __len__(self):
            return self.X.shape[0]
        
        def __getitem__(self, idx):
            return self.transform(self.X[idx])
    

    def normalize(x):
            for i in range(x.shape[0]):
                x[i] = (x[i] - torch.min(x[i])) / (torch.max(x[i]) - torch.min(x[i]))
            return x
    

    def preprocess(image: np.ndarray) -> np.ndarray:
        return cv2.resize(src=cv2.cvtColor(src=image, code=cv2.COLOR_BGR2RGB), dsize=(SIZE, SIZE), interpolation=cv2.INTER_AREA)
        
    
    imgaug.seed(SEED)
    dataset_augment = augmenters.Sequential([
        augmenters.HorizontalFlip(p=0.25),
        augmenters.VerticalFlip(p=0.25),
        augmenters.SomeOf(5, [
                augmenters.blur.GaussianBlur(sigma=(0, 5), seed=SEED),
                augmenters.blur.MedianBlur(k=(1, 7), seed=SEED),
                augmenters.size.Crop(percent=(0, 0.1), seed=SEED),
                augmenters.geometric.Affine(rotate=(-45, 45), scale=(0.5, 1.2),translate_percent=(-0.2, 0.2), seed=SEED),
                augmenters.geometric.Rot90(k=(1, 3), seed=SEED),
                augmenters.arithmetic.Dropout(p=(0, 0.05), seed=SEED),
                augmenters.arithmetic.SaltAndPepper(p=(0, 0.05), seed=SEED),
                augmenters.color.MultiplyBrightness(mul=(0.5, 1.5)),
                augmenters.color.MultiplySaturation(mul=(0, 5), seed=SEED),
                augmenters.iaa_convolutional.Sharpen(alpha=(0.75, 1), lightness=(0.75, 1.25), seed=SEED),
                augmenters.iaa_convolutional.Emboss(alpha=(0.75, 1), strength=(0.75, 1.25), seed=SEED),
                augmenters.contrast.CLAHE(seed=SEED),
                augmenters.contrast.GammaContrast(gamma=(0.2, 5), seed=SEED), 
                ])
            ])


    p_image, n_image = preprocess(cv2.imread(os.path.join(path, p_name), cv2.IMREAD_COLOR)), preprocess(cv2.imread(os.path.join(path, n_name), cv2.IMREAD_COLOR))
    p_images, n_images = np.array(dataset_augment(images=[p_image for _ in range(num_samples)])), np.array(dataset_augment(images=[n_image for _ in range(num_samples)]))

    fea_extractor = build_feature_extractor()
    fea_extractor.to(DEVICE)

    # Positive Features
    feature_data_setup = FEDS(X=p_images, transform=TRANSFORM)
    feature_data = DL(feature_data_setup, batch_size=batch_size, shuffle=False)
    p_features = torch.zeros(num_samples, FEATURE_VECTOR_LENGTH).to(DEVICE)
    for i, X in enumerate(feature_data):
        X = X.to(DEVICE)
        with torch.no_grad():
            output = fea_extractor(X)
        p_features[i * batch_size : (i * batch_size) + output.shape[0], :] = output
    p_features = normalize(p_features).detach().cpu().numpy()


    # Negative Features
    feature_data_setup = FEDS(X=n_images, transform=TRANSFORM)
    feature_data = DL(feature_data_setup, batch_size=batch_size, shuffle=False)
    n_features = torch.zeros(num_samples, FEATURE_VECTOR_LENGTH).to(DEVICE)
    for i, X in enumerate(feature_data):
        X = X.to(DEVICE)
        with torch.no_grad():
            output = fea_extractor(X)
        n_features[i * batch_size : (i * batch_size) + output.shape[0], :] = output
    n_features = normalize(n_features).detach().cpu().numpy()
    
    features = np.concatenate((p_features, n_features), axis=0)
    labels = np.concatenate((np.ones((len(p_features), ), dtype=np.uint8), np.zeros((len(n_features), ), dtype=np.uint8)), axis=0)

    np.random.seed(SEED)
    np.random.shuffle(features)

    np.random.seed(SEED)
    np.random.shuffle(labels)

    del fea_extractor, p_images, n_images, p_image, n_image
    torch.cuda.empty_cache()

    return features, labels

# Machine Learning Models Setup

In [None]:
class MLModels(object):
    def __init__(self, model_name: str):
        self.model_name = model_name
        self.model = None
    
        if re.match(r"lgr", self.model_name, re.IGNORECASE):
            self.model = LogisticRegression(random_state=SEED)
        elif re.match(r"gnb", self.model_name, re.IGNORECASE):
            self.model = GaussianNB()
        elif re.match(r"knc", self.model_name, re.IGNORECASE):
            self.model = KNeighborsClassifier()
        elif re.match(r"dtc", self.model_name, re.IGNORECASE):
            self.model = DecisionTreeClassifier(random_state=SEED)
        elif re.match(r"rfc", self.model_name, re.IGNORECASE):
            self.model = RandomForestClassifier(random_state=SEED)
        elif re.match(r"xgc", self.model_name, re.IGNORECASE):
            self.model = XGBClassifier(tree_method="gpu_hist", random_state=SEED)
        else:
            raise NotImplementedError("Invalid Model Name")
    

    def fit_model(self, features, labels):
        breaker()
        print("Running -{}- Model ...".format(self.model_name))
        tr_feat, va_feat, tr_label, va_label = train_test_split(features, labels, test_size=SPLIT, shuffle=True, random_state=SEED)
        self.model.fit(tr_feat, tr_label)
        y_pred = self.model.predict(va_feat)
        breaker()
        print("-{}- Accuracy : {:.5f}".format(self.model_name, accuracy_score(y_pred, va_label)))
        filename = "{}_Model.pkl".format(self.model_name)
        pickle.dump(self.model, open(os.path.join("/content/", filename), "wb"))

# Main

In [None]:
def main():
    num_samples = 15000
    batch_size = 128

    features, labels = build_features(num_samples=num_samples, batch_size=batch_size, path="/content/", p_name="P.png", n_name="N.png")

    model_names = ["lgr", "gnb", "knc", "dtc", "rfc", "xgc"]

    for model_name in model_names:
        model = MLModels(model_name=model_name)
        model.fit_model(features, labels)
        print("\t\t ---------- ")
    breaker()
main()


**************************************************

Building Features ...

**************************************************

Running -lgr- Model ...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



**************************************************

-lgr- Accuracy : 0.96833
		 ---------- 

**************************************************

Running -gnb- Model ...

**************************************************

-gnb- Accuracy : 0.86500
		 ---------- 

**************************************************

Running -knc- Model ...

**************************************************

-knc- Accuracy : 0.93583
		 ---------- 

**************************************************

Running -dtc- Model ...

**************************************************

-dtc- Accuracy : 0.83183
		 ---------- 

**************************************************

Running -rfc- Model ...

**************************************************

-rfc- Accuracy : 0.94867
		 ---------- 

**************************************************

Running -xgc- Model ...

**************************************************

-xgc- Accuracy : 0.93867
		 ---------- 

**************************************************

