In [1]:
!git clone https://github.com/mahmoudan0s/Automated-Material-Stream-Identification-System-MSI-.git

Cloning into 'Automated-Material-Stream-Identification-System-MSI-'...
remote: Enumerating objects: 12844, done.[K
remote: Counting objects: 100% (20/20), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 12844 (delta 1), reused 16 (delta 1), pack-reused 12824 (from 2)[K
Receiving objects: 100% (12844/12844), 430.27 MiB | 21.54 MiB/s, done.
Resolving deltas: 100% (228/228), done.
Updating files: 100% (8165/8165), done.


In [2]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline
import cv2
import os
from skimage.io import imread, imshow
from skimage.transform import resize
from skimage.feature import hog
from skimage import data, exposure

In [3]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [4]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array

In [13]:
from torchvision import datasets, transforms

SVMtransform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

train_data = datasets.ImageFolder("/content/Automated-Material-Stream-Identification-System-MSI-/data/split/train", transform=SVMtransform)
val_data  = datasets.ImageFolder("/content/Automated-Material-Stream-Identification-System-MSI-/data/split/val",  transform=SVMtransform)

In [53]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_data, batch_size=32, shuffle=False)
val_loader   = DataLoader(val_data, batch_size=32, shuffle=False)


In [54]:
import torch
import torchvision.models as models
import torch.nn as nn

device = "cuda" if torch.cuda.is_available() else "cpu"

model = models.efficientnet_b0(weights="DEFAULT")
model.classifier = nn.Identity()   # remove final FC layer
model = model.to(device)
model.eval()


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [55]:
def extract_features(dataloader):
    features = []
    labels = []

    with torch.no_grad():
        for imgs, lbls in dataloader:
            imgs = imgs.to(device)
            feats = model(imgs)
            feats = feats.cpu().numpy()

            features.append(feats)
            labels.append(lbls.numpy())

    features = np.concatenate(features)
    labels = np.concatenate(labels)

    return features, labels


In [56]:
train_X, train_y = extract_features(train_loader)
val_X,   val_y   = extract_features(val_loader)

print(train_X.shape)  # (num_images, 1280)
print(val_X.shape)

(3300, 1280)
(401, 1280)


In [43]:
test_data  = datasets.ImageFolder("/content/Automated-Material-Stream-Identification-System-MSI-/data/split/test",  transform=SVMtransform)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [44]:
test_X, test_y = extract_features(test_loader)

In [57]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_X = scaler.fit_transform(train_X)
val_X = scaler.transform(val_X)

In [58]:
from sklearn.decomposition import PCA

pca = PCA(n_components=256)
train_pca = pca.fit_transform(train_X)
val_pca = pca.transform(val_X)
test_pca  = pca.transform(test_X)

In [59]:
knn = KNeighborsClassifier(n_neighbors=5, metric='cosine',weights='distance')
knn.fit(train_pca, train_y)
knn_pred = knn.predict(val_pca)
print("KNN Accuracy:", accuracy_score(val_y, knn_pred))

KNN Accuracy: 0.9251870324189526


In [60]:
UNKNOWN_LABEL = 6

# ---- Unknown Prediction Function ----
def knn_predict_with_unknown(X, knn_model, threshold):
    preds = []
    confidences = []

    distances, neighbors = knn_model.kneighbors(X, return_distance=True)

    for i in range(len(X)):
        mean_dist = distances[i].mean()

        if mean_dist > threshold:
            preds.append(UNKNOWN_LABEL)
            confidences.append(mean_dist)
        else:
            labels = knn_model._y[neighbors[i]]
            pred = np.bincount(labels).argmax()
            preds.append(pred)
            confidences.append(mean_dist)

    return np.array(preds), np.array(confidences)

# ---- Choose Threshold ----
distances, _ = knn.kneighbors(val_pca, return_distance=True)
mean_distances = distances.mean(axis=1)
THRESHOLD = mean_distances.max() * 1.2

print("Chosen Threshold:", THRESHOLD)

# ---- Validation ----
val_pred, val_conf = knn_predict_with_unknown(val_pca, knn, THRESHOLD)

from sklearn.metrics import accuracy_score
print("Validation Accuracy:", accuracy_score(val_y, val_pred))
print("Unknown samples:", (val_pred == UNKNOWN_LABEL).sum())


Chosen Threshold: 0.68741363
Validation Accuracy: 0.9251870324189526
Unknown samples: 0


In [49]:
BEST_THRESHOLD = 9

test_pred, _ = knn_predict_with_unknown(
    test_pca,
    knn,
    threshold=BEST_THRESHOLD
)

from sklearn.metrics import classification_report

print(classification_report(test_y, test_pred))

              precision    recall  f1-score   support

           0       0.98      0.85      0.91        48
           1       0.91      0.94      0.93        54
           2       0.97      0.90      0.93        39
           3       0.88      0.98      0.93        47
           4       0.93      0.98      0.95        42
           5       0.97      0.97      0.97        40

    accuracy                           0.94       270
   macro avg       0.94      0.94      0.94       270
weighted avg       0.94      0.94      0.94       270

