In [1]:
!git clone https://github.com/mahmoudan0s/Automated-Material-Stream-Identification-System-MSI-.git

Cloning into 'Automated-Material-Stream-Identification-System-MSI-'...
remote: Enumerating objects: 12866, done.[K
remote: Counting objects: 100% (42/42), done.[K
remote: Compressing objects: 100% (38/38), done.[K
remote: Total 12866 (delta 5), reused 33 (delta 4), pack-reused 12824 (from 2)[K
Receiving objects: 100% (12866/12866), 434.86 MiB | 23.77 MiB/s, done.
Resolving deltas: 100% (232/232), done.
Updating files: 100% (8197/8197), done.


In [2]:
import torch
from torchvision import datasets,models, transforms
from torch.utils.data import DataLoader
from PIL import Image
from pathlib import Path
import numpy as np
import torch.nn as nn

In [3]:
BASE_DIR = Path("/content/Automated-Material-Stream-Identification-System-MSI-/data/split")
TRAIN_DIR = BASE_DIR / "train"
VAL_DIR   = BASE_DIR / "val"
TEST_DIR  = BASE_DIR / "test"

In [4]:
INPUT_SIZE = (224, 224)  # EfficientNet-B0
BATCH_SIZE = 32
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"


In [5]:
transform = transforms.Compose([
    transforms.Resize(INPUT_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

In [6]:
train_data = datasets.ImageFolder(TRAIN_DIR, transform=transform)
val_data   = datasets.ImageFolder(VAL_DIR, transform=transform)

In [7]:
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=False)
val_loader   = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False)

In [8]:
print(train_data.classes)

['0_glass', '1_paper', '2_cardboard', '3_plastic', '4_metal', '5_trash']


In [9]:
model = models.efficientnet_b0(weights="DEFAULT")
model.classifier = nn.Identity()
model.eval()
model.to(DEVICE)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth


100%|██████████| 20.5M/20.5M [00:00<00:00, 133MB/s]


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [10]:
def extract_features(dataloader):
    features = []
    labels = []

    with torch.no_grad():
        for imgs, lbls in dataloader:
            imgs = imgs.to(DEVICE)
            feats = model.features(imgs)
            feats = torch.nn.functional.adaptive_avg_pool2d(feats, (1,1)).flatten(1)
            feats = feats.cpu().numpy()

            features.append(feats)
            labels.append(lbls.numpy())

    features = np.concatenate(features)
    labels = np.concatenate(labels)
    return features, labels


In [11]:
train_X, train_y = extract_features(train_loader)
val_X,   val_y   = extract_features(val_loader)

print("Train features:", train_X.shape)  # (num_train_images, 1280)
print("Val features:", val_X.shape)      # (num_val_images, 1280)


Train features: (3296, 1280)
Val features: (404, 1280)


In [12]:
test_data = datasets.ImageFolder(TEST_DIR, transform=transform)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)
test_X, test_y = extract_features(test_loader)
print("Test features:", test_X.shape)

Test features: (270, 1280)


In [13]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

svm = SVC(kernel='rbf', C=10, gamma='scale')
svm.fit(train_X, train_y)

pred = svm.predict(val_X)
print("SVM Accuracy:", accuracy_score(val_y, pred))

SVM Accuracy: 0.943069306930693


In [14]:
linearSVM = SVC(kernel='linear', C=1)
linearSVM.fit(train_X, train_y)
pred = linearSVM.predict(val_X)
print("SVM Accuracy:", accuracy_score(val_y, pred))

SVM Accuracy: 0.9232673267326733


In [15]:
from sklearn.decomposition import PCA

pca = PCA(n_components=256)
train_pca = pca.fit_transform(train_X)
val_pca = pca.transform(val_X)

In [16]:
svm.fit(train_pca, train_y)
pred = svm.predict(val_pca)
print("SVM Accuracy:", accuracy_score(val_y, pred))

SVM Accuracy: 0.9455445544554455


In [17]:
svm = SVC(
    kernel="rbf",
    C=10,
    gamma="scale",
    probability=True,
    class_weight="balanced"
)

svm.fit(train_pca, train_y)

In [18]:
val_pred = svm.predict(val_pca)
print("Closed-set Val Accuracy:",
      accuracy_score(val_y, val_pred))

Closed-set Val Accuracy: 0.9455445544554455


In [19]:
def predict_with_unknown(X, svm, threshold=0.45, unknown_label=6):
    """
    X: numpy array (N, feature_dim)  → features after PCA
    svm: trained SVM (probability=True)
    threshold: confidence threshold
    unknown_label: label for Unknown class

    returns:
        predictions: numpy array of labels (including Unknown)
        confidences: numpy array of max probabilities
    """

    probs = svm.predict_proba(X)            # shape (N, num_classes)
    max_probs = probs.max(axis=1)
    preds = svm.classes_[probs.argmax(axis=1)]

    final_preds = []
    for p, conf in zip(preds, max_probs):
        if conf < threshold:
            final_preds.append(unknown_label)
        else:
            final_preds.append(int(p))

    return np.array(final_preds), max_probs


In [20]:
pred_unknown, confs = predict_with_unknown(
    val_pca,
    svm,
    threshold=0.7
)

In [21]:
print("Open-set Val Accuracy:",
      accuracy_score(val_y, pred_unknown))

Open-set Val Accuracy: 0.9084158415841584


In [22]:
for t in [0.3, 0.35, 0.4, 0.45, 0.5,0.6,0.7]:
    pred_t, _ = predict_with_unknown(val_pca, svm, threshold=t)
    acc = accuracy_score(val_y, pred_t)
    print(f"Threshold={t:.2f} → Acc={acc:.4f}")

Threshold=0.30 → Acc=0.9455
Threshold=0.35 → Acc=0.9455
Threshold=0.40 → Acc=0.9455
Threshold=0.45 → Acc=0.9431
Threshold=0.50 → Acc=0.9356
Threshold=0.60 → Acc=0.9208
Threshold=0.70 → Acc=0.9084


In [23]:
test_pca = pca.transform(test_X)

test_pred, _ = predict_with_unknown(
    test_pca,
    svm,
    threshold=0.45
)

print("Final Test Accuracy:",
      accuracy_score(test_y, test_pred))


Final Test Accuracy: 0.9333333333333333


In [24]:
import joblib

joblib.dump(svm, "svm_model2.pkl")
joblib.dump(pca, "pca_model2.pkl")

['pca_model2.pkl']

In [32]:
from sklearn.metrics import classification_report

print(classification_report(
    test_y,
    test_pred,
    target_names=[
        "Glass", "Paper", "Cardboard",
        "Plastic", "Metal", "Trash", "Unknown"
    ]
))

              precision    recall  f1-score   support

       Glass       0.93      0.90      0.91        48
       Paper       0.95      0.96      0.95        54
   Cardboard       0.97      0.90      0.93        39
     Plastic       0.90      0.91      0.91        47
       Metal       0.95      0.95      0.95        42
       Trash       0.97      0.97      0.97        40
     Unknown       0.00      0.00      0.00         0

    accuracy                           0.93       270
   macro avg       0.81      0.80      0.80       270
weighted avg       0.94      0.93      0.94       270



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
