In [1]:
import os

# Check if we are in the nested 'SVM' folder
if os.getcwd().endswith('SVM'):
    os.chdir('../..')  # Go up two levels to the project root
    print("Directory changed to Project Root.")

print("Current Working Directory:", os.getcwd())

Directory changed to Project Root.
Current Working Directory: d:\college\Senior Year\Machine Learning\Material-Stream-Identification-System


In [2]:
import os
import cv2
import numpy as np
from skimage.feature import hog, local_binary_pattern
from sklearn.preprocessing import LabelEncoder

augmentedDir = "data\\augmented"  
classes = [d for d in os.listdir(augmentedDir) if os.path.isdir(os.path.join(augmentedDir,d))]
print("Classes found:", classes)

IMG_SIZE = (128, 128)
LBP_RADIUS = 1
LBP_N_POINTS = 8 * LBP_RADIUS
LBP_METHOD = 'uniform'
HOG_PIXELS_PER_CELL = (8, 8)
HOG_CELLS_PER_BLOCK = (2, 2)
HOG_ORIENTATIONS = 9
COLOR_BINS = 16  

#  Feature Extraction
X = []
y = []


for cls in classes:
    folder = os.path.join(augmentedDir, cls)
    files = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]
    print(f"Processing class '{cls}' with {len(files)} images...")
    
    for file in files:
        img_path = os.path.join(folder, file)
        img = cv2.imread(img_path)
        if img is None:
            continue
        
        # Resize
        img = cv2.resize(img, IMG_SIZE)
        
        # Convert to grayscale for HOG and LBP
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # HOG feature
        hog_feat = hog(
            gray,
            orientations=HOG_ORIENTATIONS,
            pixels_per_cell=HOG_PIXELS_PER_CELL,
            cells_per_block=HOG_CELLS_PER_BLOCK,
            block_norm='L2-Hys',
            transform_sqrt=True,
            feature_vector=True
        )
        
        # LBP feature
        lbp = local_binary_pattern(gray, LBP_N_POINTS, LBP_RADIUS, LBP_METHOD)
        (lbp_hist, _) = np.histogram(lbp.ravel(),
                                     bins=np.arange(0, LBP_N_POINTS + 3),
                                     range=(0, LBP_N_POINTS + 2))
        lbp_hist = lbp_hist.astype("float")
        lbp_hist /= (lbp_hist.sum() + 1e-6)
        
        # Color histogram (HSV)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h_hist = cv2.calcHist([hsv], [0], None, [COLOR_BINS], [0, 180])
        s_hist = cv2.calcHist([hsv], [1], None, [COLOR_BINS], [0, 256])
        v_hist = cv2.calcHist([hsv], [2], None, [COLOR_BINS], [0, 256])
        color_hist = np.concatenate([h_hist, s_hist, v_hist]).ravel()
        color_hist = color_hist / (color_hist.sum() + 1e-6)

        # merge all features
        feature_vector = np.concatenate([hog_feat, lbp_hist, color_hist])
        
        X.append(feature_vector)
        y.append(cls)

X = np.array(X)
y = np.array(y)


le = LabelEncoder()
y_encoded = le.fit_transform(y)

print("Feature extraction done!")
print("X shape:", X.shape)
print("y shape:", y.shape)
print("y_encoded shape:", y_encoded.shape)
print("Class mapping:", dict(zip(le.classes_, le.transform(le.classes_))))


Classes found: ['cardboard', 'glass', 'metal', 'paper']
Processing class 'cardboard' with 1540 images...
Processing class 'glass' with 1540 images...
Processing class 'metal' with 1540 images...
Processing class 'paper' with 1540 images...
Feature extraction done!
X shape: (6160, 8158)
y shape: (6160,)
y_encoded shape: (6160,)
Class mapping: {np.str_('cardboard'): np.int64(0), np.str_('glass'): np.int64(1), np.str_('metal'): np.int64(2), np.str_('paper'): np.int64(3)}


In [3]:


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import numpy as np


print("Original X shape:", X.shape)
print("Original y shape:", y.shape)


le = LabelEncoder()
y_encoded = le.fit_transform(y)
class_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("Class mapping:", class_mapping)
print("y_encoded shape:", y_encoded.shape)

# split data 
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

print("Before scaling:")
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

# Standardization (Mean=0, Std=1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("After scaling:")
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)


Original X shape: (6160, 8158)
Original y shape: (6160,)
Class mapping: {np.str_('cardboard'): np.int64(0), np.str_('glass'): np.int64(1), np.str_('metal'): np.int64(2), np.str_('paper'): np.int64(3)}
y_encoded shape: (6160,)
Before scaling:
X_train shape: (4928, 8158)
X_test shape: (1232, 8158)
After scaling:
X_train shape: (4928, 8158)
X_test shape: (1232, 8158)


In [4]:
# scaling done in previous step start training the model

In [31]:
from sklearn.svm import SVC
import numpy as np


model = SVC( C=1000,gamma='scale',probability= True) # rbf 

model.fit (X_train, y_train)

y_pred_prob = model.predict_proba(X_test)




In [35]:
Confidence_Threshold = 0.5
max_val = np.max(y_pred_prob)
max_val_idx = np.argmax(y_pred_prob  )



y_pred_classes = [ np.argmax(prob_list) if (np.max(prob_list)>= Confidence_Threshold) else 6  for prob_list in y_pred_prob ]

from sklearn.metrics import accuracy_score

print(accuracy_score(y_test,y_pred_classes))

0.864448051948052


# recorded accuarcies for models
rbf , c= 1 , gamma = 'scale' , threshold = 0.7 -> accuracy = 0.77
rbf , c= 1 , gamma = 'scale' , threshold = 0.5 -> accuracy = 0.849
rbf , c= 1 , gamma = 'scale' , threshold = 0.4 -> accuracy = 0.86

rbf , c= 10 , gamma = 'scale' , threshold = 0.7 -> accuracy = 0.81
rbf , c= 10 , gamma = 'scale' , threshold = 0.6 -> accuracy = 0.84
rbf , c= 10 , gamma = 'scale' , threshold = 0.5 -> accuracy = 0.86

rbf , c= 10 , gamma = 'auto' , threshold = 0.7 -> accuracy = 0.80
rbf , c= 10 , gamma = 'auto' , threshold = 0.6 -> accuracy = 0.84
rbf , c= 10 , gamma = 'auto' , threshold = 0.5 -> accuracy = 0.86

rbf , c= 100 , gamma = 'scale' , threshold = 0.7 -> accuracy = 0.80
rbf , c= 100 , gamma = 'scale' , threshold = 0.6 -> accuracy = 0.84
rbf , c= 100 , gamma = 'scale' , threshold = 0.5 -> accuracy = 0.86

rbf , c= 1000 , gamma = 'scale' , threshold = 0.7 -> accuracy = 0.81
rbf , c= 1000 , gamma = 'scale' , threshold = 0.6 -> accuracy = 0.84
rbf , c= 1000 , gamma = 'scale' , threshold = 0.5 -> accuracy = 0.86






In [44]:

model2 = SVC( kernel='linear',C=1,probability= True) # linear

model2.fit (X_train, y_train)

y_pred_prob2 = model2.predict_proba(X_test)



In [47]:
# visulaize the probabilites to know how it is distributed 

from matplotlib.pyplot import hist


# Try different thresholds 
Confidence_Threshold = 0.5

max_val2 = np.max(y_pred_prob2)
max_val_idx2 = np.argmax(y_pred_prob2  )



y_pred_classes2 = [ np.argmax(prob_list) if (np.max(prob_list)>= Confidence_Threshold) else 6  for prob_list in y_pred_prob2 ]


print(accuracy_score(y_test,y_pred_classes2))

0.8287337662337663


# recorded accuracies for linear SVM 

linear  , C= 1  , 0.7 -> 0.74
linear  , C= 1  , 0.6 -> 0.78
linear  , C= 1  , 0.6 -> 0.82

linear  , C= 100  , 0.7 -> 0.75
linear  , C= 100  , 0.6 -> 0.79
linear  , C= 100  , 0.5 -> 0.83

# check if confidence threshold is suitable or not 

In [40]:
# Count how many items were labeled as '6' (Unknown)
n_unknown = y_pred_classes.count(6)
total_samples = len(y_test)
rejection_rate = (n_unknown / total_samples) * 100

print(f"Accuracy: {accuracy_score(y_test, y_pred_classes):.4f}")
print(f"Rejection Rate: {rejection_rate:.2f}%")

Accuracy: 0.8644
Rejection Rate: 5.03%


# Save the model

In [43]:
import joblib

# 1. Define filenames
model_filename = 'svm_model_final.pkl'
scaler_filename = 'scaler_final.pkl'

# 2. Save the objects
# (Make sure to save the BEST model, e.g., 'model' or 'model2')
joblib.dump(model, model_filename) 
joblib.dump(scaler, scaler_filename)

print(f"Model saved to {model_filename}")
print(f"Scaler saved to {scaler_filename}")

Model saved to svm_model_final.pkl
Scaler saved to scaler_final.pkl
