# Step 4: Train classifiers
1. SVM → src/train_svm.py
    - Choose kernel: linear, rbf, or poly.
    - Use cross-validation to tune C and gamma.
2. k-NN → src/train_knn.py
    - Choose k (e.g., 3, 5, 7) and distance metric (euclidean/manhattan).
    - Optionally use distance-weighted voting.
3. Save trained models in models/.
### Notebook for reference: 04_train_svm_knn.ipynb.
# Step 5: Handle 'Unknown' class (ID 6)
Implement in src/inference.py:
    - For SVM: Use decision function confidence; reject low-confidence predictions.
    - For k-NN: Reject if nearest neighbors are too far or no clear majority.
- Test this mechanism on a small validation set.

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier


# 1. Load extracted features


In [2]:

X = np.load("features.npy")
y = np.load("labels.npy")

print("Loaded feature matrix shape:", X.shape)
print("Loaded labels shape:", y.shape)


Loaded feature matrix shape: (20503, 25774)
Loaded labels shape: (20503,)


# 2. Train-test split


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


# 3. Create and train KNN model


In [4]:
knn = KNeighborsClassifier(
    n_neighbors=3,      # you can tune this
    weights='distance', # helps performance
    metric='euclidean'  # default and good for hist+HOG
)

knn.fit(X_train, y_train)

0,1,2
,n_neighbors,3
,weights,'distance'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'euclidean'
,metric_params,
,n_jobs,


# 4. Evaluate


In [5]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

y_pred = knn.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print("\nAccuracy:", acc)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.6098512557912704

Classification Report:
              precision    recall  f1-score   support

           0       0.40      0.87      0.55       543
           1       0.81      0.59      0.68       847
           2       0.80      0.56      0.66       693
           3       0.65      0.51      0.57       987
           4       0.59      0.66      0.62       798
           5       0.67      0.50      0.57       233

    accuracy                           0.61      4101
   macro avg       0.65      0.61      0.61      4101
weighted avg       0.67      0.61      0.62      4101


Confusion Matrix:
[[470   7   6  30  18  12]
 [ 74 498  56  66 145   8]
 [ 85  50 388  31 133   6]
 [401  11   6 505  44  20]
 [ 99  31  25 108 523  12]
 [ 35  16   6  33  26 117]]


# unknown classes Threshold to get the unknown class if image is blured or does not match any class

In [6]:
import numpy as np

def knn_predict_with_unknown(model, x, distance_threshold=0.4):
    # distances to the k nearest neighbors
    distances, indices = model.kneighbors([x])

    # mean distance of k neighbors
    avg_dist = np.mean(distances)

    if avg_dist > distance_threshold:
        return 6   # UNKNOWN CLASS
    else:
        return model.predict([x])[0]


# 3. Train SVM model


In [7]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

# Define parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 0.01, 0.1, 1],
    'kernel': ['rbf']
}

svc = SVC()

grid = GridSearchCV(
    estimator=svc,
    param_grid=param_grid,
    cv=5,             # 5-fold cross-validation
    scoring='accuracy',
    n_jobs=-1
)

grid.fit(X_train, y_train)

print("Best params:", grid.best_params_)
print("Best CV score:", grid.best_score_)

# Use best model
best_svc = grid.best_estimator_

y_pred = best_svc.predict(X_test)



KeyboardInterrupt



In [14]:
from sklearn.svm import SVC

model = SVC(
    kernel='rbf',
    C=1.0,                 # increase regularization to reduce underfitting
    gamma='scale',
    class_weight='balanced' # automatically balances classes
)


In [15]:
#scale features
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)


# 4. Evaluation


In [16]:
#y_pred = model.predict(X_test)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.6571567910265789

Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.75      0.78       543
           1       0.63      0.60      0.62       847
           2       0.60      0.57      0.59       693
           3       0.69      0.80      0.74       987
           4       0.59      0.63      0.61       798
           5       0.68      0.39      0.50       233

    accuracy                           0.66      4101
   macro avg       0.67      0.62      0.64      4101
weighted avg       0.66      0.66      0.65      4101


Confusion Matrix:
 [[406  27  15  61  32   2]
 [ 19 508 136  63 115   6]
 [  7 107 396  82  94   7]
 [ 35  47  36 789  65  15]
 [ 22  81  65 113 505  12]
 [ 13  35  11  36  47  91]]


# 5. Save models


### How to use the trained model later
- import joblib
- knn = joblib.load("knn_waste_classifier.pkl")

- #predict on a new feature vector
- prediction = knn.predict([new_feature_vector])
- print("Predicted waste class:", prediction[0])


In [None]:
import joblib

joblib.dump(knn, "knn_waste_classifier.pkl")
print("\nModel saved as knn_waste_classifier.pkl")

In [None]:
joblib.dump(model, "svm_waste_classifier.pkl")
print("\nSaved model: svm_waste_classifier.pkl")