upload 180 and clean_data_only zip files before running cells

In [None]:
!unzip 180.zip

Archive:  180.zip
   creating: 180/
   creating: 180/battery/
  inflating: 180/battery/battery1.jpg  
  inflating: 180/battery/battery10.jpg  
  inflating: 180/battery/battery100.jpg  
  inflating: 180/battery/battery101.jpg  
  inflating: 180/battery/battery102.jpg  
  inflating: 180/battery/battery103.jpg  
  inflating: 180/battery/battery104.jpg  
  inflating: 180/battery/battery105.jpg  
  inflating: 180/battery/battery106.jpg  
  inflating: 180/battery/battery107.jpg  
  inflating: 180/battery/battery108.jpg  
  inflating: 180/battery/battery109.jpg  
  inflating: 180/battery/battery11.jpg  
  inflating: 180/battery/battery110.jpg  
  inflating: 180/battery/battery111.jpg  
  inflating: 180/battery/battery112.jpg  
  inflating: 180/battery/battery113.jpg  
  inflating: 180/battery/battery114.jpg  
  inflating: 180/battery/battery115.jpg  
  inflating: 180/battery/battery116.jpg  
  inflating: 180/battery/battery117.jpg  
  inflating: 180/battery/battery118.jpg  
  inflating: 180/b

In [None]:
!unzip clean_data_only.zip

Archive:  clean_data_only.zip
   creating: clean_data_only/
  inflating: clean_data_only/10.jpg  
  inflating: clean_data_only/100.jpg  
  inflating: clean_data_only/1002.jpg  
  inflating: clean_data_only/1010.jpg  
  inflating: clean_data_only/1013.jpg  
  inflating: clean_data_only/1044.jpg  
  inflating: clean_data_only/105.jpg  
  inflating: clean_data_only/1057.jpg  
  inflating: clean_data_only/1062.jpg  
  inflating: clean_data_only/1085.jpg  
  inflating: clean_data_only/1089.jpg  
  inflating: clean_data_only/109.jpg  
  inflating: clean_data_only/1097.jpg  
  inflating: clean_data_only/1103.jpg  
  inflating: clean_data_only/1110.jpg  
  inflating: clean_data_only/1111.jpg  
  inflating: clean_data_only/1117.jpg  
  inflating: clean_data_only/1126.jpg  
  inflating: clean_data_only/1128.jpg  
  inflating: clean_data_only/1132.jpg  
  inflating: clean_data_only/1135.jpg  
  inflating: clean_data_only/1137.jpg  
  inflating: clean_data_only/1148.jpg  
  inflating: clean_data_o

# binary classification using classical ml 80-20

In [None]:
import os
from skimage.io import imread
from skimage.transform import resize
from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Paths for datasets
garbage_dir = '180'        # folder with subfolders
non_garbage_dir = 'clean_data_only'

def load_images_and_labels(garbage_dir, non_garbage_dir):
    images = []
    labels = []

    # Helper to collect images recursively for garbage (label=1)
    for root, _, files in os.walk(garbage_dir):
        for file in files:
            if file.lower().endswith(('.jpg','.png','.jpeg')):
                img_path = os.path.join(root, file)
                img = imread(img_path, as_gray=True)
                img_resized = resize(img, (180, 180))

                images.append(img_resized)
                labels.append(1)

    # Non-garbage images (label=0)
    for file in os.listdir(non_garbage_dir):
        if file.lower().endswith(('.jpg','.png','.jpeg')):
            img_path = os.path.join(non_garbage_dir, file)
            img = imread(img_path, as_gray=True)
            img_resized = resize(img, (180, 180))
            images.append(img_resized)
            labels.append(0)
    return images, labels

X, y = load_images_and_labels(garbage_dir, non_garbage_dir)

# Extract HOG features
features = [hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), orientations=9) for image in X]

# Split data 80-20
X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.2, random_state=42)

In [None]:
# Train SVM classifier
binary80_model = SVC(kernel='linear')
binary80_model.fit(X_train, y_train)


In [None]:
train_preds = binary80_model.predict(X_train)
train_acc = accuracy_score(y_train, train_preds)
print(f"Training Accuracy: {train_acc*100:.2f}%")


Training Accuracy: 100.00%


In [None]:
# Evaluate
preds = binary80_model.predict(X_test)
acc = accuracy_score(y_test, preds)
print(f"Test Accuracy: {acc*100:.2f}%")


Test Accuracy: 97.99%


# multiclass classification if the image is garbage(1)


In [None]:
# Labels & mapping
subclass_names = ['battery', 'glass', 'metal', 'organic', 'paper', 'plastic']
subclass_to_idx = {name: idx for idx, name in enumerate(subclass_names)}


In [None]:
import os
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
class GarbageSubclassDataset(Dataset):
    def __init__(self, garbage_root, transform=None):
        self.transform = transform
        self.image_paths = []
        self.labels = []

        for subclass in os.listdir(garbage_root):
            subclass_path = os.path.join(garbage_root, subclass)
            if not os.path.isdir(subclass_path):
                continue
            label = subclass_to_idx.get(subclass)
            if label is None:
                continue
            for image_file in os.listdir(subclass_path):
                if image_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    self.image_paths.append(os.path.join(subclass_path, image_file))
                    self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert('RGB')
        if self.transform:
            img = self.transform(img)
        label = self.labels[idx]
        return img, label

In [None]:
transform = transforms.ToTensor()
garbage_dataset = GarbageSubclassDataset('180', transform=transform)
garbage_loader = torch.utils.data.DataLoader(garbage_dataset, batch_size=32, shuffle=True)
print(f"Total garbage subclass images: {len(garbage_dataset)}")

Total garbage subclass images: 4650


In [None]:
# CNN Model
class GarbageCNN(nn.Module):
    def __init__(self):
        super(GarbageCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.fc1 = nn.Linear(32 * 45 * 45, 128)
        self.fc2 = nn.Linear(128, len(subclass_names))  #  use 6 classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 45 * 45)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x



In [None]:


# Training setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cnnmodel = GarbageCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnnmodel.parameters(), lr=0.001)



In [None]:
# Training loop
for epoch in range(10):
    cnnmodel.train()
    total_loss = 0.0
    correct = 0
    total = 0

    for images, labels in garbage_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = cnnmodel(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    print(f"Epoch {epoch+1}, Loss: {total_loss:.3f}, Accuracy: {100 * correct / total:.2f}%")


Epoch 1, Loss: 194.655, Accuracy: 46.77%
Epoch 2, Loss: 135.263, Accuracy: 64.62%
Epoch 3, Loss: 104.036, Accuracy: 74.24%
Epoch 4, Loss: 77.647, Accuracy: 81.08%
Epoch 5, Loss: 56.270, Accuracy: 86.80%
Epoch 6, Loss: 36.379, Accuracy: 91.53%
Epoch 7, Loss: 23.801, Accuracy: 94.92%
Epoch 8, Loss: 14.662, Accuracy: 96.84%
Epoch 9, Loss: 11.704, Accuracy: 97.51%
Epoch 10, Loss: 8.132, Accuracy: 98.39%


# saving both binary80_model and cnnmodel for future use


In [None]:
import joblib
joblib.dump(binary80_model, 'binary80_model.joblib')


['binary80_model.joblib']

In [None]:
torch.save(cnnmodel.state_dict(), 'cnnmodel.pth')

# **binary classification using classical ml 70-30**

In [None]:
# Load full dataset
X_full, y_full = load_images_and_labels(garbage_dir, non_garbage_dir)

# Extract HOG features
features_full = [hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), orientations=9) for image in X_full]

# Split dataset into train and test with 70-30 ratio
X_train_70, X_test_30, y_train_70, y_test_30 = train_test_split(features_full, y_full, test_size=0.3, random_state=42)




In [None]:
# Train SVM classifier
binary7030_model = SVC(kernel='linear')
binary7030_model.fit(X_train_70, y_train_70)


In [None]:
train7030_preds = binary7030_model.predict(X_train_70)
train7030_acc = accuracy_score(y_train_70, train7030_preds)
print(f"Training Accuracy: {train7030_acc*100:.2f}%")


Training Accuracy: 100.00%


In [None]:
# Evaluate
preds7030 = binary7030_model.predict(X_test_30)
acc7030 = accuracy_score(y_test_30, preds7030)
print(f"Test Accuracy: {acc7030*100:.2f}%")


Test Accuracy: 98.01%


# **metrics for binary 8020, binary 7030 and cnn model**

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import torch

# CNN subclass model evaluation (multi-class)
cnnmodel.eval()
all_labels = []
all_preds = []
with torch.no_grad():
    for images, labels in garbage_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = cnnmodel(images)
        _, predicted = torch.max(outputs, 1)
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

# Binary SVC model metrics (80-20 split)
acc_bin_80 = accuracy_score(y_test, preds)
prec_bin_80 = precision_score(y_test, preds, average='binary')
recall_bin_80 = recall_score(y_test, preds, average='binary')
f1_bin_80 = f1_score(y_test, preds, average='binary')

# Binary SVC model metrics (70-30 split)
acc_bin_70 = accuracy_score(y_test_30, preds7030)
prec_bin_70 = precision_score(y_test_30, preds7030, average='binary')
recall_bin_70 = recall_score(y_test_30, preds7030, average='binary')
f1_bin_70 = f1_score(y_test_30, preds7030, average='binary')

# CNN subclass model metrics (multi-class)
acc_sub = accuracy_score(all_labels, all_preds)
prec_sub = precision_score(all_labels, all_preds, average='weighted')
recall_sub = recall_score(all_labels, all_preds, average='weighted')
f1_sub = f1_score(all_labels, all_preds, average='weighted')

# Prepare DataFrame for 80-20 split
metrics_data_80 = {
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Binary 8020 Model': [acc_bin_80, prec_bin_80, recall_bin_80, f1_bin_80],
    'CNN Subclass Model': [acc_sub, prec_sub, recall_sub, f1_sub]
}
df_metrics_80 = pd.DataFrame(metrics_data_80)
df_metrics_80['Binary 8020 Model'] = df_metrics_80['Binary 8020 Model'].apply(lambda x: f"{x:.4f}")
df_metrics_80['CNN Subclass Model'] = df_metrics_80['CNN Subclass Model'].apply(lambda x: f"{x:.4f}")
print("Metrics for Binary 80-20 Model vs CNN Subclass Model:")
print(df_metrics_80.to_string(index=False))
print("\n")

# Prepare DataFrame for 70-30 split
metrics_data_70 = {
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Binary 7030 Model': [acc_bin_70, prec_bin_70, recall_bin_70, f1_bin_70],
    'CNN Subclass Model': [acc_sub, prec_sub, recall_sub, f1_sub]
}
df_metrics_70 = pd.DataFrame(metrics_data_70)
df_metrics_70['Binary 7030 Model'] = df_metrics_70['Binary 7030 Model'].apply(lambda x: f"{x:.4f}")
df_metrics_70['CNN Subclass Model'] = df_metrics_70['CNN Subclass Model'].apply(lambda x: f"{x:.4f}")
print("Metrics for Binary 70-30 Model vs CNN Subclass Model:")
print(df_metrics_70.to_string(index=False))

Metrics for Binary 80-20 Model vs CNN Subclass Model:
   Metric Binary 8020 Model CNN Subclass Model
 Accuracy            0.9799             0.9692
Precision            0.9830             0.9721
   Recall            0.9778             0.9692
 F1 Score            0.9804             0.9695


Metrics for Binary 70-30 Model vs CNN Subclass Model:
   Metric Binary 7030 Model CNN Subclass Model
 Accuracy            0.9801             0.9692
Precision            0.9843             0.9721
   Recall            0.9766             0.9692
 F1 Score            0.9804             0.9695
