In [1]:
import os
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt

# Set the dataset
path = Path("garbage-classification-6-classes-775class")

In [2]:
#listing classes of dataset
import os
classes=os.listdir(path)
print("Classes (subfolders):", os.listdir(path))

Classes (subfolders): ['battery', 'glass', 'metal', 'organic', 'paper', 'plastic']


In [3]:
#number of images per class
for cls in classes:
    count = len(os.listdir(os.path.join(path, cls)))
    print(f"{cls}: {count} images")

battery: 775 images
glass: 775 images
metal: 775 images
organic: 775 images
paper: 775 images
plastic: 775 images


In [4]:
#printing sample image names in each class
for cls in classes[:6]: 
    print(f"\nSample files from '{cls}':")
    print(os.listdir(os.path.join(path, cls))[:5])



Sample files from 'battery':
['battery1.jpg', 'battery10.jpg', 'battery100.jpg', 'battery101.jpg', 'battery102.jpg']

Sample files from 'glass':
['glass1.jpg', 'glass10.jpg', 'glass100.jpg', 'glass101.jpg', 'glass102.jpg']

Sample files from 'metal':
['metal1.jpg', 'metal10.jpg', 'metal100.jpg', 'metal101.jpg', 'metal102.jpg']

Sample files from 'organic':
['biological1.jpg', 'biological10.jpg', 'biological100.jpg', 'biological101.jpg', 'biological102.jpg']

Sample files from 'paper':
['paper1.jpg', 'paper10.jpg', 'paper100.jpg', 'paper101.jpg', 'paper102.jpg']

Sample files from 'plastic':
['plastic1.jpg', 'plastic10.jpg', 'plastic100.jpg', 'plastic101.jpg', 'plastic102.jpg']


In [5]:
from PIL import Image
import os

for class_name in classes:
    class_path = os.path.join(path, class_name)
    image_files = [f for f in os.listdir(class_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

    if image_files:
        img_path = os.path.join(class_path, image_files[0])
        with Image.open(img_path) as img:
            print(f"{class_name}: Sample image size = {img.size}")


battery: Sample image size = (280, 180)
glass: Sample image size = (285, 177)
metal: Sample image size = (512, 384)
organic: Sample image size = (236, 214)
paper: Sample image size = (512, 384)
plastic: Sample image size = (512, 384)


In [6]:
#checking if any corrupt or unreadable images are present across all the classes
from PIL import Image, UnidentifiedImageError

total_images = 0
corrupt_images = []

for cls in classes:
    class_path = os.path.join(path, cls)
    for filename in os.listdir(class_path):
        total_images += 1
        file_path = os.path.join(class_path, filename)
        try:
            with Image.open(file_path) as img:
                img.verify()  # Check if image is readable
        except (UnidentifiedImageError, IOError, OSError):
            corrupt_images.append(file_path)

print(f"\nTotal images checked: {total_images}")
print(f"Corrupt or unreadable images: {len(corrupt_images)}")

if corrupt_images:
    print("Examples of corrupt images:")
    print(corrupt_images[:5])
else:
    print("No corrupt or missing image files found.")



Total images checked: 4650
Corrupt or unreadable images: 0
No corrupt or missing image files found.


In [7]:
import os
from PIL import Image

inp, out = "garbage-classification-6-classes-775class", "180"
for root, _, files in os.walk(inp):
    for f in files:
        if f.lower().endswith(('jpg','jpeg','png')):
            img = Image.open(os.path.join(root,f)).convert("RGB").resize((180,180))
            path = os.path.join(out, os.path.relpath(root, inp))
            os.makedirs(path, exist_ok=True)
            img.save(os.path.join(path,f))


In [8]:
import os
from PIL import Image

inp, out = "garbage-classification-6-classes-775class", "180_240"
for root, _, files in os.walk(inp):
    for f in files:
        if f.lower().endswith(('jpg','jpeg','png')):
            img = Image.open(os.path.join(root,f)).convert("RGB").resize((180,240))
            path = os.path.join(out, os.path.relpath(root, inp))
            os.makedirs(path, exist_ok=True)
            img.save(os.path.join(path,f))


In [9]:
import os
from PIL import Image

inp, out = "garbage-classification-6-classes-775class", "240"
for root, _, files in os.walk(inp):
    for f in files:
        if f.lower().endswith(('jpg','jpeg','png')):
            img = Image.open(os.path.join(root,f)).convert("RGB").resize((240,240))
            path = os.path.join(out, os.path.relpath(root, inp))
            os.makedirs(path, exist_ok=True)
            img.save(os.path.join(path,f))


REMOVING NOISE AND SHARPENING TO GET CLEAR IMAGES 

In [10]:
import os
import cv2

input_folder = "180"
blur_threshold = 100.0  # Adjustable: lower values => blurrier

images_to_sharpen = []

for cls in os.listdir(input_folder):
    class_folder = os.path.join(input_folder, cls)
    for filename in os.listdir(class_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(class_folder, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue
            # Blur detection using Laplacian variance
            lap_var = cv2.Laplacian(img, cv2.CV_64F).var()
            # Noise estimation using local standard deviation 
            noise = cv2.meanStdDev(img)[1]
            # Add images with low sharpness or high noise
            if lap_var < blur_threshold or noise > 40: 
                images_to_sharpen.append((img_path, lap_var, noise))

print(f"Images needing sharpening: {len(images_to_sharpen)}")
print("Sample (path, Laplacian variance, noise std):")
for item in images_to_sharpen[:10]:
    print(item)


Images needing sharpening: 3372
Sample (path, Laplacian variance, noise std):
('180\\battery\\battery1.jpg', np.float64(2782.6017871932636), array([[52.23167754]]))
('180\\battery\\battery10.jpg', np.float64(2714.8811417609745), array([[82.95641478]]))
('180\\battery\\battery100.jpg', np.float64(3609.0465080932786), array([[54.48993637]]))
('180\\battery\\battery101.jpg', np.float64(1976.7722471412512), array([[54.84393361]]))
('180\\battery\\battery102.jpg', np.float64(603.4087654320988), array([[61.8621533]]))
('180\\battery\\battery103.jpg', np.float64(2238.971140671201), array([[73.1895068]]))
('180\\battery\\battery104.jpg', np.float64(4280.087612138966), array([[68.62434711]]))
('180\\battery\\battery105.jpg', np.float64(1929.6409816491387), array([[87.2343758]]))
('180\\battery\\battery106.jpg', np.float64(2552.8253241417083), array([[69.42657299]]))
('180\\battery\\battery107.jpg', np.float64(3064.2860800325793), array([[99.27825427]]))


In [11]:
import cv2
import numpy as np

# images_to_sharpen is list of (img_path, sharpness, noise)
for img_path, lap_var, noise in images_to_sharpen:
    img = cv2.imread(img_path)
    if img is None:
        continue
    # Sharpen using kernel
    kernel = np.array([[0, -1, 0],
                       [-1, 5, -1],
                       [0, -1, 0]], np.float32)
    sharpened = cv2.filter2D(img, -1, kernel)
    cv2.imwrite(img_path, sharpened)  # Overwrites original


In [12]:
import os
import cv2

input_folder = "180_240"
blur_threshold = 100.0  

images_to_sharpen_2 = []

for cls in os.listdir(input_folder):
    class_folder = os.path.join(input_folder, cls)
    for filename in os.listdir(class_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(class_folder, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue
            # Blur detection using Laplacian variance
            lap_var = cv2.Laplacian(img, cv2.CV_64F).var()
            # Noise estimation using local standard deviation 
            noise = cv2.meanStdDev(img)[1]
            # Add images with low sharpness or high noise
            if lap_var < blur_threshold or noise > 40:  
                images_to_sharpen_2.append((img_path, lap_var, noise))

print(f"Images needing sharpening: {len(images_to_sharpen_2)}")
print("Sample (path, Laplacian variance, noise std):")
for item in images_to_sharpen_2[:10]:
    print(item)


Images needing sharpening: 3420
Sample (path, Laplacian variance, noise std):
('180_240\\battery\\battery1.jpg', np.float64(1347.7084151615013), array([[51.67560315]]))
('180_240\\battery\\battery10.jpg', np.float64(1819.0780090449243), array([[82.97455175]]))
('180_240\\battery\\battery100.jpg', np.float64(2563.960155341221), array([[54.56804265]]))
('180_240\\battery\\battery101.jpg', np.float64(1330.7422619834533), array([[54.82029483]]))
('180_240\\battery\\battery102.jpg', np.float64(544.9568518518519), array([[61.92790656]]))
('180_240\\battery\\battery103.jpg', np.float64(1737.1857166018306), array([[73.19009086]]))
('180_240\\battery\\battery104.jpg', np.float64(3452.548028891782), array([[68.77324186]]))
('180_240\\battery\\battery105.jpg', np.float64(1785.51599998071), array([[87.26819117]]))
('180_240\\battery\\battery106.jpg', np.float64(2561.098946115719), array([[69.85137146]]))
('180_240\\battery\\battery107.jpg', np.float64(2678.598240740741), array([[99.50670402]]))


In [14]:
import cv2
import numpy as np

# images_to_sharpen is list of (img_path, sharpness, noise)
for img_path, lap_var, noise in images_to_sharpen_2:
    img = cv2.imread(img_path)
    if img is None:
        continue
    # Sharpen using kernel
    kernel = np.array([[0, -1, 0],
                       [-1, 5, -1],
                       [0, -1, 0]], np.float32)
    sharpened_2 = cv2.filter2D(img, -1, kernel)
    cv2.imwrite(img_path, sharpened_2)  # Overwrites original; 


In [17]:
import os
import cv2

input_folder = "240"
blur_threshold = 100.0  

images_to_sharpen_3 = []

for cls in os.listdir(input_folder):
    class_folder = os.path.join(input_folder, cls)
    for filename in os.listdir(class_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(class_folder, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue
            # Blur detection using Laplacian variance
            lap_var = cv2.Laplacian(img, cv2.CV_64F).var()
            # Noise estimation using local standard deviation 
            noise = cv2.meanStdDev(img)[1]
            # Add images with low sharpness or high noise
            if lap_var < blur_threshold or noise > 40:  # noise threshold = 40
                images_to_sharpen_3.append((img_path, lap_var, noise))

print(f"Images needing sharpening: {len(images_to_sharpen_3)}")
print("Sample (path, Laplacian variance, noise std):")
for item in images_to_sharpen_3[:10]:
    print(item)


Images needing sharpening: 3127
Sample (path, Laplacian variance, noise std):
('240\\battery\\battery1.jpg', np.float64(12283.124003847175), array([[62.61201561]]))
('240\\battery\\battery10.jpg', np.float64(14437.189914887153), array([[89.5264954]]))
('240\\battery\\battery100.jpg', np.float64(32849.52552689887), array([[73.71578853]]))
('240\\battery\\battery101.jpg', np.float64(12021.215621870177), array([[62.40818436]]))
('240\\battery\\battery102.jpg', np.float64(3752.4603819444446), array([[64.33212223]]))
('240\\battery\\battery103.jpg', np.float64(9174.856684020244), array([[80.52539843]]))
('240\\battery\\battery104.jpg', np.float64(32434.232752237654), array([[83.08665625]]))
('240\\battery\\battery105.jpg', np.float64(7090.823432406202), array([[91.83200555]]))
('240\\battery\\battery106.jpg', np.float64(14343.698429194395), array([[75.67789687]]))
('240\\battery\\battery107.jpg', np.float64(16378.340919979444), array([[104.46836398]]))


In [16]:
import cv2
import numpy as np

for img_path, lap_var, noise in images_to_sharpen_3:
    img = cv2.imread(img_path)
    if img is None:
        continue
    # Sharpen using kernel
    kernel = np.array([[0, -1, 0],
                       [-1, 5, -1],
                       [0, -1, 0]], np.float32)
    sharpened_3= cv2.filter2D(img, -1, kernel)
    cv2.imwrite(img_path, sharpened_3)  # Overwrites original;


LABELLING THE CLASSES IN THE DATASET

In [10]:
import os

data_dir = "180"  
class_names = sorted(os.listdir(data_dir))
class_to_label = {name: idx for idx, name in enumerate(class_names)}
print(class_to_label)


{'battery': 0, 'glass': 1, 'metal': 2, 'organic': 3, 'paper': 4, 'plastic': 5}


In [11]:
import os

data_dir = "180_240"  
class_names = sorted(os.listdir(data_dir))
class_to_label = {name: idx for idx, name in enumerate(class_names)}
print(class_to_label)


{'battery': 0, 'glass': 1, 'metal': 2, 'organic': 3, 'paper': 4, 'plastic': 5}


In [12]:
import os

data_dir = "240"  
class_names = sorted(os.listdir(data_dir))
class_to_label = {name: idx for idx, name in enumerate(class_names)}
print(class_to_label)


{'battery': 0, 'glass': 1, 'metal': 2, 'organic': 3, 'paper': 4, 'plastic': 5}
