In [18]:
# Load the Dataset (Only Loading for Now)
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set path to your dataset
dataset_path = "TrashType_Image_Dataset"  # Replace with actual path

# Set image parameters
img_height, img_width = 128, 128
batch_size = 32

# Create basic data generator (no augmentation or rescaling yet)
datagen = ImageDataGenerator(
    validation_split=0.2  # 80% train, 20% validation
)

# Load training data
train_data = datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

# Load validation data
val_data = datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

# Print class labels
print("Class indices:", train_data.class_indices)


Found 1343 images belonging to 6 classes.
Found 332 images belonging to 6 classes.
Class indices: {'cardboard': 0, 'glass': 1, 'metal': 2, 'paper': 3, 'plastic': 4, 'trash': 5}


In [20]:
# Handle Missing or Corrupted Images

import os
from PIL import Image

dataset_path = "TrashType_Image_Dataset"  # Replace with actual path
corrupt_count = 0

for folder in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, folder)
    
    if not os.path.isdir(folder_path):
        continue

    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        try:
            # Try to open image
            img = Image.open(file_path)
            img.verify()  # Verify image integrity
        except (IOError, SyntaxError) as e:
            print(f"Corrupted image found and removed: {file_path}")
            os.remove(file_path)
            corrupt_count += 1

print(f"✅ Done. Total corrupted images removed: {corrupt_count}")


✅ Done. Total corrupted images removed: 0


In [21]:
# Check for Empty Folders

empty_folders = []

for folder in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, folder)
    if os.path.isdir(folder_path) and len(os.listdir(folder_path)) == 0:
        empty_folders.append(folder_path)

if empty_folders:
    print("⚠️ Empty class folders found:")
    for f in empty_folders:
        print(f)
else:
    print("✅ No empty folders found.")


✅ No empty folders found.


In [22]:
# Remove Duplicate Images from Dataset
import os
import imagehash
from PIL import Image

seen_hashes = set()
duplicates_removed = 0

for folder in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, folder)

    if not os.path.isdir(folder_path):
        continue

    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        try:
            with Image.open(file_path) as img:
                hash_val = imagehash.average_hash(img)

            if hash_val in seen_hashes:
                os.remove(file_path)
                print(f"🗑️ Removed duplicate: {file_path}")
                duplicates_removed += 1
            else:
                seen_hashes.add(hash_val)

        except Exception as e:
            print(f"⚠️ Error processing {file_path}: {e}")

print(f"\n✅ Duplicate removal complete. Total duplicates removed: {duplicates_removed}")



✅ Duplicate removal complete. Total duplicates removed: 0


In [24]:
# Resize images to a uniform size (e.g., 128x128 or 224x224)
from PIL import Image
import os

# Choose target size
target_size = (128, 128)  # or (224, 224)

resized_count = 0

for folder in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, folder)

    if not os.path.isdir(folder_path):
        continue

    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)

        try:
            with Image.open(file_path) as img:
                img = img.convert("RGB")  # Ensure 3 channels
                img_resized = img.resize(target_size, Image.ANTIALIAS)
                img_resized.save(file_path)
                resized_count += 1

        except Exception as e:
            continue

In [28]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

dataset_path = "TrashType_Image_Dataset"
# Define image size and batch
img_height, img_width = 128, 128
batch_size = 32

# Create generator with normalization
datagen = ImageDataGenerator(
    rescale=1./255,         # Normalize to [0, 1]
    validation_split=0.2    # Still keeping 20% for validation
)

# Training data
train_data = datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

# Validation data
val_data = datagen.flow_from_directory(
    dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)


Found 1343 images belonging to 6 classes.
Found 332 images belonging to 6 classes.
