# Exploration of WASTE Classification Dataset

This notebook is used for exploring the WASTE Classification dataset. We will analyze the dataset structure, visualize some samples, and perform preliminary data analysis.

In [1]:
import os
import matplotlib.pyplot as plt
import cv2
import numpy as np
from glob import glob

# Define paths to the dataset
train_path = '../data/raw/TRAIN/'
test_path = '../data/raw/TEST/'

# List all images in the training set
train_images = glob(os.path.join(train_path, '*/*.jpg'))
test_images = glob(os.path.join(test_path, '*/*.jpg'))

# Display the number of images in each set
print(f'Training images: {len(train_images)}')
print(f'Test images: {len(test_images)}')

In [2]:
# Function to display a sample of images
def display_sample_images(images, n=5):
    plt.figure(figsize=(15, 5))
    for i in range(n):
        img = cv2.imread(images[i])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.subplot(1, n, i + 1)
        plt.imshow(img)
        plt.axis('off')
    plt.show()

# Display sample images from the training set
display_sample_images(train_images, n=5)

## Preliminary Data Analysis

In this section, we will analyze the distribution of classes in the dataset.

In [3]:
# Count the number of images per class in the training set
class_counts = {}
for img in train_images:
    label = img.split('/')[-2]  # Get the class label from the folder name
    class_counts[label] = class_counts.get(label, 0) + 1

# Plot the class distribution
plt.figure(figsize=(10, 5))
plt.bar(class_counts.keys(), class_counts.values())
plt.title('Class Distribution in Training Set')
plt.xlabel('Class')
plt.ylabel('Number of Images')
plt.xticks(rotation=45)
plt.show()