In [1]:
import os
import shutil
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img, array_to_img
from sklearn.model_selection import train_test_split
import PIL
from PIL import Image, UnidentifiedImageError 

# Define paths
original_dataset_dir = 'Dataset'  # Folder with all images categorized by subfolders (classes)
output_base_dir = 'output_dataset'
train_dir = os.path.join(output_base_dir, 'train')
test_dir = os.path.join(output_base_dir, 'test')

# Define parameters
test_size = 0.2  # Proportion of data to be used for testing
IMG_SIZE = (224, 224)  # Resize images for consistency
AUGMENTATIONS = 10  # Number of augmented images per original image

# Create output directories
def create_dir_structure(base_dir, classes):
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)
    for class_name in classes:
        os.makedirs(os.path.join(base_dir, class_name), exist_ok=True)

# Split dataset into training and testing
def split_dataset(base_dir, output_train_dir, output_test_dir, test_ratio):
    classes = os.listdir(base_dir)
    create_dir_structure(output_train_dir, classes)
    create_dir_structure(output_test_dir, classes)

    for class_name in classes:
        class_dir = os.path.join(base_dir, class_name)
        images = os.listdir(class_dir)
        train_images, test_images = train_test_split(images, test_size=test_ratio, random_state=42)

        for img in train_images:
            shutil.copy(os.path.join(class_dir, img), os.path.join(output_train_dir, class_name))
        for img in test_images:
            shutil.copy(os.path.join(class_dir, img), os.path.join(output_test_dir, class_name))

# Augment training data
def augment_images(input_dir, output_dir, augmentations):
    datagen = ImageDataGenerator(
        rotation_range=30,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    
    for class_name in os.listdir(input_dir):
        class_dir = os.path.join(input_dir, class_name)
        output_class_dir = os.path.join(output_dir, class_name)
        
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            
            # Check if the file is a valid image before processing
            try:
                Image.open(img_path).verify() 
            except (IOError, SyntaxError) as e:
                print(f"Skipping invalid image file: {img_path} due to error: {e}")
                continue # Skip to the next image

            img = load_img(img_path, target_size=IMG_SIZE)
            x = img_to_array(img)
            x = x.reshape((1,) + x.shape)

            i = 0
            for batch in datagen.flow(x, batch_size=1, save_to_dir=output_class_dir, 
                                      save_prefix='aug', save_format='jpeg'):
                i += 1
                if i >= augmentations:
                    break  # Stop after creating `augmentations` images


# Additional Preprocessing (Normalization)
def preprocess_images(input_dir):
    datagen = ImageDataGenerator(rescale=1.0/255)  # Normalize pixel values to [0, 1]
    for class_name in os.listdir(input_dir):
        class_dir = os.path.join(input_dir, class_name)
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            try:
                # Attempt to open and verify the image
                img = load_img(img_path, target_size=IMG_SIZE)
            except UnidentifiedImageError:
                print(f"Skipping invalid or unreadable image: {img_path}")
                # Optional: Remove the problematic file if you want to clean up the dataset
                os.remove(img_path) 
                continue # Move on to the next image

            img_array = img_to_array(img) / 255.0  # Normalize
            # Save the preprocessed image back (Optional)
            array_to_img(img_array).save(img_path)

# Main workflow
print("Splitting dataset...")
split_dataset(original_dataset_dir, train_dir, test_dir, test_size)

print("Augmenting training data...")
augment_images(train_dir, train_dir, AUGMENTATIONS)

print("Normalizing images...")
preprocess_images(train_dir)
preprocess_images(test_dir)

print("Dataset preparation complete. Training and testing datasets are ready.")


Splitting dataset...
Augmenting training data...
Skipping invalid image file: output_dataset\train\HDPE\image197.avif due to error: cannot identify image file 'output_dataset\\train\\HDPE\\image197.avif'
Skipping invalid image file: output_dataset\train\LDPE\image115.avif due to error: cannot identify image file 'output_dataset\\train\\LDPE\\image115.avif'
Skipping invalid image file: output_dataset\train\PET\image140.avif due to error: cannot identify image file 'output_dataset\\train\\PET\\image140.avif'
Skipping invalid image file: output_dataset\train\PP\mountain-discarded-plastic-garbage-bottles-generative-ai_170984-12661.avif due to error: cannot identify image file 'output_dataset\\train\\PP\\mountain-discarded-plastic-garbage-bottles-generative-ai_170984-12661.avif'
Skipping invalid image file: output_dataset\train\PS\CHAPTER-I INTRODUCTION TO DB CONCEPTS.pdf due to error: cannot identify image file 'output_dataset\\train\\PS\\CHAPTER-I INTRODUCTION TO DB CONCEPTS.pdf'
Skipping

In [1]:
import os
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Define paths
base_dir = 'output_dataset'
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')

# Hyperparameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
LEARNING_RATE = 0.0001
EPOCHS = 20

# Data Generators
train_datagen = ImageDataGenerator(rescale=1.0 / 255)
test_datagen = ImageDataGenerator(rescale=1.0 / 255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# Number of classes
num_classes = len(train_generator.class_indices)

# VGG16 Model
def create_vgg16_model():
    vgg16_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = Flatten()(vgg16_base.output)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=vgg16_base.input, outputs=output)

    # Freeze base model layers
    for layer in vgg16_base.layers:
        layer.trainable = False

    model.compile(optimizer=Adam(learning_rate=LEARNING_RATE),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# ResNet50 Model
def create_resnet50_model():
    resnet50_base = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = Flatten()(resnet50_base.output)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=resnet50_base.input, outputs=output)

    # Freeze base model layers
    for layer in resnet50_base.layers:
        layer.trainable = False

    model.compile(optimizer=Adam(learning_rate=LEARNING_RATE),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Callbacks
checkpoint = ModelCheckpoint("best_model.keras", monitor='val_accuracy', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Train VGG16
print("Training VGG16 model...")
vgg16_model = create_vgg16_model()
vgg16_history = vgg16_model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=test_generator,
    callbacks=[checkpoint, early_stopping]
)

# Save VGG16 model
vgg16_model.save("vgg16_final_model.h5")

# Train ResNet50
print("Training ResNet50 model...")
resnet50_model = create_resnet50_model()
resnet50_history = resnet50_model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=test_generator,
    callbacks=[checkpoint, early_stopping]
)

# Save ResNet50 model
resnet50_model.save("resnet50_final_model.h5")

print("Model training complete. Models saved.")


Found 8588 images belonging to 6 classes.
Found 195 images belonging to 6 classes.
Training VGG16 model...
Epoch 1/20


  self._warn_if_super_not_called()


[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.3757 - loss: 1.6207
Epoch 1: val_accuracy improved from -inf to 0.54872, saving model to best_model.keras
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m676s[0m 3s/step - accuracy: 0.3760 - loss: 1.6199 - val_accuracy: 0.5487 - val_loss: 1.2104
Epoch 2/20
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.6262 - loss: 1.0059
Epoch 2: val_accuracy improved from 0.54872 to 0.61026, saving model to best_model.keras
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m634s[0m 2s/step - accuracy: 0.6263 - loss: 1.0058 - val_accuracy: 0.6103 - val_loss: 1.1422
Epoch 3/20
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.7204 - loss: 0.7989
Epoch 3: val_accuracy improved from 0.61026 to 0.64103, saving model to best_model.keras
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m613s[0m 2s/step - accuracy



Training ResNet50 model...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 0us/step
Epoch 1/20
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2452 - loss: 1.9531
Epoch 1: val_accuracy did not improve from 0.65641
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m388s[0m 1s/step - accuracy: 0.2453 - loss: 1.9525 - val_accuracy: 0.2615 - val_loss: 1.7113
Epoch 2/20
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2692 - loss: 1.7246
Epoch 2: val_accuracy did not improve from 0.65641
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m394s[0m 1s/step - accuracy: 0.2692 - loss: 1.7246 - val_accuracy: 0.3231 - val_loss: 1.6930
Epoch 3/20
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2



Model training complete. Models saved.


In [11]:
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, accuracy_score

# Load the best saved model
model = load_model("resnet50_final_model.h5")

# Evaluate on test data
test_generator.reset()
predictions = model.predict(test_generator, verbose=1)
predicted_classes = predictions.argmax(axis=1)
true_classes = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

# Print classification report
print("Classification Report:")
print(classification_report(true_classes, predicted_classes, target_names=class_labels))

# Print accuracy
accuracy = accuracy_score(true_classes, predicted_classes)
print(f"Model Accuracy: {accuracy * 100:.2f}%")




[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1s/step
Classification Report:
              precision    recall  f1-score   support

        HDPE       0.26      0.62      0.36        52
        LDPE       0.00      0.00      0.00        26
         PET       0.21      0.20      0.20        46
          PP       0.22      0.21      0.22        28
          PS       0.00      0.00      0.00        22
         PVC       0.00      0.00      0.00        21

    accuracy                           0.24       195
   macro avg       0.11      0.17      0.13       195
weighted avg       0.15      0.24      0.18       195

Model Accuracy: 24.10%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, accuracy_score

# Load the best saved model
model = load_model("New Folder/vgg16_final_model.h5")

# Evaluate on test data
test_generator.reset()
predictions = model.predict(test_generator, verbose=1)
predicted_classes = predictions.argmax(axis=1)
true_classes = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

# Print classification report
print("Classification Report:")
print(classification_report(true_classes, predicted_classes, target_names=class_labels))

# Print accuracy
accuracy = accuracy_score(true_classes, predicted_classes)
print(f"Model Accuracy: {accuracy * 100:.2f}%")




[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step
Classification Report:
              precision    recall  f1-score   support

        HDPE       0.36      0.38      0.37        52
        LDPE       0.20      0.19      0.20        26
         PET       0.28      0.28      0.28        46
          PP       0.11      0.11      0.11        28
          PS       0.09      0.09      0.09        22
         PVC       0.05      0.05      0.05        21

    accuracy                           0.23       195
   macro avg       0.18      0.18      0.18       195
weighted avg       0.22      0.23      0.22       195

Model Accuracy: 22.56%


In [2]:
import os
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Define paths
base_dir = 'output_dataset'
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')

# Hyperparameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
LEARNING_RATE = 0.0001
EPOCHS = 20

# Data Generators
train_datagen = ImageDataGenerator(rescale=1.0 / 255)
test_datagen = ImageDataGenerator(rescale=1.0 / 255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# Number of classes
num_classes = len(train_generator.class_indices)

# ResNet50 Model
def create_resnet50_model():
    resnet50_base = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = Flatten()(resnet50_base.output)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=resnet50_base.input, outputs=output)

    # Freeze base model layers
    for layer in resnet50_base.layers:
        layer.trainable = False

    model.compile(optimizer=Adam(learning_rate=LEARNING_RATE),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Callbacks
checkpoint = ModelCheckpoint("best_model.keras", monitor='val_accuracy', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Train ResNet50
print("Training ResNet50 model...")
resnet50_model = create_resnet50_model()
resnet50_history = resnet50_model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=test_generator,
    callbacks=[checkpoint, early_stopping]
)

# Save the final ResNet50 model
resnet50_model.save("resnet50_final_model_2.h5")

print("ResNet50 training complete. Model saved.")


Found 8588 images belonging to 6 classes.
Found 195 images belonging to 6 classes.
Training ResNet50 model...
Epoch 1/20


  self._warn_if_super_not_called()


[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2349 - loss: 1.9378
Epoch 1: val_accuracy improved from -inf to 0.26154, saving model to best_model.keras
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m413s[0m 2s/step - accuracy: 0.2350 - loss: 1.9373 - val_accuracy: 0.2615 - val_loss: 1.6849
Epoch 2/20
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2641 - loss: 1.7126
Epoch 2: val_accuracy improved from 0.26154 to 0.34359, saving model to best_model.keras
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m379s[0m 1s/step - accuracy: 0.2642 - loss: 1.7126 - val_accuracy: 0.3436 - val_loss: 1.6689
Epoch 3/20
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2875 - loss: 1.6909
Epoch 3: val_accuracy did not improve from 0.34359
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m385s[0m 1s/step - accuracy: 0.2875 - loss: 1.6909 - val_accuracy



ResNet50 training complete. Model saved.


In [3]:
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, accuracy_score

# Load the best saved model
model = load_model("resnet50_final_model_2.h5")

# Evaluate on test data
test_generator.reset()
predictions = model.predict(test_generator, verbose=1)
predicted_classes = predictions.argmax(axis=1)
true_classes = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

# Print classification report
print("Classification Report:")
print(classification_report(true_classes, predicted_classes, target_names=class_labels))

# Print accuracy
accuracy = accuracy_score(true_classes, predicted_classes)
print(f"Model Accuracy: {accuracy * 100:.2f}%")



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step
Classification Report:
              precision    recall  f1-score   support

        HDPE       0.26      0.65      0.37        52
        LDPE       0.00      0.00      0.00        26
         PET       0.23      0.24      0.23        46
          PP       0.07      0.04      0.05        28
          PS       0.00      0.00      0.00        22
         PVC       0.00      0.00      0.00        21

    accuracy                           0.24       195
   macro avg       0.09      0.15      0.11       195
weighted avg       0.13      0.24      0.16       195

Model Accuracy: 23.59%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
