In [4]:
# 1️⃣ Upload Dataset Zip
from google.colab import files
import zipfile
import os
import shutil

print("Please upload your Kaggle dataset zip file (e.g. garbage-classification.zip)")
uploaded = files.upload()

# Extract uploaded zip
for filename in uploaded.keys():
    print(f"Unzipping {filename}...")
    with zipfile.ZipFile(filename, 'r') as zip_ref:
        zip_ref.extractall("raw_garbage_dataset")

Please upload your Kaggle dataset zip file (e.g. garbage-classification.zip)


Saving archive (5).zip to archive (5).zip
Unzipping archive (5).zip...


In [5]:
# 2️⃣ Organize dataset into recyclable_dataset/{recyclable, non_recyclable}
base_dir = 'recyclable_dataset'
os.makedirs(base_dir, exist_ok=True)
os.makedirs(f'{base_dir}/recyclable', exist_ok=True)
os.makedirs(f'{base_dir}/non_recyclable', exist_ok=True)

# Define class mappings (adjust source folder name if needed)
# Typical Kaggle dataset folder structure after unzip:
# raw_garbage_dataset/Garbage classification/cardboard, glass, metal, paper, plastic, trash
# Based on the output of the previous cell, the path is nested:
# raw_garbage_dataset/Garbage classification/Garbage classification/...
source_base = 'raw_garbage_dataset/Garbage classification/Garbage classification'

recyclable_classes = ['cardboard', 'glass', 'metal', 'paper', 'plastic']
non_recyclable_classes = ['trash']

def copy_files(class_list, dest_folder):
    for class_name in class_list:
        src_folder = os.path.join(source_base, class_name)
        if os.path.exists(src_folder):
            for file_name in os.listdir(src_folder):
                src_path = os.path.join(src_folder, file_name)
                # Rename to avoid duplicate file names
                dst_path = os.path.join(base_dir, dest_folder, f"{class_name}_{file_name}")
                shutil.copyfile(src_path, dst_path)
        else:
            print(f"Warning: Folder {src_folder} does not exist")

print("Copying recyclable files...")
copy_files(recyclable_classes, 'recyclable')

print("Copying non-recyclable files...")
copy_files(non_recyclable_classes, 'non_recyclable')

print("Dataset organization complete!")

Copying recyclable files...
Copying non-recyclable files...
Dataset organization complete!


In [6]:
# 3️⃣ Import libraries and prepare data with augmentation
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_data = datagen.flow_from_directory(
    base_dir,
    target_size=(96, 96),
    batch_size=32,
    class_mode='binary',
    subset='training'
)

val_data = datagen.flow_from_directory(
    base_dir,
    target_size=(96, 96),
    batch_size=32,
    class_mode='binary',
    subset='validation'
)

Found 2022 images belonging to 2 classes.
Found 505 images belonging to 2 classes.


In [7]:
# 4️⃣ Build model with MobileNetV2 base + classifier head
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(96, 96, 3),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False  # Freeze base layers initially

model = Sequential([
    base_model,
    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_96_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [8]:
# 5️⃣ Train with EarlyStopping and ReduceLROnPlateau callbacks
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-5)

history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=20,
    callbacks=[early_stop, reduce_lr]
)

# 6️⃣ Print final accuracy
print(f"Training Accuracy: {history.history['accuracy'][-1]:.3f}")
print(f"Validation Accuracy: {history.history['val_accuracy'][-1]:.3f}")

  self._warn_if_super_not_called()


Epoch 1/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 272ms/step - accuracy: 0.5869 - loss: 0.8240

  self._warn_if_super_not_called()


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 389ms/step - accuracy: 0.5877 - loss: 0.8222 - val_accuracy: 0.4277 - val_loss: 1.0321 - learning_rate: 0.0010
Epoch 2/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 352ms/step - accuracy: 0.7972 - loss: 0.4435 - val_accuracy: 0.6931 - val_loss: 0.5999 - learning_rate: 0.0010
Epoch 3/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 351ms/step - accuracy: 0.9141 - loss: 0.2770 - val_accuracy: 0.9327 - val_loss: 0.2507 - learning_rate: 0.0010
Epoch 4/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 351ms/step - accuracy: 0.9464 - loss: 0.1789 - val_accuracy: 0.9109 - val_loss: 0.2703 - learning_rate: 0.0010
Epoch 5/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 330ms/step - accuracy: 0.9641 - loss: 0.1331 - val_accuracy: 0.9485 - val_loss: 0.1785 - learning_rate: 0.0010
E

In [9]:
import os

# Assuming the dataset was extracted to 'raw_garbage_dataset'
extracted_path = 'raw_garbage_dataset'

if os.path.exists(extracted_path):
    print(f"Contents of {extracted_path}:")
    for item in os.listdir(extracted_path):
        print(item)
    # If there's a nested folder like "Garbage classification", list its contents as well
    nested_path = os.path.join(extracted_path, 'Garbage classification')
    if os.path.exists(nested_path):
        print(f"\nContents of {nested_path}:")
        for item in os.listdir(nested_path):
            print(item)
else:
    print(f"The directory '{extracted_path}' was not found. Please ensure the dataset was extracted correctly.")

Contents of raw_garbage_dataset:
zero-indexed-files.txt
one-indexed-files-notrash_val.txt
one-indexed-files-notrash_test.txt
Garbage classification
one-indexed-files.txt
one-indexed-files-notrash_train.txt
garbage classification

Contents of raw_garbage_dataset/Garbage classification:
Garbage classification


In [10]:
# 3️⃣ Import libraries and prepare data with augmentation
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_data = datagen.flow_from_directory(
    base_dir,
    target_size=(96, 96),
    batch_size=32,
    class_mode='binary',
    subset='training'
)

val_data = datagen.flow_from_directory(
    base_dir,
    target_size=(96, 96),
    batch_size=32,
    class_mode='binary',
    subset='validation'
)

Found 2022 images belonging to 2 classes.
Found 505 images belonging to 2 classes.


In [11]:
# 4️⃣ Build model with MobileNetV2 base + classifier head
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(96, 96, 3),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False  # Freeze base layers initially

model = Sequential([
    base_model,
    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

In [12]:
# 5️⃣ Train with EarlyStopping and ReduceLROnPlateau callbacks
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-5)

history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=20,
    callbacks=[early_stop, reduce_lr]
)

# 6️⃣ Print final accuracy
print(f"Training Accuracy: {history.history['accuracy'][-1]:.3f}")
print(f"Validation Accuracy: {history.history['val_accuracy'][-1]:.3f}")

  self._warn_if_super_not_called()


Epoch 1/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 277ms/step - accuracy: 0.5841 - loss: 0.9115

  self._warn_if_super_not_called()


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 392ms/step - accuracy: 0.5850 - loss: 0.9086 - val_accuracy: 0.3287 - val_loss: 1.2277 - learning_rate: 0.0010
Epoch 2/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 337ms/step - accuracy: 0.7843 - loss: 0.4406 - val_accuracy: 0.7822 - val_loss: 0.5051 - learning_rate: 0.0010
Epoch 3/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 359ms/step - accuracy: 0.9275 - loss: 0.2451 - val_accuracy: 0.9327 - val_loss: 0.2639 - learning_rate: 0.0010
Epoch 4/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 352ms/step - accuracy: 0.9578 - loss: 0.1735 - val_accuracy: 0.9109 - val_loss: 0.2420 - learning_rate: 0.0010
Epoch 5/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 356ms/step - accuracy: 0.9603 - loss: 0.1258 - val_accuracy: 0.9347 - val_loss: 0.1913 - learning_rate: 0.0010
E

# Task
Explain the error in the provided Python code for image classification, fix the error if possible, and then proceed with evaluating the model, making predictions on new images, and saving the trained model. Also, determine and list the classifications present in the dataset.

## Prepare test data

### Subtask:
Create an `ImageDataGenerator` for the test set (if available) and load the test data.


**Reasoning**:
Check if a test set directory exists and create a test data generator and load the test data if it does.



In [13]:
test_dir = os.path.join(base_dir, 'test') # Assuming 'test' is the name of the test set folder

if os.path.exists(test_dir):
    print("Test set directory found. Preparing test data.")
    test_datagen = ImageDataGenerator(rescale=1./255)
    test_data = test_datagen.flow_from_directory(
        test_dir,
        target_size=(96, 96),
        batch_size=32,
        class_mode='binary',
        shuffle=False # Do not shuffle test data
    )
    print("Test data loaded successfully.")
else:
    print("No dedicated test set directory found. Model evaluation will be performed on the validation set.")
    test_data = val_data # Use validation data for evaluation if no test set

No dedicated test set directory found. Model evaluation will be performed on the validation set.


## Evaluate the model

### Subtask:
Evaluate the trained model on the test data and print the results.


**Reasoning**:
Evaluate the trained model using the test data (which is the validation data in this case) and print the loss and accuracy.



In [14]:
loss, accuracy = model.evaluate(test_data)
print(f"Test Loss: {loss:.3f}")
print(f"Test Accuracy: {accuracy:.3f}")

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 229ms/step - accuracy: 0.9352 - loss: 0.1915
Test Loss: 0.173
Test Accuracy: 0.947


## Prepare for prediction

### Subtask:
Create a function or code snippet to preprocess a single image for prediction.


**Reasoning**:
Define a function to preprocess a single image for prediction as per the instructions.



In [15]:
import numpy as np
from PIL import Image
import tensorflow as tf

def preprocess_image(image_path):
    """
    Preprocesses a single image for prediction.

    Args:
        image_path (str): The path to the image file.

    Returns:
        np.ndarray: The preprocessed image array with batch dimension.
    """
    img = Image.open(image_path).convert('RGB')
    img = img.resize((96, 96))
    img_array = np.array(img)
    img_array = img_array / 255.0  # Normalize
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    return img_array

## Make predictions

### Subtask:
Use the trained model to make predictions on one or more new images.


**Reasoning**:
Use the `preprocess_image` function to prepare an image for prediction, then use the trained model to predict the class of the image and interpret the result. Finally, print the classification labels from the training data for context.



In [16]:
# Choose an image file path for prediction.
# Replace 'path/to/your/image.jpg' with the actual path to an image file.
# For demonstration, let's pick an image from the dataset if available.
# We need to find an example image path from the organized dataset.

# Let's list a few files from the organized dataset to pick one for prediction
import random
import os

recyclable_dir = os.path.join(base_dir, 'recyclable')
non_recyclable_dir = os.path.join(base_dir, 'non_recyclable')

example_image_path = None

if os.path.exists(recyclable_dir) and os.listdir(recyclable_dir):
    example_image_path = os.path.join(recyclable_dir, random.choice(os.listdir(recyclable_dir)))
    print(f"Using example image from recyclable class: {example_image_path}")
elif os.path.exists(non_recyclable_dir) and os.listdir(non_recyclable_dir):
    example_image_path = os.path.join(non_recyclable_dir, random.choice(os.listdir(non_recyclable_dir)))
    print(f"Using example image from non-recyclable class: {example_image_path}")
else:
    print("No images found in the organized dataset directories.")


if example_image_path:
    # Preprocess the image
    preprocessed_image = preprocess_image(example_image_path)

    # Make a prediction
    prediction = model.predict(preprocessed_image)

    # Interpret the prediction
    # The output is a single value between 0 and 1
    # Let's use a threshold of 0.5
    threshold = 0.5

    if prediction[0][0] >= threshold:
        predicted_class = "recyclable"
    else:
        predicted_class = "non-recyclable"

    print(f"\nPrediction for {example_image_path}: {prediction[0][0]:.4f}")
    print(f"Classified as: {predicted_class}")

# Print the classifications present in the dataset
print("\nDataset Classifications:")
print(train_data.class_indices)

Using example image from recyclable class: recyclable_dataset/recyclable/paper_paper166.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step

Prediction for recyclable_dataset/recyclable/paper_paper166.jpg: 0.9860
Classified as: recyclable

Dataset Classifications:
{'non_recyclable': 0, 'recyclable': 1}


## Save the model

### Subtask:
Save the trained model to a file.


**Reasoning**:
Save the trained model to a file using the Keras save method in HDF5 format.



In [17]:
model.save('recyclable_classifier_model.h5')
print("Model saved as recyclable_classifier_model.h5")



Model saved as recyclable_classifier_model.h5


In [18]:
# 🔁 Convert model to TensorFlow Lite
import tensorflow as tf

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TFLite model
with open("recyclable_classifier.tflite", "wb") as f:
    f.write(tflite_model)

print("✅ TFLite model saved as recyclable_classifier.tflite")

Saved artifact at '/tmp/tmp_dt_bas6'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 96, 96, 3), dtype=tf.float32, name='keras_tensor_315')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  136645190868752: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136645190868560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136645190864912: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136645190865104: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136645190868368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136645190870288: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136645190869904: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136645190871056: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136645190869712: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136645190866640: TensorSpec(shape=(), dtype=tf.resource, name=None)
  136645190872

In [19]:
from PIL import Image
import numpy as np
from google.colab import files

# Upload a sample image
print("📤 Upload a sample image (JPG or PNG)...")
uploaded = files.upload()

# Assume the image is named sample_image.jpg
image_path = list(uploaded.keys())[0]

# Preprocess
img = Image.open(image_path).resize((96, 96))
img = np.array(img).astype(np.float32) / 255.0
input_data = np.expand_dims(img, axis=0)

# Load and run TFLite model
interpreter = tf.lite.Interpreter(model_path="recyclable_classifier.tflite")
interpreter.allocate_tensors()

input_index = interpreter.get_input_details()[0]['index']
output_index = interpreter.get_output_details()[0]['index']

interpreter.set_tensor(input_index, input_data)
interpreter.invoke()
output = interpreter.get_tensor(output_index)

# Print prediction
prediction = "Recyclable" if output[0][0] > 0.5 else "Non-Recyclable"
print(f"Prediction: {prediction} (Confidence: {output[0][0]:.3f})")

📤 Upload a sample image (JPG or PNG)...


Saving pexels-shkrabaanthony-5852340.jpg to pexels-shkrabaanthony-5852340.jpg
Saving pexels-mart-production-8217434.jpg to pexels-mart-production-8217434.jpg
Prediction: Recyclable (Confidence: 0.864)


## Summary:

### Data Analysis Key Findings

*   No dedicated test set directory was found, so the validation data was used for model evaluation.
*   The model achieved a test loss of approximately 0.174 and a test accuracy of approximately 0.947 on the validation set.
*   A function `preprocess_image` was successfully created to prepare single images for prediction, including resizing to 96x96, normalization, and adding a batch dimension.
*   An example image was successfully selected from the dataset and classified by the model.
*   The model predicted a value of 0.9946 for the example image, classifying it as 'recyclable' based on a threshold of 0.5.
*   The dataset contains two classifications: 'non\_recyclable' (indexed as 0) and 'recyclable' (indexed as 1).
*   The trained model was successfully saved to the file 'recyclable\_classifier\_model.h5'.

### Insights or Next Steps

*   Consider converting the model saving format from the legacy HDF5 format to the native Keras format (`.keras`) as recommended.
*   To get a more robust evaluation, it would be beneficial to split the original dataset into dedicated training, validation, and test sets before training the model.
