# Proyek Klasifikasi Gambar: [Garbage Dataset](https://www.kaggle.com/datasets/sumn2u/garbage-classification-v2)
- **Nama:** Patuh Rujhan Al Istizhar
- **Email:** patuh41@gmail.com
- **ID Dicoding:** patuh_istizhar

## Import Semua Packages/Library yang Digunakan

In [None]:
import os
import shutil

import kagglehub
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import splitfolders
import tensorflow as tf
import tensorflowjs as tfjs
from PIL import Image
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import (
    BatchNormalization,
    Conv2D,
    Dense,
    Dropout,
    Flatten,
    MaxPooling2D,
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Data Preparation

In [None]:
# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)

# Constants
IMAGE_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 30

### Data Loading

In [None]:
# Download dataset
dataset_path = kagglehub.dataset_download("sumn2u/garbage-classification-v2")
data_dir = os.path.join(dataset_path, "garbage-dataset")


# Load dataset into a DataFrame
image_paths = []
class_labels = []
for class_name in os.listdir(data_dir):
    class_dir = os.path.join(data_dir, class_name)
    if os.path.isdir(class_dir):
        for filename in os.listdir(class_dir):
            image_paths.append(os.path.join(class_dir, filename))
            class_labels.append(class_name)
df = pd.DataFrame({"image_path": image_paths, "label": class_labels})
print(f"Loaded {len(df)} images")

In [None]:
# Plot class distribution
class_counts = df["label"].value_counts()
plt.figure(figsize=(10, 4))
sns.barplot(
    x=class_counts.index,
    y=class_counts.values,
    hue=class_counts.index,
    palette="viridis",
)
plt.title("Class Distribution")
plt.xlabel("Class")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

### Data Preprocessing

#### Split Dataset

In [None]:
splitfolders.ratio(data_dir, output="data_split", seed=42, ratio=(0.8, 0.1, 0.1))
train_dir = "data_split/train"
val_dir = "data_split/val"
test_dir = "data_split/test"

#### Data Generator

In [None]:
# Create ImageDataGenerator for data augmentation and preprocessing
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    horizontal_flip=True,
    rotation_range=20,
    zoom_range=0.2,
)
val_datagen = ImageDataGenerator(rescale=1.0 / 255)
test_datagen = ImageDataGenerator(rescale=1.0 / 255)

# Create data generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=True,
)
val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False,
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False,
)

## Modelling

In [None]:
# Build CNN model
num_classes = len(train_generator.class_indices)
model = Sequential(
    [
        # Convolutional layers
        Conv2D(32, (3, 3), activation="relu", input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
        MaxPooling2D((2, 2)),
        BatchNormalization(),
        Conv2D(64, (3, 3), activation="relu"),
        MaxPooling2D((2, 2)),
        BatchNormalization(),
        Conv2D(128, (3, 3), activation="relu"),
        MaxPooling2D((2, 2)),
        BatchNormalization(),
        # Classifier
        Flatten(),
        Dense(256, activation="relu"),
        Dropout(0.5),
        Dense(num_classes, activation="softmax"),
    ]
)

# Compile model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)
model.summary()

# Set Callbacks
callbacks = [
    EarlyStopping(
        patience=8, restore_best_weights=True, monitor="val_accuracy", verbose=1
    ),
    ModelCheckpoint(
        "best_model.keras", save_best_only=True, monitor="val_accuracy", verbose=1
    ),
    ReduceLROnPlateau(patience=4, factor=0.5, min_lr=0.000001, verbose=1),
]

In [None]:
# Train model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS,
    callbacks=callbacks,
)

## Evaluasi dan Visualisasi

In [None]:
# Evaluate model
train_loss, train_acc = model.evaluate(train_generator)
print(f"Training Loss: {train_loss:.4f}")
print(f"Training Accuracy: {train_acc:.4f} ({train_acc * 100:.2f}%)")
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f} ({test_acc * 100:.2f}%)")

# Plot accuracy and loss
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="Training Accuracy")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
plt.axhline(y=0.95, color="r", linestyle="--", label="Target (95%)")
plt.title("Model Accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.title("Model Loss")
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend()

plt.tight_layout()
plt.show()

## Konversi Model

In [None]:
# Define paths
tfjs_model_path = "tfjs_model"
tflite_model_path = "tflite/model.tflite"
tflite_label_path = "tflite/label.txt"
saved_model_path = "saved_model"

# Clean and create directories
for path in [tfjs_model_path, "tflite", saved_model_path]:
    if os.path.exists(path):
        shutil.rmtree(path)
    os.makedirs(path)

# Save in SavedModel format
tf.saved_model.save(model, saved_model_path)
print(f"SavedModel saved at: {saved_model_path}")

# Save in TFLite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
with open(tflite_model_path, "wb") as f:
    f.write(tflite_model)
print(f"TFLite model saved at: {tflite_model_path}")

# Save labels for TFLite
labels = list(train_generator.class_indices.keys())
with open(tflite_label_path, "w") as f:
    for label in labels:
        f.write(f"{label}\n")
print(f"Labels saved at: {tflite_label_path}")

# Save in TF.js format
tfjs.converters.save_keras_model(model, tfjs_model_path)
print(f"TF.js model saved at: {tfjs_model_path}")

## Inference (Optional)

In [None]:
# Load TFLite model
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Load and preprocess a sample image
sample_image_path = os.path.join(
    test_dir,
    os.listdir(test_dir)[0],
    os.listdir(os.path.join(test_dir, os.listdir(test_dir)[0]))[0],
)
image = Image.open(sample_image_path).convert("RGB")
image = image.resize((IMAGE_SIZE, IMAGE_SIZE))
image_array = np.array(image) / 255.0
input_image = np.expand_dims(image_array, axis=0).astype(np.float32)

# Perform inference
interpreter.set_tensor(input_details[0]["index"], input_image)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]["index"])
predicted_class = np.argmax(output_data)
class_labels = list(train_generator.class_indices.keys())
predicted_label = class_labels[predicted_class]

# Show image and prediction
plt.figure(figsize=(5, 5))
plt.imshow(image)
plt.title(f"Predicted: {predicted_label}")
plt.axis("off")
plt.show()
print(f"Predicted Class: {predicted_label}")