In [None]:

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CleanTech Waste Classification Model Training\n",
    "\n",
    "This notebook demonstrates the complete process of training a waste classification model using VGG16 transfer learning."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Import Required Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "from tensorflow.keras.applications import VGG16\n",
    "from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout\n",
    "from tensorflow.keras.models import Model\n",
    "from tensorflow.keras.optimizers import Adam\n",
    "from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array\n",
    "from tensorflow.keras.utils import to_categorical\n",
    "from sklearn.model_selection import train_test_split\n",
    "import matplotlib.pyplot as plt\n",
    "import pickle\n",
    "from PIL import Image\n",
    "\n",
    "print(f\"TensorFlow version: {tf.__version__}\")\n",
    "print(f\"Keras version: {tf.keras.__version__}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Define Configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Configuration\n",
    "WASTE_CATEGORIES = ['organic', 'recyclable', 'hazardous', 'general']\n",
    "IMG_SIZE = (224, 224)\n",
    "BATCH_SIZE = 32\n",
    "EPOCHS = 10\n",
    "LEARNING_RATE = 0.0001\n",
    "VALIDATION_SPLIT = 0.2\n",
    "\n",
    "print(f\"Waste Categories: {WASTE_CATEGORIES}\")\n",
    "print(f\"Number of Classes: {len(WASTE_CATEGORIES)}\")\n",
    "print(f\"Image Size: {IMG_SIZE}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Create Synthetic Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_synthetic_dataset():\n",
    "    \"\"\"\n",
    "    Create synthetic waste dataset for demonstration.\n",
    "    In a real project, you would use actual waste images.\n",
    "    \"\"\"\n",
    "    print(\"Creating synthetic waste dataset...\")\n",
    "    \n",
    "    # Create directories\n",
    "    data_dir = 'waste_dataset'\n",
    "    for category in WASTE_CATEGORIES:\n",
    "        os.makedirs(f'{data_dir}/{category}', exist_ok=True)\n",
    "    \n",
    "    # Generate synthetic images with category-specific patterns\n",
    "    np.random.seed(42)\n",
    "    images_per_category = 100\n",
    "    \n",
    "    for i, category in enumerate(WASTE_CATEGORIES):\n",
    "        print(f\"Generating {images_per_category} images for {category}...\")\n",
    "        \n",
    "        for j in range(images_per_category):\n",
    "            # Create synthetic image with category-specific patterns\n",
    "            if category == 'organic':\n",
    "                img = np.random.normal(0.3, 0.1, (224, 224, 3))  # Brown/green tones\n",
    "                img[:, :, 1] *= 1.5  # More green\n",
    "            elif category == 'recyclable':\n",
    "                img = np.random.normal(0.7, 0.1, (224, 224, 3))  # Blue tones\n",
    "                img[:, :, 2] *= 1.5  # More blue\n",
    "            elif category == 'hazardous':\n",
    "                img = np.random.normal(0.5, 0.1, (224, 224, 3))  # Red tones\n",
    "                img[:, :, 0] *= 1.5  # More red\n",
    "            else:  # general\n",
    "                img = np.random.normal(0.5, 0.1, (224, 224, 3))  # Gray tones\n",
    "            \n",
    "            img = np.clip(img, 0, 1) * 255\n",
    "            img = img.astype(np.uint8)\n",
    "            \n",
    "            # Save image\n",
    "            image = Image.fromarray(img)\n",
    "            image.save(f'{data_dir}/{category}/waste_{j}.jpg')\n",
    "    \n",
    "    print(\"Synthetic dataset created successfully!\")\n",
    "    return data_dir\n",
    "\n",
    "# Create the dataset\n",
    "dataset_path = create_synthetic_dataset()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Data Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize sample images from each category\n",
    "fig, axes = plt.subplots(2, 4, figsize=(15, 8))\n",
    "fig.suptitle('Sample Images from Each Waste Category', fontsize=16)\n",
    "\n",
    "for i, category in enumerate(WASTE_CATEGORIES):\n",
    "    # Load and display first image\n",
    "    img_path = f'{dataset_path}/{category}/waste_0.jpg'\n",
    "    img = load_img(img_path)\n",
    "    \n",
    "    axes[0, i].imshow(img)\n",
    "    axes[0, i].set_title(f'{category.capitalize()} - Sample 1')\n",
    "    axes[0, i].axis('off')\n",
    "    \n",
    "    # Load and display second image\n",
    "    img_path = f'{dataset_path}/{category}/waste_1.jpg'\n",
    "    img = load_img(img_path)\n",
    "    \n",
    "    axes[1, i].imshow(img)\n",
    "    axes[1, i].set_title(f'{category.capitalize()} - Sample 2')\n",
    "    axes[1, i].axis('off')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Display dataset statistics\n",
    "print(\"\\nDataset Statistics:\")\n",
    "for category in WASTE_CATEGORIES:\n",
    "    count = len(os.listdir(f'{dataset_path}/{category}'))\n",
    "    print(f\"{category.capitalize()}: {count} images\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Data Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data generators with augmentation\n",
    "train_datagen = ImageDataGenerator(\n",
    "    rescale=1./255,\n",
    "    rotation_range=20,\n",
    "    width_shift_range=0.2,\n",
    "    height_shift_range=0.2,\n",
    "    horizontal_flip=True,\n",
    "    zoom_range=0.2,\n",
    "    validation_split=VALIDATION_SPLIT\n",
    ")\n",
    "\n",
    "val_datagen = ImageDataGenerator(\n",
    "    rescale=1./255,\n",
    "    validation_split=VALIDATION_SPLIT\n",
    ")\n",
    "\n",
    "# Create data generators\n",
    "train_generator = train_datagen.flow_from_directory(\n",
    "    dataset_path,\n",
    "    target_size=IMG_SIZE,\n",
    "    batch_size=BATCH_SIZE,\n",
    "    class_mode='categorical',\n",
    "    subset='training',\n",
    "    shuffle=True\n",
    ")\n",
    "\n",
    "validation_generator = val_datagen.flow_from_directory(\n",
    "    dataset_path,\n",
    "    target_size=IMG_SIZE,\n",
    "    batch_size=BATCH_SIZE,\n",
    "    class_mode='categorical',\n",
    "    subset='validation',\n",
    "    shuffle=False\n",
    ")\n",
    "\n",
    "print(f\"Training samples: {train_generator.samples}\")\n",
    "print(f\"Validation samples: {validation_generator.samples}\")\n",
    "print(f\"Class indices: {train_generator.class_indices}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Model Architecture"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_model():\n",
    "    \"\"\"\n",
    "    Create VGG16 transfer learning model for waste classification\n",
    "    \"\"\"\n",
    "    # Load pre-trained VGG16 model\n",
    "    base_model = VGG16(\n",
    "        weights='imagenet',\n",
    "        include_top=False,\n",
    "        input_shape=(*IMG_SIZE, 3)\n",
    "    )\n",
    "    \n",
    "    # Freeze base model layers\n",
    "    base_model.trainable = False\n",
    "    \n",
    "    # Add custom classification layers\n",
    "    x = base_model.output\n",
    "    x = GlobalAveragePooling2D()(x)\n",
    "    x = Dense(512, activation='relu')(x)\n",
    "    x = Dropout(0.5)(x)\n",
    "    x = Dense(256, activation='relu')(x)\n",
    "    x = Dropout(0.3)(x)\n",
    "    predictions = Dense(len(WASTE_CATEGORIES), activation='softmax')(x)\n",
    "    \n",
    "    # Create the model\n",
    "    model = Model(inputs=base_model.input, outputs=predictions)\n",
    "    \n",
    "    # Compile model\n",
    "    model.compile(\n",
    "        optimizer=Adam(learning_rate=LEARNING_RATE),\n",
    "        loss='categorical_crossentropy',\n",
    "        metrics=['accuracy']\n",
    "    )\n",
    "    \n",
    "    return model\n",
    "\n",
    "# Create and display model\n",
    "model = create_model()\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Model Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define callbacks\n",
    "from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau\n",
    "\n",
    "callbacks = [\n",
    "    ModelCheckpoint(\n",
    "        'best_waste_model.h5',\n",
    "        monitor='val_accuracy',\n",
    "        save_best_only=True,\n",
    "        verbose=1\n",
    "    ),\n",
    "    EarlyStopping(\n",
    "        monitor='val_loss',\n",
    "        patience=5,\n",
    "        restore_best_weights=True\n",
    "    ),\n",
    "    ReduceLROnPlateau(\n",
    "        monitor='val_loss',\n",
    "        factor=0.5,\n",
    "        patience=3,\n",
    "        min_lr=1e-7\n",
    "    )\n",
    "]\n",
    "\n",
    "# Train the model\n",
    "print(\"Starting model training...\")\n",
    "history = model.fit(\n",
    "    train_generator,\n",
    "    epochs=EPOCHS,\n",
    "    validation_data=validation_generator,\n",
    "    callbacks=callbacks,\n",
    "    verbose=1\n",
    ")\n",
    "\n",
    "print(\"Training completed!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Training Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot training history\n",
    "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))\n",
    "\n",
    "# Plot accuracy\n",
    "ax1.plot(history.history['accuracy'], label='Training Accuracy')\n",
    "ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')\n",
    "ax1.set_title('Model Accuracy')\n",
    "ax1.set_xlabel('Epoch')\n",
    "ax1.set_ylabel('Accuracy')\n",
    "ax1.legend()\n",
    "ax1.grid(True)\n",
    "\n",
    "# Plot loss\n",
    "ax2.plot(history.history['loss'], label='Training Loss')\n",
    "ax2.plot(history.history['val_loss'], label='Validation Loss')\n",
    "ax2.set_title('Model Loss')\n",
    "ax2.set_xlabel('Epoch')\n",
    "ax2.set_ylabel('Loss')\n",
    "ax2.legend()\n",
    "ax2.grid(True)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Print final metrics\n",
    "final_train_acc = history.history['accuracy'][-1]\n",
    "final_val_acc = history.history['val_accuracy'][-1]\n",
    "final_train_loss = history.history['loss'][-1]\n",
    "final_val_loss = history.history['val_loss'][-1]\n",
    "\n",
    "print(f\"\\nFinal Training Accuracy: {final_train_acc:.4f}\")\n",
    "print(f\"Final Validation Accuracy: {final_val_acc:.4f}\")\n",
    "print(f\"Final Training Loss: {final_train_loss:.4f}\")\n",
    "print(f\"Final Validation Loss: {final_val_loss:.4f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9. Model Evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluate model on validation set\n",
    "validation_loss, validation_accuracy = model.evaluate(validation_generator, verbose=1)\n",
    "print(f\"\\nValidation Accuracy: {validation_accuracy:.4f}\")\n",
    "print(f\"Validation Loss: {validation_loss:.4f}\")\n",
    "\n",
    "# Generate predictions for confusion matrix\n",
    "validation_generator.reset()\n",
    "predictions = model.predict(validation_generator, verbose=1)\n",
    "predicted_classes = np.argmax(predictions, axis=1)\n",
    "true_classes = validation_generator.classes\n",
    "\n",
    "# Create confusion matrix\n",
    "from sklearn.metrics import confusion_matrix, classification_report\n",
    "import seaborn as sns\n",
    "\n",
    "cm = confusion_matrix(true_classes, predicted_classes)\n",
    "plt.figure(figsize=(8, 6))\n",
    "sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', \n",
    "            xticklabels=WASTE_CATEGORIES, \n",
    "            yticklabels=WASTE_CATEGORIES)\n",
    "plt.title('Confusion Matrix')\n",
    "plt.xlabel('Predicted')\n",
    "plt.ylabel('Actual')\n",
    "plt.show()\n",
    "\n",
    "# Classification report\n",
    "print(\"\\nClassification Report:\")\n",
    "print(classification_report(true_classes, predicted_classes, \n",
    "                          target_names=WASTE_CATEGORIES))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 10. Save Model and Training History"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save the trained model\n",
    "model.save('waste_classification_model.h5')\n",
    "print(\"Model saved as 'waste_classification_model.h5'\")\n",
    "\n",
    "# Save training history\n",
    "with open('training_history.pkl', 'wb') as f:\n",
    "    pickle.dump(history.history, f)\n",
    "print(\"Training history saved as 'training_history.pkl'\")\n",
    "\n",
    "# Save model information\n",
    "model_info = {\n",
    "    'categories': WASTE_CATEGORIES,\n",
    "    'img_size': IMG_SIZE,\n",
    "    'num_classes': len(WASTE_CATEGORIES),\n",
    "    'final_accuracy': final_val_acc,\n",
    "    'final_loss': final_val_loss\n",
    "}\n",
    "\n",
    "with open('model_info.pkl', 'wb') as f:\n",
    "    pickle.dump(model_info, f)\n",
    "print(\"Model information saved as 'model_info.pkl'\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 11. Test Prediction Function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_waste_type(model, image_path):\n",
    "    \"\"\"\n",
    "    Predict waste type from image\n",
    "    \"\"\"\n",
    "    # Load and preprocess image\n",
    "    img = load_img(image_path, target_size=IMG_SIZE)\n",
    "    img_array = img_to_array(img)\n",
    "    img_array = np.expand_dims(img_array, axis=0)\n",
    "    img_array /= 255.0\n",
    "    \n",
    "    # Make prediction\n",
    "    predictions = model.predict(img_array)\n",
    "    predicted_class = np.argmax(predictions[0])\n",
    "    confidence = np.max(predictions[0])\n",
    "    \n",
    "    return WASTE_CATEGORIES[predicted_class], confidence, predictions[0]\n",
    "\n",
    "# Test prediction on sample images\n",
    "print(\"Testing predictions on sample images:\")\n",
    "for category in WASTE_CATEGORIES:\n",
    "    test_image_path = f'{dataset_path}/{category}/waste_0.jpg'\n",
    "    predicted_class, confidence, all_preds = predict_waste_type(model, test_image_path)\n",
    "    \n",
    "    print(f\"\\nActual: {category}, Predicted: {predicted_class}, Confidence: {confidence:.3f}\")\n",
    "    \n",
    "    # Show prediction probabilities\n",
    "    for i, cat in enumerate(WASTE_CATEGORIES):\n",
    "        print(f\"  {cat}: {all_preds[i]:.3f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 12. Model Performance Summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"=\" * 50)\n",
    "print(\"CLEANTECH WASTE CLASSIFICATION MODEL SUMMARY\")\n",
    "print(\"=\" * 50)\n",
    "print(f\"Model Architecture: VGG16 Transfer Learning\")\n",
    "print(f\"Number of Classes: {len(WASTE_CATEGORIES)}\")\n",
    "print(f\"Categories: {', '.join(WASTE_CATEGORIES)}\")\n",
    "print(f\"Input Size: {IMG_SIZE}\")\n",
    "print(f\"Training Samples: {train_generator.samples}\")\n",
    "print(f\"Validation Samples: {validation_generator.samples}\")\n",
    "print(f\"Final Validation Accuracy: {final_val_acc:.4f} ({final_val_acc*100:.2f}%)\")\n",
    "print(f\"Final Validation Loss: {final_val_loss:.4f}\")\n",
    "print(f\"Training Time: {EPOCHS} epochs\")\n",
    "print(\"=\" * 50)\n",
    "print(\"\\nFiles created:\")\n",
    "print(\"- waste_classification_model.h5\")\n",
    "print(\"- best_waste_model.h5\")\n",
    "print(\"- training_history.pkl\")\n",
    "print(\"- model_info.pkl\")\n",
    "print(\"\\nReady for deployment in Flask application!\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "nbconvert_version": "6.4.0",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
