# Training Data

### Labeling DataSet

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import load_img
from sklearn.metrics import classification_report 
import numpy as np
import sklearn
import os 
import shutil 
import random

In [2]:
train_gen = ImageDataGenerator(rescale=1./255)

# Turning every folder name into class label
train_data = train_gen.flow_from_directory(
    "../Datasets/combined-cleaned-dataset",
    target_size=(224,224),
    batch_size=32,
    class_mode= "categorical"
)

print(train_data.class_indices)

Found 18042 images belonging to 13 classes.
{'battery': 0, 'biological': 1, 'brown-glass': 2, 'cardboard': 3, 'clothes': 4, 'glass': 5, 'green-glass': 6, 'metal': 7, 'paper': 8, 'plastic': 9, 'shoes': 10, 'trash': 11, 'white-glass': 12}


In [3]:
# Spliting our dataset into training, validation, and testing 
def split_dataset(source_dir, output_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    random.seed(42)
    
    for class_folder in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_folder)
        if not os.path.isdir(class_path):
            continue 
        
        images = [f for f in os.listdir(class_path) if f.lower().endswith(('.jpg','.jpeg','.png'))]
        random.shuffle(images)
    
        total = len(images)
        train_end = int(total * train_ratio)
        val_end = train_end + int(total * val_ratio)
    
        train_images = images[:train_end]
        val_images = images[train_end:val_end]
        test_images = images[val_end:]
    
        train_class_dir = os.path.join(output_dir, 'train', class_folder)
        val_class_dir = os.path.join(output_dir, 'val', class_folder)
        test_class_dir = os.path.join(output_dir,'test',class_folder)
    
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(val_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)
    
        for img in train_images:
            shutil.copy2(os.path.join(class_path, img), os.path.join(train_class_dir, img))

        for img in val_images:
            shutil.copy2(os.path.join(class_path, img), os.path.join(val_class_dir, img))

        for img in test_images:
            shutil.copy2(os.path.join(class_path, img), os.path.join(test_class_dir, img))

        print(f" {class_folder}: {len(train_images)} train / {len(val_images)} val / {len(test_images)} test")
    
if __name__ == "__main__":
    source = "../Datasets/combined-cleaned-dataset"   
    destination = "../Notebooks/the_final_sortdown"          
    split_dataset(source, destination, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)
    

 battery: 661 train / 141 val / 143 test
 biological: 689 train / 147 val / 149 test
 brown-glass: 424 train / 91 val / 92 test
 cardboard: 905 train / 194 val / 195 test
 clothes: 3727 train / 798 val / 800 test
 glass: 350 train / 75 val / 76 test
 green-glass: 440 train / 94 val / 95 test
 metal: 825 train / 176 val / 178 test
 paper: 1150 train / 246 val / 248 test
 plastic: 942 train / 202 val / 203 test
 shoes: 1383 train / 296 val / 298 test
 trash: 583 train / 125 val / 126 test
 white-glass: 542 train / 116 val / 117 test


In [4]:
# Data Augmentation
train_gen = ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 20,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    brightness_range = [0.8, 1.2]
)

print("Training Data:")
train_data = train_gen.flow_from_directory(
    "the_final_sortdown/train",
    class_mode='categorical',
    target_size=(224, 224),
    batch_size=32,
    shuffle=True
)


val_gen = ImageDataGenerator(rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)

print("\nValidation Data:")
val_data = val_gen.flow_from_directory(
    "the_final_sortdown/val",
    class_mode='categorical',
    target_size=(224, 224),
    batch_size=32,
    shuffle=True
)
print("\nTesting Data:")
test_data = test_gen.flow_from_directory(
    "the_final_sortdown/test",
    class_mode='categorical',
    target_size=(224, 224),
    batch_size=32,
    shuffle=False
)
class_names = list(test_data.class_indices.keys())



Training Data:
Found 16410 images belonging to 13 classes.

Validation Data:
Found 4985 images belonging to 13 classes.

Testing Data:
Found 5047 images belonging to 13 classes.


In [5]:
# Transfer Learning MobileNetV2
base_model = MobileNetV2(
    include_top = False,
    weights ='imagenet',
    input_shape = (224,224,3)
)
base_model.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)

num_classes = len(train_data.class_indices)
output = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

model.compile(
    optimizer=Adam(),
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

#model.summary()

In [6]:
early_stop = EarlyStopping(
    monitor='val_loss',   # What to monitor (validation loss is typical)
    patience=5,           # How many epochs without improvement before stopping
    restore_best_weights=True  # Optional, but restores the best model, not the last one
)


In [8]:
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs = 7, 
    callbacks = [early_stop]
)

Epoch 1/7
[1m229/513[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m10:03:33[0m 128s/step - accuracy: 0.7068 - loss: 0.9722

KeyboardInterrupt: 

In [7]:
test_loss, test_acc = model.evaluate(test_data)
print(f"Test accuarcy: {test_acc:.2f}")

  self._warn_if_super_not_called()


[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 649ms/step - accuracy: 0.0563 - loss: 2.9223
Test accuarcy: 0.06


In [3]:
# Adding Precision, Recall, and F1
from sklearn.metrics import classification_report

# Predict
y_pred = model.predict(test_data)
y_pred_classes = np.argmax(y_pred, axis=1)

# Get true labels
y_true = test_data.classes

# Print report
print(classification_report(y_true, y_pred_classes, target_names=class_names))


NameError: name 'model' is not defined

In [17]:
import pandas as pd

# Load file
df = pd.read_excel("Idemat_2025RevA6.xlsx")

# Extract material and eco-cost of carbon footprint (Column L)
df = df[["Process", "Carbon Dioxide"]]
df.columns = ["material", "co2_kg"]

# Clean nulls
df = df.dropna()

# Build a lookup dictionary: { "material name" : kgCO₂e }
co2_lookup = dict(zip(df["material"], df["co2_kg"]))


FileNotFoundError: [Errno 2] No such file or directory: 'Idemat_2025RevA6.xlsx'

In [2]:
!pip3 install gym stable-baselines3

Defaulting to user installation because normal site-packages is not writeable


In [6]:
!pip3 install openpyxl -q

In [14]:
import pandas as pd

# Step 1: Create an ExcelFile object
xls = pd.ExcelFile("../Datasets/Idemat_2025RevA6.xlsx")

# Step 2: Print available sheet names
print(xls.sheet_names)

# Step 3: Load a specific sheet by name
df = pd.read_excel(xls, sheet_name="Idemat2025")


BadZipFile: Bad magic number for central directory

In [8]:
import gym
from gym import Env
from gym.spaces import Discrete
import random
import pandas as pd
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# === STEP 1: Load your real kgCO₂e data from Excel ===
df = pd.read_excel("../Datasets/Idemat_2025RevA6.xlsx")  # Replace with your actual file
df = df[["Process", "Carbon Dioxide kgCO2e"]]  # Columns may vary!
df.columns = ["material", "co2_kg"]
df = df.dropna()

# Lookup: material name → CO₂ kg
co2_lookup = dict(zip(df["material"], df["co2_kg"]))

# === STEP 2: Map CNN class labels to material names in table ===
cnn_class_to_material = {
    "cardboard": "Paper, recycled",
    "clothes": "bio-Cotton USA",
    "electronics": "electrical industry",
    "furniture": "wood industry",
    "glass": "Glass (generic)",
    "hazardous": "oil industry",
    "metal": "metal products industry",
    "organic": "Feed grains",
    "paper": "paper industry",
    "plastic": "Plastic, generic",
    "shoes": "leather industry",
    "textiles": "Jute fibres Bangladesh",
    "wood": "Cotton India"
}

# Build class → kgCO₂e map
cnn_class_to_co2 = {
    label: co2_lookup.get(material, 1.0)  # default = 1.0 kg if not found
    for label, material in cnn_class_to_material.items()
}

# === STEP 3: Define best disposal action for each class ===
correct_actions = {
    "cardboard": 0,  # Recycle
    "clothes": 1,    # Donate
    "electronics": 4,  # Hazardous
    "furniture": 1,  # Donate
    "glass": 0,      # Recycle
    "hazardous": 4,  # Hazardous
    "metal": 0,      # Recycle
    "organic": 3,    # Compost
    "paper": 0,      # Recycle
    "plastic": 0,    # Recycle
    "shoes": 1,      # Donate
    "textiles": 1,   # Donate
    "wood": 0        # Recycle
}

# === STEP 4: Custom Gym environment ===
class WasteDisposalEnv(Env):
    def __init__(self):
        super(WasteDisposalEnv, self).__init__()
        self.classes = list(cnn_class_to_co2.keys())
        self.action_space = Discrete(5)  # [Recycle, Donate, Landfill, Compost, Hazardous]
        self.observation_space = Discrete(len(self.classes))
        self.current_class = None

    def reset(self):
        self.current_class = random.choice(self.classes)
        return self.classes.index(self.current_class)

    def step(self, action):
        class_idx = self.classes.index(self.current_class)
        correct_action = correct_actions[self.current_class]
        co2 = cnn_class_to_co2[self.current_class]

        reward = co2 if action == correct_action else -co2
        done = True
        obs = self.reset()  # immediate reset
        return obs, reward, done, {
            "class": self.current_class,
            "co2": co2,
            "action": action,
            "correct": correct_action
        }

# === STEP 5: Train PPO Agent ===
env = WasteDisposalEnv()
check_env(env)

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)

# Save model
model.save("ppo_waste_disposal")

# Quick test
obs = env.reset()
for _ in range(5):
    action, _ = model.predict(obs)
    obs, reward, done, info = env.step(action)
    print(f"Predicted class: {info['class']}, Action: {action}, Reward: {reward}")

KeyError: "None of [Index(['Process', 'Carbon Dioxide kgCO2e'], dtype='object')] are in the [columns]"