# Training Data

### Labeling DataSet

In [21]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import load_img
from sklearn.metrics import classification_report 
import numpy as np
import sklearn
import os 
import shutil 
import random

In [8]:
!pip install scikit-learn -q


In [19]:
!pip install Pillow


Collecting Pillow
  Downloading pillow-11.2.1-cp312-cp312-win_amd64.whl.metadata (9.1 kB)
Downloading pillow-11.2.1-cp312-cp312-win_amd64.whl (2.7 MB)
   ---------------------------------------- 0.0/2.7 MB ? eta -:--:--
   ---------------------------------------- 2.7/2.7 MB 17.2 MB/s eta 0:00:00
Installing collected packages: Pillow
Successfully installed Pillow-11.2.1


In [26]:
import PIL
print(PIL.__version__)


11.2.1


In [9]:
train_gen = ImageDataGenerator(rescale=1./255)

# Turning every folder name into class label
train_data = train_gen.flow_from_directory(
    "../Datasets/combined-cleaned-dataset",
    target_size=(224,224),
    batch_size=32,
    class_mode= "categorical"
)

print(train_data.class_indices)

Found 18042 images belonging to 13 classes.
{'battery': 0, 'biological': 1, 'brown-glass': 2, 'cardboard': 3, 'clothes': 4, 'glass': 5, 'green-glass': 6, 'metal': 7, 'paper': 8, 'plastic': 9, 'shoes': 10, 'trash': 11, 'white-glass': 12}


In [15]:
# Spliting our dataset into training, validation, and testing 
def split_dataset(source_dir, output_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    random.seed(42)
    
    for class_folder in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_folder)
        if not os.path.isdir(class_path):
            continue 
        
        images = [f for f in os.listdir(class_path) if f.lower().endswith(('.jpg','.jpeg','.png'))]
        random.shuffle(images)
    
        total = len(images)
        train_end = int(total * train_ratio)
        val_end = train_end + int(total * val_ratio)
    
        train_images = images[:train_end]
        val_images = images[train_end:val_end]
        test_images = images[val_end:]
    
        train_class_dir = os.path.join(output_dir, 'train', class_folder)
        val_class_dir = os.path.join(output_dir, 'val', class_folder)
        test_class_dir = os.path.join(output_dir,'test',class_folder)
    
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(val_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)
    
        for img in train_images:
            shutil.copy2(os.path.join(class_path, img), os.path.join(train_class_dir, img))

        for img in val_images:
            shutil.copy2(os.path.join(class_path, img), os.path.join(val_class_dir, img))

        for img in test_images:
            shutil.copy2(os.path.join(class_path, img), os.path.join(test_class_dir, img))

        print(f" {class_folder}: {len(train_images)} train / {len(val_images)} val / {len(test_images)} test")
    
if __name__ == "__main__":
    source = "../Datasets/combined-cleaned-dataset"   
    destination = "../Notebooks/the_final_sortdown"          
    split_dataset(source, destination, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)
    

 battery: 661 train / 141 val / 143 test
 biological: 689 train / 147 val / 149 test
 brown-glass: 424 train / 91 val / 92 test
 cardboard: 905 train / 194 val / 195 test
 clothes: 3727 train / 798 val / 800 test
 glass: 350 train / 75 val / 76 test
 green-glass: 440 train / 94 val / 95 test
 metal: 825 train / 176 val / 178 test
 paper: 1150 train / 246 val / 248 test
 plastic: 942 train / 202 val / 203 test
 shoes: 1383 train / 296 val / 298 test
 trash: 583 train / 125 val / 126 test
 white-glass: 542 train / 116 val / 117 test


In [12]:
# Data Augmentation
train_gen = ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 20,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    brightness_range = [0.8, 1.2]
)

print("Training Data:")
train_data = train_gen.flow_from_directory(
    "the_final_sortdown/train",
    batch_size=32,
    target_size=(224, 224),
    class_mode = "categorical"
)

val_gen = ImageDataGenerator(rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)

print("\nValidation Data:")
val_data = val_gen.flow_from_directory(
    "the_final_sortdown/val",
    batch_size=32,
    target_size=(224, 224),
    class_mode="categorical"
)

print("\nTesting Data:")
test_data = test_gen.flow_from_directory(
    "the_final_sortdown/test",
    batch_size=32,
    target_size=(224, 224),
    class_mode="categorical",
    shuffle=False
)

Training Data:
Found 16410 images belonging to 13 classes.

Validation Data:
Found 4985 images belonging to 13 classes.

Testing Data:
Found 5047 images belonging to 13 classes.


In [24]:
# Transfer Learning MobileNetV2
base_model = MobileNetV2(
    include_top = False,
    weights ='imagenet',
    input_shape = (224,224,3)
)
base_model.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
output = Dense(train_data.num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

model.compile(
    optimizer=Adam(),
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

#model.summary()

In [None]:
history = model.fit(
    train_data
    validation_data=val_data,
    epochs = 10
)

ImportError: Could not import PIL.Image. The use of `load_img` requires PIL.

In [None]:
test_loss, test_acc = model.evaluate(test_data)
print(f"Test accuarcy: {test_acc:.2f}")

[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 151ms/step - accuracy: 0.9400 - loss: 0.2037
Test accuarcy: 0.91


In [None]:
# Adding Precision, Recall, and F1
y_pred = model.predict(test_data)
y_pred_classes = np.argmax(y_pred, axis=1)

y_true = test_data.classes
print(classification_report(y_true, y_pred_classes, target_names=list(test_data.class_indices.keys())))


[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 153ms/step
              precision    recall  f1-score   support

     battery       0.94      0.94      0.94       143
  biological       0.95      0.98      0.96       149
 brown-glass       0.86      0.83      0.84        92
   cardboard       0.88      0.97      0.92       195
     clothes       0.99      0.98      0.99       800
       glass       0.38      0.39      0.39        76
 green-glass       0.79      0.84      0.82        95
       metal       0.80      0.92      0.85       178
       paper       0.95      0.94      0.94       248
     plastic       0.95      0.81      0.87       203
       shoes       0.94      0.99      0.96       298
       trash       0.95      0.83      0.89       126
 white-glass       0.70      0.61      0.65       117

    accuracy                           0.91      2720
   macro avg       0.85      0.85      0.85      2720
weighted avg       0.91      0.91      0.91      2720



In [5]:
# Adding Reinforcement Learning Sysytem

import gym 
from gym import spaces 
import numpy as np 

In [6]:
!pip3 install gym -q

In [None]:
class WasteDisposalEnv(gym.Env): 
    def __init__(self, classifier_model, test_data, co2_impact_mapping):

        super(WasteDisposalEnv, self).__init__()

        #used for predicting material type 

        self.classfier = classifier_model 

        #data for predict 

        self.data = test_data
        self.images = self.labels = self.__get_all_data()
        self.co2_impact_mapping = co2_impact_mapping 

        #define action space 4 disposal options(Recycle, Compost, Donate, Landfill)
        self.action_space = spaces.Discrete(4)

        self.observation_space = spaces.Box(low=0, high=1, shape=(self.classifier.output_shape[-1],), dtype=np.float32)
        
        self.current_idx = 0


        def get_data(self): 
            images = []
            labels =[]
            
            for batch_x , batch_y in self.data: 
                images.extend (batch_x)
                labels.extend(np.argmax(batch_y, axis=1))
                if len(images) >= self.data.samples: 
                    break 
                return np.array(images), np(labels)
            
        def reset(self): 
            self.current_idx = 0 
            image = self.images[self.current_idx]
            probs = self.classifier.predict(image[np.newaxis, ...])[0]
            return probs 
        
        def step(self,action):
            image = self.images[self.current_idx]
            true_label = self.labels[self.current_idx]

            probs = self.classifier.predict(image[np.newaxis,...])[0]
            predicted_class = np.argmax(probs)

            reward = self._calculate_reward(predicted_class, action)

            self.current_idx += 1
            done = self.current_idx >= len(self.images)

            if not done: 
                next_image = self.images[self.current_idx]
                next_probs = self.classifier.predict(next_image[np.newaxis, ...])[0]
            
            else:
                next_probs = np.zeros_like(probs)

            return next_probs, reward, done, {}
        
        def _calculate_reward(self, predicted_class, action):
            # CO₂ logic
            item_type = list(self.data.class_indices.keys())[predicted_class]
            item_impact = self.co2_impact_mapping.get(item_type, 0)
            
            # Reward/Penalty based on action
            if action == 0:  # Recycle
                return item_impact.get('recycle', -5)
            elif action == 1:  # Compost
                return item_impact.get('compost', -5)
            elif action == 2:  # Donate
                return item_impact.get('donate', -5)
            elif action == 3:  # Landfill
                return item_impact.get('landfill', -10)
            else:
                return -10  # default penalty

In [None]:
co2_impact_mapping = {
    "plastic": {
        "recycle": +10,
        "landfill": -5
    },
    "food_waste": {
        "compost": +8,
        "landfill": -5
    },
    "electronics": {
        "donate": +15,
        "landfill": -20
    },
    "textile": {
        "donate": +10,
        "landfill": -15
    },
    # etc.
}
