In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import os
import shutil

In [2]:
data_dir = "/Users/michelangelozampieri/Desktop/TAMID-Group-New/data/sorted_data"

In [4]:
classes = []
for root, dirs, files in os.walk(data_dir):
    for name in dirs:
        classes.append(name)
print(classes)

['PP', 'Other', 'PE-HD', 'PS', 'PET', 'PVC']


In [5]:
output_dir = "/Users/michelangelozampieri/Desktop/TAMID-group-New/data/sorted_data_output"

train_dir = os.path.join(output_dir, "train")
validation_dir = os.path.join(output_dir, "validation")
test_dir = os.path.join(output_dir, "test")

os.makedirs(train_dir, exist_ok=True)
os.makedirs(validation_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

In [7]:
from sklearn.model_selection import train_test_split

for class_name in classes:
    class_dir = os.path.join(data_dir, class_name)
    images = os.listdir(class_dir)

    train_val_images, test_images = train_test_split(images, test_size=0.15, random_state=42)
    train_images, validation_images = train_test_split(train_val_images, test_size=0.1765, random_state=42)

    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(validation_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)

    for image in train_images:
        shutil.copy(os.path.join(class_dir, image), os.path.join(train_dir, class_name, image))
    for image in validation_images:
        shutil.copy(os.path.join(class_dir, image), os.path.join(validation_dir, class_name, image))
    for image in test_images:
        shutil.copy(os.path.join(class_dir, image), os.path.join(test_dir, class_name, image))

In [8]:
print("Number of images in training directory:")
for class_name in classes:
    print(f"{class_name}: {len(os.listdir(os.path.join(train_dir, class_name)))}")
print("Number of images in validation directory:")
for class_name in classes:
    print(f"{class_name}: {len(os.listdir(os.path.join(test_dir, class_name)))}")
print("Number of images in test directory:")
for class_name in classes:
    print(f"{class_name}: {len(os.listdir(os.path.join(validation_dir, class_name)))}")

Number of images in training directory:
PP: 251
Other: 27
PE-HD: 98
PS: 251
PET: 895
PVC: 84
Number of images in validation directory:
PP: 54
Other: 6
PE-HD: 22
PS: 55
PET: 193
PVC: 19
Number of images in test directory:
PP: 55
Other: 7
PE-HD: 22
PS: 55
PET: 193
PVC: 19


In [10]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical"
)

Found 1606 images belonging to 6 classes.


In [11]:
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical"
)

Found 351 images belonging to 6 classes.


In [13]:
test_dataset = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    labels="inferred",  
    label_mode="categorical",  
    image_size=(224, 224)
)

Found 349 files belonging to 6 classes.


In [14]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(len(classes), activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-4 * 10**(-epoch / 10))

In [16]:
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=validation_generator,
    callbacks=[lr_schedule]
)

  self._warn_if_super_not_called()


Epoch 1/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 1s/step - accuracy: 0.5203 - loss: 1.4635 - val_accuracy: 0.5869 - val_loss: 1.2095 - learning_rate: 1.0000e-04
Epoch 2/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 1s/step - accuracy: 0.5781 - loss: 1.2448 - val_accuracy: 0.5869 - val_loss: 1.1492 - learning_rate: 7.9433e-05
Epoch 3/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 1s/step - accuracy: 0.5978 - loss: 1.1253 - val_accuracy: 0.5926 - val_loss: 1.0801 - learning_rate: 6.3096e-05
Epoch 4/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 1s/step - accuracy: 0.5862 - loss: 1.1196 - val_accuracy: 0.6040 - val_loss: 1.0294 - learning_rate: 5.0119e-05
Epoch 5/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 1s/step - accuracy: 0.6118 - loss: 1.0282 - val_accuracy: 0.6040 - val_loss: 0.9809 - learning_rate: 3.9811e-05
Epoch 6/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [21]:
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc}")

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 230ms/step - accuracy: 0.4753 - loss: 268.3473
Test accuracy: 0.49570199847221375


In [22]:
model_dir = "/Users/michelangelozampieri/Desktop/TAMID-group-New/models"
os.makedirs(model_dir, exist_ok=True)

In [23]:
model.save(os.path.join(model_dir, "base_model.h5"))



In [28]:
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

for layer in model.layers[:-10]:  # Freeze all layers except the last 10
    layer.trainable = False 

model.compile(optimizer=SGD(learning_rate=1e-4, momentum=0.9),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

lr_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, min_lr=1e-7)

In [29]:
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=20,
    callbacks=[lr_schedule]
)

Epoch 1/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 1s/step - accuracy: 0.6895 - loss: 0.8778 - val_accuracy: 0.6182 - val_loss: 1.1714 - learning_rate: 1.0000e-04
Epoch 2/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 1s/step - accuracy: 0.6923 - loss: 0.8280 - val_accuracy: 0.6553 - val_loss: 0.9241 - learning_rate: 1.0000e-04
Epoch 3/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 1s/step - accuracy: 0.6982 - loss: 0.8185 - val_accuracy: 0.6268 - val_loss: 1.1001 - learning_rate: 1.0000e-04
Epoch 4/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 1s/step - accuracy: 0.7022 - loss: 0.7956 - val_accuracy: 0.6524 - val_loss: 0.9446 - learning_rate: 1.0000e-04
Epoch 5/20
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 953ms/step - accuracy: 0.6793 - loss: 0.8428
Epoch 5: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [33]:
# Evaluate the model

test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc}")


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 237ms/step - accuracy: 0.2179 - loss: 440.4608
Test accuracy: 0.22922636568546295


In [35]:
predictions = model.predict(test_dataset)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = classes
class_labels = list(test_dataset.class_names)
print(f"True classes: {true_classes}")
print(f"Predicted classes: {predicted_classes}")

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 226ms/step
True classes: ['PP', 'Other', 'PE-HD', 'PS', 'PET', 'PVC']
Predicted classes: [2 5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 2 5 2 2 5 5 2 5 5
 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 5 2 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5
 2 2 5 5 5 5 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 2 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5
 2 5 5 2 5 5 5 5 2 2 5 5 5 5 5 5 5 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 5
 5 5 5 5 5 2 5 2 5 5 5 2 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 2 5 5 2
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 2 5 2
 2 5 5 2 5 5 5 2 5 2 5 5 5 5 5 5 5 5 5 5 5 2 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5
 2 5 5 5 5 5 2 2 2 5 2 5 5 5 5 2 5 2 5 5 2 5 5 2 5 2 2 5 5 5 2 5 5 5 2 2 5
 5 5 5 5 5 5 2 5 5 2 5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 2 5 5 2 5 5 5 5 2 5
 5 5 5 5 5 5 5 2 2 5 2 5 5 5 5 5]


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(true_classes, predicted_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.title('Confusion Matrix')
plt.show()

ValueError: Found input variables with inconsistent numbers of samples: [6, 349]

In [37]:
model.save(os.path.join(model_dir, "fine_tuned_model.h5"))

