In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
labels_df = pd.read_csv('preprocessed_training_data_task3.csv')
npy_folder = 'parallel_preprocessed_images'

In [3]:
image_data = []
labels = []

In [4]:
# Ensure 'processed_image_path' column contains paths relative to the npy_folder
labels_df['processed_image_path'] = labels_df['processed_image_path'].apply(
    lambda x: os.path.join(npy_folder, os.path.basename(x))
)

In [5]:
missing_files = labels_df[~labels_df['processed_image_path'].apply(os.path.exists)]
if not missing_files.empty:
    print(f"Missing .npy files:\n{missing_files}")
    labels_df = labels_df[labels_df['processed_image_path'].apply(os.path.exists)]

In [6]:
for _, row in labels_df.iterrows():
    image_array = np.load(row['processed_image_path'])  # Load .npy file
    image_data.append(image_array)
    labels.append(row['output_label'])  # Use 'output_label' as the class

In [7]:
image_data = np.array(image_data, dtype='float32')  # Ensure float32 for TensorFlow
labels = np.array(labels)

In [21]:
print(f"Loaded {len(image_data)} images and labels.")

Loaded 10015 images and labels.


In [9]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
categorical_labels = to_categorical(encoded_labels)

In [22]:
X_train, X_val, y_train, y_val = train_test_split(image_data, categorical_labels, test_size=0.2, random_state=42)


In [23]:
import os

# Define the dataset directory
dirpath = 'Project'  # Assuming all .npy files and the metadata CSV are in this directory

# Define constants
IMG_SIZE = 224  # Image size based on the example .npy file shape
BATCH_SIZE = 32  # Standard batch size for training
CHANNELS = 3  # Number of channels in the images (RGB)
EPOCHS = 10  # Number of epochs for training

# Input shape for the model
INPUT_SHAPE = (IMG_SIZE, IMG_SIZE, CHANNELS)  # Model input shape

# Determine the number of classes
# Assuming 'output_label' column contains the unique class labels
labels_df = pd.read_csv('preprocessed_training_data_task3.csv')
N_CLASSES = labels_df['output_label'].nunique()  # Count unique classes in the label column

# Print configuration summary
print(f"Directory Path: {dirpath}")
print(f"Image Size: {IMG_SIZE}x{IMG_SIZE}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Number of Channels: {CHANNELS}")
print(f"Number of Epochs: {EPOCHS}")
print(f"Input Shape: {INPUT_SHAPE}")
print(f"Number of Classes: {N_CLASSES}")


Directory Path: Project
Image Size: 224x224
Batch Size: 32
Number of Channels: 3
Number of Epochs: 10
Input Shape: (224, 224, 3)
Number of Classes: 7


In [27]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Flatten, Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam

# Define the model
model = Sequential([
    Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=INPUT_SHAPE),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),  # Dropout to prevent overfitting

    Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.001)),  # L2 regularization
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(128, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.5),  # Higher dropout rate for the dense layer

    Dense(N_CLASSES, activation='softmax')  # Output layer for multi-class classification
])

# Compile the model with a lower learning rate
model.compile(
    optimizer=Adam(learning_rate=0.0001),  # Adjust learning rate for more stable training
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Model summary
model.summary()



In [28]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Define callbacks
early_stopping = EarlyStopping(
    monitor='val_loss', 
    patience=5,  # Stop training if no improvement in validation loss for 5 epochs
    restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss', 
    factor=0.5,  # Reduce learning rate by a factor of 0.5
    patience=3,  # Trigger if no improvement in 3 epochs
    min_lr=1e-6  # Minimum learning rate
)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,  # Increased epochs to allow more training
    batch_size=BATCH_SIZE,  # Use the defined batch size (e.g., 32)
    callbacks=[early_stopping, reduce_lr]  # Add callbacks
)


Epoch 1/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 1s/step - accuracy: 0.3614 - loss: 2.8479 - val_accuracy: 0.0589 - val_loss: 5.5323 - learning_rate: 1.0000e-04
Epoch 2/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m290s[0m 1s/step - accuracy: 0.5140 - loss: 2.2032 - val_accuracy: 0.2526 - val_loss: 3.8969 - learning_rate: 1.0000e-04
Epoch 3/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 1s/step - accuracy: 0.5787 - loss: 1.9957 - val_accuracy: 0.5791 - val_loss: 3.6890 - learning_rate: 1.0000e-04
Epoch 4/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m308s[0m 1s/step - accuracy: 0.6457 - loss: 1.7506 - val_accuracy: 0.6355 - val_loss: 2.7484 - learning_rate: 1.0000e-04
Epoch 5/20
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m313s[0m 1s/step - accuracy: 0.6601 - loss: 1.6487 - val_accuracy: 0.5876 - val_loss: 3.1027 - learning_rate: 1.0000e-04
Epoch 6/20
[1m251/251[0m [32m━━━━━━━━━━━━━

In [30]:
model.save('cnn_multiclass_model.keras')