In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import multiprocessing 
from multiprocessing import Pool
from functools import partial
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix, classification_report
from image_processing import preprocess_image

In [None]:
# Define paths to your dataset folders
folders = ['stop', 'ok', 'peace', 'peace_inverted', 'like', 'dislike']
base_path = r'D:\archive(1)\hagrid-sample-30k-384p\hagrid_30k'

image_paths = []
labels = []

for folder in folders:
    folder_path = os.path.join(base_path, 'train_val_' + folder)
    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)
        image_paths.append(img_path)
        labels.append(folder)
        

# Create DataFrame
df = pd.DataFrame({'image_path': image_paths, 'label': labels})

print(df['label'].value_counts())

# Shuffle the DataFrame
df = df.sample(frac=1).reset_index(drop=True)
print(df.head())
# Encode labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

print(df.head())



In [None]:
# Preprocess images and store them in a new DataFrame
# Number of processes to use
num_processes = multiprocessing.cpu_count() - 1

num_processes = os.cpu_count() - 2  # Use all available CPU cores except one
with Pool(processes=num_processes) as pool:
    preprocessed_images = pool.starmap(partial(preprocess_image), df.values)

# Separate preprocessed images and labels
X, y = zip(*preprocessed_images)

# Split the dataset into training and testing sets
X_t, X_test, y_t, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Split training dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_t, y_t, test_size = 0.111, random_state=123)

In [None]:
# Convert lists to numpy arrays
X_train = np.array(X_train)
X_val = np.array(X_val)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_val = np.array(y_val)
y_test = np.array(y_test)

# Print the shapes of the training and testing sets
print("Training set shape:", X_train.shape, y_train.shape)
print("Validation set shape:", X_val.shape, y_val.shape)
print("Testing set shape:", X_test.shape, y_test.shape)

In [None]:
# Define data augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,      # Rotate images by up to 20 degrees
    width_shift_range=0.2,  # Shift images horizontally by up to 20% of the width
    height_shift_range=0.2, # Shift images vertically by up to 20% of the height
    shear_range=0.2,        # Shear transformations
    zoom_range=0.2,         # Zoom in by up to 20%
    horizontal_flip=True,   # Flip images horizontally
    fill_mode='nearest'     # Fill in missing pixels after rotation or shifting
)

# Define batch size
batch_size = 32

# Create data generators
train_datagen = datagen.flow(X_train, y_train, batch_size=batch_size)
val_datagen = datagen.flow(X_val, y_val, batch_size=batch_size)

In [None]:
# Load MobileNetV2 pre-trained on ImageNet
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Regularization strength
l2_reg = 0.001

# Combine the base model and custom classification head
model = Sequential([base_model, GlobalAveragePooling2D(), 
                    Dense(512, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)), 
                    Dropout(0.5), 
                    Dense(512, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)), 
                    Dropout(0.5),
                    Dense(512, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)), 
                    Dropout(0.5), 
                    Dense(6, activation='softmax')])

# Compile the model
model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

model.summary()

In [None]:
# Create a ReduceLROnPlateau callback
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=1e-6)

# Define the filepath to save the model weights
filepath = "model3/"

# Create a ModelCheckpoint callback to save the model weights
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

# Train the model using data augmentation
history = model.fit(
    train_datagen,
    steps_per_epoch=len(X_train) // batch_size,
    epochs=30,
    validation_data=val_datagen,
    validation_steps=len(X_val) // batch_size,
    callbacks=[reduce_lr, checkpoint]
)

In [None]:


# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_acc)

# Predict labesl for test set
preds = model.predict(X_test)

# Convert predictions to classes
predicted_classes = np.argmax(preds, axis=1)

# Print or visualize confusion matrix as needed
print(confusion_matrix(y_test, predicted_classes))

# Print classification report
print(classification_report(y_test, predicted_classes))