In [None]:
import zipfile
import os

# Define the path to the uploaded zip file
zip_file_path = '/content/Facial Recognition.zip'

# Check if the file exists
if os.path.exists(zip_file_path):
    print("File found. Extracting...")
    # Extract the zip file
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall('/content/')
    print("Extraction complete.")
else:
    print("File not found.")


File found. Extracting...


BadZipFile: File is not a zip file

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_dir = 'train/'
test_dir = 'test/'
valid_dir = 'valid/'

# Data augmentation and normalization
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

valid_datagen = ImageDataGenerator(rescale=1./255)

# Flow images from directories and their classes from CSV
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),  # Resize images to 150x150
    batch_size=32,
    class_mode='binary'  # Assumes you have 2 classes (savoury and non-savoury)
)

valid_generator = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)


Found 11220 images belonging to 2 classes.
Found 600 images belonging to 2 classes.


In [None]:
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define data generators
datagen = ImageDataGenerator(rescale=1./255)

# Load data into generators
train_generator = datagen.flow_from_directory(
    'train/',
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

# Extract data from generator
X_train = []
y_train = []

for i in range(len(train_generator)):
    X_batch, y_batch = train_generator[i]
    X_train.extend(X_batch)
    y_train.extend(y_batch)
    if len(X_train) >= train_generator.samples:
        break

X_train = np.array(X_train)
y_train = np.array(y_train)

# Now, X_train and y_train are defined and can be used with SMOTE



Found 11220 images belonging to 2 classes.


In [None]:
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc
import matplotlib.pyplot as plt

# Define data generators
datagen = ImageDataGenerator(rescale=1./255)

# Load data into generators
train_generator = datagen.flow_from_directory(
    'train/',
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

# Extract data from generator
X_train = []
y_train = []

for i in range(len(train_generator)):
    X_batch, y_batch = train_generator[i]
    X_train.extend(X_batch)
    y_train.extend(y_batch)
    if len(X_train) >= train_generator.samples:
        break

X_train = np.array(X_train)
y_train = np.array(y_train)

# Flatten the image data for SMOTE
X_train_flattened = X_train.reshape(X_train.shape[0], -1)

# Apply SMOTE
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X_train_flattened, y_train)

# Reshape X_resampled back to image dimensions
X_resampled = X_resampled.reshape(X_resampled.shape[0], 150, 150, 3)

# Split the resampled data
X_train_resampled, X_test_resampled, y_train_resampled, y_test_resampled = train_test_split(X_resampled, y_resampled, test_size=0.2)

# Define the model architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification (savoury vs non-savoury)
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Retrain the model with the balanced dataset
history = model.fit(X_train_resampled, y_train_resampled, validation_data=(X_test_resampled, y_test_resampled), epochs=10)

# Re-evaluate the model
loss, accuracy = model.evaluate(X_test_resampled, y_test_resampled)
print(f'Test accuracy after bias mitigation: {accuracy}')

# Visualizations (Confusion Matrix and ROC Curve)
y_pred_resampled = model.predict(X_test_resampled).round()

# Confusion Matrix
cm_after = confusion_matrix(y_test_resampled, y_pred_resampled)
disp_after = ConfusionMatrixDisplay(confusion_matrix=cm_after)
disp_after.plot(cmap='Blues')
plt.title('Confusion Matrix After Bias Mitigation')
plt.show()

# ROC Curve
fpr_after, tpr_after, _ = roc_curve(y_test_resampled, y_pred_resampled)
roc_auc_after = auc(fpr_after, tpr_after)

plt.figure(figsize=(8, 6))
plt.plot(fpr_after, tpr_after, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc_after)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve After Bias Mitigation')
plt.legend(loc="lower right")
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'train/'

In [None]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification (savoury vs non-savoury)
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(
    train_generator,  # Use the new augmented/resampled train generator
    steps_per_epoch=len(train_generator),
    epochs=10,
    validation_data=valid_generator,
    validation_steps=len(valid_generator)
)

# Evaluate on the test set
loss, accuracy = model.evaluate(test_generator)
print(f'Test accuracy: {accuracy}')


In [None]:
cm_after = confusion_matrix(y_true_after, y_pred_after)  # Use your actual y_true_after and y_pred_after
disp_after = ConfusionMatrixDisplay(confusion_matrix=cm_after)
disp_after.plot(cmap='Blues')
plt.title('Confusion Matrix After Bias Mitigation')
plt.show()


In [None]:
fpr_after, tpr_after, _ = roc_curve(y_true_after, y_pred_after)  # Use your actual y_true_after and y_pred_after
roc_auc_after = auc(fpr_after, tpr_after)

plt.figure(figsize=(8, 6))
plt.plot(fpr_after, tpr_after, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc_after)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve After Bias Mitigation')
plt.legend(loc="lower right")
plt.show()
