### Naive Bayes Classification and logistic regression for Face-Mask Detection



#### Import Libraries

In [1]:
pip install opencv-python

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install numpy --upgrade




In [3]:
# Import Libraries
import numpy as np 
import pickle
import cv2
import os

#### Load and Preprocess Data


In [4]:
# Define the paths to the image folders
with_mask_folder = './data/with_mask'
without_mask_folder = './data/without_mask'

# Define the reshaped size
reshaped_size = 64

In [5]:
# Function to load and preprocess images
def load_and_preprocess(folder_path, label):
    images = []
    labels = []
    
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg"):  
            img_path = os.path.join(folder_path, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Read image in grayscale
            img_resized = cv2.resize(img, (reshaped_size, reshaped_size))
            images.append(img_resized.flatten())
            labels.append(label)
    
    return np.array(images), np.array(labels)

In [6]:
# Load and preprocess images with masks
X_with_mask, y_with_mask = load_and_preprocess(with_mask_folder, label=1)

# Load and preprocess images without masks
X_without_mask, y_without_mask = load_and_preprocess(without_mask_folder, label=0)

In [7]:
# Combine the datasets
X = np.concatenate((X_with_mask, X_without_mask), axis=0)
y = np.concatenate((y_with_mask, y_without_mask), axis=0)

# Shuffle the dataset 
shuffle_indices = np.random.permutation(len(X))
X = X[shuffle_indices]
y = y[shuffle_indices]

# Print shapes before reshaping and normalizing
print('Before Reshaping size:', X.shape)

# Reshape and normalize 
resized_X = []
for img in X:
    resized_X.append(cv2.resize(img.reshape(reshaped_size, reshaped_size), (reshaped_size, reshaped_size)))

X = np.asarray(resized_X)
del resized_X

print('After Reshaping Size: ', X.shape)
print('Before Reshaping to 1D:', X.shape)
X = X.reshape(-1, reshaped_size * reshaped_size)
print('After Reshaping to 1D: ', X.shape)

# Normalize the data
X = X / 255.0
print('Domain: [', X.min(), ',', X.max(), ']')

Before Reshaping size: (7553, 4096)
After Reshaping Size:  (7553, 64, 64)
Before Reshaping to 1D: (7553, 64, 64)
After Reshaping to 1D:  (7553, 4096)
Domain: [ 0.0 , 1.0 ]


# Split Data 

In [8]:
# Setting the proportion for training data
train_proportion = 0.8
num_samples = len(X)

# Calculate the number of samples for training
num_train = int(train_proportion * num_samples)

# Split the data
X_train, y_train = X[:num_train], y[:num_train]
X_test, y_test = X[num_train:], y[num_train:]

In [9]:
def accuracy(y_true, y_pred):
    correct_predictions = np.sum(y_true == y_pred)
    total_samples = len(y_true)
    accuracy = correct_predictions / total_samples
    return accuracy

# NaiveBayes

In [10]:
class GaussianNaiveBayes:
    def _init_(self):
        self.class_probs = None
        self.mean = None
        self.std = None

    def fit(self, X, y):
        num_samples, num_features = X.shape
        unique_classes = np.unique(y)
        num_classes = len(unique_classes)

        self.class_probs = np.zeros(num_classes)
        self.mean = np.zeros((num_classes, num_features))
        self.std = np.zeros((num_classes, num_features))

        for i, c in enumerate(unique_classes):
            class_mask = (y == c)
            self.class_probs[i] = np.sum(class_mask) / num_samples

            for j in range(num_features):
                feature_values = X[class_mask, j]
                self.mean[i, j] = np.mean(feature_values)
                self.std[i, j] = np.std(feature_values)

    def predict(self, X):
        num_samples, num_features = X.shape
        num_classes = len(self.class_probs)
        predictions = np.zeros(num_samples)

        for i in range(num_samples):
            posteriors = np.zeros(num_classes)
            for j in range(num_classes):
                class_prob = np.log(self.class_probs[j])
                feature_probs = np.sum(
                    -0.5 * ((X[i] - self.mean[j]) / (self.std[j] + 1e-10))**2 -
                    np.log(self.std[j] + 1e-10)
                )
                posteriors[j] = class_prob + feature_probs

            predictions[i] = np.argmax(posteriors)

        return predictions


def calculate_accuracy(y_true, y_pred):
    correct_predictions = np.sum(y_true == y_pred)
    total_samples = len(y_true)
    accuracy = correct_predictions / total_samples
    return accuracy

# Instantiate GaussianNaiveBayes model
gnb_model = GaussianNaiveBayes()

# Train the model
gnb_model.fit(X_train, y_train)

# Make predictions
gnb_predictions = gnb_model.predict(X_test)

# Calculate accuracy
accuracy_gnb = calculate_accuracy(y_test, gnb_predictions)

print(f"Gaussian Naive Bayes Accuracy: {accuracy_gnb * 100:.2f}%")

Gaussian Naive Bayes Accuracy: 69.89%


# Categorical Approach 

In [24]:
class CategoricalNaiveBayes:
    def _init_(self):
        self.class_probs = None
        self.feature_probs = None

    def fit(self, X, y):
        num_samples, num_features = X.shape
        unique_classes = np.unique(y)
        num_classes = len(unique_classes)

        self.class_probs = np.zeros(num_classes)
        self.feature_probs = []

        for i, c in enumerate(unique_classes):
            class_mask = (y == c)
            self.class_probs[i] = np.sum(class_mask) / num_samples

            class_feature_probs = []
            for j in range(num_features):
                feature_values = np.unique(X[class_mask, j])
                total_values = len(feature_values)

                # Laplace smoothing for unseen values
                feature_probs = (np.zeros(total_values) + 1) / (total_values + 1)
                class_feature_probs.append((feature_values, feature_probs))

            self.feature_probs.append(class_feature_probs)

    def predict(self, X):
        num_samples, num_features = X.shape
        num_classes = len(self.class_probs)
        predictions = np.zeros(num_samples)

        epsilon = 1e-10  # Small epsilon value to avoid division by zero in logarithm

        for i in range(num_samples):
            posteriors = np.zeros(num_classes)

            for j in range(num_classes):
                class_prob = np.log(self.class_probs[j])
                feature_probs = 0

                for k in range(num_features):
                    feature_value = X[i, k]
                    class_feature_probs = self.feature_probs[j][k]

                    if feature_value in class_feature_probs[0]:
                        idx = np.where(class_feature_probs[0] == feature_value)[0][0]
                        feature_probs += np.log(class_feature_probs[1][idx] + epsilon)

                posteriors[j] = class_prob + feature_probs

            predictions[i] = np.argmax(posteriors)

        return predictions
    
categorical_nb_model = CategoricalNaiveBayes()

# Train the model
categorical_nb_model.fit(X_train, y_train)

# Make predictions
categorical_nb_predictions = categorical_nb_model.predict(X_test)


accuracy_categorical_nb = accuracy(y_test, categorical_nb_predictions)
print(f"Categorical Naive Bayes Accuracy: {accuracy_categorical_nb * 100:.2f}%") 



Categorical Naive Bayes Accuracy: 47.98%


# Logistic Regression

In [19]:
class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.weights = None
        self.bias = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        num_samples, num_features = X.shape
        self.weights = np.zeros(num_features)
        self.bias = 0

        for _ in range(self.num_iterations):
            model = np.dot(X, self.weights) + self.bias
            predictions = self.sigmoid(model)

            dw = (1/num_samples) * np.dot(X.T, (predictions - y))
            db = (1/num_samples) * np.sum(predictions - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        model = np.dot(X, self.weights) + self.bias
        predictions = self.sigmoid(model)
        return np.round(predictions)

In [20]:
# Instantiate LogisticRegression model
lr_model = LogisticRegression()

# Train the model
lr_model.fit(X_train, y_train)

# Make predictions
lr_predictions = lr_model.predict(X_test)

In [21]:

accuracy_lr = accuracy(y_test, lr_predictions)
print(f"Logistic Regression Accuracy: {accuracy_lr * 100:.2f}%")

Logistic Regression Accuracy: 57.97%
