In [4]:
import numpy as np

In [5]:
# Define the MyBayes class
class MyBayes:
    def __init__(self):
        self.priors = None  # Store prior probabilities for each class
        self.conditional_probs = None  # Store conditional probabilities for features

    def fit(self, XTrain, YTrain):
        """
        Fit the Naive Bayes model to the training data.
        """
        # Unique classes and their counts
        classes, counts = np.unique(YTrain, return_counts=True)
        self.priors = {cls: count / len(YTrain) for cls, count in zip(classes, counts)}

        # Calculate conditional probabilities
        self.conditional_probs = {}
        for cls in classes:
            cls_indices = np.where(YTrain == cls)[0]
            cls_features = XTrain[cls_indices]
            self.conditional_probs[cls] = (np.sum(cls_features, axis=0) + 1) / (
                len(cls_features) + 2
            )  # Laplace smoothing

    def predict1(self, x):
        """
        Predict the class for a single sample.
        """
        posteriors = {}
        for cls, prior in self.priors.items():
            likelihood = np.prod(
                self.conditional_probs[cls] ** x * (1 - self.conditional_probs[cls]) ** (1 - x)
            )
            posteriors[cls] = prior * likelihood
        return max(posteriors, key=posteriors.get)

    def predict2(self, XTest):
        """
        Predict classes for multiple samples.
        """
        return np.array([self.predict1(row) for row in XTest])

    def score(self, XTest, YTest):
        """
        Calculate the accuracy of predictions.
        """
        predictions = self.predict2(XTest)
        return np.mean(predictions == YTest)

In [6]:
# Utility functions
def split_data(X, y, test_ratio=0.2):
    """
    Split data into training and testing sets.
    """
    indices = np.arange(len(y))
    np.random.shuffle(indices)
    test_size = int(len(y) * test_ratio)
    test_indices = indices[:test_size]
    train_indices = indices[test_size:]
    return X[train_indices], X[test_indices], y[train_indices], y[test_indices]


In [7]:
# Main Program
if __name__ == "__main__":
    np.random.seed(42)  # Set seed for reproducibility

    # Generate synthetic dataset
    def generate_data(samples, features):
        X = np.random.randint(0, 2, (samples, features))
        y = (np.sum(X, axis=1) >= features // 2).astype(int)
        return X, y

    X, y = generate_data(100, 5)
    XTrain, XTest, YTrain, YTest = split_data(X, y)

    # Train and evaluate the Naive Bayes model
    mybay = MyBayes()
    mybay.fit(XTrain, YTrain)

    # Test single prediction
    x_single = XTest[0]
    predicted_class = mybay.predict1(x_single)
    print(f"Single Prediction for {x_single}: Class {predicted_class}")

    # Test batch predictions
    predicted_classes = mybay.predict2(XTest)
    print("Predicted Classes:", predicted_classes)

    # Evaluate accuracy
    accuracy = mybay.score(XTest, YTest)
    print(f"Model Accuracy: {accuracy * 100:.2f}%")

Single Prediction for [1 0 0 1 1]: Class 1
Predicted Classes: [1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1]
Model Accuracy: 100.00%


In [8]:
# Main Program
if __name__ == "__main__":
    np.random.seed(42)  # Set seed for reproducibility

    # Function to read and preprocess the dataset
    def read_diagnosis_data(filename):
        """
        Read and preprocess the diagnosis.csv file.
        """
        # Read the file
        data = np.loadtxt(filename, delimiter=",")
        # Features: columns 1 to 5
        X = data[:, 1:6]
        # Target: column 6
        y = data[:, 6].astype(int)
        return X, y

    # Read data from the diagnosis.csv file
    X, y = read_diagnosis_data("diagnosis.csv")

    # Split data into training and testing sets
    XTrain, XTest, YTrain, YTest = split_data(X, y)

    # Train and evaluate the Naive Bayes model
    mybay = MyBayes()
    mybay.fit(XTrain, YTrain)

    # Test single prediction
    x_single = XTest[0]
    predicted_class = mybay.predict1(x_single)
    print(f"Single Prediction for {x_single}: Class {predicted_class}")

    # Test batch predictions
    predicted_classes = mybay.predict2(XTest)
    print("Predicted Classes:", predicted_classes)

    # Evaluate accuracy
    accuracy = mybay.score(XTest, YTest)
    print(f"Model Accuracy: {accuracy * 100:.2f}%")

Single Prediction for [0. 0. 1. 0. 0.]: Class 1
Predicted Classes: [1 1 0 1 1 0 0 1 0 0 1 0 0 1 1 0 0 0 1 0 0 1 1 1]
Model Accuracy: 100.00%
