# Gerekli Kütüphaneler

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB as SKGaussianNB

seed = 42
np.random.seed(seed)

# Veri Seti

In [2]:
breast_cancer = datasets.load_breast_cancer()

In [3]:
X, y = breast_cancer.data, breast_cancer.target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

# Gaussian Naive Bayes

In [5]:
class GaussianNB:
    def __init__(self):
        # Sinif oncelik olasiliklari
        self.class_prior = None
        # Sinifa gore ortalamalar
        self.mean = None
        # Sinifa gore varyanslar
        self.var = None
        # Sinif etiketleri
        self.classes = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes = np.unique(y)
        n_classes = len(self.classes)

        # Sinif oncelik olasiliklari (P(C))
        self.class_prior = np.zeros(n_classes)
        # Ozelliklerin sinifa gore ortalamalari
        self.mean = np.zeros((n_classes, n_features))
        # Ozelliklerin sinifa gore varyanslari
        self.var = np.zeros((n_classes, n_features))

        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            # P(C): Sinifin frekansi
            self.class_prior[idx] = X_c.shape[0] / float(n_samples)
            # Ozelliklerin sinifa gore ortalamalari
            self.mean[idx, :] = np.mean(X_c, axis=0)
            # Ozelliklerin sinifa gore varyanslari
            self.var[idx, :] = np.var(X_c, axis=0)

    def predict(self, X):
        predictions = []
        for x in X:
            posteriors = []
    
            for idx, c in enumerate(self.classes):
                prior_log = np.log(self.class_prior[idx])
                mean = self.mean[idx]
                var = self.var[idx]
                eps = 1e-9
                coef = 1.0 / np.sqrt(2.0 * np.pi * var + eps)
                exp_term = np.exp(-(x - mean) ** 2 / (2.0 * var + eps))
                likelihood_log = np.sum(np.log(coef * exp_term))
                posterior = prior_log + likelihood_log
                posteriors.append(posterior)
    
            pred = self.classes[np.argmax(posteriors)]
            predictions.append(pred)

        return predictions

# Eğitim

In [6]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# Tahmin

In [7]:
y_pred = gnb.predict(X_test)

# Sonuçlar

In [8]:
print("Accuracy score:", accuracy_score(y_test, y_pred))

Accuracy score: 0.9649122807017544


# Scikit-Learn

In [9]:
gnb2 = SKGaussianNB()
gnb2.fit(X_train, y_train)

In [10]:
y_pred_2 = gnb2.predict(X_test)

In [11]:
print("Accuracy score:", accuracy_score(y_test, y_pred_2))

Accuracy score: 0.9736842105263158
