<a href="https://colab.research.google.com/github/mohrael/Machine-Learning/blob/main/Na%C3%AFveBayes2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
class GaussianNaiveBayes:
  def __init__(self):
    #store labels
    self.classes = None
    self.mean_ = {}
    self.var_ = {}
    self.prior_ = {}
  def fit(self, x, y):
    """
    Train the Gaussian Naive Bayes model.
    X: numpy array of shape (n_samples, n_features)
    y: numpy array of shape (n_samples,)
    """
    #get all unique class labels
    self.classes = np.unique(y)

    #number of features (columns)
    n_features = x.shape[1]

    #initialize mean, variance, priors
    self.mean_ = np.zeros((len(self.classes),n_features))
    self.var_ = np.zeros((len(self.classes),n_features))
    self.prior_ = np.zeros(len(self.classes))

    #calculate mean, variance, priors for each class
    #loop through each class
    for idx,c in enumerate(self.classes):
      # Get rows where the label == c
      x_c = x[y==c]
      # Compute mean of each feature for this class
      self.mean_[idx,:] = np.mean(x_c, axis=0)
      self.var_[idx,:] = np.var(x_c, axis=0)
      self.prior_[idx] = x_c.shape[0] / x.shape[0]


    """
    Calculate the Gaussian probability density of x for a given class index.
    class_idx: integer index corresponding to self.classes
    x: one sample (1D array of features)
    """
  def _gaussian_density(self, class_idx, x):
    mean = self.mean_[class_idx]
    var = self.var_[class_idx]

    # Calculate numerator of Gaussian
    numerator = np.exp(-((x - mean) ** 2) / (2 * var + 1e-9))
    denominator = np.sqrt(2* np.pi * var + 1e-9)

    return numerator/denominator



    """
    Return probability estimates for each class.
    X: numpy array of shape (n_samples, n_features)
    Returns: numpy array of shape (n_samples, n_classes)
    """
  def predict_proba (self,x):
    #number of samples and classes
    n_samples = x.shape[0]
    n_classes = len(self.classes)

    #initialize probabitly matrix
    probs = np.zeros((n_samples, n_classes))

    for i in range(n_samples):
      #loop through each class
      for idx,c in enumerate(self.classes):
        # Likelihood: product of Gaussian densities across features
          likelihood = np.prod(self._gaussian_density(idx,x[i]))
        # Posterior = prior * likelihood
          probs[i,idx]= self.prior_[idx]*likelihood
      probs[i] /= np.sum(probs[i])
    return probs



    """
    Predict class labels for each sample in X.
    X: numpy array of shape (n_samples, n_features)
    Returns: numpy array of labels
    """
  def predict(self,x):
    #get probabilities for each class
    probs = self.predict_proba(x)

    #class with the highest probability
    class_idx = np.argmax(probs, axis=1)

    return self.classes[class_idx]




In [None]:
if __name__ == "__main__":
  data = pd.read_csv('/content/acceptance_data.txt',names=['exam1','exam2','decision'])

  x = data.drop(columns=['decision']).round(2)
  y = data['decision']

  x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

  x_train = x_train.to_numpy().reshape((-1,2))
  x_test = x_test.to_numpy().reshape((-1,2))

  y_train = y_train.to_numpy()
  y_test = y_test.to_numpy()

  model = GaussianNaiveBayes()
  model.fit(x_train,y_train)

  predictions = model.predict(x_train)
  print("Predictions:", predictions)

  # Probabilities
  pred_probs = model.predict_proba(x_train)
  print("Probabilities:\n", pred_probs)

  # accuracy = accuracy_score(y_test,predictions)
  # print("Accuracy: ",accuracy)

Predictions: [0 1 1 1 1 1 1 0 1 1 0 1 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 1 0 1 1 1 0 0 1 0 1
 0 0 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 1 1 1 0 1
 0 1 1 0 0 1]
Probabilities:
 [[0.81826893 0.18173107]
 [0.01509075 0.98490925]
 [0.35775465 0.64224535]
 [0.0470322  0.9529678 ]
 [0.38958713 0.61041287]
 [0.23986231 0.76013769]
 [0.16351812 0.83648188]
 [0.60714601 0.39285399]
 [0.30214179 0.69785821]
 [0.02671323 0.97328677]
 [0.67482741 0.32517259]
 [0.00842741 0.99157259]
 [0.08303301 0.91696699]
 [0.68363198 0.31636802]
 [0.05207077 0.94792923]
 [0.9084822  0.0915178 ]
 [0.46280307 0.53719693]
 [0.70636197 0.29363803]
 [0.80632459 0.19367541]
 [0.47084853 0.52915147]
 [0.0107007  0.9892993 ]
 [0.86802459 0.13197541]
 [0.43583756 0.56416244]
 [0.25421988 0.74578012]
 [0.23976313 0.76023687]
 [0.37883544 0.62116456]
 [0.15465422 0.84534578]
 [0.34873751 0.65126249]
 [0.88846996 0.11153004]
 [0.03119251 0.96880749]
 [0.11629642 0.88370358]
 [0.16571421 0.83428579]
 [0.595