<a href="https://colab.research.google.com/github/rimalsaksham07/Genetic-Algorithm/blob/main/NAIVE_BAYES_CODE_IMPLEMENTATION_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
from google.colab import drive
drive.mount('/content/mydrive')

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder

# Install openpyxl only if needed to read .xlsx files
!pip install openpyxl

# Load the Excel file
file = "/content/mydrive/MyDrive/ai/nb1 (1).xlsx"
df = pd.read_excel(file, index_col=0)
print(df)

# Label Encoding
label_encoders = {}
print(df.columns)  # Check actual column names in your DataFrame

# Update this list to match the actual column names in your DataFrame
for column in ['Symptoms', 'Severity', 'Duration', 'Location', 'Age Group', 'Treated?']:
    le = LabelEncoder()
    df[column.strip()] = le.fit_transform(df[column.strip()])
    label_encoders[column] = le

print(df)

# Prepare features and target
X, y = df.drop(["Treated?"], axis=1), df["Treated?"]
print(X)
print(y)

# Define Naive Bayes Classifier
class NaiveBayes:
    def __init__(self):
        self.features = None
        self.likelihoods = {}
        self.class_priors = {}
        self.pred_priors = {}
        self.X_train = None
        self.y_train = None
        self.train_size = 0
        self.num_feats = 0

    def fit(self, X, y):
        self.features = list(X.columns)
        self.X_train = X
        self.y_train = y
        self.train_size = X.shape[0]
        self.num_feats = X.shape[1]

        for feature in self.features:
            self.likelihoods[feature] = {}
            self.pred_priors[feature] = {}
            for feat_val in np.unique(self.X_train[feature]):
                self.pred_priors[feature][feat_val] = 0
                for outcome in np.unique(self.y_train):
                    self.likelihoods[feature][f'{feat_val}_{outcome}'] = 0
                    self.class_priors[outcome] = 0

        self._calc_class_prior()
        self._calc_likelihoods()
        self._calc_predictor_prior()

    def _calc_class_prior(self):
        for outcome in np.unique(self.y_train):
            outcome_count = sum(self.y_train == outcome)
            self.class_priors[outcome] = outcome_count / self.train_size

    def _calc_likelihoods(self):
        for feature in self.features:
            for outcome in np.unique(self.y_train):
                outcome_count = sum(self.y_train == outcome)
                feat_likelihood = self.X_train[feature][self.y_train[self.y_train == outcome].index.values.tolist()].value_counts().to_dict()
                for feat_val, count in feat_likelihood.items():
                    self.likelihoods[feature][f'{feat_val}_{outcome}'] = (count + 1) / (outcome_count + len(feat_likelihood))  # Laplace smoothing

    def _calc_predictor_prior(self):
        for feature in self.features:
            feat_val_counts = self.X_train[feature].value_counts().to_dict()
            for feat_val, count in feat_val_counts.items():
                self.pred_priors[feature][feat_val] = count / self.train_size

    def predict(self, X):
        results = []
        X = np.array(X)

        for query in X:
            probs_outcome = {}
            for outcome in np.unique(self.y_train):
                prior = self.class_priors[outcome]
                likelihood = 1
                for feature, feat_val in zip(self.features, query):
                    likelihood *= self.likelihoods[feature].get(f'{feat_val}_{outcome}', 1 / (self.train_size + len(self.features)))  # Default to smoothing
                probs_outcome[outcome] = prior * likelihood

            # Choose the outcome with the highest posterior probability
            best_outcome = max(probs_outcome, key=probs_outcome.get)
            results.append(best_outcome)

        return results

# Instantiate and train the classifier
nb_classifier = NaiveBayes()
nb_classifier.fit(X, y)

# Predict with the trained classifier
predictions = nb_classifier.predict(X)
print(predictions)


Drive already mounted at /content/mydrive; to attempt to forcibly remount, call drive.mount("/content/mydrive", force_remount=True).
     Unnamed: 1  Unnamed: 2 Unnamed: 3 Unnamed: 4 Unnamed: 5  Unnamed: 6  \
NaN         NaN         NaN        NaN        NaN        NaN         NaN   
NaN         NaN         NaN        NaN        NaN        NaN         NaN   
NaN         NaN         NaN        NaN        NaN        NaN         NaN   
NaN         NaN         NaN        NaN        NaN        NaN         NaN   
NaN         NaN         NaN         SN   Symptoms   Severity    Duration   
NaN         NaN         NaN          1      fever       high  short-term   
NaN         NaN         NaN          2   headache       mild   long-term   
NaN         NaN         NaN          3      fever       high  short-term   
NaN         NaN         NaN          4      cough   moderate  short-term   
NaN         NaN         NaN          5      fever   moderate   long-term   
NaN         NaN         NaN    

KeyError: 'Symptoms'