In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


Phos=0.6
Pday=0.4
Pa_hos=0.3
Pa_day=0.2

#to find Phos_a
#formula= Phos_a=Pa_hos*Phos/total


Phos_a=Pa_hos*Phos/(Pa_hos*Phos+Pa_day*Pday)
print("Phos_a: ",Phos_a)



Phos_a:  0.3333333333333333


In [3]:
def bayes_theorem(prior_A, prior_notA, likelihood_given_A, likelihood_given_notA):
    # total probability of evidence
    evidence = likelihood_given_A * prior_A + likelihood_given_notA * prior_notA
    posterior = (likelihood_given_A * prior_A) / evidence
    return posterior

bayes_theorem(0.6,0.4,0.3,0.2)

0.6923076923076923

In [4]:
TP=0.99
FP=0.02
P1=0.01
P0=0.99 # disease=total-non.diseased
bayes_theorem(0.01,0.99,0.99,0.02)

0.3333333333333333

In [7]:
import pandas as pd
from collections import defaultdict

class NaiveBayesClassifier:
    def __init__(self):
        self.priors = {}
        self.likelihoods = {}
        self.classes = []
        self.feature_values = {}

    def fit(self, data, target_column):
        self.classes = data[target_column].unique()
        total_samples = len(data)

        # Priors
        for c in self.classes:
            self.priors[c] = len(data[data[target_column] == c]) / total_samples

        # Feature likelihoods
        features = [col for col in data.columns if col != target_column]
        for feature in features:
            self.feature_values[feature] = data[feature].unique()
            self.likelihoods[feature] = {}

            for c in self.classes:
                subset = data[data[target_column] == c]
                total_c = len(subset)

                self.likelihoods[feature][c] = {}
                for value in self.feature_values[feature]:
                    count = len(subset[subset[feature] == value])
                    # Laplace smoothing
                    prob = (count + 1) / (total_c + len(self.feature_values[feature]))
                    self.likelihoods[feature][c][value] = prob

    def predict(self, x):
        posteriors = {}
        for c in self.classes:
            prob = self.priors[c]
            for feature, value in x.items():
                if value in self.likelihoods[feature][c]:
                    prob *= self.likelihoods[feature][c][value]
            posteriors[c] = prob
        return max(posteriors, key=posteriors.get), posteriors


# Example usage
data = pd.DataFrame([
    ["<=30","high","no","fair","no"],
    ["<=30","high","no","excellent","no"],
    ["31…40","high","no","fair","yes"],
    [">40","medium","no","fair","yes"],
    [">40","low","yes","fair","yes"],
    [">40","low","yes","excellent","no"],
    ["31…40","low","yes","excellent","yes"],
    ["<=30","medium","no","fair","no"],
    ["<=30","low","yes","fair","yes"],
    [">40","medium","yes","fair","yes"],
    ["<=30","medium","yes","excellent","yes"],
    ["31…40","medium","no","excellent","yes"],
    ["31…40","high","yes","fair","yes"],
    [">40","medium","no","excellent","no"]
], columns=["age","income","student","credit_rating","computer"])

data.to_csv('Computer_NB.csv')
data=pd.read_csv('Computer_NB.csv')
nb = NaiveBayesClassifier()
nb.fit(data, target_column="computer")

# Predict example: age=<=30, income=medium, student=yes, credit=fair
sample = {"age":"<=30","income":"medium","student":"yes","credit_rating":"fair"}
pred, probs = nb.predict(sample)

print("Prediction:", pred)
print("Probabilities:", probs)


Prediction: yes
Probabilities: {'no': 0.008199708454810493, 'yes': 0.027117768595041326}


In [12]:
import re
from collections import defaultdict

class NaiveBayesTextClassifier:
    def __init__(self):
        self.priors = {}
        self.word_counts = {}
        self.total_words = {}
        self.vocab = set()
        self.classes = []

    def tokenize(self, text):
        # lowercase and split into words
        return re.findall(r"\b\w+\b", text.lower())

    def fit(self, df, text_col="Text", target_col="Tag"):
        self.classes = df[target_col].unique()
        total_docs = len(df)

        # Initialize counts
        for c in self.classes:
            self.word_counts[c] = defaultdict(int)
            self.total_words[c] = 0

        # Priors and likelihoods
        for c in self.classes:
            subset = df[df[target_col] == c]
            self.priors[c] = len(subset) / total_docs

            for text in subset[text_col]:
                words = self.tokenize(text)
                for w in words:
                    self.word_counts[c][w] += 1
                    self.total_words[c] += 1
                    self.vocab.add(w)

    def predict(self, text):
        words = self.tokenize(text)
        scores = {}

        for c in self.classes:
            # Start with prior probability
            prob = self.priors[c]
            for w in words:
                count = self.word_counts[c][w]
                prob *= (count + 1) / (self.total_words[c] + len(self.vocab))  # Laplace smoothing
            scores[c] = prob

        return max(scores, key=scores.get), scores
    
import pandas as pd

train_data = pd.DataFrame([
    ["A great game","Sports"],
    ["The election was over", "Not sports"],
    ["Very clean match", "Sports"],
    ["A clean but forgettable game", "Sports"],
    ["It was a close election", "Not sports"]
], columns=["Text","Tag"])

# Save to CSV
train_data.to_csv("Sports_NB.csv", index=False)

# Reload
train_data = pd.read_csv("Sports_NB.csv")
print(train_data)

# Train
nb = NaiveBayesTextClassifier()
nb.fit(train_data, text_col="Text", target_col="Tag")

# Test example
test_sentence = "A very close game"
pred, probs = nb.predict(test_sentence)

print("Prediction:", pred)
print("Probabilities:", probs)


                           Text         Tag
0                  A great game      Sports
1         The election was over  Not sports
2              Very clean match      Sports
3  A clean but forgettable game      Sports
4       It was a close election  Not sports
Prediction: Sports
Probabilities: {'Sports': 2.7647999999999997e-05, 'Not sports': 5.7175324559303314e-06}
