<a href="https://colab.research.google.com/github/puspughimire7-art/6005AI_P/blob/main/Week_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Naive Bayes Tutorial – Google Colab Notebook

This notebook implements all tasks from the Naive Bayes tutorial (Accident, Weather, Loan, Disease) and a Naive Bayes classifier from scratch.

In [1]:
import pandas as pd
from collections import Counter

## Dataset 1 – Accident Prediction

In [2]:
data1 = pd.DataFrame({
    "Weather": ["Rain","snow","clear","clear","snow","rain","rain","snow","clear","clear"],
    "Road": ["bad","average","bad","good","good","average","good","bad","good","bad"],
    "Traffic": ["high","normal","light","high","normal","light","normal","high","high","high"],
    "Engine": ["no","yes","no","yes","no","no","no","no","yes","yes"],
    "Accident": ["yes","yes","no","yes","no","no","no","no","yes","yes"]
})
data1

Unnamed: 0,Weather,Road,Traffic,Engine,Accident
0,Rain,bad,high,no,yes
1,snow,average,normal,yes,yes
2,clear,bad,light,no,no
3,clear,good,high,yes,yes
4,snow,good,normal,no,no
5,rain,average,light,no,no
6,rain,good,normal,no,no
7,snow,bad,high,no,no
8,clear,good,high,yes,yes
9,clear,bad,high,yes,yes


## Helper Function – Naive Bayes Probability

In [3]:
def naive_bayes_prob(df, class_col, evidence):
    """Return posterior probabilities P(class | evidence) using Naive Bayes."""
    classes = df[class_col].unique()
    results = {}

    for c in classes:
        subset = df[df[class_col] == c]
        prior = len(subset) / len(df)
        likelihood = 1.0

        for feature, value in evidence.items():
            count = len(subset[subset[feature] == value])
            if len(subset) == 0:
                feature_prob = 0
            else:
                feature_prob = count / len(subset)
            likelihood *= feature_prob

        results[c] = prior * likelihood

    total = sum(results.values())
    if total == 0:
        return results  # all zero, e.g. unseen combination

    posterior = {c: results[c]/total for c in results}
    return posterior

### Task 1 – Compute $P(Accident = yes \mid X=(Rain, Good, Normal, No))$

In [4]:
evidence1 = {
    "Weather": "Rain",
    "Road": "good",
    "Traffic": "normal",
    "Engine": "no"
}

posterior1 = naive_bayes_prob(data1, "Accident", evidence1)
posterior1

{'yes': 1.0, 'no': 0.0}

## Dataset 2 – Weather-Based Game Prediction

In [5]:
data2 = pd.DataFrame({
    "outlook": ["sunny","sunny","overcast","rainy","rainy","rainy","overcast","sunny","sunny","rainy",
                "sunny","overcast","overcast","rainy"],
    "temperature": ["hot","hot","hot","mild","cool","cool","cool","mild","cool","mild",
                    "mild","mild","hot","mild"],
    "humidity": ["high","high","high","high","normal","normal","high","high","normal","normal",
                 "normal","high","normal","high"],
    "windy": ["false","true","false","false","false","true","true","false","false","false",
              "true","true","false","true"],
    "play": ["no","no","yes","yes","yes","no","yes","no","yes","yes","yes","yes","yes","no"]
})
data2

Unnamed: 0,outlook,temperature,humidity,windy,play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
5,rainy,cool,normal,True,no
6,overcast,cool,high,True,yes
7,sunny,mild,high,False,no
8,sunny,cool,normal,False,yes
9,rainy,mild,normal,False,yes


### Task 2 – Q1: $(sunny, hot, high, false)$

In [6]:
evidence2_q1 = {
    "outlook":"sunny",
    "temperature":"hot",
    "humidity":"high",
    "windy":"false"
}
naive_bayes_prob(data2, "play", evidence2_q1)

{'no': 0.7446373850868233, 'yes': 0.2553626149131767}

### Task 2 – Q3: $(overcast, cool, high, true)$

In [7]:
evidence2_q3 = {
    "outlook":"overcast",
    "temperature":"cool",
    "humidity":"high",
    "windy":"true"
}
naive_bayes_prob(data2, "play", evidence2_q3)

{'no': 0.0, 'yes': 1.0}

## Dataset 3 – Loan Approval Prediction

In [8]:
data3 = pd.DataFrame({
    "Employment": ["Employed","Unemployed","Employed","Employed","Unemployed"],
    "Credit": ["Good","Bad","Good","Bad","Good"],
    "Income": ["High","Low","Medium","Medium","Low"],
    "Loan": ["Yes","No","Yes","No","Yes"]
})
data3

Unnamed: 0,Employment,Credit,Income,Loan
0,Employed,Good,High,Yes
1,Unemployed,Bad,Low,No
2,Employed,Good,Medium,Yes
3,Employed,Bad,Medium,No
4,Unemployed,Good,Low,Yes


### Task 3 – Applicant 1: (Employed, Good, Medium)

In [9]:
evidence3_a1 = {
    "Employment":"Employed",
    "Credit":"Good",
    "Income":"Medium"
}
naive_bayes_prob(data3, "Loan", evidence3_a1)

{'Yes': 1.0, 'No': 0.0}

### Task 3 – Applicant 2: (Unemployed, Bad, Low)

In [10]:
evidence3_a2 = {
    "Employment":"Unemployed",
    "Credit":"Bad",
    "Income":"Low"
}
naive_bayes_prob(data3, "Loan", evidence3_a2)

{'Yes': 0.0, 'No': 1.0}

## Dataset 4 – Disease Diagnosis

In [11]:
data4 = pd.DataFrame({
    "Fever":["Yes","No","Yes","No","Yes"],
    "Cough":["Yes","Yes","No","Yes","No"],
    "Fatigue":["Yes","No","Yes","No","Yes"],
    "Travel":["Yes","No","No","Yes","No"],
    "Disease":["Positive","Negative","Positive","Negative","Positive"]
})
data4

Unnamed: 0,Fever,Cough,Fatigue,Travel,Disease
0,Yes,Yes,Yes,Yes,Positive
1,No,Yes,No,No,Negative
2,Yes,No,Yes,No,Positive
3,No,Yes,No,Yes,Negative
4,Yes,No,Yes,No,Positive


### Task 4 – Patient 1: Fever=Yes, Cough=No, Fatigue=Yes, Travel=No

In [12]:
evidence4_p1 = {"Fever":"Yes","Cough":"No","Fatigue":"Yes","Travel":"No"}
naive_bayes_prob(data4,"Disease", evidence4_p1)

{'Positive': 1.0, 'Negative': 0.0}

### Task 4 – Patient 2: Fever=No, Cough=Yes, Fatigue=No, Travel=No

In [13]:
evidence4_p2 = {"Fever":"No","Cough":"Yes","Fatigue":"No","Travel":"No"}
naive_bayes_prob(data4,"Disease", evidence4_p2)

{'Positive': 0.0, 'Negative': 1.0}

## Task 5 – Naive Bayes Classifier From Scratch

In [14]:
class NaiveBayesClassifier:
    def fit(self, df, target):
        self.target = target
        self.classes = df[target].unique()
        self.model = {}

        for c in self.classes:
            subset = df[df[target] == c]
            self.model[c] = {
                "prior": len(subset)/len(df),
                "likelihoods": {}
            }
            for col in df.columns:
                if col == target:
                    continue
                probs = subset[col].value_counts(normalize=True).to_dict()
                self.model[c]["likelihoods"][col] = probs

    def predict(self, x):
        scores = {}
        for c in self.classes:
            score = self.model[c]["prior"]
            for feature, value in x.items():
                score *= self.model[c]["likelihoods"][feature].get(value, 0)
            scores[c] = score
        return scores

# Example usage on Dataset 4
nb = NaiveBayesClassifier()
nb.fit(data4, "Disease")
nb.predict({"Fever":"Yes","Cough":"No","Fatigue":"Yes","Travel":"No"})

{'Positive': 0.2666666666666666, 'Negative': 0.0}