<a href="https://colab.research.google.com/github/saniyashk1542/ML_25-26/blob/main/BDT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

# Example dataset
data = pd.DataFrame({
    'Disease': ['Cold', 'Cold', 'Cold', 'Covid', 'Covid', 'Covid'],
    'Fever':   ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'No']
})

# Compute P(fever|Cold)
cold = data[data['Disease'] == 'Cold']
P_fever_given_cold = (cold['Fever'] == 'Yes').mean()

# Compute P(fever|Covid)
covid = data[data['Disease'] == 'Covid']
P_fever_given_covid = (covid['Fever'] == 'Yes').mean()

print("P(fever|Cold) =", round(P_fever_given_cold, 2))
print("P(fever|Covid) =", round(P_fever_given_covid, 2))


P(fever|Cold) = 0.67
P(fever|Covid) = 0.67


In [2]:
# Priors (before seeing fever)
P_cold = 0.7
P_covid = 0.3

# Likelihoods (chance of fever given disease)
P_fever_given_cold = 0.5
P_fever_given_covid = 0.9

# Numerators of Bayes' rule
cold_score = P_fever_given_cold * P_cold
covid_score = P_fever_given_covid * P_covid

# Normalize to get posterior probabilities
total = cold_score + covid_score
P_cold_given_fever = cold_score / total
P_covid_given_fever = covid_score / total

print("P(Cold | Fever) =", round(P_cold_given_fever, 2))
print("P(Covid | Fever) =", round(P_covid_given_fever, 2))

# Decision (pick higher probability)
if P_cold_given_fever > P_covid_given_fever:
    print("👉 Decision: Patient most likely has a Cold")
else:
    print("👉 Decision: Patient most likely has Covid")


P(Cold | Fever) = 0.56
P(Covid | Fever) = 0.44
👉 Decision: Patient most likely has a Cold


In [3]:
import pandas as pd

# -----------------------------
# 1. Example dataset
# -----------------------------
data = pd.DataFrame({
    'Disease': ['Cold', 'Cold', 'Cold', 'Covid', 'Covid', 'Covid',
                'Cold', 'Covid', 'Cold', 'Covid', 'Cold', 'Cold',
                'Covid', 'Covid', 'Cold', 'Cold', 'Covid', 'Cold'],
    'Fever':   ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'No',
                'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No',
                'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No']
})

# -----------------------------
# 2. Compute Priors
# -----------------------------
priors = data['Disease'].value_counts(normalize=True)
print("🔹 Priors (P(Disease)):\n", priors, "\n")

# -----------------------------
# 3. Compute Likelihoods (P(Fever | Disease))
# -----------------------------
likelihoods = data.groupby('Disease')['Fever'].value_counts(normalize=True).unstack().fillna(0)
print("🔹 Likelihoods (P(Fever | Disease)):\n", likelihoods, "\n")

# -----------------------------
# 4. Compute Posteriors for Fever = 'Yes' using Bayes Rule
# -----------------------------
# Numerator for each class = P(Fever|Disease) * P(Disease)
numerators = {}
for disease in priors.index:
    numerators[disease] = likelihoods.loc[disease, 'Yes'] * priors[disease]

# Denominator = total of numerators (to normalize)
denominator = sum(numerators.values())

# Compute normalized posterior probabilities
posteriors = {disease: value / denominator for disease, value in numerators.items()}

# -----------------------------
# 5. Display Results
# -----------------------------
print("🔹 Posterior Probabilities given Fever = Yes:")
for disease, prob in posteriors.items():
    print(f"P({disease} | Fever=Yes) = {round(prob, 3)}")

# -----------------------------
# 6. Decision: choose most likely disease
# -----------------------------
best_disease = max(posteriors, key=posteriors.get)
print(f"\n👉 Decision: Patient most likely has **{best_disease}**")


🔹 Priors (P(Disease)):
 Disease
Cold     0.555556
Covid    0.444444
Name: proportion, dtype: float64 

🔹 Likelihoods (P(Fever | Disease)):
 Fever      No   Yes
Disease            
Cold     0.40  0.60
Covid    0.25  0.75 

🔹 Posterior Probabilities given Fever = Yes:
P(Cold | Fever=Yes) = 0.5
P(Covid | Fever=Yes) = 0.5

👉 Decision: Patient most likely has **Cold**


In [4]:
import pandas as pd

# -----------------------------
# 1. Example dataset
# -----------------------------
data = pd.DataFrame({
    'Email_Type': ['Spam', 'Spam', 'Not_Spam', 'Spam', 'Not_Spam', 'Spam',
                   'Not_Spam', 'Spam', 'Not_Spam', 'Not_Spam', 'Spam', 'Not_Spam',
                   'Spam', 'Spam', 'Not_Spam', 'Not_Spam', 'Spam', 'Not_Spam'],
    'Contains_Links': ['Yes', 'Yes', 'No', 'Yes', 'No', 'No',
                       'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No',
                       'Yes', 'No', 'No', 'Yes', 'Yes', 'No']
})

# -----------------------------
# 2. Compute Priors
# -----------------------------
priors = data['Email_Type'].value_counts(normalize=True)
print("🔹 Priors (P(Email_Type)):\n", priors, "\n")

# -----------------------------
# 3. Compute Likelihoods (P(Contains_Links | Email_Type))
# -----------------------------
likelihoods = data.groupby('Email_Type')['Contains_Links'].value_counts(normalize=True).unstack().fillna(0)
print("🔹 Likelihoods (P(Contains_Links | Email_Type)):\n", likelihoods, "\n")

# -----------------------------
# 4. Compute Posterior for Contains_Links = 'Yes' using Bayes' Rule
# -----------------------------
numerators = {}
for etype in priors.index:
    numerators[etype] = likelihoods.loc[etype, 'Yes'] * priors[etype]

denominator = sum(numerators.values())
posteriors = {etype: value / denominator for etype, value in numerators.items()}

# -----------------------------
# 5. Display Results
# -----------------------------
print("🔹 Posterior Probabilities given Contains_Links = Yes:")
for etype, prob in posteriors.items():
    print(f"P({etype} | Contains_Links=Yes) = {round(prob, 3)}")

# -----------------------------
# 6. Decision: choose most likely class
# -----------------------------
best_type = max(posteriors, key=posteriors.get)
print(f"\n👉 Decision: Email with a link is most likely **{best_type}**")


🔹 Priors (P(Email_Type)):
 Email_Type
Spam        0.5
Not_Spam    0.5
Name: proportion, dtype: float64 

🔹 Likelihoods (P(Contains_Links | Email_Type)):
 Contains_Links        No       Yes
Email_Type                        
Not_Spam        0.666667  0.333333
Spam            0.222222  0.777778 

🔹 Posterior Probabilities given Contains_Links = Yes:
P(Spam | Contains_Links=Yes) = 0.7
P(Not_Spam | Contains_Links=Yes) = 0.3

👉 Decision: Email with a link is most likely **Spam**
