In [1]:
import pandas as pd

# Sample dataset
data = {
    'outlook': ['sunny', 'sunny', 'overcast', 'rain', 'rain', 'rain', 'overcast', 'sunny', 'sunny', 'rain', 'sunny', 'overcast', 'overcast'],
    'temperature': ['hot', 'hot', 'hot', 'mild', 'mild', 'cool', 'cool', 'mild', 'cool', 'mild', 'mild', 'mild', 'cool'],
    'humidity': ['high', 'high', 'normal', 'normal', 'high', 'normal', 'normal', 'high', 'normal', 'normal', 'normal', 'high', 'normal'],
    'windy': ['false', 'true', 'false', 'false', 'false', 'true', 'true', 'false', 'true', 'true', 'true', 'false', 'true'],
    'playtennis': ['no', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'no']
}

# Convert the dictionary to a DataFrame
df = pd.DataFrame(data)

print("Sample Dataset:")
print(df)

Sample Dataset:
     outlook temperature humidity  windy playtennis
0      sunny         hot     high  false         no
1      sunny         hot     high   true         no
2   overcast         hot   normal  false        yes
3       rain        mild   normal  false        yes
4       rain        mild     high  false         no
5       rain        cool   normal   true        yes
6   overcast        cool   normal   true        yes
7      sunny        mild     high  false         no
8      sunny        cool   normal   true        yes
9       rain        mild   normal   true        yes
10     sunny        mild   normal   true        yes
11  overcast        mild     high  false        yes
12  overcast        cool   normal   true         no


In [9]:
# Data to be classified
X = {
    'outlook': 'sunny',
    'temperature': 'mild',
    'humidity': 'normal',
    'windy': 'strong'
}

# Convert 'windy' from 'strong' to 'true' for consistency with the dataset
# X['windy'] = 'true' if X['windy'] == 'strong' else 'false'
if X['windy']=='strong':
    X['windy']='true'
else:
    X['windy']='false'

print(X)

{'outlook': 'sunny', 'temperature': 'mild', 'humidity': 'normal', 'windy': 'true'}


In [27]:

# Calculate prior probabilities of the target class 'playtennis'
prior_probs = df['playtennis'].value_counts(normalize=True)
# `prior_probs` contains the probability of each class ('yes' or 'no') in the dataset
print(prior_probs)
# Prior Probabilities
a=data['playtennis']
for i in range(len(a)):
    y=a.count('yes')
    n=a.count('no')
    pp_y=y/len(a)
    pp_n=n/len(a)
print(f"yes\t {pp_y}")
print(f"no\t {pp_n}")

playtennis
yes    0.615385
no     0.384615
Name: proportion, dtype: float64
yes	 0.6153846153846154
no	 0.38461538461538464


In [28]:
# Initialize an empty dictionary to store likelihoods
likelihoods = {}

# Loop through each feature in the dataset
for feature in ['outlook', 'temperature', 'humidity', 'windy']:
    # Get unique values for the current feature
    feature_values = df[feature].unique()
    # Initialize an empty dictionary for storing likelihoods of the current feature
    likelihoods[feature] = {}
    # Loop through each unique value of the feature
    for value in feature_values:
        # Loop through each class in the target variable 'playtennis'
        for playtennis in prior_probs.index:
            # Calculate the likelihood of the feature value given the class
            likelihood = (df[df['playtennis'] == playtennis][feature] == value).sum() / (df['playtennis'] == playtennis).sum()
            # Store the calculated likelihood in the dictionary
            likelihoods[feature][(value, playtennis)] = likelihood

# Define a function to classify a new data point
def classify(X, prior_probs, likelihoods):
    # Initialize an empty dictionary to store posterior probabilities
    post_probs = {}
    # Loop through each class
    for playtennis in prior_probs.index:
        # Start with the prior probability of the class
        prob = prior_probs[playtennis]
        # Loop through each feature in the new data point
        for feature in X:
            # Update the probability by multiplying with the likelihood of the feature value given the class
            prob *= likelihoods[feature].get((X[feature], playtennis), 1e-6)  # Use a small value for unseen feature values
        # Store the posterior probability for the class
        post_probs[playtennis] = prob
    # Return the class with the highest posterior probability
    return max(post_probs, key=post_probs.get)

# Classify the new data point
result = classify(X, prior_probs, likelihoods)
# Print the classification result
print(f"\n\nThe classification result for X is: {result}")



The classification result for X is: yes


In [29]:
#Naïve Bias Classification: 
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import CategoricalNB
import numpy as np

# Data from the table
data = {
    'Home Owner': ['Yes', 'No', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'No'],
    'Marital Status': ['Single', 'Married', 'Single', 'Married', 'Divorced', 'Married', 'Divorced', 'Divorced', 'Single', 'Single'],
    'Annual Income': [125, 100, 70, 120, 95, 60, 220, 85, 75, 90],
    'Defaulted Borrower': ['No', 'No', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes']
}

# Create a DataFrame
df = pd.DataFrame(data)

# Encode the categorical features using LabelEncoder
le_home_owner = LabelEncoder()
le_marital_status = LabelEncoder()
le_defaulted = LabelEncoder()

df['Home Owner Encoded'] = le_home_owner.fit_transform(df['Home Owner'])
df['Marital Status Encoded'] = le_marital_status.fit_transform(df['Marital Status'])
df['Defaulted Borrower Encoded'] = le_defaulted.fit_transform(df['Defaulted Borrower'])

# Features (Home Owner, Marital Status, Annual Income) and target (Defaulted Borrower)
X = df[['Home Owner Encoded', 'Marital Status Encoded', 'Annual Income']].values
y = df['Defaulted Borrower Encoded'].values

# Train Naive Bayes classifier (CategoricalNB for categorical + GaussianNB for income)
cat_nb = CategoricalNB()
cat_nb.fit(X, y)

# New test record: Home Owner = Yes, Marital Status = Married, Income = 120K
test_record = [
    le_home_owner.transform(['Yes'])[0],  # Home Owner
    le_marital_status.transform(['Married'])[0],  # Marital Status
    120  # Annual Income
]

# Predict class
prediction_encoded = cat_nb.predict([test_record])
prediction = le_defaulted.inverse_transform(prediction_encoded)

print(f"Predicted class for (Home Owner = Yes, Marital Status = Married, Income = 120K): {prediction[0]}")


Predicted class for (Home Owner = Yes, Marital Status = Married, Income = 120K): No
