In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score,precision_score,recall_score
from sklearn.model_selection import train_test_split

Q3

In [None]:
data = {
    'Text': [
        "A great game",
        "The election was over",
        "Very clean match",
        "A clean but forgettable game",
        "It was a close election"
    ],
    'Tag': [
        "Sports",
        "Not sports",
        "Sports",
        "Sports",
        "Not sports"
    ]
}

df = pd.DataFrame(data)

csv_file_path = 'word_dataset.csv'
df.to_csv(csv_file_path)

In [None]:
# Initialize CountVectorizer
vectorizer = CountVectorizer()

# Convert text to feature vectors
X = vectorizer.fit_transform(df['Text'])

# Target labels
y = df['Tag'].apply(lambda x: 1 if x == 'Sports' else 0)


In [None]:
class NaiveBayesClassifier:
    def fit(self, X, y):
        self.classes, class_counts = np.unique(y, return_counts=True)
        self.class_prior = class_counts / len(y)
        
        self.feature_count = np.zeros((len(self.classes), X.shape[1]))
        self.feature_total = np.zeros(len(self.classes))
        
        for c in self.classes:
            X_c = X[y == c]
            self.feature_count[c, :] = X_c.sum(axis=0)
            self.feature_total[c] = X_c.shape[0]
        
        # Laplace smoothing
        self.feature_prob = (self.feature_count + 1) / (self.feature_total[:, np.newaxis] + 2)
        
    def predict(self, X):
        log_prob = np.log(self.class_prior) + X @ np.log(self.feature_prob.T)
        return self.classes[np.argmax(log_prob, axis=1)]
    
    def predict_proba(self, X):
        probas = []
        for i in range(X.shape[0]):
            row = X[i, :].toarray().flatten()
            class_probs = {}
            for cls in self.classes:
                prior = np.log(self.class_prior[cls])
                likelihood = 0
                for feature_index in range(X.shape[1]):
                    value = row[feature_index]
                    prob = self.feature_probs[cls].get(feature_index, {}).get(value, 1 / (X.shape[0] + len(np.unique(row))))
                    likelihood += np.log(prob)
                class_probs[cls] = prior + likelihood
            
            total_prob = np.logaddexp.reduce(list(class_probs.values()))
            prob_dist = {cls: np.exp(class_probs[cls] - total_prob) for cls in self.classes}
            probas.append(prob_dist)
        
        return probas

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,stratify=y, random_state=42)

# Train the Naive Bayes classifier
nb = NaiveBayesClassifier()
nb.fit(X_train, y_train)

# Predict on test set
y_pred = nb.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")


In [None]:
# New sentence
new_sentence = ["A very close game"]

# Convert to feature vector
X_new = vectorizer.transform(new_sentence)

# Predict the class
prediction = nb.predict(X_new)
tag = 'Sports' if prediction[0] == 1 else 'Not sports'
print(f"The sentence '{new_sentence[0]}' is classified as: {tag}")
probabilities = nb.predict_proba(X_new)
print(f"Class probabilities for the sentence '{new_sentence[0]}': {probabilities[0]}")

Q1

In [None]:
def bayes_theorem(p_a_given_b, p_b, p_a):
    return (p_a_given_b * p_b) / p_a

def total_probability(p_a_given_b1, p_b1, p_a_given_b2, p_b2):
    return p_a_given_b1 * p_b1 + p_a_given_b2 * p_b2

# Problem (a)
P_H = 0.60
P_D = 0.40
P_A_given_H = 0.30
P_A_given_D = 0.20

P_A = total_probability(P_A_given_H, P_H, P_A_given_D, P_D)
P_H_given_A = bayes_theorem(P_A_given_H, P_H, P_A)
print(f"The probability that a student who scored an A grade is a hosteler is {P_H_given_A:.2f}")

# Problem (b)
P_D = 0.01
P_not_D = 0.99
P_T_given_D = 0.99
P_T_given_not_D = 0.02

P_T = total_probability(P_T_given_D, P_D, P_T_given_not_D, P_not_D)
P_D_given_T = bayes_theorem(P_T_given_D, P_D, P_T)
print(f"The probability of having the disease given a positive test result is {P_D_given_T:.2f}")


Q2

In [None]:
from collections import defaultdict

class NaiveBayesClassifier:
    def __init__(self):
        self.class_prob = {}
        self.feature_probs = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
        self.features = []

    def fit(self, df):
        # Calculate prior probabilities for each class
        total_records = len(df)
        class_counts = df['buys_computer'].value_counts()
        self.class_prob = {cls: count / total_records for cls, count in class_counts.items()}

        # Calculate conditional probabilities for each feature given the class
        self.features = df.columns[:-1]
        for cls in self.class_prob:
            class_df = df[df['buys_computer'] == cls]
            feature_counts = {feature: class_df[feature].value_counts() for feature in self.features}
            for feature in self.features:
                total_feature_counts = class_df[feature].count()
                for value, count in feature_counts[feature].items():
                    self.feature_probs[feature][value][cls] = count / total_feature_counts

    def predict(self, row):
        probabilities = {}
        for cls in self.class_prob:
            prob = self.class_prob[cls]
            for feature, value in row.items():
                if value in self.feature_probs[feature]:
                    prob *= self.feature_probs[feature][value].get(cls, 0)
                else:
                    prob = 0
            probabilities[cls] = prob
        
        total_prob = sum(probabilities.values())
        if total_prob == 0:
            return None
        probabilities = {cls: prob / total_prob for cls, prob in probabilities.items()}
        return probabilities

# Load dataset
df = pd.read_csv('q2.csv')

# Initialize and train classifier
nb_classifier = NaiveBayesClassifier()
nb_classifier.fit(df)

# Test row
test_row = {
    'Age': '31-40',
    'Income': 'High',
    'Student': 'Yes',
    'Credit Rating': 'Fair'
}

# Predict probabilities for test row
probabilities = nb_classifier.predict(test_row)
print(f"Probabilities for test row: {probabilities}")


In [None]:
from sklearn.metrics import accuracy_score

# Train the Naive Bayes classifier
nb = NaiveBayesClassifier()
nb.fit(X_train, y_train)

# Predict on the test set
y_pred = nb.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


In [None]:
example_index = 0 
example_row = X_test.iloc[[example_index]]

# Predict class
predicted_class = nb.predict(example_row)
print(f"Predicted class for example row: {predicted_class[0]}")

# Predict probabilities
predicted_proba = nb.predict_proba(example_row)
print(f"Class probabilities for example row: {predicted_proba[0]}")