<a href="https://colab.research.google.com/github/NP-15/Machine-learning/blob/main/Sessions/Naive-Bayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Naive Bayes

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report



In [3]:
data = pd.read_csv('/content/drive/MyDrive/Machine-learning-main/assets/naive-bayes.csv', encoding="latin1")


In [5]:
data.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [4]:
X_train, X_test, y_train, y_test = train_test_split(data['v2'], data['v1'], test_size=0.2, random_state=42)

vectorizer = CountVectorizer()
X_train_freq = vectorizer.fit_transform(X_train)
X_test_freq = vectorizer.transform(X_test)

In [7]:
frequency_table = data['v1'].value_counts()
print("Frequency Table for output:\n")
print(frequency_table)

Frequency Table:

ham     4825
spam     747
Name: v1, dtype: int64


### 1. Gaussian Naive Bayes

In [9]:
# 1. Gaussian Naive Bayes
gnb = GaussianNB()
gnb.fit(X_train_freq.toarray(), y_train)
y_pred_gnb = gnb.predict(X_test_freq.toarray())

### 2. Multinomial Naive Bayes

In [10]:
# 2. Multinomial Naive Bayes
mnb = MultinomialNB()
mnb.fit(X_train_freq, y_train)
y_pred_mnb = mnb.predict(X_test_freq)

### 3. Bernoulli Naive Bayes

In [11]:
vectorizer_binary = CountVectorizer(binary=True)
X_train_binary = vectorizer_binary.fit_transform(X_train)
X_test_binary = vectorizer_binary.transform(X_test)

bnb = BernoulliNB()
bnb.fit(X_train_binary, y_train)
y_pred_bnb = bnb.predict(X_test_binary)

## Evaluation

In [12]:
def evaluate_model(y_true, y_pred, model_name):
    print(f"\nConfusion Matrix for {model_name}:\n")
    print(confusion_matrix(y_true, y_pred))
    print(f"\nAccuracy for {model_name}: {accuracy_score(y_true, y_pred)}")
    print(f"\nClassification Report for {model_name}:\n")
    print(classification_report(y_true, y_pred))

# Evaluate Gaussian NB
evaluate_model(y_test, y_pred_gnb, 'Gaussian Naive Bayes')

# Evaluate Multinomial NB
evaluate_model(y_test, y_pred_mnb, 'Multinomial Naive Bayes')

# Evaluate Bernoulli NB
evaluate_model(y_test, y_pred_bnb, 'Bernoulli Naive Bayes')


Confusion Matrix for Gaussian Naive Bayes:

[[866  99]
 [ 12 138]]

Accuracy for Gaussian Naive Bayes: 0.9004484304932735

Classification Report for Gaussian Naive Bayes:

              precision    recall  f1-score   support

         ham       0.99      0.90      0.94       965
        spam       0.58      0.92      0.71       150

    accuracy                           0.90      1115
   macro avg       0.78      0.91      0.83      1115
weighted avg       0.93      0.90      0.91      1115


Confusion Matrix for Multinomial Naive Bayes:

[[963   2]
 [ 16 134]]

Accuracy for Multinomial Naive Bayes: 0.9838565022421525

Classification Report for Multinomial Naive Bayes:

              precision    recall  f1-score   support

         ham       0.98      1.00      0.99       965
        spam       0.99      0.89      0.94       150

    accuracy                           0.98      1115
   macro avg       0.98      0.95      0.96      1115
weighted avg       0.98      0.98      0.98   

## To calculate the likelihood

In [14]:
l_gnb = gnb.theta_
#l_mnb = mnb.theta_
#l_bnb = bnb.theta_

print("Likelihoods:\n")
print(f"Gaussian Naive Bayes: {l_gnb}")
#print(f"Multinomial Naive Bayes: {l_mnb}")
#print(f"Bernoulli Naive Bayes: {l_bnb}")

Likelihoods:

Gaussian Naive Bayes: [[0.         0.         0.00025907 ... 0.         0.00233161 0.00025907]
 [0.01340034 0.03852596 0.         ... 0.00167504 0.         0.        ]]
