<a href="https://colab.research.google.com/github/mehdiabbasidev/darsman-machine-learning/blob/main/NaiveBayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Dataset download link:
https://drive.google.com/file/d/1tRkRzRSHQWDrxgJBLQLazehl2H5jThaM/view?usp=sharing

https://drive.google.com/file/d/1whqAWyr3ue4wU6i5dEbuSHcoIZtRkghv/view?usp=sharing

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

# Gaussian Naive Bayes

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('/content/drive/MyDrive/datasets/Social_Network_Ads_By_Gender.csv')
df.head()

In [None]:
df.shape

In [None]:
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])
df.head()

In [None]:
X = df.iloc[:, [0, 1,2]]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
classifier = GaussianNB()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

In [None]:
print(f"Accuracy:{accuracy_score(y_test, y_pred)}")
print(f"Classification Report:\n{classification_report(y_test, y_pred)}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}")

In [None]:
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')
scatter = ax.scatter(df['Gender'], df['Age'], df['EstimatedSalary'], c=df['Purchased'], cmap='bwr', alpha=0.7)
legend1 = ax.legend(*scatter.legend_elements(), title="Classes")
ax.add_artist(legend1)
ax.set_title('3D Plot')
ax.set_xlabel('Gender')
ax.set_ylabel('Age')
ax.set_zlabel('EstimatedSalary')
plt.show()

# Bernoulli Naive Bayes

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
num_samples = 10000
X = np.random.randint(2, size=(num_samples, 2))
y = np.random.choice(['yes', 'no'], size=num_samples)

In [None]:
df = pd.DataFrame(X, columns=['feature1', 'feature2'])
y = np.where(y == 'yes', 1, 0)
df['target'] = y
df.head()

Unnamed: 0,feature1,feature2,target
0,0,0,1
1,1,0,0
2,1,1,0
3,1,1,0
4,1,0,0


In [None]:
df['target'].value_counts()

target
1    5042
0    4958
Name: count, dtype: int64

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.3, random_state=42)

In [None]:
model = BernoulliNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
print(f"Accuracy:{accuracy_score(y_test, y_pred)}")
print(f"Classification Report:\n{classification_report(y_test, y_pred)}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}")

Accuracy:0.49966666666666665
Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.51      0.51      1508
           1       0.50      0.49      0.49      1492

    accuracy                           0.50      3000
   macro avg       0.50      0.50      0.50      3000
weighted avg       0.50      0.50      0.50      3000

Confusion Matrix:
[[769 739]
 [762 730]]


# Multinomial Naive Bayes

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('/content/drive/MyDrive/datasets/Generated_Dataset_Words.csv')
df.head()

Unnamed: 0,hello,world,email,free,offer,buy,win,prize,meeting,discount,money,urgent,target
0,0,0,0,0,0,0,1,0,1,0,0,1,1.0
1,1,0,1,0,1,0,0,1,0,0,0,0,1.0
2,1,0,1,0,0,0,1,1,1,1,1,0,1.0
3,0,1,0,0,1,1,1,1,1,1,1,0,0.0
4,0,1,0,1,1,0,1,1,0,1,1,0,1.0


In [None]:
df.shape
df.columns
df.dtypes
df.info()

In [None]:
df['target'].value_counts()

target
1.0    700
0.0    300
Name: count, dtype: int64

In [None]:
X=df.drop(['target'],axis=1)
y=df['target']
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [None]:
MNB = MultinomialNB()
MNB.fit(X_train, y_train)
y_pred = MNB.predict(X_test)

In [None]:
print(f"Accuracy:{accuracy_score(y_test, y_pred)}")
print(f"Classification Report:\n{classification_report(y_test, y_pred)}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}")

Accuracy:0.68
Classification Report:
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00        64
         1.0       0.68      1.00      0.81       136

    accuracy                           0.68       200
   macro avg       0.34      0.50      0.40       200
weighted avg       0.46      0.68      0.55       200

Confusion Matrix:
[[  0  64]
 [  0 136]]


In [None]:
test_sample=np.array([[1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0]])
test_sample_pred=MNB.predict(test_sample)
print(f"Classification of test sample : {test_sample_pred[0]}")

Classification of test sample : 1.0
