# Name : Naveen Mathews Renji | CWID : 20016323 

## Import Libraries

In [1]:
import pandas as pd
import numpy as np 
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix

## Load the dataset

In [2]:
df = pd.read_csv("CS513_targeting_cat_full.csv")

#removing unnecessary column customer
df = df.drop(columns=['Customer'])

In [3]:
df.head()

Unnamed: 0,Age,Gender,Region,Product,Purchase
0,<30,F,North,A,Yes
1,30<50,M,North,B,Yes
2,30<50,F,North,B,No
3,50<,M,North,B,No
4,<30,F,North,A,Yes


## One hot encode categorical variables

In [4]:
#by converting these columns to numerical 1 or 0 values we can perform operations on them
categorical_cols = ['Age', 'Gender', 'Region', 'Product']
for col in categorical_cols:
    enc = pd.get_dummies(df[col], prefix=col)
    df = pd.concat([df.drop(columns=[col]), enc], axis=1)

In [5]:
df.head()

Unnamed: 0,Purchase,Age_30<50,Age_50<,Age_<30,Gender_F,Gender_M,Region_East,Region_North,Region_South,Region_West,Product_A,Product_B
0,Yes,0,0,1,1,0,0,1,0,0,1,0
1,Yes,1,0,0,0,1,0,1,0,0,0,1
2,No,1,0,0,1,0,0,1,0,0,0,1
3,No,0,1,0,0,1,0,1,0,0,0,1
4,Yes,0,0,1,1,0,0,1,0,0,1,0


## Convert target variable to binary

In [6]:
#using label encoder to convert the Yes and No values in Purchase column to 1 and 0
le = LabelEncoder()
df['Purchase'] = le.fit_transform(df['Purchase'])
df.head()

Unnamed: 0,Purchase,Age_30<50,Age_50<,Age_<30,Gender_F,Gender_M,Region_East,Region_North,Region_South,Region_West,Product_A,Product_B
0,1,0,0,1,1,0,0,1,0,0,1,0
1,1,1,0,0,0,1,0,1,0,0,0,1
2,0,1,0,0,1,0,0,1,0,0,0,1
3,0,0,1,0,0,1,0,1,0,0,0,1
4,1,0,0,1,1,0,0,1,0,0,1,0


## Split data into training and testing sets

In [7]:
#setting a random state to ensure reproductibility of results
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['Purchase']), df['Purchase'], test_size=0.3, random_state=123)


## Construct Naive Bayes model and fit to training data

## Gaussian Naive Bayes

In [8]:
model = GaussianNB()

model.fit(X_train, y_train)

# Make predictions on test data
y_pred = model.predict(X_test)

# Calculate performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)

# Print performance metrics
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'Specificity: {specificity}')

Accuracy: 0.7916666666666666
Precision: 0.875
Recall: 0.717948717948718
F1 Score: 0.7887323943661971
Specificity: 0.8787878787878788


## Bernoulli Naive Bayes

In [9]:
model = BernoulliNB()

model.fit(X_train, y_train)

# Make predictions on test data
y_pred = model.predict(X_test)

# Calculate performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)

# Print performance metrics
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'Specificity: {specificity}')

Accuracy: 0.8472222222222222
Precision: 0.9375
Recall: 0.7692307692307693
F1 Score: 0.8450704225352113
Specificity: 0.9393939393939394


## Multinomial Naive Bayes

In [10]:
model = MultinomialNB()
model.fit(X_train, y_train)

# Make predictions on test data
y_pred = model.predict(X_test)

# Calculate performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)

# Print performance metrics
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'Specificity: {specificity}')

Accuracy: 0.8194444444444444
Precision: 0.9333333333333333
Recall: 0.717948717948718
F1 Score: 0.8115942028985509
Specificity: 0.9393939393939394
