In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset
df = pd.read_csv('/content/advertising_ef.csv')

# Explore the dataset
display(df.info())
display(df.head())
display(df.describe())

# Drop rows with missing values
df_cleaned = df.dropna()

# Encode categorical variables
df_cleaned = pd.get_dummies(df_cleaned, columns=['Gender'], drop_first=True)

# Drop 'City' and 'Country' columns
df_cleaned = df_cleaned.drop(['City', 'Country'], axis=1)

# Split features and target
X = df_cleaned.drop('Clicked on Ad', axis=1)
y = df_cleaned['Clicked on Ad']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and fit the Naive Bayes model
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Make predictions
y_pred = nb_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Display results
print(f"Accuracy: {accuracy:.2%}")
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1009 entries, 0 to 1008
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Daily Time Spent on Site  1005 non-null   float64
 1   Age                       998 non-null    float64
 2   Area Income               998 non-null    float64
 3   Daily Internet Usage      1005 non-null   float64
 4   City                      998 non-null    object 
 5   Gender                    1009 non-null   object 
 6   Country                   996 non-null    object 
 7   Clicked on Ad             1009 non-null   int64  
dtypes: float64(4), int64(1), object(3)
memory usage: 63.2+ KB


None

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,City,Gender,Country,Clicked on Ad
0,68.95,35.0,61833.9,256.09,Wrightburgh,Female,Tunisia,0
1,,31.0,68441.85,193.77,West Jodi,Male,Nauru,0
2,69.47,26.0,59785.94,236.5,Davidton,Female,San Marino,0
3,74.15,29.0,54806.18,245.89,West Terrifurt,Male,Italy,0
4,68.37,35.0,73889.99,225.58,South Manuel,Female,Iceland,0


Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Clicked on Ad
count,1005.0,998.0,998.0,1005.0,1009.0
mean,65.009463,35.962926,54958.573617,180.04192,0.501487
std,15.879997,8.772142,13381.286752,43.923438,0.500246
min,32.6,19.0,13996.5,104.78,0.0
25%,51.3,29.0,46993.3675,138.87,0.0
50%,68.37,35.0,56998.245,183.42,1.0
75%,78.57,42.0,65267.4025,218.8,1.0
max,91.43,61.0,79484.8,269.96,1.0


Accuracy: 94.77%
Confusion Matrix:
 [[138   5]
 [ 10 134]]
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.97      0.95       143
           1       0.96      0.93      0.95       144

    accuracy                           0.95       287
   macro avg       0.95      0.95      0.95       287
weighted avg       0.95      0.95      0.95       287

