In [3]:
# Lauren Escobedo

import pandas as pd
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Load and preprocess the dataset
lenses_df = pd.read_csv('lenses.data',delimiter='\s+', header=None)
lenses_df.rename(columns={5 : 'class'}, inplace=True)
lenses_df.head()

# Prepare the dataset for training/testing
X = lenses_df.drop(columns=['class'])  # Define the feature variables
y = lenses_df['class']  # Define target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)  # Split data, allocating 70% for training and 30% for testing

# Model training and evaluation using Multinomial Naive Bayes
nb_multi = MultinomialNB()  # Initialize the Multinomial Naive Bayes classifier
nb_multi.fit(X, y)  # Train the classifier using the entire dataset (X, y)

# Model training and evaluation using Gaussian Naive Bayes
nb_gauss = GaussianNB()  # Initialize the Gaussian Naive Bayes classifier
nb_gauss.fit(X, y)  # Train the classifier using the entire dataset (X, y)

# Predictions and performance evaluation for Multinomial Naive Bayes
multi_preds = nb_multi.predict(X_test)  # Predict the target variable for the test set using Multinomial Naive Bayes
print("Results for multinomial distribution assumption:")  # Print results header for Multinomial Naive Bayes
print(accuracy_score(y_test, multi_preds))  # Calculate and print the accuracy of the Multinomial Naive Bayes model
print(confusion_matrix(y_test, multi_preds))  # Calculate and print the confusion matrix to evaluate model performance

# Predictions and performance evaluation for Gaussian Naive Bayes
gauss_preds = nb_gauss.predict(X_test)  # Predict the target variable for the test set using Gaussian Naive Bayes
print("Results for Gaussian distribution assumption:")  # Print results header for Gaussian Naive Bayes
print(accuracy_score(y_test, gauss_preds))  # Calculate and print the accuracy of the Gaussian Naive Bayes model
print(confusion_matrix(y_test, gauss_preds))  # Calculate and print the confusion matrix to evaluate model performance

Results for multinomial distribution assumption:
0.625
[[1 0 2]
 [0 0 1]
 [0 0 4]]
Results for Gaussian distribution assumption:
0.75
[[3 0 0]
 [0 1 0]
 [2 0 2]]
