In [12]:
# Step 1: Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import datasets
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report  # Make sure these are imported

# Step 2: Load the Iris dataset
# Use pd.read_csv to read data from CSV instead of load_iris
iris = pd.read_csv('/content/IRIS.csv')
X = iris.drop('species', axis=1).values  # Assuming 'species' is your target column
y = iris['species'].values  # Assuming 'species' is your target column

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 4: Train the Naive Bayes model
model = GaussianNB()  # Gaussian Naive Bayes as the Iris dataset features are continuous
model.fit(X_train, y_train)

# Step 5: Make predictions
y_pred = model.predict(X_test)

# Step 6: Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Naive Bayes classifier: {accuracy * 100:.2f}%")

# Calculate and store the confusion matrix and classification report
conf_matrix = confusion_matrix(y_test, y_pred)  # Calculate confusion matrix
class_report = classification_report(y_test, y_pred)  # Calculate classification report


print("\nconfusion matrix:")
print(conf_matrix)  # Now print the calculated confusion matrix
print("\nclassification report:")
print(class_report)  # Now print the calculated classification report

Accuracy of Naive Bayes classifier: 97.78%

confusion matrix:
[[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]

classification report:
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        19
Iris-versicolor       1.00      0.92      0.96        13
 Iris-virginica       0.93      1.00      0.96        13

       accuracy                           0.98        45
      macro avg       0.98      0.97      0.97        45
   weighted avg       0.98      0.98      0.98        45



In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
data = pd.read_csv("/content/spam_ham_india.csv")

# Check the column names in your DataFrame
print(data.columns)  # Print the available columns

# Assuming your message column is named 'Msg' (based on the provided data sample)
# Replace 'Msg' with the actual name of your message column if it's different
message_column_name = 'Msg'  # Replace 'Msg' with the actual column name if needed

# Assuming your label column is named 'Label' (based on error message)
# Replace 'Label' with the actual name of your label column if it's different
label_column_name = 'Label'  # Replace 'Label' with the actual column name if needed


# Preprocessing: Convert all messages to lowercase
data[message_column_name] = data[message_column_name].str.lower()

# Splitting the data into training and test sets (80% training, 20% test)
X = data[message_column_name]
y = data[label_column_name]  # Assuming 'Label' is your target column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text data into numerical features using TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english')  # Remove common English stopwords
X_train_tfidf = vectorizer.fit_transform(X_train.fillna('')) # Fill NaN with empty string for training data
X_test_tfidf = vectorizer.transform(X_test.fillna('')) # Fill NaN with empty string for testing data

# Model: Naive Bayes
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

# Predicting the labels on the test set
y_pred = model.predict(X_test_tfidf)

# Evaluating the model
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Index(['Msg', 'Label'], dtype='object')
Accuracy: 0.9890

Classification Report:
              precision    recall  f1-score   support

         ham       0.99      1.00      0.99       297
        spam       0.99      0.97      0.98       157

    accuracy                           0.99       454
   macro avg       0.99      0.99      0.99       454
weighted avg       0.99      0.99      0.99       454

