# Spam Email Classifier using Python and Streamlit

In [None]:

# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix
import streamlit as st
    

## Loading and Exploring the Dataset

In [None]:

# Load the dataset (ensure 'spam.csv' is placed in the same directory)
df = pd.read_csv('spam.csv', encoding='latin-1')

# Display first few rows to check the data
df = df[['v1', 'v2']]
df.columns = ['label', 'message']  # Rename columns for easier access
df.head()
    

## Data Preprocessing

In [None]:

# Convert the label column to binary (spam: 1, ham: 0)
df['label'] = df['label'].map({'spam': 1, 'ham': 0})

# Splitting the dataset into training and testing sets
X = df['message']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    

## Feature Extraction using TF-IDF

In [None]:

# Convert text into TF-IDF feature vectors
tfidf = TfidfVectorizer(max_features=2500)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)
    

## Model Building - Naive Bayes Classifier

In [None]:

# Build a Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

# Make predictions
y_pred = model.predict(X_test_tfidf)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

st.write(f"Accuracy: {accuracy:.2f}")
st.write("Confusion Matrix:", cm)
    

## Streamlit Interface

In [None]:

# Streamlit UI for prediction
st.title("Spam Classifier")

# Input message from the user
user_input = st.text_area("Enter the message to classify")

# When the button is clicked, classify the message
if st.button("Classify"):
    input_tfidf = tfidf.transform([user_input])  # Transform the input
    prediction = model.predict(input_tfidf)[0]  # Predict using the trained model
    label = 'Spam' if prediction == 1 else 'Ham'
    st.write(f"The message is: {label}")
    