# **Spam Detection Model**
Created By: Thidas Senavirathna

In [None]:
# Install necessary libraries
!pip install scikit-learn pandas numpy flask



In [None]:
# Import libraries
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
import pandas as pd  # Import pandas library

# Sample dataset
data = pd.read_csv("spam.csv", encoding="latin-1")
data = data[['v1', 'v2']]
data.columns = ['label', 'text']
data['label'] = data['label'].map({'ham': 0, 'spam': 1})

# Preprocessing
tfidf = TfidfVectorizer(stop_words='english', max_features=3000)
X = tfidf.fit_transform(data['text']).toarray()
y = data['label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = MultinomialNB()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.9820627802690582


In [None]:
# Save the Model
import joblib

joblib.dump(model, 'spam_model.pkl')
joblib.dump(tfidf, 'vectorizer.pkl')

['vectorizer.pkl']

In [None]:
model = joblib.load('spam_model.pkl')
vectorizer = joblib.load('vectorizer.pkl')

# Example input
input_text = "Congrtulations! you have won a free iphone."

# Transform the input text
transformed_input = vectorizer.transform([input_text])

# Predict
prediction = model.predict(transformed_input)

# Interpret the result
if prediction[0] == 1:
    print("This is spam.")
else:
    print("This is not spam.")

This is spam.
