# Email Spam Checker Project
# This code is a Google Colab compatible spam detection project with a dataset of 500 lines.

In [1]:
# 1. Installing Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [2]:
#2. Creating a 500-Line Sample Data Set
# Random spam and spam a list of non-email texts
email_texts = [
    "Congratulations! You've won a $1,000 gift card. Click here to claim now!",
    "Meeting is scheduled at 3 PM tomorrow, please confirm.",
    "Earn cash fast by working from home. Sign up today!",
    "Reminder: Your package has been shipped and will arrive soon.",
    "Last chance to win a trip to Hawaii!",
    "Your bank account has been credited with a refund.",
    "Free membership for life if you join today.",
    "Hello, wanted to check if you're available for a quick call.",
    "Discount on all products for a limited time. Hurry up!",
    "Join our free webinar on Data Science and Machine Learning",
    "New updates are available for your software. Click here to download.",
    "Don't miss out on this exclusive offer, sign up now!",
    "Your subscription will expire soon, please renew to avoid interruption.",
    "Looking forward to catching up soon. Let me know your availability.",
    "You've been selected for a prize. Claim your reward now!",
    "Can we meet up next week for coffee?",
    "Congratulations! You're eligible for a personal loan.",
    "This is your last reminder to complete your profile.",
    "Your monthly statement is ready for viewing.",
    "Special discount on holiday bookings. Book now!",
    "Important: Account security update required.",
    "Win a free iPhone by participating in this survey!",
    "Schedule your free consultation with our expert.",
    "Your recent payment has been processed successfully.",
    "Urgent: Please verify your account information."
]

# Creating labels - Randomly label 50% of them as spam
labels = ['spam' if i % 2 == 0 else 'not spam' for i in range(500)]

# Creating a dataset of 500 rows by randomly selecting email texts
data = {
    'email_text': np.random.choice(email_texts, 500),
    'label': labels
}

# Defining the dataset as a DataFrame
df = pd.DataFrame(data)

In [3]:
# 3. Data Processing
# We use CountVectorizer to transform text data into numeric values
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['email_text'])
y = df['label'].apply(lambda x: 1 if x == 'spam' else 0)  # 'spam' = 1, 'not spam' = 0

# Splitting into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [4]:
# 4. Modeling
# We train the model with Naive Bayes algorithm
model = MultinomialNB()
model.fit(X_train, y_train)

In [5]:
# 5. Prediction and Evaluation
# Making predictions on the test dataset
y_pred = model.predict(X_test)

# Calculating the model's accuracy rate and other evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=['Not Spam', 'Spam'])

# Showing the results
print(f"Model Doğruluk Oranı: {accuracy:.2f}")
print("\nSınıflandırma Raporu:\n", report)

Model Doğruluk Oranı: 0.49

Sınıflandırma Raporu:
               precision    recall  f1-score   support

    Not Spam       0.50      0.68      0.58        77
        Spam       0.47      0.30      0.37        73

    accuracy                           0.49       150
   macro avg       0.49      0.49      0.47       150
weighted avg       0.49      0.49      0.48       150



In [None]:
# 6. Notes for Future Steps
# This sample dataset can be expanded with larger real datasets.
# Also, model accuracy can be increased by hyperparameter tuning.