<a href="https://colab.research.google.com/github/kswathi2627/MACHINE-LEARNING-MODEL-IMPLEMENTATION/blob/main/CD4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 📘 Machine Learning Model Implementation
# Predictive Model using Scikit-Learn (Spam Email Detection)

# Step 1: Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 2: Load Dataset
# SMS Spam Collection dataset (Ham = Not Spam, Spam = Spam Message)
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
data = pd.read_csv(url, sep='\t', header=None, names=['label', 'message'])

print("Dataset Shape:", data.shape)
print(data.head())

# Step 3: Preprocess Data
X = data['message']   # Features (text messages)
y = data['label'].map({'ham':0, 'spam':1})  # Encode labels (ham=0, spam=1)

# Convert text into numerical features using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
X_vectorized = vectorizer.fit_transform(X)

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_vectorized, y, test_size=0.2, random_state=42
)

# Step 5: Train Model (Naive Bayes Classifier)
model = MultinomialNB()
model.fit(X_train, y_train)

# Step 6: Make Predictions
y_pred = model.predict(X_test)

# Step 7: Evaluate the Model
print("✅ Model Evaluation Results")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Step 8: Try a Custom Prediction
sample = [
    "Congratulations! You've won a free lottery ticket. Call now!",
    "Are we still meeting for lunch tomorrow?"
]
sample_vec = vectorizer.transform(sample)
print("\nSample Predictions:", model.predict(sample_vec))  # 1=spam, 0=ham


Dataset Shape: (5572, 2)
  label                                            message
0   ham  Go until jurong point, crazy.. Available only ...
1   ham                      Ok lar... Joking wif u oni...
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...
3   ham  U dun say so early hor... U c already then say...
4   ham  Nah I don't think he goes to usf, he lives aro...
✅ Model Evaluation Results
Accuracy: 0.979372197309417

Confusion Matrix:
 [[966   0]
 [ 23 126]]

Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99       966
           1       1.00      0.85      0.92       149

    accuracy                           0.98      1115
   macro avg       0.99      0.92      0.95      1115
weighted avg       0.98      0.98      0.98      1115


Sample Predictions: [0 0]
