In [None]:
# CODTECH Internship â€“ Task 4
# Machine Learning Model Implementation (Spam Detection)

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Create dataset
data = {
    'message': [
        'Win money now',
        'Hello how are you',
        'Free gift available',
        'Let us meet today',
        'Congratulations you won prize',
        'Call me when free',
        'Exclusive offer just for you',
        'Are you coming tomorrow'
    ],
    'label': ['spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham']
}

df = pd.DataFrame(data)
print("Dataset:\n")
print(df)

# Step 2: Convert labels to numbers
df['label'] = df['label'].map({'spam': 1, 'ham': 0})

# Step 3: Split data into training and testing
X = df['message']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Step 4: Convert text into numeric features
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Step 5: Train the Machine Learning model
model = MultinomialNB()
model.fit(X_train_vec, y_train)

# Step 6: Make predictions
y_pred = model.predict(X_test_vec)

# Step 7: Evaluate the model
print("\nModel Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Step 8: Test with a custom message
test_message = ["Free prize waiting for you"]
test_vector = vectorizer.transform(test_message)
prediction = model.predict(test_vector)

print("\nCustom Message Test:")
if prediction[0] == 1:
    print("Spam Message")
else:
    print("Normal Message")