# 1.Importing Libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# 2. Load and Prepare the Dataset

In [6]:
# Load the dataset
data = pd.read_csv('SMSSpamCollection', sep='\t', header=None, names=['label', 'text'])

# Map 'ham' to 0 (non-spam) and 'spam' to 1
data['label'] = data['label'].map({'ham': 0, 'spam': 1})

# Split the data into features (X) and labels (y)
X = data['text']
y = data['label']

# 3. Split the Data into Training and Testing Sets

In [7]:
# Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# 4. Feature Engineering: TF-IDF Vectorization

In [8]:
# Transform text data into TF-IDF feature vectors
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


# 5. Train the Text Classification Model (Support Vector Machine - SVM)

In [11]:
# Initialize SVM classifier
clf = SVC(kernel='linear')

# Train the classifier
clf.fit(X_train_tfidf, y_train)


SVC(kernel='linear')

# 6. Evaluate the Model

In [12]:
# Make predictions on the test set
predictions = clf.predict(X_test_tfidf)

# Evaluate accuracy
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy}")

# Confusion Matrix and Classification Report
conf_matrix = confusion_matrix(y_test, predictions)
classification_rep = classification_report(y_test, predictions)

print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{classification_rep}")


Accuracy: 0.9937219730941704
Confusion Matrix:
[[966   0]
 [  7 142]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       966
           1       1.00      0.95      0.98       149

    accuracy                           0.99      1115
   macro avg       1.00      0.98      0.99      1115
weighted avg       0.99      0.99      0.99      1115

