In [1]:
import os
print(os.getcwd())  
print(os.listdir())

C:\xampp\htdocs\DetectSpam
['.ipynb_checkpoints', '.vscode', 'app.py', 'readme', 'spam.csv', 'static', 'svm_spam_model.pkl', 'templates', 'test_model.py', 'tfidf_vectorizer.pkl', 'train_model.py', 'Untitled.ipynb']


In [2]:
import pandas as pd

df = pd.read_csv("spam.csv", encoding="utf-8", sep="\t", header=None)
print(df.head()) 

      0                                                  1
0   ham  Go until jurong point, crazy.. Available only ...
1   ham                      Ok lar... Joking wif u oni...
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...
3   ham  U dun say so early hor... U c already then say...
4   ham  Nah I don't think he goes to usf, he lives aro...


In [3]:
df.columns = ["Label", "Message"]

df["Label"] = df["Label"].map({"ham": 0, "spam": 1})

df = df.drop_duplicates()
df["Message"] = df["Message"].str.lower().str.strip()

print(df.head())
print(df["Label"].value_counts())  


   Label                                            Message
0      0  go until jurong point, crazy.. available only ...
1      0                      ok lar... joking wif u oni...
2      1  free entry in 2 a wkly comp to win fa cup fina...
3      0  u dun say so early hor... u c already then say...
4      0  nah i don't think he goes to usf, he lives aro...
Label
0    4516
1     653
Name: count, dtype: int64


In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

X = df["Message"]
y = df["Label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

print("TF-IDF berhasil dibuat!")


TF-IDF berhasil dibuat!


In [5]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

svm_model = SVC(kernel="linear")

svm_model.fit(X_train_tfidf, y_train)

y_pred = svm_model.predict(X_test_tfidf)

accuracy = accuracy_score(y_test, y_pred)
print(f"Akurasi Model: {accuracy:.4f}")

print("Laporan Klasifikasi:")
print(classification_report(y_test, y_pred))


Akurasi Model: 0.9845
Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       894
           1       0.97      0.91      0.94       140

    accuracy                           0.98      1034
   macro avg       0.98      0.95      0.97      1034
weighted avg       0.98      0.98      0.98      1034



In [6]:
import joblib

joblib.dump(svm_model, "svm_spam_model.pkl")

joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

print("Model & Vectorizer berhasil disimpan!")


Model & Vectorizer berhasil disimpan!


In [7]:
import joblib

model = joblib.load("svm_spam_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")

test_message = ["Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's"]

test_vectorized = vectorizer.transform(test_message)

result = model.predict(test_vectorized)[0]

print("Hasil Prediksi:", "SPAM" if result == 1 else "HAM (Bukan Spam)")


Hasil Prediksi: SPAM
