In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfTransformer

In [2]:
df = pd.read_csv("mail_data.csv")

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Memisahkan data
X_train, X_test, y_train, y_test = train_test_split(df['Message'], df['Category'], test_size=0.2, random_state=42)

# Menggunakan TF-IDF Vectorizer untuk mengubah teks menjadi vektor fitur
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# 1. Support Vector Machine (SVM)
svm_model = SVC(kernel='linear')
svm_model.fit(X_train_tfidf, y_train)
svm_predictions = svm_model.predict(X_test_tfidf)

# Evaluasi SVM
svm_accuracy = accuracy_score(y_test, svm_predictions)
print("SVM Accuracy:", svm_accuracy)
print("SVM Classification Report:\n", classification_report(y_test, svm_predictions))

# 2. Decision Tree
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train_tfidf, y_train)
dt_predictions = dt_model.predict(X_test_tfidf)

# Evaluasi Decision Tree
dt_accuracy = accuracy_score(y_test, dt_predictions)
print("\nDecision Tree Accuracy:", dt_accuracy)
print("Decision Tree Classification Report:\n", classification_report(y_test, dt_predictions))

# 3. Naive Bayes
nb_model = MultinomialNB()
nb_model.fit(X_train_tfidf, y_train)
nb_predictions = nb_model.predict(X_test_tfidf)

# Evaluasi Naive Bayes
nb_accuracy = accuracy_score(y_test, nb_predictions)
print("\nNaive Bayes Accuracy:", nb_accuracy)
print("Naive Bayes Classification Report:\n", classification_report(y_test, nb_predictions))

SVM Accuracy: 0.9937219730941704
SVM Classification Report:
               precision    recall  f1-score   support

         ham       0.99      1.00      1.00       966
        spam       1.00      0.95      0.98       149

    accuracy                           0.99      1115
   macro avg       1.00      0.98      0.99      1115
weighted avg       0.99      0.99      0.99      1115


Decision Tree Accuracy: 0.9668161434977578
Decision Tree Classification Report:
               precision    recall  f1-score   support

         ham       0.98      0.99      0.98       966
        spam       0.90      0.85      0.87       149

    accuracy                           0.97      1115
   macro avg       0.94      0.92      0.93      1115
weighted avg       0.97      0.97      0.97      1115


Naive Bayes Accuracy: 0.9650224215246637
Naive Bayes Classification Report:
               precision    recall  f1-score   support

         ham       0.96      1.00      0.98       966
        spam    

In [5]:
print(df)

     Category                                            Message
0         ham  Go until jurong point, crazy.. Available only ...
1         ham                      Ok lar... Joking wif u oni...
2        spam  Free entry in 2 a wkly comp to win FA Cup fina...
3         ham  U dun say so early hor... U c already then say...
4         ham  Nah I don't think he goes to usf, he lives aro...
...       ...                                                ...
5567     spam  This is the 2nd time we have tried 2 contact u...
5568      ham               Will ü b going to esplanade fr home?
5569      ham  Pity, * was in mood for that. So...any other s...
5570      ham  The guy did some bitching but I acted like i'd...
5571      ham                         Rofl. Its true to its name

[5572 rows x 2 columns]


In [None]:
# Fungsi untuk prediksi pesan menggunakan model yang telah dilatih
def predict_message(message, model):
    message_tfidf = vectorizer.transform([message])
    prediction = model.predict(message_tfidf)
    return prediction[0]

# Pesan yang ingin diuji
user_input = input("Masukkan pesan yang ingin diuji: ")

# Prediksi menggunakan SVM
svm_prediction = predict_message(user_input, svm_model)
print("Prediksi SVM:", svm_prediction)

# Prediksi menggunakan Decision Tree
dt_prediction = predict_message(user_input, dt_model)
print("Prediksi Decision Tree:", dt_prediction)

# Prediksi menggunakan Naive Bayes
nb_prediction = predict_message(user_input, nb_model)
print("Prediksi Naive Bayes:", nb_prediction)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Function to plot confusion matrix
def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='g', cmap='Blues', xticklabels=['Ham', 'Spam'], yticklabels=['Ham', 'Spam'])
    plt.title(title)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

# Plot confusion matrix for SVM
plot_confusion_matrix(y_test, svm_predictions, 'SVM Confusion Matrix')

# Plot confusion matrix for Decision Tree
plot_confusion_matrix(y_test, dt_predictions, 'Decision Tree Confusion Matrix')

# Plot confusion matrix for Naive Bayes
plot_confusion_matrix(y_test, nb_predictions, 'Naive Bayes Confusion Matrix')
