In [1]:
!pip install pandas scikit-learn



In [2]:
import pandas as pd

# Load dataset from online
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_table(url, header=None, names=["label", "text"])

# See first 5 rows
df.head()

Unnamed: 0,label,text
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [3]:
df["label"] = df["label"].map({"ham": 0, "spam": 1})
df.head()

Unnamed: 0,label,text
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."


In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    df["text"], df["label"], test_size=0.2, random_state=42
)

In [9]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [6]:
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()
model.fit(X_train_vec, y_train)

In [7]:
accuracy = model.score(X_test_vec, y_test)
print(f"Model Accuracy: {accuracy:.2f}")

Model Accuracy: 0.99


In [8]:
while True:
    message = input("Enter a message (or 'exit' to quit): ")
    if message.lower() == "exit":
        break
    msg_vec = vectorizer.transform([message])
    prediction = model.predict(msg_vec)[0]
    print("Spam" if prediction == 1 else "Not Spam")

Enter a message (or 'exit' to quit): Free entry! Claim your prize now!
Spam
Enter a message (or 'exit' to quit): Hey, are we meeting today?
Not Spam
Enter a message (or 'exit' to quit): exit
