In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

In [2]:
dataset = pd.read_csv('spam.csv', encoding='latin-1') 

In [4]:
dataset = dataset[['v1', 'v2']]
dataset.columns = ['label', 'text']

In [5]:
dataset['label'] = dataset['label'].map({'spam': 1, 'ham': 0})

In [7]:
X = dataset.iloc[:, 1].values
y = dataset.iloc[:, 0].values

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [9]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [10]:
model = LogisticRegression()
model.fit(X_train_vectorized, y_train)

In [16]:
y_pred = model.predict(X_test_vectorized)
y_pred

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [12]:
from sklearn.metrics import accuracy_score
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.9766816143497757


# gradio로 웹사이트 구현

In [17]:
def predict_spam(message):
    message_vectorized = vectorizer.transform([message])
    prediction = model.predict(message_vectorized)
    return "Spam" if prediction[0] == 1 else "Not Spam"

In [19]:
import gradio as gr

iface = gr.Interface(fn=predict_spam, inputs="text", outputs="text", title="Spam Classifier",
                     description="Enter a message to see if it's spam or not.")
iface.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


