In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib

# Простой набор данных
data = {
    'text': [
        "Congratulations, you've won a lottery! Call now!",
        "Meeting scheduled at 10 am tomorrow.",
        "Limited offer! Get a discount on your next purchase.",
        "Please review the attached document and provide feedback.",
        "Your account has been compromised, update your password immediately.",
        "Looking forward to our lunch meeting today.",
    ],
    'label': [1, 0, 1, 0, 1, 0]  # 1: Spam, 0: Not Spam
}

df = pd.DataFrame(data)

# Разделение данных на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.3, random_state=42)


In [4]:
# Создание пайплайна
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),  # Преобразование текста в TF-IDF
    ('classifier', MultinomialNB())  # Классификатор Naive Bayes
])

# Обучение модели
pipeline.fit(X_train, y_train)

# Оценка модели
y_pred = pipeline.predict(X_test)
print("Классификационный отчет:")
print(classification_report(y_test, y_pred))


Классификационный отчет:
              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [5]:
# Сохранение пайплайна
model_filename = "simple_text_classifier.pkl"
joblib.dump(pipeline, model_filename)
print(f"Модель сохранена в файл: {model_filename}")


Модель сохранена в файл: simple_text_classifier.pkl


In [6]:
# Загрузка модели
loaded_pipeline = joblib.load(model_filename)

# Тестовые данные
new_texts = [
    "Congratulations! You've won a free trip to Paris!",
    "Can we reschedule our meeting for tomorrow?",
    "Your bank account has been locked due to suspicious activity."
]

# Предсказания
predictions = loaded_pipeline.predict(new_texts)

# Вывод результатов
for text, label in zip(new_texts, predictions):
    print(f"Текст: {text}\nКласс: {'Spam' if label == 1 else 'Not Spam'}\n")


Текст: Congratulations! You've won a free trip to Paris!
Класс: Not Spam

Текст: Can we reschedule our meeting for tomorrow?
Класс: Not Spam

Текст: Your bank account has been locked due to suspicious activity.
Класс: Spam

