In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk

In [None]:
nltk.download('vader_lexicon')

In [None]:
data = pd.read_csv("user_requests.csv")  # Должен содержать колонки 'text' и 'category'
texts = data['text']
labels = data['category']

# Опциональный анализ тональности
sentiment_analyzer = SentimentIntensityAnalyzer()

def add_sentiment_score(text):
    sentiment_score = sentiment_analyzer.polarity_scores(text)
    return sentiment_score['compound']

data['sentiment'] = texts.apply(add_sentiment_score)

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(data[['text', 'sentiment']], labels, test_size=0.2, random_state=42)

# Преобразование текстов в TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train['text']).toarray()
X_test_tfidf = vectorizer.transform(X_test['text']).toarray()

# Добавление признака тональности к TF-IDF
X_train_combined = pd.DataFrame(X_train_tfidf)
X_train_combined['sentiment'] = X_train['sentiment'].values
X_test_combined = pd.DataFrame(X_test_tfidf)
X_test_combined['sentiment'] = X_test['sentiment'].values

# Обучение классификатора
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_combined, y_train)

# Оценка модели
y_pred = model.predict(X_test_combined)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
