In [None]:
from sklearn.model_selection import train_test_split

X = ...
y = ...

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import MaxAbsScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import classification_report

In [None]:
def fit_predict(vectorizer, model, X_train, y_train, X_test, y_test):
    vectorizer.fit(X_train)

    bow = vectorizer.transform(X_train)  # bow — bag of words (мешок слов)
    bow_test = vectorizer.transform(X_test)

    print(bow.shape)

    scaler = MaxAbsScaler()
    bow = scaler.fit_transform(bow)
    bow_test = scaler.transform(bow_test)

    model.fit(bow, y_train)
    pred = model.predict(bow_test)

    print(classification_report(y_test, pred))

In [None]:
models = {
    "LogisticRegression": LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42),
    "CatBoostClassifier": CatBoostClassifier(iterations=1000, learning_rate=3e-4, random_state=42),
    "RandomForestClassifier": RandomForestClassifier(n_estimators=300, class_weight='balanced', random_state=42),
}

vectorizers = {
    "CountVectorizer": CountVectorizer(ngram_range=(1, 3), min_df=0.05, max_df=0.8),
    "TfidfVectorizer": TfidfVectorizer(ngram_range=(1, 3), min_df=0.05, max_df=0.8),
}

for model_name, model in models:
    for vectorizer_name, vectorizer in vectorizers:
        print(f"Model : {model_name} \nVectorizer : {vectorizer_name}\n")
        fit_predict(vectorizer, model, X_train, y_train, X_test, y_test)
