In [1]:
#Устанавливаем библиотеки
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
import numpy as np

In [2]:
def classification_training(data):
    #Трансформируем необходимый столбец в бинарный
    y = (data['mental_wellness_index_0_100'] >= 15).astype(int)
    # Подготовка признаков
    # Выбираем все столбцы, кроме целевого и user id
    feature_columns = [col for col in data.columns if col not in ['mental_wellness_index_0_100', 'user_id']]
    X = data[feature_columns].copy()
    
    # Обработка категориальных признаков
    categorical_features = ['gender', 'occupation', 'work_mode']
    le_dict = {}
    for feature in categorical_features:
        if feature in X.columns:
            le = LabelEncoder()
            X.loc[:, feature] = le.fit_transform(X[feature].astype(str))
            le_dict[feature] = le

     # Разделение датасета на тренировочную и тестовую выборку
    X_train, X_test, y_train, y_test = train_test_split(
        X, y,
        test_size=0.25,
        shuffle=True,
        random_state=27
    )
    # Обучение модели SVM
    svm_model = SVC(
        C=10, 
        kernel='rbf', 
        gamma='scale', 
        random_state=27 
    )
    svm_model.fit(X_train, y_train)

    # Обучение модели логистической регрессии
    lr_model = LogisticRegression(
        C=1.0, 
        penalty='l2', 
        solver='liblinear', 
        random_state=27, 
        max_iter=1000 
    )
    lr_model.fit(X_train, y_train)

    # Предсказания и оценка
    # SVM
    y_pred_svm = svm_model.predict(X_test)
    svm_accuracy = accuracy_score(y_test, y_pred_svm)
    svm_f1 = f1_score(y_test, y_pred_svm)

    # LR
    y_pred_lr = lr_model.predict(X_test)
    lr_accuracy = accuracy_score(y_test, y_pred_lr)
    lr_f1 = f1_score(y_test, y_pred_lr)

    #Вывод результатов по шаблону
    print(f"SVM: {svm_accuracy:.4f}; {svm_f1:.4f}")
    print(f"LR: {lr_accuracy:.4f}; {lr_f1:.4f}")
    

In [3]:
#df = pd.read_csv('DB_3_cleaned.csv')
#df.head()
#classification_training(df)