In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

# Данные
titanic = pd.read_csv('train.csv')

# Предобработка данных
def preprocess_data(df):
    df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)
    df['Age'] = df['Age'].fillna(df['Age'].median())
    df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])
    df = pd.get_dummies(df, columns=['Sex', 'Embarked'], drop_first=True)
    return df

titanic = preprocess_data(titanic)

# Разделение на признаки и целевую переменную
X = titanic.drop('Survived', axis=1)
y = titanic['Survived']

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
# Обучение базовой модели
model = LogisticRegression(random_state=42, max_iter=1000)
model.fit(X_train, y_train)

# Предсказание и оценка модели
y_pred = model.predict(X_test)
print("Без перевзвешивания:")
print(classification_report(y_test, y_pred))


Без перевзвешивания:
              precision    recall  f1-score   support

           0       0.82      0.85      0.84       105
           1       0.77      0.74      0.76        74

    accuracy                           0.80       179
   macro avg       0.80      0.80      0.80       179
weighted avg       0.80      0.80      0.80       179



In [10]:
# Применение SMOTE
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Обучение модели на данных после SMOTE
model_smote = LogisticRegression(random_state=42, max_iter=1000)
model_smote.fit(X_train_smote, y_train_smote)

# Предсказание и оценка модели
y_pred_smote = model_smote.predict(X_test)
print("С oversampling (SMOTE):")
print(classification_report(y_test, y_pred_smote))


С oversampling (SMOTE):
              precision    recall  f1-score   support

           0       0.87      0.79      0.83       105
           1       0.74      0.84      0.78        74

    accuracy                           0.81       179
   macro avg       0.81      0.81      0.81       179
weighted avg       0.82      0.81      0.81       179



In [11]:
# Применение undersampling
undersample = RandomUnderSampler(random_state=42)
X_train_under, y_train_under = undersample.fit_resample(X_train, y_train)

# Обучение модели на данных после undersampling
model_under = LogisticRegression(random_state=42, max_iter=1000)
model_under.fit(X_train_under, y_train_under)

# Предсказание и оценка модели
y_pred_under = model_under.predict(X_test)
print("С undersampling:")
print(classification_report(y_test, y_pred_under))


С undersampling:
              precision    recall  f1-score   support

           0       0.88      0.78      0.83       105
           1       0.73      0.85      0.79        74

    accuracy                           0.81       179
   macro avg       0.81      0.82      0.81       179
weighted avg       0.82      0.81      0.81       179

