In [2]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split


data = pd.read_csv('Titanic.csv')
data_cleaned = data.dropna()

non_numeric_cols = ['Name', 'Ticket', 'Cabin']
data_cleaned = data_cleaned.drop(columns=non_numeric_cols)

data_cleaned['Sex'] = data_cleaned['Sex'].map({'male': 0, 'female': 1})
data_cleaned['Embarked'] = data_cleaned['Embarked'].map({'C': 1, 'Q': 2, 'S': 3})

data_cleaned = data_cleaned.drop(columns=['PassengerId'])

initial_rows = len(data)
cleaned_rows = len(data_cleaned)
lost_percentage = ((initial_rows - cleaned_rows) / initial_rows) * 100
print(f"Процент потерянных данных: {lost_percentage:.2f}%")

# Машинное обучение
X = data_cleaned.drop(columns=['Survived'])
y = data_cleaned['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Точность модели: {accuracy:.4f}")
X_no_embarked = X.drop(columns=['Embarked'])
X_train_ne, X_test_ne, y_train_ne, y_test_ne = train_test_split(X_no_embarked, y, test_size=0.3, random_state=42)
model_ne = LogisticRegression(max_iter=1000)
model_ne.fit(X_train_ne, y_train_ne)
y_pred_ne = model_ne.predict(X_test_ne)
accuracy_ne = accuracy_score(y_test_ne, y_pred_ne)
print(f"Точность модели без признака Embarked: {accuracy_ne:.4f}")
print(f"Изменение точности: {accuracy - accuracy_ne:.4f}")

coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_[0]
})
print("\nКоэффициенты модели:")
print(coefficients.sort_values(by='Coefficient', ascending=False))


Процент потерянных данных: 79.46%
Точность модели: 0.6909
Точность модели без признака Embarked: 0.6909
Изменение точности: 0.0000

Коэффициенты модели:
    Feature  Coefficient
1       Sex     2.457463
5      Fare     0.002858
2       Age    -0.025012
6  Embarked    -0.194425
3     SibSp    -0.222469
4     Parch    -0.397235
0    Pclass    -0.590362
