In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import StackingClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from gmdh import Combi, Mia
from sklearn.metrics import accuracy_score

In [16]:
df = pd.read_csv("train.csv")
df.drop(columns=["PassengerId", "Name", "Ticket", "Cabin"], inplace=True)
df["Age"].fillna(df["Age"].median(), inplace=True)
df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True)
label_encoders = {}
for col in ["Sex", "Embarked"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Age"].fillna(df["Age"].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True)


In [17]:
X = df.drop(columns=["Survived"])
y = df["Survived"]

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [19]:
base_learners = [
    ("rf", RandomForestClassifier(n_estimators=100, random_state=42)),
    ("gb", GradientBoostingClassifier(n_estimators=100, random_state=42))
]
stacking_model = StackingClassifier(estimators=base_learners, final_estimator=LogisticRegression())
stacking_model.fit(X_train, y_train)
y_pred_stacking = stacking_model.predict(X_test)
stacking_accuracy = accuracy_score(y_test, y_pred_stacking)
print(f"Stacking Accuracy: {stacking_accuracy:.4f}")

Stacking Accuracy: 0.8101


In [20]:
mlp_model = MLPClassifier(hidden_layer_sizes=(50, 30), max_iter=500, random_state=42)
mlp_model.fit(X_train, y_train)
y_pred_mlp = mlp_model.predict(X_test)
mlp_accuracy = accuracy_score(y_test, y_pred_mlp)
print(f"MLP Accuracy: {mlp_accuracy:.4f}")

MLP Accuracy: 0.7877


In [None]:
X_train_np = X_train.to_numpy(dtype=float)  # Преобразуем в numpy массив
X_test_np = X_test.to_numpy(dtype=float)
y_train_np = y_train.to_numpy(dtype=float).ravel()  # Делаем одномерным
y_test_np = y_test.to_numpy(dtype=float).ravel()

<class 'numpy.ndarray'> (712, 7)
<class 'numpy.ndarray'> (712,)


In [33]:
gmdh_linear = Combi()
gmdh_linear.fit(X_train_np, y_train_np)
y_pred_gmdh_linear = gmdh_linear.predict(X_test).round().astype(int)
gmdh_linear_accuracy = accuracy_score(y_test_np, y_pred_gmdh_linear)
print(f"GMDH Linear (COMBI) Accuracy: {gmdh_linear_accuracy:.4f}")

GMDH Linear (COMBI) Accuracy: 0.7765


In [34]:
gmdh_nonlinear = Mia()
gmdh_nonlinear.fit(X_train_np, y_train_np)
y_pred_gmdh_nonlinear = gmdh_nonlinear.predict(X_test).round().astype(int)
gmdh_nonlinear_accuracy = accuracy_score(y_test_np, y_pred_gmdh_nonlinear)
print(f"GMDH Nonlinear (MIA) Accuracy: {gmdh_nonlinear_accuracy:.4f}")

GMDH Nonlinear (MIA) Accuracy: 0.7821
