In [1]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import roc_auc_score, precision_score, recall_score, roc_curve, accuracy_score

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('german.csv', sep=';')
print(data.head())

X = data.iloc[:, 1:].to_numpy()
y = data.iloc[:, 0].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
plt.hist(y_train, bins=2, edgecolor='k')
plt.xticks([0, 1])
plt.xlabel('Class (0: Non-Creditworthy, 1: Creditworthy)')
plt.ylabel('Count')
plt.title('Distribution of Classes in Training Data')
plt.show()

In [4]:
rf_model = RandomForestClassifier(n_estimators=200, max_depth=8, min_samples_split=5, min_samples_leaf=2, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
rf_prob = rf_model.predict_proba(X_test)[:, 1]
rf_roc_auc = roc_auc_score(y_test, rf_prob)

print("\nRandom Forest метрики:")
print(f"ROC AUC: {rf_roc_auc:.2f}")
print(f"Accuracy: {accuracy_score(y_test, rf_pred):.2f}")
print(f"Precision: {precision_score(y_test, rf_pred):.2f}")
print(f"Recall: {recall_score(y_test, rf_pred):.2f}")

gb_model = GradientBoostingClassifier(n_estimators=200, learning_rate=0.05, max_depth=4, min_samples_split=5, random_state=42)
gb_model.fit(X_train, y_train)
gb_pred = gb_model.predict(X_test)
gb_prob = gb_model.predict_proba(X_test)[:, 1]
gb_roc_auc = roc_auc_score(y_test, gb_prob)

print("\nGradient Boosting метрики:")
print(f"ROC AUC: {gb_roc_auc:.2f}")
print(f"Accuracy: {accuracy_score(y_test, gb_pred):.2f}")
print(f"Precision: {precision_score(y_test, gb_pred):.2f}")
print(f"Recall: {recall_score(y_test, gb_pred):.2f}")

In [None]:
# Оптимизированный MLP (Neural Network)
mlp_model = MLPClassifier(hidden_layer_sizes=(64, 32), alpha=0.0001, max_iter=1000, random_state=42)
mlp_model.fit(X_train, y_train)
mlp_pred = mlp_model.predict(X_test)
mlp_prob = mlp_model.predict_proba(X_test)[:, 1]
mlp_roc_auc = roc_auc_score(y_test, mlp_prob)

print("\nMLP (Neural Network) метрики:")
print(f"ROC AUC: {mlp_roc_auc:.2f}")
print(f"Accuracy: {accuracy_score(y_test, mlp_pred):.2f}")
print(f"Precision: {precision_score(y_test, mlp_pred):.2f}")
print(f"Recall: {recall_score(y_test, mlp_pred):.2f}")