In [5]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Загрузка данных
data = pd.read_csv('german_credit_data.csv')

# Указание нужных признаков
features = [
    'Age', 'Sex', 'Job', 'Housing', 'Saving accounts', 'Checking account',
    'Credit amount', 'Duration', 'Purpose'
]

data['Default'] = (data['Credit amount'] / data['Duration'] > 200).astype(int)  # Пример генерации целевого признака

target = 'Default'

# Предобработка данных
# Кодирование категориальных признаков
categorical_features = ['Sex', 'Housing', 'Saving accounts', 'Checking account', 'Purpose']
label_encoders = {}

for feature in categorical_features:
    le = LabelEncoder()
    data[feature] = le.fit_transform(data[feature].astype(str))
    label_encoders[feature] = le

# Разделение на признаки и целевой признак
X = data[features]
y = data[target]

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Обучение модели RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Оценка модели
predictions = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, predictions))
print("Classification Report:\n", classification_report(y_test, predictions))

# Сохранение модели и метаданных
joblib.dump(model, 'credit_default_model.pkl')
joblib.dump({
    'columns': X.columns.tolist(),
    'label_encoders': label_encoders
}, 'model_metadata.pkl')


Accuracy: 0.95
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.98      0.97       153
           1       0.93      0.85      0.89        47

    accuracy                           0.95       200
   macro avg       0.94      0.92      0.93       200
weighted avg       0.95      0.95      0.95       200



['model_metadata.pkl']