In [6]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
data = pd.read_csv('/srv/german_credit_data.csv')  # Replace with your dataset path

# Data Preprocessing
# Define features and target variable
X = data.drop('Purpose', axis=1)  # Replace 'creditworthy' with your target column
y = data['Purpose']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing pipeline
numeric_features = ['Credit amount', 'Duration']  # Replace with your numeric features
categorical_features = ['Sex', 'Housing']  # Replace with your categorical features

# Create preprocessing pipelines
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create a pipeline that combines preprocessing and model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}%".format(accuracy * 100))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

Accuracy: 30.00%
                     precision    recall  f1-score   support

           business       0.21      0.17      0.19        18
                car       0.35      0.37      0.36        63
domestic appliances       0.00      0.00      0.00         1
          education       0.07      0.07      0.07        15
furniture/equipment       0.29      0.25      0.27        44
           radio/TV       0.35      0.41      0.38        54
            repairs       0.00      0.00      0.00         3
    vacation/others       0.00      0.00      0.00         2

           accuracy                           0.30       200
          macro avg       0.16      0.16      0.16       200
       weighted avg       0.29      0.30      0.29       200

[[ 3  4  0  3  3  5  0  0]
 [ 4 23  1  4 14 17  0  0]
 [ 0  1  0  0  0  0  0  0]
 [ 0  5  1  1  1  6  0  1]
 [ 3 18  0  1 11 11  0  0]
 [ 4 13  0  4  9 22  2  0]
 [ 0  1  0  0  0  2  0  0]
 [ 0  1  0  1  0  0  0  0]]
