In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow import keras
from tensorflow.keras import layers

# Load the dataset
data = pd.read_csv('cleaned_german_credit_data_updated.csv')

# Preprocessing: Encoding categorical variables
label_encoders = {}
for column in ['Sex', 'Job', 'Saving accounts', 'Checking account', 
               'Housing_own', 'Housing_rent', 'Purpose_car', 
               'Purpose_domestic appliances', 'Purpose_education', 
               'Purpose_furniture/equipment', 'Purpose_radio/TV', 
               'Purpose_repairs', 'Purpose_vacation/others']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Feature selection
X = data.drop('Risk', axis=1)
y = data['Risk']

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape for transformer input
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Define the transformer model
def create_transformer_model(input_shape):
    inputs = layers.Input(shape=input_shape)
    
    # Transformer block
    x = layers.MultiHeadAttention(num_heads=4, key_dim=2)(inputs, inputs)
    x = layers.LayerNormalization(epsilon=1e-6)(x + inputs)  # Residual connection
    x = layers.Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.GlobalAveragePooling1D()(x)
    
    # Fully connected layers
    x = layers.Dense(32, activation='relu')(x)
    x = layers.Dropout(0.1)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)  # For binary classification

    model = keras.Model(inputs, outputs)
    return model

# Create the model
model = create_transformer_model((X_train.shape[1], 1))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, validation_split=0.2, epochs=20, batch_size=32)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.4f}')

ModuleNotFoundError: No module named 'distutils'

In [7]:
! pip install setuptools

Defaulting to user installation because normal site-packages is not writeable


In [6]:
! C:\Users\Samsung\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip

Defaulting to user installation because normal site-packages is not writeable
Collecting pip
  Downloading pip-24.3.1-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-24.3.1-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 1.8/1.8 MB 14.3 MB/s eta 0:00:00
Installing collected packages: pip
Successfully installed pip-24.3.1
