In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
data = pd.read_csv('cleaned_german_credit_data_updated.csv')

# Preprocessing: Encoding categorical variables
label_encoders = {}
for column in ['Sex', 'Job', 'Saving accounts', 'Checking account', 
               'Housing_own', 'Housing_rent', 'Purpose_car', 
               'Purpose_domestic appliances', 'Purpose_education', 
               'Purpose_furniture/equipment', 'Purpose_radio/TV', 
               'Purpose_repairs', 'Purpose_vacation/others']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Feature selection
X = data.drop('Risk', axis=1)
y = data['Risk']

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Choosing and training the SVM model
model = SVC(kernel='linear')  # You can also try 'rbf', 'poly', etc.
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluating the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.554140127388535
              precision    recall  f1-score   support

           0       0.51      0.37      0.43        71
           1       0.58      0.71      0.64        86

    accuracy                           0.55       157
   macro avg       0.54      0.54      0.53       157
weighted avg       0.55      0.55      0.54       157



In [2]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']  # Use 'scale' as default
}

# Create a GridSearchCV object
grid_search = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy')

# Fit the model using GridSearchCV
grid_search.fit(X_train, y_train)

# Get the best parameters
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Score:", grid_search.best_score_)

# Evaluate the best model on the test set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("Test Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Best Parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
Best Cross-Validation Score: 0.6684931506849315
Test Accuracy: 0.554140127388535
              precision    recall  f1-score   support

           0       0.51      0.35      0.42        71
           1       0.57      0.72      0.64        86

    accuracy                           0.55       157
   macro avg       0.54      0.54      0.53       157
weighted avg       0.55      0.55      0.54       157

