In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [26]:
# Load the dataset
file_path = 'C:/Users/Samsung/Downloads/cleaned_german_credit_data_updated.csv'
df = pd.read_csv(file_path)

# Assume the last column is the target variable and the rest are features
X = df.iloc[:, :-1]  # Features
y = df.iloc[:, -1]   # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [27]:
param_grid = {'C': np.logspace(-4, 4, 100)}
grid_search = GridSearchCV(LogisticRegression(penalty='l2', solver='liblinear'), param_grid, cv=5)
grid_search.fit(X_train, y_train)
print("Best parameters:", grid_search.best_params_)

Best parameters: {'C': 0.14174741629268062}


In [28]:
# Create a Logistic Regression model with Ridge regularization (L2)
ridge_model = LogisticRegression(penalty='l2', C=grid_search.best_params_['C'], solver='liblinear')  # 'liblinear' is good for small datasets

# Fit the model to the training data
ridge_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = ridge_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Display confusion matrix and classification report
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.57
Confusion Matrix:
[[24 26]
 [19 36]]

Classification Report:
              precision    recall  f1-score   support

           0       0.56      0.48      0.52        50
           1       0.58      0.65      0.62        55

    accuracy                           0.57       105
   macro avg       0.57      0.57      0.57       105
weighted avg       0.57      0.57      0.57       105

