# Predicting Credit Card Approval

## 1. Importing necessary libraries 

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV

In [2]:
import warnings
warnings.filterwarnings("ignore")

## 2. Loading the dataset

In [3]:
df = pd.read_csv("crx.data", header=None, na_values="?") #replaces the "?" value with NaN values

print(df.head())

  0      1      2  3  4  5  6     7  8  9   10 11 12     13   14 15
0  b  30.83  0.000  u  g  w  v  1.25  t  t   1  f  g  202.0    0  +
1  a  58.67  4.460  u  g  q  h  3.04  t  t   6  f  g   43.0  560  +
2  a  24.50  0.500  u  g  q  h  1.50  t  f   0  f  g  280.0  824  +
3  b  27.83  1.540  u  g  w  v  3.75  t  t   5  t  g  100.0    3  +
4  b  20.17  5.625  u  g  w  v  1.71  t  f   0  f  s  120.0    0  +


In [4]:
print(df.isnull().sum())

0     12
1     12
2      0
3      6
4      6
5      9
6      9
7      0
8      0
9      0
10     0
11     0
12     0
13    13
14     0
15     0
dtype: int64


In [5]:
df_imputed = df.copy()

## 3. Handling missing values

In [6]:
for col in df_imputed.columns:
    if df_imputed[col].dtypes == "object":
        df_imputed[col] = df_imputed[col].fillna(
            df_imputed[col].value_counts().index[0]
        )
    else:
        df_imputed[col] = df_imputed[col].fillna(df_imputed[col].mean())

## 4. Encoding Categorical Variables

In [7]:
df_encoded = pd.get_dummies(df_imputed, drop_first=True)

## 5. Defining features and target

In [8]:
X = df_encoded.iloc[:, :-1].values
y = df_encoded.iloc[:, [-1]].values

## 6. Train-Test Split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42, stratify=y
)

## 7. Scaling the features

In [10]:
scaler = StandardScaler()
rescaledX_train = scaler.fit_transform(X_train)
rescaledX_test = scaler.transform(X_test)

## 8. Training the model

In [11]:
logreg = LogisticRegression()
logreg.fit(rescaledX_train, y_train)

In [12]:
# Train accuracy
y_train_pred = logreg.predict(rescaledX_train)
print("Training Confusion Matrix:")
print(confusion_matrix(y_train, y_train_pred))

# Test accuracy
y_test_pred = logreg.predict(rescaledX_test)
print("Test Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))


Training Confusion Matrix:
[[189  17]
 [ 28 228]]
Test Confusion Matrix:
[[ 84  17]
 [ 17 110]]


## 7. Hyperparameter Tuning (GridSearchCV)

In [13]:
tol = [0.01, 0.001, 0.0001]
max_iter = [100, 150, 200]
param_grid = dict(tol=tol, max_iter=max_iter)

In [14]:
grid_model = GridSearchCV(estimator=logreg, param_grid=param_grid, cv=5)
grid_model_result = grid_model.fit(rescaledX_train, y_train)

## 8. The best model

In [15]:
best_model = grid_model.best_estimator_
best_model

In [16]:
y_pred = best_model.predict(rescaledX_test)

In [17]:
# Accuracy
print("Test Accuracy:", accuracy_score(y_test, y_pred))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Detailed metrics
print(classification_report(y_test, y_pred))

Test Accuracy: 0.8421052631578947
Confusion Matrix:
 [[ 84  17]
 [ 19 108]]
              precision    recall  f1-score   support

       False       0.82      0.83      0.82       101
        True       0.86      0.85      0.86       127

    accuracy                           0.84       228
   macro avg       0.84      0.84      0.84       228
weighted avg       0.84      0.84      0.84       228

