Importing the Dependencies

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

Data Collection and Processing

In [None]:
from google.colab import files
uploaded = files.upload()
df = pd.read_csv('Credit_Card_Dataset.csv')
df.head(7)

In [None]:
# Print few rows of the data
print(df.head())

In [None]:
# Show entie DataFrame
df

In [None]:
# number of rows and columns
df.shape

In [None]:
# number of missing values in each column
df.isnull().sum()

In [None]:
# Fill missing values in some columns with the mode (most frequent value)
df['annual_income_joint'].fillna(df['annual_income_joint'].mode()[0], inplace = True)
df['verification_income_joint'].fillna(df['verification_income_joint'].mode()[0], inplace = True)
df['debt_to_income_joint'].fillna(df['debt_to_income_joint'].mode()[0], inplace = True)
df['months_since_last_delinq'].fillna(df['months_since_last_delinq'].mode()[0], inplace = True)
df['months_since_90d_late'].fillna(df['months_since_90d_late'].mode()[0], inplace = True)
df['months_since_last_credit_inquiry'].fillna(df['months_since_last_credit_inquiry'].mode()[0], inplace = True)
df['num_accounts_120d_past_due'].fillna(df['num_accounts_120d_past_due'].mode()[0], inplace = True)

# Fill missing values in emp_length and debt_to_income with the mean
df.emp_length = df.emp_length.fillna(df.emp_length.mean())
df.debt_to_income = df.debt_to_income.fillna(df.debt_to_income.mean())

# Drop columns that might not be relevant for prediction
df = df.drop(columns=['emp_title', 'state', 'sub_grade', 'loan_purpose'])

In [None]:
# number of missing values in each column
df.isnull().sum()

In [None]:
# convert categorical columns to numerical values
df.replace({'homeownership':{'MORTGAGE':0,'RENT':1, 'OWN':2},
            'verified_income':{'Verified':0,'Not Verified':1, 'Source Verified':2},
            'verification_income_joint':{'Verified':0,'Not Verified':1, 'Source Verified':2},
            'application_type':{'individual':0,'joint':1},
            'grade':{'A':0,'B':1, 'C':2, 'D':3, 'E':4, 'F':5, 'G':6},
            'issue_month':{'Mar-18':0,'Feb-18':1, 'Jan-18':2},
            'loan_status':{'Current':0, 'Fully Paid':1, 'In Grace Period':2, 'Late (31-120 days)':3, 'Charged Off':4, 'Late (16-30 days)':5},
            'initial_listing_status':{'whole':0, 'fractional':1},
            'disbursement_method':{'Cash':0, 'DirectPay':1}},inplace=True)

In [None]:
df

In [None]:
# Print an overview of the data
df.info()

In [None]:
# separating the data and label
X = df.drop(columns=['loan_status', 'paid_late_fees'], axis=1)
Y = df['loan_status']

In [None]:
print(X)
print(Y)

In [None]:
#Print a sample of the features and target variable
print(X.shape)
print(Y.shape)

In [None]:
# Saving data to Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df.to_csv('/content/drive/My Drive/Colab_Notebooks/Predicting_Credit_Card_Approval/Updated_Credit_Card_Data.csv', index=False)

Train Test Split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.1,stratify=Y,random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

Training the model:

Support Vector Machine Model

In [None]:
classifier = svm.SVC(kernel='linear', C=0.1)  # Example with lower C

In [None]:
def models(X_train, Y_train):

  # Check label type (replace with your actual classifier variable)
  if Y_train.dtype == 'float64':
    # Assuming two classes, convert to 0 and 1 based on a threshold
    threshold = 0.5  # Adjust threshold as needed
    Y_train = np.where(Y_train > threshold, 1, 0)
  # Fit the classifier with discrete labels
  classifier.fit(X_train, Y_train)


In [None]:
model = models(X_train, Y_train)

Model Evaluation

In [None]:
# Define a threshold to separate binary classes
threshold = 0.5

# Convert continuous values to binary based on the threshold
Y_train = np.where(Y_train > threshold, 1, 0)

In [None]:
# accuracy score on training data
X_train_prediction = classifier.predict(X_train)
training_data_accuray = accuracy_score(X_train_prediction,Y_train)

In [None]:
print('Accuracy on training data : ', training_data_accuray)

In [None]:
# Define a threshold to separate binary classes
threshold = 0.5
Y_test = np.where(Y_test > threshold, 1, 0)

In [None]:
# accuracy score on testing data
X_test_prediction = classifier.predict(X_test)
test_data_accuray = accuracy_score(X_test_prediction,Y_test)

In [None]:
print('Accuracy on test data : ', test_data_accuray)