# Data Dictionary
1. person_age	The age of the loan applicant.

2. person_income	The annual income of the loan applicant.

3. person_home_ownership	Indicates whether the applicant owns their home, rents, or has another living arrangement.

4. person_emp_length	The length of time the applicant has been employed.

5. loan_intent	The purpose for which the loan is being requested (e.g., debt consolidation, home improvement).

6. loan_grade	A letter grade assigned to the loan based on the applicant's credit worthiness.

7. loan_amnt	The amount of the loan requested.

8. loan_int_rate	The interest rate charged on the loan.

9. loan_status	The current status of the loan (e.g., fully paid, charged off, current).

10. loan_percent_income	The percentage of the applicant's income that the loan amount represents.

11. cb_person_default_on_file	Indicates whether the applicant has a history of defaulting on previous loans.

12. cb_person_cred_hist_length	The length of the applicant's credit history.



In [1]:
import pandas as pd


data = pd.read_csv('credit_risk_dataset.csv')

In [2]:
data.head(3)

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,22,59000,RENT,123.0,PERSONAL,D,35000,16.02,1,0.59,Y,3
1,21,9600,OWN,5.0,EDUCATION,B,1000,11.14,0,0.1,N,2
2,25,9600,MORTGAGE,1.0,MEDICAL,C,5500,12.87,1,0.57,N,3


In [3]:
data.isnull().sum()

person_age                       0
person_income                    0
person_home_ownership            0
person_emp_length              895
loan_intent                      0
loan_grade                       0
loan_amnt                        0
loan_int_rate                 3116
loan_status                      0
loan_percent_income              0
cb_person_default_on_file        0
cb_person_cred_hist_length       0
dtype: int64

In [4]:
data.columns

Index(['person_age', 'person_income', 'person_home_ownership',
       'person_emp_length', 'loan_intent', 'loan_grade', 'loan_amnt',
       'loan_int_rate', 'loan_status', 'loan_percent_income',
       'cb_person_default_on_file', 'cb_person_cred_hist_length'],
      dtype='object')

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32581 entries, 0 to 32580
Data columns (total 12 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   person_age                  32581 non-null  int64  
 1   person_income               32581 non-null  int64  
 2   person_home_ownership       32581 non-null  object 
 3   person_emp_length           31686 non-null  float64
 4   loan_intent                 32581 non-null  object 
 5   loan_grade                  32581 non-null  object 
 6   loan_amnt                   32581 non-null  int64  
 7   loan_int_rate               29465 non-null  float64
 8   loan_status                 32581 non-null  int64  
 9   loan_percent_income         32581 non-null  float64
 10  cb_person_default_on_file   32581 non-null  object 
 11  cb_person_cred_hist_length  32581 non-null  int64  
dtypes: float64(3), int64(5), object(4)
memory usage: 3.0+ MB


In [6]:
data['loan_status'].value_counts()

loan_status
0    25473
1     7108
Name: count, dtype: int64

In [7]:
data.isnull().sum()

person_age                       0
person_income                    0
person_home_ownership            0
person_emp_length              895
loan_intent                      0
loan_grade                       0
loan_amnt                        0
loan_int_rate                 3116
loan_status                      0
loan_percent_income              0
cb_person_default_on_file        0
cb_person_cred_hist_length       0
dtype: int64

In [8]:
data['loan_int_rate'] = data['loan_int_rate'].fillna(data['loan_int_rate'].mean())

In [9]:
data['person_emp_length'] = data['person_emp_length'].fillna(data['person_emp_length'].mean())

In [10]:
data['person_emp_length'] = data['person_emp_length'].astype(int)

In [11]:
data

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,22,59000,RENT,123,PERSONAL,D,35000,16.02,1,0.59,Y,3
1,21,9600,OWN,5,EDUCATION,B,1000,11.14,0,0.10,N,2
2,25,9600,MORTGAGE,1,MEDICAL,C,5500,12.87,1,0.57,N,3
3,23,65500,RENT,4,MEDICAL,C,35000,15.23,1,0.53,N,2
4,24,54400,RENT,8,MEDICAL,C,35000,14.27,1,0.55,Y,4
...,...,...,...,...,...,...,...,...,...,...,...,...
32576,57,53000,MORTGAGE,1,PERSONAL,C,5800,13.16,0,0.11,N,30
32577,54,120000,MORTGAGE,4,PERSONAL,A,17625,7.49,0,0.15,N,19
32578,65,76000,RENT,3,HOMEIMPROVEMENT,B,35000,10.99,1,0.46,N,28
32579,56,150000,MORTGAGE,5,PERSONAL,B,15000,11.48,0,0.10,N,26


In [12]:
data['loan_intent'].value_counts()

loan_intent
EDUCATION            6453
MEDICAL              6071
VENTURE              5719
PERSONAL             5521
DEBTCONSOLIDATION    5212
HOMEIMPROVEMENT      3605
Name: count, dtype: int64

In [13]:
data['loan_intent'] = data['loan_intent'].replace({
    'EDUCATION': 1,
    'MEDICAL': 2,
    'VENTURE': 3,
    'PERSONAL': 4,
    'DEBTCONSOLIDATION': 5,
    'HOMEIMPROVEMENT': 6
})

  data['loan_intent'] = data['loan_intent'].replace({


In [14]:
data['person_home_ownership'].value_counts()

person_home_ownership
RENT        16446
MORTGAGE    13444
OWN          2584
OTHER         107
Name: count, dtype: int64

In [15]:
data['person_home_ownership'] = data['person_home_ownership'].replace({
    'RENT': 1,
    'MORTGAGE': 2,
    'OWN': 3,
    'OTHER': 4
})

  data['person_home_ownership'] = data['person_home_ownership'].replace({


In [16]:
data

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,22,59000,1,123,4,D,35000,16.02,1,0.59,Y,3
1,21,9600,3,5,1,B,1000,11.14,0,0.10,N,2
2,25,9600,2,1,2,C,5500,12.87,1,0.57,N,3
3,23,65500,1,4,2,C,35000,15.23,1,0.53,N,2
4,24,54400,1,8,2,C,35000,14.27,1,0.55,Y,4
...,...,...,...,...,...,...,...,...,...,...,...,...
32576,57,53000,2,1,4,C,5800,13.16,0,0.11,N,30
32577,54,120000,2,4,4,A,17625,7.49,0,0.15,N,19
32578,65,76000,1,3,6,B,35000,10.99,1,0.46,N,28
32579,56,150000,2,5,4,B,15000,11.48,0,0.10,N,26


In [17]:
data['cb_person_default_on_file'] = data['cb_person_default_on_file'].replace({
    'Y': 1,
    'N': 0,
    
})

  data['cb_person_default_on_file'] = data['cb_person_default_on_file'].replace({


In [18]:
data.drop('loan_amnt', axis = 1, inplace = True)

In [19]:
data

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,22,59000,1,123,4,D,16.02,1,0.59,1,3
1,21,9600,3,5,1,B,11.14,0,0.10,0,2
2,25,9600,2,1,2,C,12.87,1,0.57,0,3
3,23,65500,1,4,2,C,15.23,1,0.53,0,2
4,24,54400,1,8,2,C,14.27,1,0.55,1,4
...,...,...,...,...,...,...,...,...,...,...,...
32576,57,53000,2,1,4,C,13.16,0,0.11,0,30
32577,54,120000,2,4,4,A,7.49,0,0.15,0,19
32578,65,76000,1,3,6,B,10.99,1,0.46,0,28
32579,56,150000,2,5,4,B,11.48,0,0.10,0,26


In [20]:
# from sklearn.preprocessing import StandardScaler

# Sc = StandardScaler()

In [21]:
# data['person_income'] = Sc.fit_transform(data[['person_income']])

In [22]:
data

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,22,59000,1,123,4,D,16.02,1,0.59,1,3
1,21,9600,3,5,1,B,11.14,0,0.10,0,2
2,25,9600,2,1,2,C,12.87,1,0.57,0,3
3,23,65500,1,4,2,C,15.23,1,0.53,0,2
4,24,54400,1,8,2,C,14.27,1,0.55,1,4
...,...,...,...,...,...,...,...,...,...,...,...
32576,57,53000,2,1,4,C,13.16,0,0.11,0,30
32577,54,120000,2,4,4,A,7.49,0,0.15,0,19
32578,65,76000,1,3,6,B,10.99,1,0.46,0,28
32579,56,150000,2,5,4,B,11.48,0,0.10,0,26


In [23]:
data['loan_grade'].value_counts()

loan_grade
A    10777
B    10451
C     6458
D     3626
E      964
F      241
G       64
Name: count, dtype: int64

In [24]:
data['loan_grade'] = data['loan_grade'].replace({'A':1,  'B':2, 'C':3, 'D':4, 'E':5, 'F': 6, 'G':7})

  data['loan_grade'] = data['loan_grade'].replace({'A':1,  'B':2, 'C':3, 'D':4, 'E':5, 'F': 6, 'G':7})


In [25]:
data

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,22,59000,1,123,4,4,16.02,1,0.59,1,3
1,21,9600,3,5,1,2,11.14,0,0.10,0,2
2,25,9600,2,1,2,3,12.87,1,0.57,0,3
3,23,65500,1,4,2,3,15.23,1,0.53,0,2
4,24,54400,1,8,2,3,14.27,1,0.55,1,4
...,...,...,...,...,...,...,...,...,...,...,...
32576,57,53000,2,1,4,3,13.16,0,0.11,0,30
32577,54,120000,2,4,4,1,7.49,0,0.15,0,19
32578,65,76000,1,3,6,2,10.99,1,0.46,0,28
32579,56,150000,2,5,4,2,11.48,0,0.10,0,26


In [28]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [29]:
# Split the data into training and testing sets
X = data.drop('loan_status', axis=1)
y = data['loan_status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [31]:
lr = LogisticRegression()

lr.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [32]:
y_pred = lr.predict(X_test)

In [33]:
# Evaluate the model using accuracy score, classification report, and confusion matrix
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.8112628510050637
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.96      0.89      5072
           1       0.69      0.27      0.39      1445

    accuracy                           0.81      6517
   macro avg       0.76      0.62      0.64      6517
weighted avg       0.79      0.81      0.78      6517

Confusion Matrix:
[[4894  178]
 [1052  393]]


In [34]:
import joblib

joblib.dump(lr, 'Credit_risk.model')

['Credit_risk.model']