# Credit Approval

# Database Description

*   CreditScore: Credit score of the individual (300 to 850)
*   AnnualIncome: Annual income of the individual (in dollars)
*   LoanAmount: Requested loan amount (in dollars)
*   LoanDuration: Duration of the loan (in years)
*   Age: Age of the individual (in years)
*   EmploymentStatus: Employment status of the individual (Employed, Unemployed, Self-Employed)
*   MaritalStatus: Marital status of the individual (Single, Married, Divorced, Widowed)
*   NumberOfDependents: Number of dependents
*   EducationLevel: Education level of the individual (High School, Associate, Bachelor, Master, Doctorate)
*   HomeOwnershipStatus: Home ownership status (Own, Rent, Mortgage, Other)
*   MonthlyDebtPayments: Monthly debt payments (in dollars)
*   CreditCardUtilizationRate: Credit card utilization rate (0 to 1)
*   NumberOfOpenCreditLines: Number of open credit lines
*   NumberOfCreditInquiries: Number of credit inquiries in the last 6 months
DebtToIncomeRatio: Debt-to-income ratio (0 to 1)
*   BankruptcyHistory: Whether the individual has a bankruptcy history (0: No, 1: Yes)
*   LoanPurpose: Purpose of the loan (Home, Auto, Education, Debt Consolidation, Other)
*   PreviousLoanDefaults: Whether the individual has defaulted on a previous loan (0: No, 1: Yes)
*   InterestRate: Interest rate for the loan (0.01 to 0.3)
*   PaymentHistory: Years of payment history
*   SavingsAccountBalance: Savings account balance (in dollars)
*   CheckingAccountBalance: Checking account balance (in dollars)
*   InvestmentAccountBalance: Investment account balance (in dollars)
*   RetirementAccountBalance: Retirement account balance (in dollars)
*   EmergencyFundBalance: Emergency fund balance (in dollars)
*   TotalAssets: Total assets (in dollars)
*   TotalLiabilities: Total liabilities (in dollars)
*   NetWorth: Net worth (in dollars)
*   LengthOfCreditHistory: Length of credit history (in years)
*   MortgageBalance: Mortgage balance (in dollars)
*   RentPayments: Monthly rent payments (in dollars)
*   AutoLoanBalance: Auto loan balance (in dollars)
*   PersonalLoanBalance: Personal loan balance (in dollars)
*   StudentLoanBalance: Student loan balance (in dollars)
*   UtilityBillsPaymentHistory: Payment history for utility bills (0 to 1)
*   HealthInsuranceStatus: Health insurance status (Insured, Uninsured)
*   LifeInsuranceStatus: Life insurance status (Insured, Uninsured)
*   CarInsuranceStatus: Car insurance status (Insured, Uninsured)
*   HomeInsuranceStatus: Home insurance status (Insured, Uninsured)
*   OtherInsurancePolicies: Number of other insurance policies
*   EmployerType: Type of employer (Private, Public, Self-Employed, Other)
*   JobTenure: Job tenure (in years)
*   MonthlySavings: Monthly savings (in dollars)
*   AnnualBonuses: Annual bonuses (in dollars)
*   AnnualExpenses: Annual expenses (in dollars)
*   MonthlyHousingCosts: Monthly housing costs (in dollars)
*   MonthlyTransportationCosts: Monthly transportation costs (in dollars)
*   MonthlyFoodCosts: Monthly food costs (in dollars)
*   MonthlyHealthcareCosts: Monthly healthcare costs (in dollars)
*   MonthlyEntertainmentCosts: Monthly entertainment costs (in dollars)
*   LoanApproved: Whether the loan was approved (0: No, 1: Yes)

## Instal Ydata and Altair

In [3]:
!pip install ydata-profiling
!pip install --upgrade altair

Collecting altair
  Downloading altair-5.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting narwhals>=1.5.2 (from altair)
  Downloading narwhals-1.8.2-py3-none-any.whl.metadata (6.8 kB)
Downloading altair-5.4.1-py3-none-any.whl (658 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m658.1/658.1 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading narwhals-1.8.2-py3-none-any.whl (167 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m167.7/167.7 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: narwhals, altair
  Attempting uninstall: altair
    Found existing installation: altair 4.2.2
    Uninstalling altair-4.2.2:
      Successfully uninstalled altair-4.2.2
Successfully installed altair-5.4.1 narwhals-1.8.2


# Import Libraries

In [4]:
import ydata_profiling as pp
import pandas as pd
import altair as alt

In [5]:
from sklearn.model_selection import train_test_split

# Read the original database

In [7]:
financial_risk = pd.read_csv('financial_risk_analysis_large.csv')
financial_risk

Unnamed: 0,CreditScore,AnnualIncome,LoanAmount,LoanDuration,Age,EmploymentStatus,MaritalStatus,NumberOfDependents,EducationLevel,HomeOwnershipStatus,...,JobTenure,MonthlySavings,AnnualBonuses,AnnualExpenses,MonthlyHousingCosts,MonthlyTransportationCosts,MonthlyFoodCosts,MonthlyHealthcareCosts,MonthlyEntertainmentCosts,LoanApproved
0,402,63295,18830,13,29,Self-Employed,Widowed,2,Doctorate,Other,...,24,378,3741,40058,977,412,399,136,124,0
1,735,55936,23729,1,42,Self-Employed,Divorced,3,Master,Own,...,10,575,4115,16745,695,206,898,252,131,0
2,570,62547,19660,7,54,Self-Employed,Single,3,Doctorate,Mortgage,...,16,691,4105,23273,627,266,392,73,36,0
3,406,46129,21674,23,25,Self-Employed,Divorced,3,High School,Other,...,6,452,4559,42163,397,307,250,378,-32,0
4,371,57725,12189,26,42,Employed,Widowed,4,Master,Own,...,2,690,7856,30087,723,315,114,88,68,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,414,83679,19053,10,54,Unemployed,Married,2,Bachelor,Other,...,33,523,5740,31540,890,351,599,148,138,0
999996,333,79096,22567,4,21,Self-Employed,Married,2,Bachelor,Rent,...,35,525,5669,26843,2082,80,427,366,100,0
999997,668,55138,13939,28,52,Employed,Divorced,0,Bachelor,Rent,...,20,386,3475,27936,393,-85,534,8,98,0
999998,627,62867,19115,8,23,Employed,Married,4,Associate,Mortgage,...,18,287,6476,17752,1226,311,227,119,167,0


## View column datatype information

In [8]:
financial_risk.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 51 columns):
 #   Column                      Non-Null Count    Dtype  
---  ------                      --------------    -----  
 0   CreditScore                 1000000 non-null  int64  
 1   AnnualIncome                1000000 non-null  int64  
 2   LoanAmount                  1000000 non-null  int64  
 3   LoanDuration                1000000 non-null  int64  
 4   Age                         1000000 non-null  int64  
 5   EmploymentStatus            1000000 non-null  object 
 6   MaritalStatus               1000000 non-null  object 
 7   NumberOfDependents          1000000 non-null  int64  
 8   EducationLevel              1000000 non-null  object 
 9   HomeOwnershipStatus         1000000 non-null  object 
 10  MonthlyDebtPayments         1000000 non-null  int64  
 11  CreditCardUtilizationRate   1000000 non-null  float64
 12  NumberOfOpenCreditLines     1000000 non-null  int64  
 13

## Convert datatype columns

In [9]:
financial_risk["NumberOfDependents",] = pd.to_numeric(financial_risk['NumberOfDependents'])
financial_risk["PreviousLoanDefaults",] = pd.to_numeric(financial_risk['PreviousLoanDefaults'])
financial_risk["BankruptcyHistory"] = pd.to_numeric(financial_risk['BankruptcyHistory'])

# EDA

In [10]:
from ydata_profiling import ProfileReport

In [11]:
profile = ProfileReport(financial_risk, title="Report")

In [12]:
profile.to_file("Report.html")

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

## Insights

Data analysis

*    Annual Income: Exclude data in which income is less than 0.
*   Loan amount : Exclude data in which the loan is less than 0
*   Monthly debt payments: If you have negative data, they are considered as arrears.
*   Interest rate : This column does not influence the approval of new loans **EXCLUDE**
*   Savings account balance: Exclude data where the balance is less than 0.
*   Retirement account balance : Exclude data in which the balance is less than 0
*   Emergency fund balance : Exclude data in which the balance is less than 0
*   Credit card utilization rate: Exclude column
*   Total assets : Exclude data less than 0
* Mortgage balance : Exclude data that is less than 0
* Rent payments: Exclude data that is less than 0
* Car loan balance: Exclude data that is less than 0
* Car loan balance : Exclude data that is less than 0
* Personal loan balance: Exclude data that is less than 0
* Student loan balance : Exclude data that is less than 0
* Monthly savings : Exclude data that is less than 0
* Annual bonuses : Exclude data that is less than 0
* Annual expenses : Exclude data that is less than 0
* Monthly housing expenses : Exclude data that is less than 0
* Monthly transportation expenses : Exclude data that is less than 0
* Monthly housing expenses : Exclude data that is less than 0
* Monthly transportation expenses : Exclude data that are less than 0.
* Monthly food expenses : Exclude data less than 0
* Monthly health expenses : Exclude data less than 0
* Monthly health care expenses : Exclude data that are less than 0
* Monthly leisure expenses: Exclude data that is less than 0.
* Approved loan: This is the target column to be predicted.

In [17]:
column_cleaning = ['AnnualIncome','LoanAmount','SavingsAccountBalance', 'RetirementAccountBalance','EmergencyFundBalance', 'TotalAssets',
                     'TotalLiabilities', 'MortgageBalance', 'RentPayments', 'AutoLoanBalance', 'PersonalLoanBalance', 'StudentLoanBalance',
                     'MonthlySavings', 'AnnualBonuses', 'AnnualExpenses', 'MonthlyHousingCosts','MonthlyTransportationCosts','MonthlyFoodCosts',
                     'MonthlyHealthcareCosts', 'MonthlyEntertainmentCosts']

In [31]:
condition = financial_risk[column_cleaning].ge(0).all(axis=1)

In [32]:
financial_risk_filtered = financial_risk[condition]

In [33]:
financial_risk_filtered

Unnamed: 0,CreditScore,AnnualIncome,LoanAmount,LoanDuration,Age,EmploymentStatus,MaritalStatus,NumberOfDependents,EducationLevel,HomeOwnershipStatus,...,AnnualBonuses,AnnualExpenses,MonthlyHousingCosts,MonthlyTransportationCosts,MonthlyFoodCosts,MonthlyHealthcareCosts,MonthlyEntertainmentCosts,LoanApproved,"('NumberOfDependents',)","('PreviousLoanDefaults',)"
0,402,63295,18830,13,29,Self-Employed,Widowed,2,Doctorate,Other,...,3741,40058,977,412,399,136,124,0,2,0
1,735,55936,23729,1,42,Self-Employed,Divorced,3,Master,Own,...,4115,16745,695,206,898,252,131,0,3,0
2,570,62547,19660,7,54,Self-Employed,Single,3,Doctorate,Mortgage,...,4105,23273,627,266,392,73,36,0,3,0
4,371,57725,12189,26,42,Employed,Widowed,4,Master,Own,...,7856,30087,723,315,114,88,68,0,4,0
5,320,67527,18618,20,30,Self-Employed,Widowed,0,Doctorate,Other,...,7574,21769,853,253,303,213,33,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999991,592,36452,24944,22,19,Self-Employed,Widowed,3,Master,Own,...,6876,24875,715,345,416,184,158,1,3,0
999992,613,65353,2035,3,54,Employed,Divorced,1,High School,Mortgage,...,6091,28956,1585,72,338,266,95,1,1,0
999995,414,83679,19053,10,54,Unemployed,Married,2,Bachelor,Other,...,5740,31540,890,351,599,148,138,0,2,0
999996,333,79096,22567,4,21,Self-Employed,Married,2,Bachelor,Rent,...,5669,26843,2082,80,427,366,100,0,2,0


In [34]:
financial_risk_filtered = financial_risk_filtered.drop(['InterestRate','CreditCardUtilizationRate'], axis=1)

# Effective visualization

In [39]:
data_graphic = financial_risk_filtered.sample(n=1000)

In [40]:
alt.Chart(data_graphic).mark_point().encode(
    y='CreditScore:Q',
    x='AnnualIncome:Q'
)

People earning less than 40,000 do not usually have a credit score, suggesting that they are not actively involved in the banking system. In contrast, those earning between 40,000 and 80,000 generally have a credit score. Interestingly, those with incomes of 50,000 to 60,000 tend to have a better score than those with higher incomes. This may be attributed to the fact that people with higher incomes tend to take on a higher level of debt, which in turn leads to a higher risk of default.

In [41]:
alt.Chart(data_graphic).mark_bar().encode(
   x= 'JobTenure',
   y= 'AnnualIncome',
   color = 'EmploymentStatus'
)

In this graph, we observe that there are unemployed people who, despite their situation, have a duration of time in their current job, which generates an inconsistency in the data. To correct this problem in the dataframe, the value of the JobTenure column should be replaced by 0 in those cases where the EmploymentStatus column indicates “Unemployed”. This will allow the data to more accurately reflect the reality of the employment status of these individuals.

In [42]:
financial_risk_update = financial_risk_filtered.copy()

In [43]:
financial_risk_update.loc[financial_risk_update['EmploymentStatus'] == 'Unemployed', 'JobTenure'] = 0

In [54]:
data_graphic_update = financial_risk_update.sample(n=1000)

In [56]:
alt.Chart(data_graphic_update).mark_bar().encode(
   x= 'JobTenure',
   y= 'AnnualIncome',
   color = 'EmploymentStatus'
)

In this graph, we observe that unemployed people effectively have no duration in their current job. However, there is a notable concentration in duration 0, which is due to the fact that these data are summing annual earnings.

In [71]:
alt.Chart(data_graphic_update).mark_point().encode(
    alt.X('AnnualExpenses').axis(tickMinStep=50),
    alt.Y('AnnualIncome').title('Income'),
    color='BankruptcyHistory:O',
    shape='BankruptcyHistory:O'
)

In [76]:

alt.Chart(data_graphic_update).mark_bar().encode(
    alt.X('count(BankruptcyHistory):Q'),
    alt.Y('EducationLevel:N').sort('x'),
    color=alt.Color('EducationLevel').sort(field='BankruptcyHistory', op='max', order='ascending')
)

In this dataframe, we can see that anyone can face bankruptcy, regardless of their level of education or their income and expenses. However, in the sample presented in the graph, we can see that the higher the level of education, the greater the economic turnover, which can lead to taking on more financial risk. As a result, there are also bankrupts with a higher level of education. This suggests that education, although it may be associated with better economic opportunities, does not necessarily guarantee risk-free financial management.

In [77]:
financial_risk_filtered.loc[financial_risk_filtered['EmploymentStatus'] == 'Unemployed', 'JobTenure'] = 0

## Remove target column

In [78]:
Loan_approved = financial_risk_filtered['LoanApproved'].copy()
financial_data = financial_risk_filtered.drop(['LoanApproved'], axis=1)

In [79]:
financial_data

Unnamed: 0,CreditScore,AnnualIncome,LoanAmount,LoanDuration,Age,EmploymentStatus,MaritalStatus,NumberOfDependents,EducationLevel,HomeOwnershipStatus,...,MonthlySavings,AnnualBonuses,AnnualExpenses,MonthlyHousingCosts,MonthlyTransportationCosts,MonthlyFoodCosts,MonthlyHealthcareCosts,MonthlyEntertainmentCosts,"('NumberOfDependents',)","('PreviousLoanDefaults',)"
0,402,63295,18830,13,29,Self-Employed,Widowed,2,Doctorate,Other,...,378,3741,40058,977,412,399,136,124,2,0
1,735,55936,23729,1,42,Self-Employed,Divorced,3,Master,Own,...,575,4115,16745,695,206,898,252,131,3,0
2,570,62547,19660,7,54,Self-Employed,Single,3,Doctorate,Mortgage,...,691,4105,23273,627,266,392,73,36,3,0
4,371,57725,12189,26,42,Employed,Widowed,4,Master,Own,...,690,7856,30087,723,315,114,88,68,4,0
5,320,67527,18618,20,30,Self-Employed,Widowed,0,Doctorate,Other,...,726,7574,21769,853,253,303,213,33,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999991,592,36452,24944,22,19,Self-Employed,Widowed,3,Master,Own,...,285,6876,24875,715,345,416,184,158,3,0
999992,613,65353,2035,3,54,Employed,Divorced,1,High School,Mortgage,...,1013,6091,28956,1585,72,338,266,95,1,0
999995,414,83679,19053,10,54,Unemployed,Married,2,Bachelor,Other,...,523,5740,31540,890,351,599,148,138,2,0
999996,333,79096,22567,4,21,Self-Employed,Married,2,Bachelor,Rent,...,525,5669,26843,2082,80,427,366,100,2,0


# Split Dataset

In [80]:
original_count = len(financial_data)
training_size = 0.60 # 60% of records
test_size = (1-training_size) / 2

training_count = int(original_count * training_size)
test_count = int(original_count * test_size)
validation_count = original_count - training_count - test_count

print(training_count, test_count, validation_count, original_count)

411847 137282 137284 686413


In [81]:
train_x, rest_x, train_y, rest_y = train_test_split(financial_data, Loan_approved, train_size = training_count, random_state=42)

In [82]:
test_x, validate_x, test_y, validate_y = train_test_split(rest_x, rest_y, train_size=test_count, random_state=42)

In [83]:
print(len(train_x), len(test_x), len(validate_x))

411847 137282 137284


In [84]:
financial_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 686413 entries, 0 to 999998
Data columns (total 50 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   CreditScore                 686413 non-null  int64  
 1   AnnualIncome                686413 non-null  int64  
 2   LoanAmount                  686413 non-null  int64  
 3   LoanDuration                686413 non-null  int64  
 4   Age                         686413 non-null  int64  
 5   EmploymentStatus            686413 non-null  object 
 6   MaritalStatus               686413 non-null  object 
 7   NumberOfDependents          686413 non-null  int64  
 8   EducationLevel              686413 non-null  object 
 9   HomeOwnershipStatus         686413 non-null  object 
 10  MonthlyDebtPayments         686413 non-null  int64  
 11  NumberOfOpenCreditLines     686413 non-null  int64  
 12  NumberOfCreditInquiries     686413 non-null  int64  
 13  DebtToIncomeRatio  

# Make transformation Pipeline

In [85]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Binarizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.preprocessing import MaxAbsScaler

In [86]:
one_hot_encoding = ColumnTransformer([
    (
        'one_hot_encode',
        OneHotEncoder (sparse_output = False, handle_unknown="ignore"),
        [
            "EmploymentStatus",
            "MaritalStatus",
            "EducationLevel",
            "HomeOwnershipStatus",
            "LoanPurpose",
            "HealthInsuranceStatus",
            "LifeInsuranceStatus",
            "CarInsuranceStatus",
            "HomeInsuranceStatus",
            "OtherInsurancePolicies",
            "EmployerType"
        ]
    )
])

In [87]:
scaler  = ColumnTransformer([
    ("scaler", MaxAbsScaler(), [
        "CreditScore",
        "AnnualIncome",
        "LoanAmount",
        "LoanDuration",
        "Age",
        "MonthlyDebtPayments",
        "NumberOfOpenCreditLines",
        "NumberOfCreditInquiries",
        "PaymentHistory",
        "SavingsAccountBalance",
        "CheckingAccountBalance",
        "InvestmentAccountBalance",
        "RetirementAccountBalance",
        "EmergencyFundBalance",
        "TotalAssets",
        "TotalLiabilities",
        "NetWorth",
        "LengthOfCreditHistory",
        "MortgageBalance",
        "RentPayments",
        "AutoLoanBalance",
        "PersonalLoanBalance",
        "StudentLoanBalance",
        "UtilityBillsPaymentHistory",
        "JobTenure",
        "MonthlySavings",
        "AnnualBonuses",
        "AnnualExpenses",
        "MonthlyHousingCosts",
        "MonthlyTransportationCosts",
        "MonthlyFoodCosts",
        "MonthlyHealthcareCosts",
        "MonthlyEntertainmentCosts"
    ])
])

In [88]:
passthrough = ColumnTransformer([
    (
        "passthrough",
        "passthrough",
        [
            "NumberOfDependents",
            "PreviousLoanDefaults",
            "BankruptcyHistory",
        ]
    )
])

In [89]:
union_features = FeatureUnion(
    [
        ("categorical", one_hot_encoding),
        ("passthrough", passthrough),
        ("scaled", scaler),
    ]
)

In [90]:
feature_engineering_pipeline = Pipeline(
    [
        (
           "features",
           union_features,
        )
    ]
)

# Pipeline Training

In [91]:
feature_engineering_pipeline

In [92]:
feature_engineering_pipeline.fit(train_x)

In [93]:
transformed_x = feature_engineering_pipeline.transform(train_x)

In [94]:
train_x.shape, transformed_x.shape

((411847, 50), (411847, 74))

In [95]:
transformed_x

array([[1.        , 0.        , 0.        , ..., 0.35143067, 0.26646248,
        0.21165644],
       [0.        , 0.        , 1.        , ..., 0.24651504, 0.3568147 ,
        0.31288344],
       [0.        , 0.        , 1.        , ..., 0.14673514, 0.15007657,
        0.23006135],
       ...,
       [0.        , 0.        , 1.        , ..., 0.23844461, 0.27718224,
        0.12576687],
       [0.        , 1.        , 0.        , ..., 0.38297872, 0.5941807 ,
        0.21472393],
       [0.        , 1.        , 0.        , ..., 0.68085106, 0.40275651,
        0.22392638]])

# Testing Pipeline

In [96]:
feature_engineering_pipeline.transform(test_x)

array([[0.        , 1.        , 0.        , ..., 0.10124725, 0.35375191,
        0.16257669],
       [0.        , 1.        , 0.        , ..., 0.18488628, 0.22970904,
        0.17177914],
       [0.        , 0.        , 1.        , ..., 0.19809244, 0.32159265,
        0.50920245],
       ...,
       [0.        , 0.        , 1.        , ..., 0.21203228, 0.34303216,
        0.40490798],
       [0.        , 0.        , 1.        , ..., 0.09904622, 0.21745789,
        0.41104294],
       [0.        , 0.        , 1.        , ..., 0.39985326, 0.20673813,
        0.39263804]])

# Model Training

In [97]:
from sklearn.base import clone

feature_transformer = clone(feature_engineering_pipeline)

features_train_x = feature_transformer.fit_transform(train_x)
features_validate_x= feature_transformer.transform(validate_x)

In [98]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=100)

model.fit(features_train_x, train_y)

# Model validation

In [99]:
from sklearn.metrics import accuracy_score

pred_y = model.predict(features_validate_x)

In [100]:
print(accuracy_score(validate_y, pred_y))

0.7454255412138341


# Final Pipeline


In [101]:
Credit_approval = Pipeline([
    ("feature_engineering", clone(feature_engineering_pipeline)),
    ("model", RandomForestClassifier(n_estimators=100))
])

In [102]:
Credit_approval

In [103]:
Credit_approval.fit(train_x, train_y)

In [104]:
test_pred_y = Credit_approval.predict(test_x)

In [105]:
print(accuracy_score(test_pred_y, test_y))

0.7413572063344066


# Save Model

In [106]:
from joblib import dump

dump(Credit_approval, 'Credit_approval.joblib')

['Credit_approval.joblib']

['Credit_approval.joblib']

In [107]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
