In [1]:
import pandas as pd
import numpy as np
from collections import Counter
import math

In [2]:
#Load the Dataset
data = pd.DataFrame([
    ['A1', 'Good', 'Salaried', 'High', 'Yes', 'Approved'],
    ['A2', 'Poor', 'Self-Employed', 'Low', 'No', 'Rejected'],
    ['A3', 'Good', 'Self-Employed', 'High', 'Yes', 'Approved'],
    ['A4', 'Average', 'Salaried', 'Medium', 'No', 'Approved'],
    ['A5', 'Poor', 'Salaried', 'Low', 'No', 'Rejected'],
    ['A6', 'Good', 'Salaried', 'Medium', 'Yes', 'Approved'],
    ['A7', 'Average', 'Self-Employed', 'Medium', 'Yes', 'Approved'],
    ['A8', 'Poor', 'Salaried', 'Low', 'No', 'Rejected'],
    ['A9', 'Good', 'Salaried', 'High', 'Yes', 'Approved'],
    ['A10', 'Average', 'Self-Employed', 'Medium', 'No', 'Rejected']
], columns=['ID', 'CreditScore', 'EmploymentType', 'IncomeLevel', 'Collateral', 'LoanStatus'])

features = ['CreditScore', 'EmploymentType', 'IncomeLevel', 'Collateral']


In [3]:
#Entropy Calculation
def entropy(class_column):
    counts = Counter(class_column)
    total = len(class_column)
    return -sum((count/total) * math.log2(count/total) for count in counts.values())

# Compute total entropy
target_entropy = entropy(data['LoanStatus'])
print("Entropy of LoanStatus:", round(target_entropy, 4))


Entropy of LoanStatus: 0.971


In [4]:
#Information Gain
def info_gain(df, attribute, target):
    total_entropy = entropy(df[target])
    values = df[attribute].unique()
    weighted_entropy = 0

    for val in values:
        subset = df[df[attribute] == val]
        weighted_entropy += (len(subset) / len(df)) * entropy(subset[target])

    return total_entropy - weighted_entropy

# Calculate Information Gain for all features
gains = {feature: info_gain(data, feature, 'LoanStatus') for feature in features}
print("\nInformation Gain for each feature:")
for k, v in gains.items():
    print(f"- {k}: {round(v, 4)}")



Information Gain for each feature:
- CreditScore: 0.6955
- EmploymentType: 0.02
- IncomeLevel: 0.6464
- Collateral: 0.61


In [5]:
#Build Decision Tree (ID3 Algorithm)
def id3(df, features, target):
    labels = df[target]
    
    if len(set(labels)) == 1:
        return labels.iloc[0]
    
    if not features:
        return labels.mode()[0]

    gains = {feature: info_gain(df, feature, target) for feature in features}
    best_feature = max(gains, key=gains.get)

    tree = {best_feature: {}}
    for val in df[best_feature].unique():
        subset = df[df[best_feature] == val]
        subtree = id3(subset, [f for f in features if f != best_feature], target)
        tree[best_feature][val] = subtree

    return tree

tree = id3(data, features, 'LoanStatus')

import pprint
print("\n🌳 Decision Tree:")
pprint.pprint(tree)



🌳 Decision Tree:
{'CreditScore': {'Average': {'EmploymentType': {'Salaried': 'Approved',
                                                'Self-Employed': {'Collateral': {'No': 'Rejected',
                                                                                 'Yes': 'Approved'}}}},
                 'Good': 'Approved',
                 'Poor': 'Rejected'}}


In [6]:
#Prediction Function#
def predict(tree, sample):
    if not isinstance(tree, dict):
        return tree
    root = next(iter(tree))
    value = sample.get(root)
    subtree = tree[root].get(value)
    if isinstance(subtree, dict):
        return predict(subtree, sample)
    else:
        return subtree if subtree else "Unknown"


In [7]:
# Example test case
sample = {
    'CreditScore': 'Good',
    'EmploymentType': 'Salaried',
    'IncomeLevel': 'High',
    'Collateral': 'Yes'
}

print("\n Prediction for sample:")
print(sample)
print("Loan Status Prediction:", predict(tree, sample))


 Prediction for sample:
{'CreditScore': 'Good', 'EmploymentType': 'Salaried', 'IncomeLevel': 'High', 'Collateral': 'Yes'}
Loan Status Prediction: Approved


In [8]:
import pickle

with open("loan_status.pkl", "wb") as file:
    pickle.dump(tree, file)