In [None]:
# !pip install experta

In [None]:
import pandas as pd
import pickle as pkl
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree


dataset = "Loan-Approval-Prediction-Dataset/loan_approval_dataset.csv"
df = pd.read_csv('datasets/'+dataset)
print(len(df))

In [2]:
# Remove leading and trailing spaces from column names
df.columns = df.columns.str.strip()

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum().sum()

In [None]:
print(f"loan_status: {df['loan_status'].value_counts()}")
# Encode categorical data
le = LabelEncoder()
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = le.fit_transform(df[col].astype(str))
class_names = ['Approved', 'Rejected']
print(f"loan_status: {df['loan_status'].value_counts()}")

# Step 2: Define label mapping
label_map = {0: "Approved", 1: "Rejected"}  # Based on your LabelEncoder

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X = df.drop("loan_status", axis=1)
y = df["loan_status"]

# 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = DecisionTreeClassifier(
    criterion="entropy",
    max_depth=5,              # Try values: None, 3, 5, 10, etc.
    min_samples_split=10,     # Prevent overfitting
    min_samples_leaf=5,       # Minimum samples in a leaf
    # min_impurity_decrease=0.01,
    random_state=42
)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)


depth = clf.get_depth()
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Tree Depth: {depth} | Number of Leaves: {clf.get_n_leaves()}")

In [None]:
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

plt.figure(figsize=(40, 20), dpi=300)
plot_tree(
    clf,
    feature_names=X.columns,
    class_names=class_names,
    filled=True,
    impurity=False,
    label="root"
)
plt.savefig("decision_tree_simple.png", bbox_inches="tight")
# plt.close()


In [9]:
from sklearn.tree import export_text

rules = export_text(clf, feature_names=list(X.columns), max_depth=depth+5)
# print(rules)
# Write rules to a file
with open("rules.txt", "w") as f:
    f.write(rules)


In [None]:
from sklearn.tree import export_text
import re

# Step 1: Get the raw rules
tree_rules = export_text(clf, feature_names=list(X.columns))

# Step 2: Define label mapping
# Done above when encoding the data

# Step 3: Parse and convert
rules = []
rule_count = 0
stack = []

for line in tree_rules.split("\n"):
    indent_level = line.count("|   ")
    line = line.strip()

    # Remove tree symbols
    line = re.sub(r"\|--- ", "", line)
    line = line.replace("|   ", "")

    if line.startswith("class:"):
        class_val = int(line.split(":")[1].strip())
        class_label = label_map[class_val]
        # Build final rule from stack
        conditions = stack[:indent_level]
        condition_str = ", ".join(conditions)
        rule_code = f"""    @Rule(Fact({condition_str}))
    def rule_{rule_count}(self):
        print("Loan Status: {class_label}")\n"""
        rules.append(rule_code)
        rule_count += 1
    elif "<=" in line or ">" in line:       # hne7tag n8yrha lw fyha 7aga 8eir <= w >, zy >= aw <
        op = "<=" if " <= " in line else ">"
        feature, op_val = line.split(op)
        val = float(op_val.strip())
        stack = stack[:indent_level] + [f"{feature.strip()}=P(lambda x: x {op} {val})"]

# Step 4: Output rules
# print("from experta import *\n\nclass LoanExpert(KnowledgeEngine):")
for r in rules:
    print(r)


In [None]:
from experata import *

class LoanApprovalExpert(KnowledgeEngine):
    @Rule(Fact(cibil_score=P(lambda x: x <= 549.5), loan_term=P(lambda x: x <= 5.0), loan_amount=P(lambda x: x <= 26250000.0), income_annum=P(lambda x: x <= 8050000.0), loan_amount=P(lambda x: x <= 18400000.0)))
    def rule_0(self):
        print("Loan Status: Rejected")

    @Rule(Fact(cibil_score=P(lambda x: x <= 549.5), loan_term=P(lambda x: x <= 5.0), loan_amount=P(lambda x: x <= 26250000.0), income_annum=P(lambda x: x <= 8050000.0), loan_amount=P(lambda x: x > 18400000.0)))
    def rule_1(self):
        print("Loan Status: Approved")

    @Rule(Fact(cibil_score=P(lambda x: x <= 549.5), loan_term=P(lambda x: x <= 5.0), loan_amount=P(lambda x: x <= 26250000.0), income_annum=P(lambda x: x > 8050000.0)))
    def rule_2(self):
        print("Loan Status: Rejected")

    @Rule(Fact(cibil_score=P(lambda x: x <= 549.5), loan_term=P(lambda x: x <= 5.0), loan_amount=P(lambda x: x > 26250000.0), income_annum=P(lambda x: x <= 9650000.0)))
    def rule_3(self):
        print("Loan Status: Approved")

    @Rule(Fact(cibil_score=P(lambda x: x <= 549.5), loan_term=P(lambda x: x <= 5.0), loan_amount=P(lambda x: x > 26250000.0), income_annum=P(lambda x: x > 9650000.0)))
    def rule_4(self):
        print("Loan Status: Approved")

    @Rule(Fact(cibil_score=P(lambda x: x <= 549.5), loan_term=P(lambda x: x > 5.0)))
    def rule_5(self):
        print("Loan Status: Rejected")

    @Rule(Fact(cibil_score=P(lambda x: x > 549.5), residential_assets_value=P(lambda x: x <= 950000.0), commercial_assets_value=P(lambda x: x <= 1850000.0), loan_amount=P(lambda x: x <= 3100000.0), residential_assets_value=P(lambda x: x <= 150000.0)))
    def rule_6(self):
        print("Loan Status: Approved")

    @Rule(Fact(cibil_score=P(lambda x: x > 549.5), residential_assets_value=P(lambda x: x <= 950000.0), commercial_assets_value=P(lambda x: x <= 1850000.0), loan_amount=P(lambda x: x <= 3100000.0), residential_assets_value=P(lambda x: x > 150000.0)))
    def rule_7(self):
        print("Loan Status: Approved")

    @Rule(Fact(cibil_score=P(lambda x: x > 549.5), residential_assets_value=P(lambda x: x <= 950000.0), commercial_assets_value=P(lambda x: x <= 1850000.0), loan_amount=P(lambda x: x > 3100000.0), loan_id=P(lambda x: x <= 402.0)))
    def rule_8(self):
        print("Loan Status: Approved")

    @Rule(Fact(cibil_score=P(lambda x: x > 549.5), residential_assets_value=P(lambda x: x <= 950000.0), commercial_assets_value=P(lambda x: x <= 1850000.0), loan_amount=P(lambda x: x > 3100000.0), loan_id=P(lambda x: x > 402.0)))
    def rule_9(self):
        print("Loan Status: Approved")

    @Rule(Fact(cibil_score=P(lambda x: x > 549.5), residential_assets_value=P(lambda x: x <= 950000.0), commercial_assets_value=P(lambda x: x > 1850000.0)))
    def rule_10(self):
        print("Loan Status: Approved")

    @Rule(Fact(cibil_score=P(lambda x: x > 549.5), residential_assets_value=P(lambda x: x > 950000.0)))
    def rule_11(self):
        print("Loan Status: Approved")
