In [1]:
# !pip install experta

In [None]:
import pandas as pd
import pickle as pkl
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree
from sklearn.preprocessing import MinMaxScaler


dataset = "Loan-Approval-Prediction-Dataset/loan_approval_dataset.csv"
df = pd.read_csv('datasets/'+dataset)
print(len(df))

In [None]:
df.columns = df.columns.str.strip()
# drop the 'Loan_ID' column as it is not useful for prediction
df.drop(columns=['loan_id'], inplace=True)
df.head()

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum().sum()

In [None]:
print(f"loan_status: {df['loan_status'].value_counts()}")
# Encode categorical data
le = LabelEncoder()
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = pd.factorize(df[col])[0]
class_names = ['Approved', 'Rejected']
print(f"loan_status: {df['loan_status'].value_counts()}")

# Step 2: Define label mapping
label_map = {0: "Approved", 1: "Rejected"}  # Based on your LabelEncoder

In [8]:
from sklearn.preprocessing import MinMaxScaler
# Normalize the data

feature_cols = df.columns[df.columns != "loan_status"]
mx = MinMaxScaler()
df[feature_cols] = mx.fit_transform(df[feature_cols])


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X = df.drop("loan_status", axis=1)
y = df["loan_status"]

# 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True, stratify=y)

clf = DecisionTreeClassifier(
    criterion="entropy",
    max_depth=4,              # Try values: None, 3, 5, 10, etc.
    min_samples_split=10,     # Prevent overfitting
    min_samples_leaf=5,       # Minimum samples in a leaf
    min_impurity_decrease=0.001,
    # max_features="sqrt",
    random_state=42
)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)


depth = clf.get_depth()
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Tree Depth: {depth} | Number of Leaves: {clf.get_n_leaves()}")

In [None]:
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

plt.figure(figsize=(40, 20), dpi=300)
plot_tree(
    clf,
    feature_names=X.columns,
    class_names=class_names,
    filled=True,
    rounded=True,
    impurity=False,
    label="root"
)
plt.savefig("decision_tree_simple.png", bbox_inches="tight")
# plt.close()


In [11]:
from sklearn.tree import export_text

tree_rules = export_text(clf, feature_names=list(X.columns), max_depth=depth+5, decimals=5)
# print(rules)
# Write rules to a file
with open("rules.txt", "w") as f:
    f.write(tree_rules)


In [None]:
from sklearn.tree import export_text
import re

# Parse and convert
rules = []
rule_count = 0
stack = []

for line in tree_rules.split("\n"):
    indent_level = line.count("|   ")
    line = line.strip()

    # Remove tree symbols
    line = re.sub(r"\|--- ", "", line)
    line = line.replace("|   ", "")

    if line.startswith("class:"):
        class_val = int(float(line.split(":")[1].strip()))
        class_label = label_map[class_val]
        # Build final rule from stack
        conditions = stack[:indent_level]
        condition_str = ", ".join(conditions)
        rule_code = f"""    @Rule(Fact({condition_str}))
    def rule_{rule_count}(self):
        # print(f"Rule {rule_count}: Loan Status: {class_label}")
        self.prediction = {0 if class_label == "Approved" else 1}\n"""
        rules.append(rule_code)
        rule_count += 1
    elif "<=" in line or ">" in line:       # hne7tag n8yrha lw fyha 7aga 8eir <= w >, zy >= aw <
        op = "<=" if " <= " in line else ">"
        feature, op_val = line.split(op)
        val = float(op_val.strip())
        stack = stack[:indent_level] + [f"{feature.strip()}=P(lambda x: x {op} {val})"]

# Output rules
print("""from experta import *\n\nclass LoanExpert(KnowledgeEngine):
    def __init__(self):
        super().__init__()
        self.prediction = None\n""")
for r in rules:
    print(r)


In [13]:
from experta import *

class LoanExpert(KnowledgeEngine):
    def __init__(self):
        super().__init__()
        self.prediction = None

    @Rule(Fact(cibil_score=P(lambda x: x <= 0.41583), loan_term=P(lambda x: x <= 0.16667), loan_amount=P(lambda x: x <= 0.66199), income_annum=P(lambda x: x <= 0.7268)))
    def rule_0(self):
        # print(f"Rule 0: Loan Status: Approved")
        self.prediction = 0

    @Rule(Fact(cibil_score=P(lambda x: x <= 0.41583), loan_term=P(lambda x: x <= 0.16667), loan_amount=P(lambda x: x <= 0.66199), income_annum=P(lambda x: x > 0.7268)))
    def rule_1(self):
        # print(f"Rule 1: Loan Status: Rejected")
        self.prediction = 1

    @Rule(Fact(cibil_score=P(lambda x: x <= 0.41583), loan_term=P(lambda x: x <= 0.16667), loan_amount=P(lambda x: x > 0.66199), income_annum=P(lambda x: x <= 0.97423)))
    def rule_2(self):
        # print(f"Rule 2: Loan Status: Approved")
        self.prediction = 0

    @Rule(Fact(cibil_score=P(lambda x: x <= 0.41583), loan_term=P(lambda x: x <= 0.16667), loan_amount=P(lambda x: x > 0.66199), income_annum=P(lambda x: x > 0.97423)))
    def rule_3(self):
        # print(f"Rule 3: Loan Status: Approved")
        self.prediction = 0

    @Rule(Fact(cibil_score=P(lambda x: x <= 0.41583), loan_term=P(lambda x: x > 0.16667)))
    def rule_4(self):
        # print(f"Rule 4: Loan Status: Rejected")
        self.prediction = 1

    @Rule(Fact(cibil_score=P(lambda x: x > 0.41583), residential_assets_value=P(lambda x: x <= 0.03938), commercial_assets_value=P(lambda x: x <= 0.09536), loan_amount=P(lambda x: x <= 0.11352)))
    def rule_5(self):
        # print(f"Rule 5: Loan Status: Approved")
        self.prediction = 0

    @Rule(Fact(cibil_score=P(lambda x: x > 0.41583), residential_assets_value=P(lambda x: x <= 0.03938), commercial_assets_value=P(lambda x: x <= 0.09536), loan_amount=P(lambda x: x > 0.11352)))
    def rule_6(self):
        # print(f"Rule 6: Loan Status: Approved")
        self.prediction = 0

    @Rule(Fact(cibil_score=P(lambda x: x > 0.41583), residential_assets_value=P(lambda x: x <= 0.03938), commercial_assets_value=P(lambda x: x > 0.09536)))
    def rule_7(self):
        # print(f"Rule 7: Loan Status: Approved")
        self.prediction = 0

    @Rule(Fact(cibil_score=P(lambda x: x > 0.41583), residential_assets_value=P(lambda x: x > 0.03938)))
    def rule_8(self):
        # print(f"Rule 8: Loan Status: Approved")
        self.prediction = 0

In [None]:
# Testing on one example

user_input = {
    "no_of_dependents": 4,
    "education": 0,
    "self_employed": 0,
    "income_annum": 7000000,
    "loan_amount": 10,
    "loan_term": 5,
    "cibil_score": 12,
    "residential_assets_value": 200000,
    "commercial_assets_value": 1500000,
    "luxury_assets_value": 22700000,
    "bank_asset_value": 8000000
}
# Convert to DataFrame to transform
input_df = pd.DataFrame([user_input])
normalized_input = mx.transform(input_df)[0]  # 1D array
normalized_fact = dict(zip(df.columns, normalized_input))


# normalized_fact = {
# "no_of_dependents":            0.600000,
# "education":                   1.000000,
# "self_employed":               1.000000,
# "income_annum":                0.237113,
# "loan_amount":                 0.216837,
# "loan_term":                   0.555556,
# "cibil_score":                 0.396667,
# "residential_assets_value":    0.171233,
# "commercial_assets_value":     0.092784,
# "luxury_assets_value":         0.226221,
# "bank_asset_value":            0.224490,
# }

engine = LoanExpert()
engine.reset()
engine.declare(Fact(**normalized_fact))
engine.run()
print(engine.prediction)


In [None]:
from sklearn.metrics import accuracy_score

y_pred_rule = []

for i, row in X_test.iterrows():
    # Run inference using your rule engine
    engine = LoanExpert()
    engine.reset()
    engine.declare(Fact(**row))
    engine.run()
    if engine.prediction is None:
        print("No rule matched for:", row)
    # Append predicted label (assume it's stored in engine.prediction)
    y_pred_rule.append(engine.prediction)

y_pred_tree = clf.predict(X_test)
# Compute accuracy
print("Accuracy on test set:", accuracy_score(y_test, y_pred_rule))
