In [1]:
# !pip install experta

In [2]:
import pandas as pd
import pickle as pkl
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree
import re
from sklearn.tree import export_text

In [None]:
dataset = "Fraud-Detection-Transactions-Dataset/synthetic_fraud_dataset.csv"
df = pd.read_csv('datasets/'+dataset)
print(len(df))

In [None]:
target_col = 'Fraud_Label'
print(df.shape)
df.columns = df.columns.str.strip()
#drop: Transaction_ID, User_ID
df.drop(['Transaction_ID', 'User_ID'], axis=1, inplace=True)
# Convert to datetime if not already
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Extract hour
df['Hour'] = df['Timestamp'].dt.hour

# Drop original Timestamp column
df = df.drop(columns=['Timestamp'])
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum().sum()

In [None]:
print(f"{target_col}: {df[target_col].value_counts()}")

for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = pd.factorize(df[col])[0]
        print(f"Column '{col}' has {df[col].nunique()} unique values.")

class_names = ['Not Fraud', 'Fraud']
# Define label mapping
label_map = {0: "Not Fraud", 1: "Fraud"}

In [11]:
# Normalize the data
feature_cols = df.columns[df.columns != target_col]
mx = MinMaxScaler()
df[feature_cols] = mx.fit_transform(df[feature_cols])


In [None]:
X = df.drop(target_col, axis=1)
y = df[target_col]

# 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True, stratify=y)

clf = DecisionTreeClassifier(
    criterion="entropy",
    max_depth=4,              # Try values: None, 3, 5, 10, etc.
    min_samples_split=10,     # Prevent overfitting
    min_samples_leaf=5,       # Minimum samples in a leaf
    min_impurity_decrease=0.001,
    # max_features="sqrt",
    random_state=42
)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)


depth = clf.get_depth()
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Tree Depth: {depth} | Number of Leaves: {clf.get_n_leaves()}")

In [None]:
plt.figure(figsize=(40, 20), dpi=300)
plot_tree(
    clf,
    feature_names=X.columns,
    class_names=class_names,
    filled=True,
    rounded=True,
    impurity=False,
    label="root"
)
plt.savefig("decision_tree_simple.png", bbox_inches="tight")
# plt.close()


In [17]:
tree_rules = export_text(clf, feature_names=list(X.columns), max_depth=depth+5, decimals=5)
# print(rules)
# Write rules to a file
with open("fraud_rules.txt", "w") as f:
    f.write(tree_rules)

In [None]:
# Parse and convert
rules = []
rule_count = 0
stack = []

for line in tree_rules.split("\n"):
    indent_level = line.count("|   ")
    line = line.strip()

    # Remove tree symbols
    line = re.sub(r"\|--- ", "", line)
    line = line.replace("|   ", "")

    if line.startswith("class:"):
        class_val = int(float(line.split(":")[1].strip()))
        class_label = label_map[class_val]
        # Build final rule from stack
        conditions = stack[:indent_level]
        condition_str = ", ".join(conditions)
        rule_code = f"""    @Rule(Fact({condition_str}))
    def rule_{rule_count}(self):
        # print(f"Rule {rule_count}: Fraud Status: {class_label}")
        self.prediction = {0 if class_label == "Not Fraud" else 1}\n"""
        rules.append(rule_code)
        rule_count += 1
    elif "<=" in line or ">" in line:
        op = "<=" if " <= " in line else ">"
        feature, op_val = line.split(op)
        val = float(op_val.strip())
        stack = stack[:indent_level] + [f"{feature.strip()}=P(lambda x: x {op} {val})"]

# Output rules
print("""from experta import *\n\nclass FraudExpert(KnowledgeEngine):
    def __init__(self):
        super().__init__()
        self.prediction = None\n""")
for r in rules:
    print(r)


In [22]:
from experta import *

class FraudExpert(KnowledgeEngine):
    def __init__(self):
        super().__init__()
        self.prediction = None

    @Rule(Fact(Failed_Transaction_Count_7d=P(lambda x: x <= 0.875), Risk_Score=P(lambda x: x <= 0.85004)))
    def rule_0(self):
        # print(f"Rule 0: Fraud Status: Not Fraud")
        self.prediction = 0

    @Rule(Fact(Failed_Transaction_Count_7d=P(lambda x: x <= 0.875), Risk_Score=P(lambda x: x > 0.85004)))
    def rule_1(self):
        # print(f"Rule 1: Fraud Status: Fraud")
        self.prediction = 1

    @Rule(Fact(Failed_Transaction_Count_7d=P(lambda x: x > 0.875)))
    def rule_2(self):
        # print(f"Rule 2: Fraud Status: Fraud")
        self.prediction = 1

In [None]:
# Testing on one example

user_input = {
    "Transaction_Amount":              39.79,
    "Transaction_Type":                0,
    "Account_Balance":                 93213.17,
    "Device_Type":                     2,
    "Location":                        2,
    "Merchant_Category":               1,
    "IP_Address_Flag":                 0,
    "Previous_Fraudulent_Activity":    0,
    "Daily_Transaction_Count":         7,
    "Avg_Transaction_Amount_7d":       437.63,
    "Failed_Transaction_Count_7d":     3,
    "Card_Type":                       0,
    "Card_Age":                        65,
    "Transaction_Distance":            883.17,
    "Authentication_Method":           1,
    "Risk_Score":                      0.8494,
    "Is_Weekend":                      0,
    "Hour":                            5
}
# Convert to DataFrame to transform
input_df = pd.DataFrame([user_input])
normalized_input = mx.transform(input_df)[0]  # 1D array
normalized_fact = dict(zip(df.columns, normalized_input))


# normalized_fact = {
# "Transaction_Amount":              0.008594,
# "Transaction_Type":                0.000000,
# "Account_Balance":                 0.737284,
# "Device_Type":                     0.500000,
# "Location":                        0.500000,
# "Merchant_Category":               0.250000,
# "IP_Address_Flag":                 0.000000,
# "Previous_Fraudulent_Activity":    0.000000,
# "Daily_Transaction_Count":         1.000000,
# "Avg_Transaction_Amount_7d":       0.220714,
# "Failed_Transaction_Count_7d":     0.750000,
# "Card_Type":                       0.000000,
# "Card_Age":                        0.676471,
# "Transaction_Distance":            0.839782,
# "Authentication_Method":           0.333333,
# "Risk_Score":                      0.642464,
# "Is_Weekend":                      0.000000,
# "Hour":                            0.217391
# }

engine = FraudExpert()
engine.reset()
engine.declare(Fact(**normalized_fact))
engine.run()
print(engine.prediction)


In [None]:
y_pred_rule = []

for i, row in X_test.iterrows():
    # Run inference using your rule engine
    engine = FraudExpert()
    engine.reset()
    engine.declare(Fact(**row))
    engine.run()
    if engine.prediction is None:
        print("No rule matched for:", row)
    # Append predicted label (assume it's stored in engine.prediction)
    y_pred_rule.append(engine.prediction)

y_pred_tree = clf.predict(X_test)
# Compute accuracy
print(f"Accuracy on test set: {accuracy_score(y_test, y_pred_rule) * 100}%")