In [None]:
import pandas as pd

# ---------- Step 1: CA evolution ----------
def evolve_CA(PS, Rule, d, n, left, right):
    """Evolves the given CA state once and returns the next state."""
    m = left + right + 1
    NS = [0] * n
    for i in range(n):
        RMT = 0
        rng = m - 1
        for j in range(i - left, i + right + 1):
            RMT += int((d ** rng) * PS[(n + j) % n])
            rng -= 1
        NS[i] = Rule[RMT]
    return NS


# ---------- Step 2: Cycle finding ----------
def find_cycle(start_state, Rule, d, n, left, right):
    """Return the cycle (list of states) reached from the starting state."""
    seen = {}
    PS = start_state[:]
    step = 0
    while True:
        state_tuple = tuple(PS)
        if state_tuple in seen:
            start_idx = seen[state_tuple]
            cycle = list(seen.keys())[start_idx:]
            return cycle
        seen[state_tuple] = step
        step += 1
        PS = evolve_CA(PS, Rule, d, n, left, right)


# ---------- Step 3: Canonical cycle ----------
def canonical_cycle_key(cycle):
    """Convert a cycle (list of states) into a unique rotation-invariant string key."""
    strs = ["".join(map(str, s)) for s in cycle]
    rotations = ["-".join(strs[i:] + strs[:i]) for i in range(len(strs))]
    return min(rotations)


# ---------- Step 4: Rule generation ----------
def generate_rule(params, d):
    left = right = 1
    m = left + right + 1
    Rule = []
    for x in range(d):
        for y in range(d):
            for z in range(d):
                Rule.append((params[0]*x*y*z + params[1]*x*y + params[2]*x*z +
                             params[3]*y*z + params[4]*x + params[5]*y +
                             params[6]*z + params[7]) % d)
    return Rule


def compute_accuracy(df, Rule, d=10, left=1, right=1):
    """Train-test split, classify, and compute accuracy.
    Uses ALL attribute columns (assumes last column is the class).
    Pads each attribute to the same global max length (leading zeros),
    concatenates them and treats the concatenation as the CA configuration.
    """
    # --- Train/test split ---
    train_df = df.sample(frac=0.8, random_state=42)
    test_df = df.drop(train_df.index)

    # # --- Identify attribute columns and class column (assume class is last column) ---
    # attr_cols = list(df.columns[:-1])
    # class_col = df.columns[-1]

    # --- Identify attribute columns and class column (use column named "Class") ---
    class_col = "Class"
    attr_cols = [col for col in df.columns if col != class_col]


    # --- Helper: keep only digits from string (fallback for messy inputs) ---
    def digits_only(x):
        s = str(x)
        filtered = "".join(ch for ch in s if ch.isdigit())
        return filtered if filtered != "" else "0"

    # --- Compute global max length across all attributes (as digit-strings) ---
    max_len = 0
    for val in df[attr_cols].values.flatten():
        ln = len(digits_only(val))
        if ln > max_len:
            max_len = ln
    if max_len == 0:
        max_len = 1  # safety

    # --- Training phase ---
    cycle_map = {}
    for _, row in train_df.iterrows():
        # pad every attribute to global max_len and concatenate in column order
        parts = []
        for c in attr_cols:
            s = digits_only(row[c])
            parts.append(s.zfill(max_len))
        concat = "".join(parts)

        PS = [int(ch) for ch in concat]          # initial configuration
        n = len(PS)
        label = row[class_col]

        cycle = find_cycle(PS, Rule, d, n, left, right)
        cycle_key = canonical_cycle_key(cycle)
        cycle_map.setdefault(cycle_key, []).append(label)

    # --- Assign majority label per cycle ---
    cycle_labels = {c: max(set(labels), key=labels.count) for c, labels in cycle_map.items()}

    # Print cycle→label mapping (you asked for this)
    # print("\nCycle → Label mapping:")
    # for c, lbl in cycle_labels.items():
    #     print(f"Cycle: {c} → Label: {lbl}")

    # # --- Testing phase ---
    # correct = 0
    # for _, row in test_df.iterrows():
    #     parts = []
    #     for c in attr_cols:
    #         s = digits_only(row[c])
    #         parts.append(s.zfill(max_len))
    #     concat = "".join(parts)
    #     PS = [int(ch) for ch in concat]
    #     n = len(PS)
    #     true_label = row[class_col]

    #     test_cycle = find_cycle(PS, Rule, d, n, left, right)
    #     test_key = canonical_cycle_key(test_cycle)

    #     # --- Check if cycle known ---
    #     if test_key in cycle_labels:
    #         pred = cycle_labels[test_key]
    #     else:
    #         # Fallback: nearest median rule (as in your original code)
    #         cycle_medians = {}
    #         for c in cycle_labels.keys():
    #             states = c.split("-")
    #             values = [int(s) for s in states]
    #             median_val = sorted(values)[len(values)//2]
    #             cycle_medians[c] = median_val

    #         test_states = test_key.split("-")
    #         test_values = [int(s) for s in test_states]
    #         test_median = sorted(test_values)[len(test_values)//2]

    #         nearest_cycle = min(cycle_medians.keys(), key=lambda c: abs(cycle_medians[c] - test_median))
    #         pred = cycle_labels[nearest_cycle]

    #     if pred == true_label:
    #         correct += 1

    # accuracy = correct / len(test_df) if len(test_df) > 0 else 0
    # return accuracy



    from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

    # --- Testing phase ---
    y_true = []
    y_pred = []

    for _, row in test_df.iterrows():
        parts = []
        for c in attr_cols:
            s = digits_only(row[c])
            parts.append(s.zfill(max_len))
        concat = "".join(parts)
        PS = [int(ch) for ch in concat]
        n = len(PS)
        true_label = row[class_col]

        test_cycle = find_cycle(PS, Rule, d, n, left, right)
        test_key = canonical_cycle_key(test_cycle)

        # --- Prediction ---
        if test_key in cycle_labels:
            pred = cycle_labels[test_key]
        else:
            # Fallback using nearest median
            cycle_medians = {}
            for c in cycle_labels.keys():
                states = c.split("-")
                values = [int(s) for s in states]
                median_val = sorted(values)[len(values)//2]
                cycle_medians[c] = median_val

            test_states = test_key.split("-")
            test_values = [int(s) for s in test_states]
            test_median = sorted(test_values)[len(test_values)//2]

            nearest_cycle = min(cycle_medians.keys(), key=lambda c: abs(cycle_medians[c] - test_median))
            pred = cycle_labels[nearest_cycle]

        y_true.append(true_label)
        y_pred.append(pred)

    # --- Compute metrics ---
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)

    # ✅ Print inside the function (NOT returned)
    print("Accuracy  :", accuracy)
    print("Precision :", precision)
    print("Recall    :", recall)
    print("F1 Score  :", f1)

    # ✅ Keep your original return (accuracy only)
    return accuracy



# ---------- Step 6: Main ----------
def main():
    # === Load dataset (.csv) ===
    dataset_path = "monks-3-new.csv"   # <-- change this to your file
    df = pd.read_csv(dataset_path)
    print(f"✅ Loaded dataset with {len(df)} rows and columns: {list(df.columns)}")
    # df_expanded = df.sample(n=1000, replace=True, random_state=42)
    # print(len(df_expanded))
    # === Load rule parameters from file ===
    param_file = "M3.txt"
    with open(param_file, "r") as f:
        param_lines = [line.strip() for line in f if line.strip()]

    param_sets = [[int(x) for x in line.split()] for line in param_lines]
    print(f"✅ Loaded {len(param_sets)} rules from {param_file}")

    import random

    # Assuming param_sets is already defined
    # sampled_params = random.sample(param_sets, 1000)



    # === Evaluate each rule ===
    results = []
    #for params in param_sets:
    for params in param_sets:
        Rule = generate_rule(params, d=10)
        import time

        start = time.time()
        acc = compute_accuracy(df, Rule, d=10)
        end = time.time()

        print("Execution Time (seconds):", end - start)

        # acc = compute_accuracy(df_expanded, Rule, d=10)
        results.append((params, acc))
        print(f"Rule {params} → Accuracy = {acc:.4f}")
        with open("Accuracy.txt", "a") as f:
          f.write(" ".join(map(str, params)) + f"  Accuracy={acc:.4f}\n")

    # === Write results to file ===
    # with open("rule_accuracy.txt", "w") as f:
    #     for params, acc in results:
    #         f.write(" ".join(map(str, params)) + f"  Accuracy={acc:.4f}\n")

    print("\n✅ Results written to rule_accuracy.txt")


if __name__ == "__main__":
    main()


✅ Loaded dataset with 432 rows and columns: ['Class', 'A1', 'A2', 'A3', 'A4', 'A5', 'A6']
✅ Loaded 1 rules from M3.txt
Accuracy  : 0.8953488372093024
Precision : 0.8953488372093024
Recall    : 0.8955627705627706
F1 Score  : 0.8953346855983773
Execution Time (seconds): 0.18248796463012695
Rule [0, 0, 0, 5, 0, 1, 0, 2] → Accuracy = 0.8953

✅ Results written to rule_accuracy.txt


Performance Comparison with Existing ML Models

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import (
    precision_score, recall_score, f1_score, accuracy_score,
    classification_report, confusion_matrix
)

# Load dataset
dataset_path = "Haber-man.csv"  # <-- change this to your file
df = pd.read_csv(dataset_path)

# Separate features and target
target = df['Class']
df = df.drop(columns=['Class'])

# Split data
X_train, X_test, y_train, y_test = train_test_split(df, target, test_size=0.2, random_state=42)

# Standardize data
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# ************************* SVM *************************
print("*************************SVM (Linear)************************************")
svc = SVC(kernel='linear', C=10.0, random_state=1)
svc.fit(X_train_std, y_train)
y_pred_svm = svc.predict(X_test_std)
print(classification_report(y_test, y_pred_svm))
print('Accuracy: %.3f' % accuracy_score(y_test, y_pred_svm))
print('Precision: %.3f' % precision_score(y_test, y_pred_svm))
print('Recall: %.3f' % recall_score(y_test, y_pred_svm))
print('F1 Score: %.3f' % f1_score(y_test, y_pred_svm))


# ************************* MultinomialNB *************************
print("*************************MultinomialNB************************************")
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train_std, y_train)
y_pred_nb = nb.predict(X_test_std)
print('Accuracy: %.3f' % accuracy_score(y_test, y_pred_nb))
print('Precision: %.3f' % precision_score(y_test, y_pred_nb))
print('Recall: %.3f' % recall_score(y_test, y_pred_nb))
print('F1 Score: %.3f' % f1_score(y_test, y_pred_nb))


# ************************* Decision Tree *************************
print("***************************DecisionTreeClassifier*********************")
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(X_train_std, y_train)
y_predict_DT = clf.predict(X_test_std)
print(confusion_matrix(y_test, y_predict_DT))
print(classification_report(y_test, y_predict_DT))
print('Accuracy: %.3f' % accuracy_score(y_test, y_predict_DT))
print('Precision: %.3f' % precision_score(y_test, y_predict_DT))
print('Recall: %.3f' % recall_score(y_test, y_predict_DT))
print('F1 Score: %.3f' % f1_score(y_test, y_predict_DT))


# ************************* Linear Regression *************************
print("**********************LinearRegression*********************")
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(X_train_std, y_train)
y_predict_LR = reg.predict(X_test_std)

# Convert regression output to class labels (rounding)
y_predict_LR = (y_predict_LR >= 0.5).astype(int)

print(confusion_matrix(y_test, y_predict_LR))
print(classification_report(y_test, y_predict_LR))
print('Accuracy: %.3f' % accuracy_score(y_test, y_predict_LR))
print('Precision: %.3f' % precision_score(y_test, y_predict_LR))
print('Recall: %.3f' % recall_score(y_test, y_predict_LR))
print('F1 Score: %.3f' % f1_score(y_test, y_predict_LR))


# ************************* KNeighborsClassifier *************************
print("*************************KNeighborsClassifier***************")
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train_std, y_train)
y_predict_KNN = neigh.predict(X_test_std)
print(confusion_matrix(y_test, y_predict_KNN))
print(classification_report(y_test, y_predict_KNN))
print('Accuracy: %.3f' % accuracy_score(y_test, y_predict_KNN))
print('Precision: %.3f' % precision_score(y_test, y_predict_KNN))
print('Recall: %.3f' % recall_score(y_test, y_predict_KNN))
print('F1 Score: %.3f' % f1_score(y_test, y_predict_KNN))

# ************************* MLPClassifier *************************
print("*********************MLPClassifier*******************")
from sklearn.neural_network import MLPClassifier
clf1 = MLPClassifier(random_state=1, max_iter=300)
clf1.fit(X_train_std, y_train)
y_predict_MLP = clf1.predict(X_test_std)
print(confusion_matrix(y_test, y_predict_MLP))
print(classification_report(y_test, y_predict_MLP))
print('Accuracy: %.3f' % accuracy_score(y_test, y_predict_MLP))
print('Precision: %.3f' % precision_score(y_test, y_predict_MLP))
print('Recall: %.3f' % recall_score(y_test, y_predict_MLP))
print('F1 Score: %.3f' % f1_score(y_test, y_predict_MLP))



*************************SVM (Linear)************************************
              precision    recall  f1-score   support

           1       0.69      0.93      0.80        44
           2       0.00      0.00      0.00        18

    accuracy                           0.66        62
   macro avg       0.35      0.47      0.40        62
weighted avg       0.49      0.66      0.56        62

Accuracy: 0.661
Precision: 0.695
Recall: 0.932
F1 Score: 0.796
*************************MultinomialNB************************************
Accuracy: 0.710
Precision: 0.741
Recall: 0.909
F1 Score: 0.816
***************************DecisionTreeClassifier*********************
[[34 10]
 [12  6]]
              precision    recall  f1-score   support

           1       0.74      0.77      0.76        44
           2       0.38      0.33      0.35        18

    accuracy                           0.65        62
   macro avg       0.56      0.55      0.55        62
weighted avg       0.63      0.65   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[[39  5]
 [15  3]]
              precision    recall  f1-score   support

           1       0.72      0.89      0.80        44
           2       0.38      0.17      0.23        18

    accuracy                           0.68        62
   macro avg       0.55      0.53      0.51        62
weighted avg       0.62      0.68      0.63        62

Accuracy: 0.677
Precision: 0.722
Recall: 0.886
F1 Score: 0.796




Exectution time of KNN

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import (
    precision_score, recall_score, f1_score, accuracy_score,
    classification_report, confusion_matrix
)

# Load dataset
dataset_path = "Haber-man (1).csv"  # <-- change this to your file
df = pd.read_csv(dataset_path)

# Separate features and target
target = df['Class']
df = df.drop(columns=['Class'])

# Split data
X_train, X_test, y_train, y_test = train_test_split(df, target, test_size=0.2, random_state=42)

# Standardize data
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)


import time

start = time.time()
# ************************* KNeighborsClassifier *************************
print("*************************KNeighborsClassifier***************")
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train_std, y_train)
y_predict_KNN = neigh.predict(X_test_std)
print(confusion_matrix(y_test, y_predict_KNN))
print(classification_report(y_test, y_predict_KNN))
print('Accuracy: %.3f' % accuracy_score(y_test, y_predict_KNN))
print('Precision: %.3f' % precision_score(y_test, y_predict_KNN))
print('Recall: %.3f' % recall_score(y_test, y_predict_KNN))
print('F1 Score: %.3f' % f1_score(y_test, y_predict_KNN))
# acc = compute_accuracy(df, Rule, d=10)
end = time.time()

print("Execution Time (seconds):", end - start)

*************************KNeighborsClassifier***************
[[36  8]
 [12  6]]
              precision    recall  f1-score   support

           1       0.75      0.82      0.78        44
           2       0.43      0.33      0.38        18

    accuracy                           0.68        62
   macro avg       0.59      0.58      0.58        62
weighted avg       0.66      0.68      0.66        62

Accuracy: 0.677
Precision: 0.750
Recall: 0.818
F1 Score: 0.783
Execution Time (seconds): 0.035925865173339844
