# importing libraries

In [2]:
# 🛠 Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import joblib  # for saving models


## Step 2: Load the Dataset




In [4]:
# Load dataset (update the filename to your CSV)
df = pd.read_csv("alu_dataset.csv")

# Show first 5 rows
df.head()


Unnamed: 0,a,b,opcode,golden_result,faulty_result,label
0,303379748,3230228097,2,70656,70656,OK
1,2223298057,2985317987,0,913648748,913648748,OK
2,112818957,1189058957,2,110696717,110696717,OK
3,2999092325,2302104082,0,1006229111,1006163575,FAULT
4,15983361,114806029,2,13877505,13877505,OK


## 🔧 Step 3: Preprocess Data


In [5]:
# Encode labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])  # OK=0, FAULT=1

# Features and target
X = df[['a', 'b', 'opcode', 'golden_result', 'faulty_result']]
y = df['label']

# Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## 🤖 Step 4: Train Machine Learning Models
We will try three models:
1. Logistic Regression
2. Random Forest
3. Support Vector Machine (SVM)


In [6]:
# Logistic Regression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# Support Vector Machine
svm = SVC(kernel='rbf', probability=True)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)


## 📊 Step 5: Evaluate Models

In [7]:
def evaluate_model(name, y_test, y_pred):
    print(f"\n🔹 {name} Results:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

evaluate_model("Logistic Regression", y_test, y_pred_lr)
evaluate_model("Random Forest", y_test, y_pred_rf)
evaluate_model("SVM", y_test, y_pred_svm)



🔹 Logistic Regression Results:
Accuracy: 0.9029029029029029
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        97
           1       0.90      1.00      0.95       902

    accuracy                           0.90       999
   macro avg       0.45      0.50      0.47       999
weighted avg       0.82      0.90      0.86       999

Confusion Matrix:
 [[  0  97]
 [  0 902]]

🔹 Random Forest Results:
Accuracy: 0.9029029029029029
Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.04      0.08        97
           1       0.91      1.00      0.95       902

    accuracy                           0.90       999
   macro avg       0.70      0.52      0.51       999
weighted avg       0.87      0.90      0.86       999

Confusion Matrix:
 [[  4  93]
 [  4 898]]

🔹 SVM Results:
Accuracy: 0.9029029029029029
Classification Report:
               precision    re

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


## 💾 Step 6: Save the Best Model
We can save the trained model for later use in Python programs.


In [8]:
# Save Random Forest as example (if it performs best)
joblib.dump(rf, "alu_fault_detector.pkl")
print("✅ Model saved as alu_fault_detector.pkl")


✅ Model saved as alu_fault_detector.pkl


# 🎯 Conclusion
- We trained multiple ML models to detect ALU faults.
- Random Forest often performs best for this kind of classification problem.
- The trained model can now be used to predict on **new ALU outputs** 
  and detect if a fault has occurred.


In [4]:
import pandas as pd
import joblib

# Load your saved model (pick the best one you saved, e.g., Random Forest)
model = joblib.load("alu_fault_detector.pkl")

# Create test cases (same format as training data)
test_cases = pd.DataFrame([
    # ✅ OK cases
    {"a": 10, "b": 20, "opcode": 0, "golden_result": 30, "faulty_result": 30},  # ADD
    {"a": 15, "b": 5,  "opcode": 1, "golden_result": 10, "faulty_result": 10},  # SUB
    {"a": 7,  "b": 3,  "opcode": 2, "golden_result": 3,  "faulty_result": 3},   # AND
    {"a": 6,  "b": 2,  "opcode": 3, "golden_result": 6,  "faulty_result": 6},   # OR
    {"a": 12, "b": 5,  "opcode": 4, "golden_result": 9,  "faulty_result": 9},   # XOR

    # ❌ Faulty cases
    {"a": 10, "b": 20, "opcode": 0, "golden_result": 30, "faulty_result": 31},  # bit-flip
    {"a": 15, "b": 5,  "opcode": 1, "golden_result": 10, "faulty_result": 8},   # wrong SUB
    {"a": 7,  "b": 3,  "opcode": 2, "golden_result": 3,  "faulty_result": 0},   # AND fault
    {"a": 6,  "b": 2,  "opcode": 3, "golden_result": 6,  "faulty_result": 7},   # OR fault
    {"a": 12, "b": 5,  "opcode": 4, "golden_result": 9,  "faulty_result": 11},  # XOR fault
])

# Predict
predictions = model.predict(test_cases)
print(list(zip(test_cases.to_dict(orient="records"), predictions)))


[({'a': 10, 'b': 20, 'opcode': 0, 'golden_result': 30, 'faulty_result': 30}, np.int64(1)), ({'a': 15, 'b': 5, 'opcode': 1, 'golden_result': 10, 'faulty_result': 10}, np.int64(1)), ({'a': 7, 'b': 3, 'opcode': 2, 'golden_result': 3, 'faulty_result': 3}, np.int64(1)), ({'a': 6, 'b': 2, 'opcode': 3, 'golden_result': 6, 'faulty_result': 6}, np.int64(1)), ({'a': 12, 'b': 5, 'opcode': 4, 'golden_result': 9, 'faulty_result': 9}, np.int64(1)), ({'a': 10, 'b': 20, 'opcode': 0, 'golden_result': 30, 'faulty_result': 31}, np.int64(1)), ({'a': 15, 'b': 5, 'opcode': 1, 'golden_result': 10, 'faulty_result': 8}, np.int64(1)), ({'a': 7, 'b': 3, 'opcode': 2, 'golden_result': 3, 'faulty_result': 0}, np.int64(1)), ({'a': 6, 'b': 2, 'opcode': 3, 'golden_result': 6, 'faulty_result': 7}, np.int64(1)), ({'a': 12, 'b': 5, 'opcode': 4, 'golden_result': 9, 'faulty_result': 11}, np.int64(1))]
