In [56]:
import pandas as pd
import numpy as np
import joblib
import os

In [58]:
# Load your test set
X_test_binary = pd.read_csv(r"C:\Users\ghaza\Desktop\FYP\Application\AI\data\dropped_final_training\X_test_dropped.csv")
X_test_multi = pd.read_csv(r"C:\Users\ghaza\Desktop\FYP\Application\AI\data\dropped_final_training\X_test_multi_dropped.csv")

In [60]:
X_test_binary.head()

Unnamed: 0,Water_Flow_Rate,Temperature_Fluctuation_Index,Pressure_Stability_Index,Detergent_Level,Hydraulic_Pressure,Coolant_Temperature
0,-0.109566,-0.912109,-0.354486,-0.317797,-0.19873,1.252999
1,-0.460948,-0.123897,-0.511168,-0.842531,-0.19873,-0.102126
2,-0.210826,-0.247584,-0.250963,-0.387861,-0.741503,0.779682
3,-1.12097,1.359175,0.749614,0.270374,0.494807,0.955652
4,1.107329,-0.418114,0.056019,-0.992408,1.667591,-0.196436


In [62]:
X_test_multi.head()

Unnamed: 0,Water_Flow_Rate,Temperature_Fluctuation_Index,Pressure_Stability_Index,Detergent_Level,Hydraulic_Pressure,Hydraulic_Oil_Temperature
0,-0.109566,-0.912109,-0.354486,-0.317797,-0.19873,0.247529
1,-0.460948,-0.123897,-0.511168,-0.842531,-0.19873,-1.119986
2,-0.210826,-0.247584,-0.250963,-0.387861,-0.741503,-0.373477
3,-1.12097,1.359175,0.749614,0.270374,0.494807,-0.395548
4,1.107329,-0.418114,0.056019,-0.992408,1.667591,-0.055879


In [64]:
model_binary = joblib.load(r"C:\Users\ghaza\Desktop\FYP\Application\AI\models\final_binary_soft_voting_ensemble.pkl")
model_binary

In [66]:
joblib.dump(model_binary, r"C:\Users\ghaza\Desktop\FYP\Application\AI\models\final_binary_soft_voting_ensemble_compat.pkl", protocol=4)

['C:\\Users\\ghaza\\Desktop\\FYP\\Application\\AI\\models\\final_binary_soft_voting_ensemble_compat.pkl']

In [68]:
model_multiclass = joblib.load(r"C:\Users\ghaza\Desktop\FYP\Application\AI\models\final_multi_ensemble.pkl")
model_multiclass

In [70]:
joblib.dump(model_binary, r"C:\Users\ghaza\Desktop\FYP\Application\AI\models\final_multi_soft_voting_ensemble_compat.pkl", protocol=4)

['C:\\Users\\ghaza\\Desktop\\FYP\\Application\\AI\\models\\final_multi_soft_voting_ensemble_compat.pkl']

In [72]:
# ====== Step 1: Predict using binary model (e.g. machine_failure yes/no) ======
y_pred_binary = model_binary.predict(X_test_binary)

# ====== Step 2: Predict using multiclass model (e.g. failure_type) ======
y_pred_multiclass = model_multiclass.predict(X_test_multi)

In [73]:
# ====== Step 3: Track inconsistency ======
# Logic: If binary model says "No Failure" (0), then multiclass should say "No Failure" (1)
inconsistent = ((y_pred_binary == 0) & (y_pred_multiclass != 1)).sum()

# Total comparisons for context
total = len(y_pred_binary)
consistency_rate = 100 * (1 - inconsistent / total)

print(f"Inconsistent predictions (binary=0, multiclass≠1): {inconsistent}")
print(f"Consistency rate: {consistency_rate:.2f}%")

Inconsistent predictions (binary=0, multiclass≠1): 3752
Consistency rate: 92.85%


In [77]:
def load_multitask_data(base_path):
    X_train = pd.read_csv(f"{base_path}/X_train_dropped.csv")
    y_train_binary = pd.read_csv(f"{base_path}/y_train_balanced_binary.csv").squeeze()
    y_train_multi = pd.read_csv(r"C:\Users\ghaza\Desktop\FYP\Application\AI\data\processed\balanced_y_train_multi.csv").squeeze()
    
    X_test = pd.read_csv(f"{base_path}/X_test_dropped.csv")
    y_test_binary = pd.read_csv(f"{base_path}/y_test_binary.csv").squeeze()
    y_test_multi = pd.read_csv(f"{base_path}/y_test_multi.csv").squeeze()

    return X_train, y_train_binary, y_train_multi, X_test, y_test_binary, y_test_multi

# Example usage
base_path = r"C:\Users\ghaza\Desktop\FYP\Application\AI\data\dropped_final_training"
X_train, y_train_binary, y_train_multi, X_test, y_test_binary, y_test_multi = load_multitask_data(base_path)

In [78]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MultiTaskNet(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super(MultiTaskNet, self).__init__()
        self.shared = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU()
        )
        self.binary_head = nn.Linear(hidden_dim, 1)
        self.multi_head = nn.Linear(hidden_dim, 5)

    def forward(self, x):
        shared = self.shared(x)
        binary_out = torch.sigmoid(self.binary_head(shared)).squeeze()
        multi_out = self.multi_head(shared)
        return binary_out, multi_out


In [76]:
pip install torch torchvision torchaudio

Note: you may need to restart the kernel to use updated packages.


In [86]:
from torch.utils.data import TensorDataset, DataLoader

# Convert to torch tensors
X_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_bin_tensor = torch.tensor(y_train_binary.values, dtype=torch.float32)
y_multi_tensor = torch.tensor(y_train_multi.values, dtype=torch.long)

# Dataset and DataLoader
dataset = TensorDataset(X_tensor, y_bin_tensor, y_multi_tensor)
loader = DataLoader(dataset, batch_size=64, shuffle=True)

# Initialize model
model = MultiTaskNet(input_dim=X_train.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn_bin = nn.BCELoss()
loss_fn_multi = nn.CrossEntropyLoss()

# Train
for epoch in range(20):
    model.train()
    total_loss = 0
    for xb, yb, ym in loader:
        optimizer.zero_grad()
        out_bin, out_multi = model(xb)
        loss = loss_fn_bin(out_bin, yb) + loss_fn_multi(out_multi, ym)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

Epoch 1, Loss: 1585.7788
Epoch 2, Loss: 1458.8634
Epoch 3, Loss: 1439.4937
Epoch 4, Loss: 1429.8364
Epoch 5, Loss: 1421.2516
Epoch 6, Loss: 1413.4760
Epoch 7, Loss: 1408.1865
Epoch 8, Loss: 1403.1947
Epoch 9, Loss: 1398.5991
Epoch 10, Loss: 1393.7692
Epoch 11, Loss: 1389.3118
Epoch 12, Loss: 1386.0908
Epoch 13, Loss: 1382.6946
Epoch 14, Loss: 1381.1773
Epoch 15, Loss: 1379.7039
Epoch 16, Loss: 1375.8796
Epoch 17, Loss: 1372.8463
Epoch 18, Loss: 1372.1484
Epoch 19, Loss: 1370.4596
Epoch 20, Loss: 1370.4475


In [87]:
model.eval()
with torch.no_grad():
    X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
    pred_bin, pred_multi = model(X_test_tensor)
    pred_bin_labels = (pred_bin > 0.5).int()
    pred_multi_labels = torch.argmax(pred_multi, dim=1)

In [88]:
import torch
import numpy as np
from sklearn.metrics import classification_report

def evaluate_multitask_model_consistency(model, X_test, y_test_binary, y_test_multi, threshold=0.5):
    """
    Evaluates a multitask model on test data and checks logical consistency:
    If binary predicts 0 (No Failure), multiclass must predict 1 (No Failure).
    """
    model.eval()
    with torch.no_grad():
        # Convert to tensor
        X_tensor = torch.tensor(X_test.values, dtype=torch.float32)
        y_bin_true = y_test_binary.values
        y_multi_true = y_test_multi.values

        # Predictions
        pred_bin_prob, pred_multi_logits = model(X_tensor)
        pred_bin_labels = (pred_bin_prob > threshold).int().numpy()
        pred_multi_labels = torch.argmax(pred_multi_logits, dim=1).numpy()

        # === Classification Reports ===
        print("🔍 Classification Report - Binary (machine_failure):\n")
        print(classification_report(y_bin_true, pred_bin_labels))

        print("🔍 Classification Report - Multiclass (failure_type):\n")
        print(classification_report(y_multi_true, pred_multi_labels))

        # === Consistency Check ===
        inconsistent = ((pred_bin_labels == 0) & (pred_multi_labels != 1)).sum()
        total = len(pred_bin_labels)
        consistency_rate = 100 * (1 - inconsistent / total)

        print(f"❌ Inconsistent predictions (binary=0, multiclass≠1): {inconsistent}")
        print(f"✅ Consistency rate: {consistency_rate:.2f}%")

        return {
            "binary_predictions": pred_bin_labels,
            "multiclass_predictions": pred_multi_labels,
            "inconsistency_count": inconsistent,
            "consistency_rate": consistency_rate
        }


In [89]:
results = evaluate_multitask_model_consistency(
    model=trained_multitask_model,
    X_test=X_test_dropped,
    y_test_binary=y_test_binary,
    y_test_multi=y_test_multi
)

NameError: name 'trained_multitask_model' is not defined