# Importing Libraries

In [7]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import joblib  # For saving non-PyTorch models

# Code

In [8]:
# Function to load dataset from correct path
def load_dataset(dataset_name, cnn_model):
    base_path = os.path.join("dataset_features", dataset_name, cnn_model)  
    X_path = os.path.join(base_path, "features.npy")
    y_path = os.path.join(base_path, "labels.npy")

    if not os.path.exists(X_path) or not os.path.exists(y_path):
        raise FileNotFoundError(f"❌ Dataset files not found in {base_path}")

    X = np.load(X_path)
    y = np.load(y_path)
    
    print(f"✅ Loaded dataset: {dataset_name} | Model: {cnn_model} | Features: {X.shape} | Labels: {y.shape}")
    return X, y

In [9]:
# Define the Neural Network Classifier
class HFFClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(HFFClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.softmax(self.fc3(x))
        return x

In [10]:
# Training function for Neural Network
def train_neural_network(dataset_name, cnn_model, num_epochs=30, batch_size=64, optimizer_type="adam", split=True):
    print(f"🔄 Loading dataset: {dataset_name} | Model: {cnn_model}...")
    X, y = load_dataset(dataset_name, cnn_model)
    
    # Convert to PyTorch tensors
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.long)
    dataset = TensorDataset(X_tensor, y_tensor)

    if split:
        train_size = int(0.8 * len(dataset))
        test_size = len(dataset) - train_size
        train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    else:
        train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    input_size = X.shape[1]
    num_classes = len(set(y))
    model = HFFClassifier(input_size, num_classes)

    optimizer = optim.Adam(model.parameters(), lr=0.001) if optimizer_type.lower() == "adam" else optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    criterion = nn.CrossEntropyLoss()

    # Training Loop
    for epoch in range(num_epochs):
        model.train()
        total_loss, correct_train, total_train = 0, 0, 0

        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == targets).sum().item()
            total_train += targets.size(0)

        train_accuracy = correct_train / total_train * 100

        if split:
            model.eval()
            correct_test, total_test = 0, 0
            with torch.no_grad():
                for inputs, targets in test_loader:
                    outputs = model(inputs)
                    _, predicted = torch.max(outputs, 1)
                    correct_test += (predicted == targets).sum().item()
                    total_test += targets.size(0)

            test_accuracy = correct_test / total_test * 100
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}, Train Acc: {train_accuracy:.2f}%, Test Acc: {test_accuracy:.2f}%")
        else:
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}, Train Acc: {train_accuracy:.2f}%")

    print("✅ Training Completed!")
    
    model_path = f"dataset_models/{dataset_name}_{cnn_model}_nn.pth"
    torch.save(model.state_dict(), model_path)
    print(f"✅ Model Saved: {model_path}")

    return model

In [11]:
# Training Function for Other Models
def train_other_models(dataset_name, cnn_model, model_type="random_forest", split=True):
    print(f"🔄 Loading dataset: {dataset_name} | Model: {cnn_model}...")
    X, y = load_dataset(dataset_name, cnn_model)

    if split:
        from sklearn.model_selection import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    else:
        X_train, y_train = X, y
        X_test, y_test = None, None  # No test set

    model_map = {
        "random_forest": RandomForestClassifier(n_estimators=100, random_state=42),
        "svm": SVC(kernel="rbf", probability=True),
        "logistic_regression": LogisticRegression(max_iter=1000),
        "gradient_boosting": GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
    }
    
    if model_type.lower() not in model_map:
        raise ValueError("Unsupported model. Choose 'random_forest', 'svm', 'logistic_regression', or 'gradient_boosting'.")
    
    model = model_map[model_type.lower()]
    print(f"🔄 Training {model_type} model on {dataset_name} | {cnn_model} features...")

    model.fit(X_train, y_train)
    y_train_pred = model.predict(X_train)
    train_accuracy = accuracy_score(y_train, y_train_pred)

    if split:
        y_test_pred = model.predict(X_test)
        test_accuracy = accuracy_score(y_test, y_test_pred)
        print(f"✅ {model_type} Model Accuracy on {dataset_name} | {cnn_model}: Train: {train_accuracy * 100:.2f}% | Test: {test_accuracy * 100:.2f}%")
    else:
        print(f"✅ {model_type} Model Accuracy on {dataset_name} | {cnn_model}: Train: {train_accuracy * 100:.2f}%")

    model_path = f"dataset_models/{dataset_name}_{cnn_model}_{model_type}.pkl"
    joblib.dump(model, model_path)
    print(f"✅ Model Saved: {model_path}")

In [13]:

# List of all datasets (directories inside 'dataset_features')
DATASETS = ["1_UCI_Dataset", "2_Rice_Leaf_Disease_Images", "3_Rice_Disease_Image_Dataset"]

# List of all CNN models (directories inside each dataset)
CNN_MODELS = ["ResNet-50", "ResNeSt-50", "ResNeXt-50", "SE-ResNet-50"]  

# Training configuration
NUM_EPOCHS = 30
BATCH_SIZE = 64
OPTIMIZER_TYPE = "adam"  # Options: 'adam', 'sgd'
SPLIT_DATA = True  # Set to False if you don't want to split into train/test

# Store results
results = []

# Loop through each dataset
for dataset_name in DATASETS:
    print(f"\n🔷 Processing dataset: {dataset_name}\n" + "=" * 50)
    
    # Loop through each CNN model
    for cnn_model in CNN_MODELS:
        dataset_path = os.path.join("dataset_features", dataset_name, cnn_model)

        # Check if features exist for this model
        features_path = os.path.join(dataset_path, "features.npy")
        labels_path = os.path.join(dataset_path, "labels.npy")

        if not os.path.exists(features_path) or not os.path.exists(labels_path):
            print(f"❌ Skipping {dataset_name} - {cnn_model}: Features not found.")
            continue  # Move to the next CNN model

        print(f"\n🟢 Training on {dataset_name} using {cnn_model}...\n" + "-" * 40)

        # Train Neural Network
        nn_accuracy = train_neural_network(
            dataset_name=dataset_name,
            cnn_model=cnn_model,
            num_epochs=NUM_EPOCHS,
            batch_size=BATCH_SIZE,
            optimizer_type=OPTIMIZER_TYPE,
            split=SPLIT_DATA
        )

        # Train Classical ML Models and store accuracy
        model_accuracies = {"Random Forest": 0, "SVM": 0, "Logistic Regression": 0}

        for model_type in ["random_forest", "svm", "logistic_regression"]:
            accuracy = train_other_models(
                dataset_name=dataset_name,
                cnn_model=cnn_model,
                model_type=model_type,
                split=SPLIT_DATA
            )
            model_accuracies[model_type.replace("_", " ").title()] = accuracy

        # Store results in a dictionary
        results.append({
            "Dataset": dataset_name,
            "CNN Model": cnn_model,
            "Neural Network Accuracy": nn_accuracy,
            "Random Forest Accuracy": model_accuracies["Random Forest"],
            "SVM Accuracy": model_accuracies["SVM"],
            "Logistic Regression Accuracy": model_accuracies["Logistic Regression"]
        })

# Convert results to a DataFrame for tabular display
df_results = pd.DataFrame(results)

# Display results as a table
print("\n📊 **Final Model Accuracies**")
print(df_results.to_string(index=False))

# Optionally save results to a CSV file
df_results.to_csv("model_accuracies.csv", index=False)
print("\n✅ Accuracy results saved to 'model_accuracies.csv'")


🔷 Processing dataset: 1_UCI_Dataset

🟢 Training on 1_UCI_Dataset using ResNet-50...
----------------------------------------
🔄 Loading dataset: 1_UCI_Dataset | Model: ResNet-50...
✅ Loaded dataset: 1_UCI_Dataset | Model: ResNet-50 | Features: (120, 4606) | Labels: (120,)
Epoch [1/30], Loss: 1.0951, Train Acc: 34.38%, Test Acc: 33.33%
Epoch [2/30], Loss: 1.0655, Train Acc: 29.17%, Test Acc: 54.17%
Epoch [3/30], Loss: 0.9673, Train Acc: 71.88%, Test Acc: 41.67%
Epoch [4/30], Loss: 0.9351, Train Acc: 48.96%, Test Acc: 70.83%
Epoch [5/30], Loss: 0.8515, Train Acc: 82.29%, Test Acc: 62.50%
Epoch [6/30], Loss: 0.8092, Train Acc: 77.08%, Test Acc: 75.00%
Epoch [7/30], Loss: 0.7600, Train Acc: 87.50%, Test Acc: 75.00%
Epoch [8/30], Loss: 0.7098, Train Acc: 86.46%, Test Acc: 79.17%
Epoch [9/30], Loss: 0.6526, Train Acc: 88.54%, Test Acc: 75.00%
Epoch [10/30], Loss: 0.6451, Train Acc: 90.62%, Test Acc: 87.50%
Epoch [11/30], Loss: 0.6289, Train Acc: 94.79%, Test Acc: 87.50%
Epoch [12/30], Loss: 