<a href="https://colab.research.google.com/github/kanicaanand/HCV-MARL-Feature-Selection/blob/main/HCV-MARL-Feature%20Selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque, Counter
import time
import pickle
import warnings
from copy import deepcopy

warnings.filterwarnings('ignore')
device = torch.device('cpu') # Explicitly set to CPU
print(f"üñ•Ô∏è Using device: {device}")
print("Explicitly using CPU")

# Set random seeds
np.random.seed(42)
torch.manual_seed(42)

# Create directories
os.makedirs('saved_models', exist_ok=True)
os.makedirs('plots_multi_agent_final', exist_ok=True)

print("‚úÖ CPU-optimized environment ready!")

file_path= "dataset bakshi nagar 1.xlsx"
def preprocess_data():

  # Preprocess HCV dataset

    print(f"üìÅ Loading data from: {file_path}")
    df = pd.read_excel(file_path)
    print("Available columns in dataset:", df.columns.tolist())

    # Map target variable
    df['Status'] = df['Status'].map({
        'HCV RNA Detected': 1,
        'Negative': 0,
        'Target Not Detected': 0,
        '<Titer Min': 0
    })
    # Define expected features (clinical biomarkers only - no Age)
    expected_features = ['Total Protein', 'Albumin', 'Globulin',
                         'ALP', 'SGOT', 'SGPT', 'GGT', 'Bilrubin']
    features = [f for f in expected_features if f in df.columns]

    if len(features) < len(expected_features):
        print("‚ö†Ô∏è Warning: Some features not found. Using:", features)
    if not features:
        raise ValueError("‚ùå No valid features found in dataset.")

    print("NaN counts before imputation:\n", df[features].isna().sum())

    # Impute missing values with median
    for feature in features:
        df[feature] = df[feature].fillna(df[feature].median())

    if df[features].isna().any().any():
        raise ValueError("‚ùå NaN values still present after imputation.")

    # Drop rows with NaN in 'Status'
    df = df.dropna(subset=['Status'])

    # Prepare features and target
    X = df[features].values
    y = df['Status'].values


    # Feature scaling
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)

    print(f"‚úÖ Data preprocessing completed!")
    print(f"üìä Final dataset: {X.shape[0]} samples, {X.shape[1]} features")
    print(f"üéØ Class distribution: {np.unique(y, return_counts=True)}")

    return X, y, features, df

üñ•Ô∏è Using device: cpu
Explicitly using CPU
‚úÖ CPU-optimized environment ready!


In [3]:
X, y, features, df = preprocess_data()

print("\nFirst 5 rows of processed features (X):")
print(X[:5])
print("\nShape of X:", X.shape)

print("\nFirst 5 rows of processed target (y):")
print(y[:5])
print("\nShape of y:", y.shape)

üìÅ Loading data from: dataset bakshi nagar 1.xlsx
Available columns in dataset: ['S.no', 'Name', 'Gender', 'Age', 'Unnamed: 4', 'Status', 'Viral Load', 'Total Protein', 'Albumin', 'Globulin', 'ALP', 'SGOT', 'SGPT', 'GGT', 'Bilrubin']
NaN counts before imputation:
 Total Protein    7
Albumin          7
Globulin         7
ALP              7
SGOT             7
SGPT             7
GGT              7
Bilrubin         8
dtype: int64
‚úÖ Data preprocessing completed!
üìä Final dataset: 938 samples, 8 features
üéØ Class distribution: (array([0., 1.]), array([500, 438]))

First 5 rows of processed features (X):
[[8.12500000e-01 2.04081633e-02 2.32558140e-02 7.45573159e-03
  1.02661597e-01 1.10192837e-02 3.75000000e-02 1.47783251e-04]
 [9.37500000e-01 8.16326531e-02 1.86046512e-01 2.70270270e-02
  4.18250951e-02 4.82093664e-02 6.25000000e-02 1.97044335e-04]
 [8.62500000e-01 2.65306122e-01 1.16279070e-01 4.65983225e-03
  3.04182510e-01 6.06060606e-02 2.18750000e-01 7.38916256e-03]
 [9.12500000

In [2]:
def average_results_across_runs(all_runs_results):
    """Average results across multiple runs"""
    if not all_runs_results:
        return None

    agent_names = list(all_runs_results[0].keys())
    averaged_results = {}

    for agent in agent_names:
        averaged_results[agent] = {}

        # Get classifier names
        classifier_names = []
        for key in all_runs_results[0][agent].keys():
            if key not in ['features', 'n_features'] and isinstance(all_runs_results[0][agent][key], dict):
                classifier_names.append(key)

        # Average performance metrics
        for clf in classifier_names:
            metrics = ['accuracy', 'precision', 'recall', 'f1', 'f1_cv']
            averaged_results[agent][clf] = {}

            for metric in metrics:
                values = []
                for run_result in all_runs_results:
                    if clf in run_result[agent] and metric in run_result[agent][clf]:
                        values.append(run_result[agent][clf][metric])

                if values:
                    averaged_results[agent][clf][metric] = {
                        'mean': np.mean(values),
                        'std': np.std(values),
                        'min': np.min(values),
                        'max': np.max(values)
                    }

                    # Most frequent feature selection
        feature_selections = []
        for run_result in all_runs_results:
            if 'features' in run_result[agent] and run_result[agent]['features']:
                feature_selections.append(tuple(sorted(run_result[agent]['features'])))

        # Find most common feature combination
        if feature_selections:
            most_common = Counter(feature_selections).most_common(1)[0][0]
            averaged_results[agent]['features'] = list(most_common)
            averaged_results[agent]['n_features'] = len(most_common)
        else:
            averaged_results[agent]['features'] = []
            averaged_results[agent]['n_features'] = 0

    return averaged_results

print("‚úÖ Preprocessing and averaging functions defined!")

‚úÖ Preprocessing and averaging functions defined!


In [12]:
import torch.nn as nn
import torch.optim as optim
from collections import deque
import numpy as np
import torch


class FeatureSelectionEnv:

    #RL Environment for Feature Selection

    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.n_features = X.shape[1]
        self.state = np.zeros(self.n_features, dtype=int)
        self.model = LogisticRegression(solver='liblinear', random_state=42, class_weight='balanced')

    def reset(self):
        self.state = np.zeros(self.n_features, dtype=int)
        return self.state.copy()

    def step(self, action, agent_name):
        feature_idx = action // 2
        include = action % 2
        self.state[feature_idx] = include
        selected_features = np.where(self.state == 1)[0]

        if len(selected_features) == 0:
            return self.state.copy(), -1, False, 0

        X_subset = self.X[:, selected_features]
        self.model.fit(X_subset, self.y)
        y_pred = self.model.predict(X_subset)
        f1 = f1_score(self.y, y_pred, zero_division=0)

        penalty = 0.04 if agent_name == 'Parsimonious' else 0.02
        reward = f1 - penalty * len(selected_features)

        if len(selected_features) > 4:
            reward -= 0.1 * (len(selected_features) - 4)

        done = False
        return self.state.copy(), reward, done, f1


class QLearningAgent:
    def __init__(self, epsilon, alpha, gamma, n_actions):
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.n_actions = n_actions
        self.q_table = {}
        self.epsilon_decay = 0.995
        self.min_epsilon = 0.1
        self.max_q_table_size = 5000

    def get_action(self, state):
        state_tuple = tuple(state)
        if state_tuple not in self.q_table:
            self.q_table[state_tuple] = np.zeros(self.n_actions)

        if len(self.q_table) > self.max_q_table_size:
            q_sums = {s: np.sum(np.abs(qs)) for s, qs in self.q_table.items()}
            sorted_states = sorted(q_sums.items(), key=lambda x: x[1])
            for s, _ in sorted_states[:len(self.q_table) - self.max_q_table_size]:
                del self.q_table[s]

        if np.random.random() < self.epsilon:
            return np.random.randint(self.n_actions)
        return np.argmax(self.q_table[state_tuple])

    def update(self, state, action, reward, next_state):
        state_tuple = tuple(state)
        next_state_tuple = tuple(next_state)

        if state_tuple not in self.q_table:
            self.q_table[state_tuple] = np.zeros(self.n_actions)
        if next_state_tuple not in self.q_table:
            self.q_table[next_state_tuple] = np.zeros(self.n_actions)

        current_q = self.q_table[state_tuple][action]
        next_max_q = np.max(self.q_table[next_state_tuple])
        self.q_table[state_tuple][action] += self.alpha * (reward + self.gamma * next_max_q - current_q)

    def decay_epsilon(self):
        self.epsilon = max(self.min_epsilon, self.epsilon * self.epsilon_decay)

class DQN(nn.Module):
    def __init__(self, n_features, n_actions):
        super(DQN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(n_features, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, n_actions)
        )

    def forward(self, x):
        return self.net(x)

class DQNAgent:
    def __init__(self, n_features, n_actions, epsilon, lr, gamma):
        self.n_features = n_features
        self.n_actions = n_actions
        self.epsilon = epsilon
        self.gamma = gamma
        self.device = device

        self.model = DQN(n_features, n_actions).to(self.device)
        self.target_model = DQN(n_features, n_actions).to(self.device)
        self.target_model.load_state_dict(self.model.state_dict())
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.memory = deque(maxlen=2000)
        self.epsilon_decay = 0.995
        self.min_epsilon = 0.1

        # Fixed batch size for CPU
        self.batch_size = 64 # Was: 128 if torch.cuda.is_available() else 64
        self.target_update_freq = 100
        self.steps = 0
        self.loss_history = []

    def get_action(self, state):
        if np.random.random() < self.epsilon:
            return np.random.randint(self.n_actions)
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        with torch.no_grad():
            q_values = self.model(state_tensor)
        return q_values.argmax().item()

    def store_transition(self, state, action, reward, next_state, done):
        self.memory.append((state.copy(), action, reward, next_state.copy(), done))

    def train(self):
        if len(self.memory) < self.batch_size:
            return
        batch_indices = np.random.choice(len(self.memory), self.batch_size, replace=False)
        batch = [self.memory[i] for i in batch_indices]
        states, actions, rewards, next_states, dones = zip(*batch)

        states = torch.FloatTensor(states).to(self.device)
        actions = torch.LongTensor(actions).to(self.device)
        rewards = torch.FloatTensor(rewards).to(self.device)
        next_states = torch.FloatTensor(next_states).to(self.device)
        dones = torch.FloatTensor(dones).to(self.device)

        current_q_values = self.model(states).gather(1, actions.unsqueeze(1)).squeeze(1)

        with torch.no_grad():
            next_q_values = self.target_model(next_states).max(1)[0]
            target_q_values = rewards + (1 - dones) * self.gamma * next_q_values

        loss = nn.SmoothL1Loss()(current_q_values, target_q_values)
        self.optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
        self.optimizer.step()

        self.steps += 1
        self.loss_history.append(loss.item())

        if self.steps % self.target_update_freq == 0:
            self.target_model.load_state_dict(self.model.state_dict())


    def decay_epsilon(self):
        self.epsilon = max(self.min_epsilon, self.epsilon * self.epsilon_decay)

class RandomAgent:
    def __init__(self, n_actions):
        self.n_actions = n_actions

    def get_action(self, state):
        return np.random.randint(self.n_actions)

    def update(self, state, action, reward, next_state):
        pass

    def store_transition(self, state, action, reward, next_state, done):
        pass

    def train(self):
        pass

    def decay_epsilon(self):
        pass

print("üß† All agent classes defined!")


def train_agents(X, y, features, n_episodes=200, max_steps=15, run_seed=42):
  # CPU-accelerated agent training
    np.random.seed(run_seed)
    torch.manual_seed(run_seed)
    env = FeatureSelectionEnv(X, y)
    n_actions = 2 * len(features)

    agents = {
        'Greedy': QLearningAgent(epsilon=0.2, alpha=0.15, gamma=0.9, n_actions=n_actions),
        'Exploratory': QLearningAgent(epsilon=0.6, alpha=0.05, gamma=0.9, n_actions=n_actions),
        'Parsimonious': DQNAgent(n_features=len(features), n_actions=n_actions, epsilon=0.4, lr=0.001, gamma=0.9),
        'Random': RandomAgent(n_actions=n_actions)
    }

    rewards = {name: [] for name in agents}
    accuracies = {name: [] for name in agents}
    feature_selections = {name: [] for name in agents}
    feature_evolution = {name: [] for name in agents}

    for ep in range(n_episodes):
        for name, agent in agents.items():
            state = env.reset()
            ep_reward = 0
            ep_f1 = 0
            ep_features = []

            for step in range(max_steps):
                action = agent.get_action(state)
                next_state, reward, done, f1 = env.step(action, name)

                if name != 'Random':
                    if name == 'Parsimonious':
                        agent.store_transition(state, action, reward, next_state, done)
                        agent.train()
                    else:
                        agent.update(state, action, reward, next_state)

                state = next_state
                ep_reward += reward
                ep_f1 = f1
                ep_features.append(state.copy())

            rewards[name].append(ep_reward)
            accuracies[name].append(ep_f1)
            feature_selections[name].append(state.copy())
            feature_evolution[name].append(ep_features)
            agent.decay_epsilon()

        if (ep + 1) % 40 == 0:
            print(f"Episode {ep + 1}/{n_episodes} completed.")

    return agents, rewards, accuracies, feature_selections, feature_evolution

def evaluate_agents_multi_classifier(agents, X_train, y_train, X_test, y_test, feature_selections, features, verbose=True):
    # CPU-optimized multi-classifier evaluation
    classifiers = {
        'Logistic Regression': LogisticRegression(
            solver='liblinear', random_state=42, class_weight='balanced',
            max_iter=2000, n_jobs=-1
        ),
        'Decision Tree': DecisionTreeClassifier(
            random_state=42, class_weight='balanced', max_depth=12
        ),
        'Random Forest': RandomForestClassifier(
            n_estimators=100, random_state=42, class_weight='balanced',
            max_depth=12, n_jobs=-1
        ),
        'XGBoost': xgb.XGBClassifier(
            random_state=42, eval_metric='logloss', max_depth=6,
            n_estimators=100, learning_rate=0.1,
            # Fixed to CPU-only tree method
            tree_method='hist',

            n_jobs=-1 # Always use all available CPU cores
        )
    }

    results = {}

    for agent_name in agents:
        if verbose:
            print(f"\n{'='*60}\nü§ñ Evaluating Agent: {agent_name}\n{'='*60}")

        final_state = feature_selections[agent_name][-1]
        selected_features = np.where(final_state == 1)[0]

        if len(selected_features) == 0:
            if verbose:
                print(f"‚ö†Ô∏è No features selected for {agent_name}")
            results[agent_name] = {clf_name: {'accuracy':0,'precision':0,'recall':0,'f1':0,'f1_cv':0} for clf_name in classifiers}
            results[agent_name]['features'] = []
            results[agent_name]['n_features'] = 0
            continue

        feature_names = [features[i] for i in selected_features]
        if verbose:
            print(f"üìä Features ({len(selected_features)}): {feature_names}")

        X_train_sub = X_train[:, selected_features]
        X_test_sub = X_test[:, selected_features]
        results[agent_name] = {'features': feature_names, 'n_features': len(selected_features)}

        if verbose:
            print(f"{'Classifier':<20}{'Accuracy':<10}{'Precision':<10}{'Recall':<10}{'F1':<10}{'CV F1':<10}")
            print("-" * 80)

        for clf_name, clf in classifiers.items():
            try:
                f1_cv_scores = cross_val_score(clf, X_train_sub, y_train, cv=5, scoring='f1', n_jobs=-1)
                f1_cv = f1_cv_scores.mean()

                clf.fit(X_train_sub, y_train)
                y_pred = clf.predict(X_test_sub)

                acc = accuracy_score(y_test, y_pred)
                prec = precision_score(y_test, y_pred, zero_division=0)
                rec = recall_score(y_test, y_pred, zero_division=0)
                f1 = f1_score(y_test, y_pred, zero_division=0)

                results[agent_name][clf_name] = {'accuracy':acc, 'precision':prec, 'recall':rec, 'f1':f1, 'f1_cv':f1_cv}

                if verbose:
                    print(f"{clf_name:<20}{acc:<10.4f}{prec:<10.4f}{rec:<10.4f}{f1:<10.4f}{f1_cv:<10.4f}")

            except Exception as e:
                if verbose:
                    print(f"‚ùå Error with {clf_name}: {e}")
                results[agent_name][clf_name] = {'accuracy':0,'precision':0,'recall':0,'f1':0,'f1_cv':0}

    return results

print("üèÉ‚Äç‚ôÇÔ∏è Training and evaluation functions defined!")

üß† All agent classes defined!
üèÉ‚Äç‚ôÇÔ∏è Training and evaluation functions defined!


In [13]:
if __name__ == "__main__":

  def evaluate_agents_multi_classifier(agents, X_train, y_train, X_test, y_test, feature_selections, features, verbose=True):
    # CPU-optimized multi-classifier evaluation
    classifiers = {
        'Logistic Regression': LogisticRegression(
            solver='liblinear', random_state=42, class_weight='balanced',
            max_iter=2000, n_jobs=-1
        ),
        'Decision Tree': DecisionTreeClassifier(
            random_state=42, class_weight='balanced', max_depth=12
        ),
        'Random Forest': RandomForestClassifier(
            n_estimators=100, random_state=42, class_weight='balanced',
            max_depth=12, n_jobs=-1
        ),
        'XGBoost': xgb.XGBClassifier(
            random_state=42, eval_metric='logloss', max_depth=6,
            n_estimators=100, learning_rate=0.1,
            # Fixed to CPU-only tree method
            tree_method='hist',

            n_jobs=-1 # Always use all available CPU cores
        )
    }

    results = {}

    for agent_name in agents:
        if verbose:
            print(f"\n{'='*60}\nü§ñ Evaluating Agent: {agent_name}\n{'='*60}")

        final_state = feature_selections[agent_name][-1]
        selected_features = np.where(final_state == 1)[0]

        if len(selected_features) == 0:
            if verbose:
                print(f"‚ö†Ô∏è No features selected for {agent_name}")
            results[agent_name] = {clf_name: {'accuracy':0,'precision':0,'recall':0,'f1':0,'f1_cv':0} for clf_name in classifiers}
            results[agent_name]['features'] = []
            results[agent_name]['n_features'] = 0
            continue

        feature_names = [features[i] for i in selected_features]
        if verbose:
            print(f"üìä Features ({len(selected_features)}): {feature_names}")

        X_train_sub = X_train[:, selected_features]
        X_test_sub = X_test[:, selected_features]
        results[agent_name] = {'features': feature_names, 'n_features': len(selected_features)}

        if verbose:
            print(f"{'Classifier':<20}{'Accuracy':<10}{'Precision':<10}{'Recall':<10}{'F1':<10}{'CV F1':<10}")
            print("-" * 80)

        for clf_name, clf in classifiers.items():
            try:
                f1_cv_scores = cross_val_score(clf, X_train_sub, y_train, cv=5, scoring='f1', n_jobs=-1)
                f1_cv = f1_cv_scores.mean()

                clf.fit(X_train_sub, y_train)
                y_pred = clf.predict(X_test_sub)

                acc = accuracy_score(y_test, y_pred)
                prec = precision_score(y_test, y_pred, zero_division=0)
                rec = recall_score(y_test, y_pred, zero_division=0)
                f1 = f1_score(y_test, y_pred, zero_division=0)

                results[agent_name][clf_name] = {'accuracy':acc, 'precision':prec, 'recall':rec, 'f1':f1, 'f1_cv':f1_cv}

                if verbose:
                    print(f"{clf_name:<20}{acc:<10.4f}{prec:<10.4f}{rec:<10.4f}{f1:<10.4f}{f1_cv:<10.4f}")

            except Exception as e:
                if verbose:
                    print(f"‚ùå Error with {clf_name}: {e}")
                results[agent_name][clf_name] = {'accuracy':0,'precision':0,'recall':0,'f1':0,'f1_cv':0}

    return results

print("üèÉ‚Äç‚ôÇÔ∏è Training and evaluation functions defined!")

üèÉ‚Äç‚ôÇÔ∏è Training and evaluation functions defined!
