Step-by-Step Guide

Step 1: Prepare Your Code Structure

1)Open Google Colab: Go to Google Colab and create a new notebook.

2)Create Directory Structure: In your Colab notebook, you can simulate the directory structure using Python commands. This structure includes:


In [None]:
# Create directory structure
!mkdir -p multi_agent_marl
!touch multi_agent_marl/__init__.py
!touch multi_agent_marl/hyperparameter_agent.py
!touch multi_agent_marl/multi_agent_hyperparameter_optimization.py
!touch setup.py

3)Write hyperparameter_agent.py: Define the HyperparameterAgent class in hyperparameter_agent.py:

In [None]:
import numpy as np

class HyperparameterAgent:
    def __init__(self, param_grid, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):
        self.param_grid = param_grid
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.q_values = {}
        self.best_params = None
        self.best_score = -np.inf

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice(len(self.param_grid))
        else:
            if state in self.q_values:
                return np.argmax(self.q_values[state])
            else:
                return np.random.choice(len(self.param_grid))

    def update_q_value(self, state, action, reward, next_state):
        if state not in self.q_values:
            self.q_values[state] = np.zeros(len(self.param_grid))
        if next_state not in self.q_values:
            self.q_values[next_state] = np.zeros(len(self.param_grid))
        self.q_values[state][action] += self.learning_rate * (
            reward + self.discount_factor * np.max(self.q_values[next_state]) - self.q_values[state][action])
        if reward > self.best_score:
            self.best_score = reward
            self.best_params = self.param_grid[action]


4) Write multi_agent_hyperparameter_optimization.py: Define the MultiAgentHyperparameterOptimization class in multi_agent_hyperparameter_optimization.py:

In [None]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.base import clone
from sklearn.metrics import accuracy_score
from .hyperparameter_agent import HyperparameterAgent

class MultiAgentHyperparameterOptimization:
    def __init__(self, model, param_grid, num_agents, epsilon_values=None):
        self.model = model
        self.param_grid = param_grid
        self.num_agents = num_agents
        self.agents = [HyperparameterAgent(param_grid, epsilon=epsilon_values[i] if epsilon_values else 0.1) for i in range(num_agents)]

    def train_agents(self, X, y, kf):
        for fold, (train_index, test_index) in enumerate(kf.split(X)):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            for agent_id, agent in enumerate(self.agents):
                state = fold  # Example state, can be more sophisticated
                action = agent.choose_action(state)
                params = agent.param_grid[action]

                # Clone the model to reset it for each training session
                model = clone(self.model)
                model.set_params(**params)

                # Model training
                model.fit(X_train, y_train)

                # Model evaluation
                y_test_pred = model.predict(X_test)
                test_accuracy = accuracy_score(y_test, y_test_pred)

                # Use test accuracy as reward
                reward = test_accuracy

                # Update agent's Q-values
                next_state = fold + 1  # Example next state
                agent.update_q_value(state, action, reward, next_state)

                # Print accuracy for each agent
                print(f"Agent {agent_id + 1} - Fold {fold + 1} - Params: {params} - Test Accuracy: {test_accuracy:.4f}")

        # Print best hyperparameters for each agent
        for agent_id, agent in enumerate(self.agents):
            print(f"Agent {agent_id + 1} - Best Params: {agent.best_params} - Best Score: {agent.best_score:.4f}")


5) Write setup.py: Create setup.py for packaging your module:

In [None]:
# setup.py

from setuptools import setup, find_packages

setup(
    name='multi_agent_marl',
    version='0.1',
    packages=find_packages(),
    install_requires=[
        'numpy',
        'pandas',
        'scikit-learn',
        'imbalanced-learn'
    ],
    entry_points={
        'console_scripts': [
            'multi_agent_marl=multi_agent_marl.multi_agent_lr_marl:main'
        ]
    }
)


Step 2: Install Required Packages

In your Colab notebook, install the required packages:

In [None]:
!pip install numpy pandas scikit-learn imbalanced-learn


Step 3: Install Module

Run the following command in your Colab notebook to install your custom module:

In [None]:
 !pip install .


Step 4: Use Module
Now, you can use multi_agent_marl module in your Colab notebook as following example:

In [None]:
from multi_agent_marl.multi_agent_hyperparameter_optimization import MultiAgentHyperparameterOptimization
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
import pandas as pd
import numpy as np

if __name__ == "__main__":
    # List of file paths
    file_paths = [
        '/content/Syn.csv', '/content/Syn_2019_1-12.csv'
    ]

    # Initialize an empty list to store dataframes
    dataframes = []

    # Read each CSV file and append to the list
    for file_path in file_paths:
        df = pd.read_csv(file_path)
        dataframes.append(df)

    # Concatenate all dataframes into one
    combined_data = pd.concat(dataframes, ignore_index=True)

    # Transform 'Label' column: keep 'Syn' as 1, 'BENIGN' as 0, and drop the rest
    combined_data = combined_data[combined_data[' Label'].isin(['Syn', 'BENIGN'])]
    combined_data[' Label'] = combined_data[' Label'].map(lambda x: 1 if x == 'Syn' else 0)

    # Check for duplicates and drop them
    combined_data = combined_data.drop_duplicates()

    # Split data into features and target variable
    X = combined_data.drop(' Label', axis=1)
    y = combined_data[' Label']

    # Convert non-numeric columns to numeric
    X_numeric = X.apply(pd.to_numeric, errors='coerce')

    # Replace infinite values with NaNs
    X_numeric.replace([np.inf, -np.inf], np.nan, inplace=True)

    # Handle missing values by imputing with the mean
    imputer = SimpleImputer(strategy='mean')
    X_imputed = imputer.fit_transform(X_numeric)

    # Feature Scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_imputed)

    # Transform label column
    y_transformed = y.astype(int)

    # Use SMOTE to handle class imbalance
    smote = SMOTE(random_state=42, k_neighbors=1)  # Adjust k_neighbors parameter
    X_resampled, y_resampled = smote.fit_resample(X_scaled, y_transformed)

    # K-Fold Cross-Validation with 5 folds
    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    # Parameter grid for RandomForestClassifier
    param_grid = [{'n_estimators': n} for n in [10, 50, 100]]

    # Initialize Multi-Agent Hyperparameter Optimization
    num_agents = 3
    epsilon_values = [0.1, 0.2, 0.3]
    optimizer = MultiAgentHyperparameterOptimization(RandomForestClassifier(random_state=42), param_grid, num_agents, epsilon_values)

    # Train agents to find the best hyperparameters
    optimizer.train_agents(X_resampled, y_resampled, kf)
