<a href="https://colab.research.google.com/github/lalalalalala-hi/AI-and-Machine-Learning/blob/main/Prediction_using_MLP_and_PSO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install Necessary Library

*   pygad - Genetic Algorithm (GA) Library
*   pyswarm - Particle Swarm Optimization (PSO) Library

*   tensorflow - Deep Learning Library
*   scikit-learn - Machine Learning Library





In [None]:
!pip install pygad
!pip install pyswarm
!pip install tensorflow
!pip install scikit-learn

Collecting pygad
  Downloading pygad-3.3.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pygad
Successfully installed pygad-3.3.1
Collecting pyswarm
  Downloading pyswarm-0.6.tar.gz (4.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyswarm
  Building wheel for pyswarm (setup.py) ... [?25l[?25hdone
  Created wheel for pyswarm: filename=pyswarm-0.6-py3-none-any.whl size=4464 sha256=571256074098f30291b5d9bd1dbc1dd316b8450ae2274e20ec47f67c2ee0c59f
  Stored in directory: /root/.cache/pip/wheels/71/67/40/62fa158f497f942277cbab8199b05cb61c571ab324e67ad0d6
Successfully built pyswarm
Installing collected packages: pyswarm
Successfully installed pyswarm-0.6


# Import Library


*   pandas - data analysis
*   sklearn - machine learning tasks
*   pyswarm - Particle Swarm Optmization
*   numpy - numerical operations








In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from pyswarm import pso
from sklearn.neural_network import MLPClassifier
import numpy as np
from google.colab import drive

# Load Data

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

# Load the data into a DataFrame
file_path = '/content/drive/MyDrive/Testing Data/prediction/bank-additional.csv'
data = pd.read_csv(file_path, sep=';')

Mounted at /content/drive


# Data Preprocessing

In [None]:
# Fill missing values with forward fill method
data.fillna(method='ffill', inplace=True)

# Encode categorical variables using LabelEncoder
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Split data into features (X) and target variable (y)
X = data.drop(columns=['y'])
y = data['y']

**Standardization** is scaled features to have a mean of 0 and a standard deviation of 1.

In [None]:
# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Multi-Layer Perceptron(MLP) Model
**MLP** is a feedforward artificial neural networks (ANN). It consists of at least three layers of nodes: an input layer, one or more hidden layers, and an output layer.

In [None]:
# Create MLP model
def create_mlp_model():
    model = MLPClassifier(hidden_layer_sizes=(100), max_iter=300, solver='adam', random_state=42)
    return model

# Particle Swarm Optimization(PSO)
**PSO** optimizes a problem by iteratively improving a candidate solution with regard to a given measure of quality, inspired by the social behavior of birds flocking or fish schooling.

In [None]:
# Define the fitness function for PSO
def evaluate_features(selected_features):
    # Convert feature selection array to binary integers
    selected_features = [int(round(x)) for x in selected_features]
    # Get indices of selected features
    selected_features = [i for i, x in enumerate(selected_features) if x == 1]

    # If no features are selected, return 0 accuracy
    if len(selected_features) == 0:
        return 0

    # Select features for training and testing
    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]

    # Create and train the MLP model
    model = create_mlp_model()
    model.fit(X_train_selected, y_train)

    # Predict and evaluate the model
    predictions = model.predict(X_test_selected)
    accuracy = accuracy_score(y_test, predictions)

    # Return negative accuracy for minimization
    return -accuracy

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# Define the lower and upper bounds for PSO
lb = [0] * X_train.shape[1]
ub = [1] * X_train.shape[1]

# Run PSO to find the best features
best_features, _ = pso(evaluate_features, lb, ub, swarmsize=20, maxiter=10)

# Convert PSO results to selected feature indices
selected_features = [int(round(x)) for x in best_features]
selected_features = [i for i, x in enumerate(selected_features) if x == 1]

# Select the best features for training and testing
X_train_selected = X_train[:, selected_features]
X_test_selected = X_test[:, selected_features]



Stopping search: maximum iterations reached --> 10




# Train MLP

In [None]:
# Create and train the MLP model with selected features
model = create_mlp_model()
model.fit(X_train_selected, y_train)



# Prediction

In [None]:
# Predict and evaluate the model on the test set
predictions = model.predict(X_test_selected)
print("Accuracy:", accuracy_score(y_test, predictions))
print(classification_report(y_test, predictions))

Accuracy: 0.9150485436893204
              precision    recall  f1-score   support

           0       0.94      0.97      0.95       732
           1       0.67      0.48      0.56        92

    accuracy                           0.92       824
   macro avg       0.80      0.72      0.75       824
weighted avg       0.91      0.92      0.91       824



# Hyperparameter Tuning
**Hyperparamater Tuning** is the process of selecting optimal set of hyperparameters for a model to improve its performance.

**GridSearchCV** works by literating over all possible combinations of the provided hyperparameter values and help to find the most effective configuration for the model.

In [None]:
from sklearn.model_selection import GridSearchCV

# Define the model
mlp = MLPClassifier(max_iter=100)

# Define the hyperparameter search space
parameter_space = {
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

# Initialize GridSearchCV
grid_search = GridSearchCV(mlp, parameter_space, n_jobs=-1, cv=3)

# Fit the model
grid_search.fit(X_train, y_train)

# Print the best parameters
print(f'Best parameters found: {grid_search.best_params_}')

# Evaluate the model
y_pred = grid_search.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(classification_report(y_test, y_pred))


Best parameters found: {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 50, 50), 'learning_rate': 'adaptive', 'solver': 'sgd'}
Accuracy: 0.904126213592233
              precision    recall  f1-score   support

           0       0.93      0.96      0.95       732
           1       0.59      0.46      0.52        92

    accuracy                           0.90       824
   macro avg       0.76      0.71      0.73       824
weighted avg       0.90      0.90      0.90       824





In [None]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,), (50,50,50), (100,100)],
    'solver': ['adam', 'sgd'],
    'max_iter': [200, 300, 400, 500],
    'activation': ['relu', 'tanh', 'logistic'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive']
}

# Perform Grid Search to find the optimal hyperparameters
grid_search = GridSearchCV(MLPClassifier(random_state=42), param_grid, cv=3)
grid_search.fit(X_train_selected, y_train)
print("Best Parameters:", grid_search.best_params_)

# Train the MLP with the best hyperparameters
best_model = grid_search.best_estimator_
best_model.fit(X_train_selected, y_train)

# Evaluate the model with the best hyperparameters
predictions = best_model.predict(X_test_selected)
print("Accuracy after tuning:", accuracy_score(y_test, predictions))
print(classification_report(y_test, predictions))

