In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold

def logistic_map(x, r=4):
    """Logistic map function for generating chaotic sequences."""
    return r * x * (1 - x)

# Load and preprocess the CWRU bearing dataset
def load_and_preprocess_data(file_path):
    # Load the dataset
    data = pd.read_csv(file_path)

    # Assuming the last column is the target variable
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values

    # Convert labels to numeric if necessary
    if y.dtype == 'object' or isinstance(y[0], str):
        le = LabelEncoder()
        y = le.fit_transform(y)

    # Split into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

def objective_function(params, X_train, y_train, X_test, y_test, is_class_report_needed=False):
    """Objective function for the SVM model."""
    C, gamma = params
    model = SVC(C=C, gamma=gamma)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)

    if is_class_report_needed:
        class_report = classification_report(y_test, predictions, labels=np.unique(predictions))
        return -accuracy, class_report  # Negative because we want to maximize accuracy
    else:
        return -accuracy

def chaos_pso(file_path, num_particles=30, num_dimensions=2, num_iterations=20):
    """Chaos Particle Swarm Optimization (CPSO) algorithm."""
    X_train, X_test, y_train, y_test = load_and_preprocess_data(file_path)

    # Initialize particle swarm optimization parameters
    w = 0.7  # Inertia weight
    c1 = 1.5  # Cognitive (particle) weight
    c2 = 1.5  # Social (swarm) weight

    # Initialize the positions and velocities of the particles
    positions = np.random.uniform([0.1, 0.0001], [10, 1], (num_particles, num_dimensions))
    velocities = np.random.uniform(-1, 1, (num_particles, num_dimensions))
    personal_best_positions = np.copy(positions)
    #print("Computing best scores")
    personal_best_scores = np.apply_along_axis(lambda pos: objective_function(pos, X_train, y_train, X_test, y_test), 1, positions)

    #print("personal_best_scores", personal_best_scores)
    # Find the global best position
    global_best_position = personal_best_positions[np.argmin(personal_best_scores)]
    global_best_score = np.min(personal_best_scores)

    global_class_report = None
    #print("global_best_score", global_best_score)
    # Initialize the chaotic sequence
    chaotic_sequence = np.random.rand(num_particles)

    # Start the optimization process
    for t in range(num_iterations):
        for i in range(num_particles):
            # Update chaotic sequence
            chaotic_sequence[i] = logistic_map(chaotic_sequence[i])

            # Update velocities
            r1 = chaotic_sequence[i]
            r2 = 1 - chaotic_sequence[i]  # Use the complement for the second random number
            velocities[i] = (w * velocities[i] +
                             c1 * r1 * (personal_best_positions[i] - positions[i]) +
                             c2 * r2 * (global_best_position - positions[i]))

            # Update positions
            positions[i] = positions[i] + velocities[i]

            # Boundary constraints
            positions[i] = np.clip(positions[i], [0.1, 0.0001], [10, 1])

            # Evaluate new position
            score, class_report = objective_function(positions[i], X_train, y_train, X_test, y_test, True)

            # Update personal best
            if score < personal_best_scores[i]:
                personal_best_positions[i] = positions[i]
                personal_best_scores[i] = score

                # Update global best
                if score < global_best_score or global_class_report is None:
                    global_best_position = positions[i]
                    global_best_score = score
                    global_class_report = class_report

        print(f"Iteration {t+1}/{num_iterations}, Best Score: {-global_best_score}")
    accuracy = abs(global_best_score)

    print(f'Accuracy: {accuracy * 100:.2f}%')
    print(f"Objective function value: {-global_best_score}")
    print("Classification report:")
    print(global_class_report)

def normalize_data(X_train, X_test):
    """Normalize the input data."""
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

def apply_pca(X_train, X_test, n_components=0.999):
    """Apply PCA for dimensionality reduction."""
    pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)
    return X_train_pca, X_test_pca

def initialize_population(pop_size, num_features):
    """Initialize the population for the genetic algorithm."""
    return np.random.randint(2, size=(pop_size, num_features))

def fitness(individual, X, y):
    """Evaluate the fitness of an individual."""
    selected_features = np.where(individual == 1)[0]
    if len(selected_features) == 0:
        return 0
    X_selected = X[:, selected_features]
    X_train, X_val, y_train, y_val = train_test_split(X_selected, y, test_size=0.3, random_state=42)
    gnb = GaussianNB()
    gnb.fit(X_train, y_train)
    y_pred = gnb.predict(X_val)
    return accuracy_score(y_val, y_pred)

def select_parents(population, fitnesses, num_parents):
    """Select parents for the next generation."""
    parents = np.empty((num_parents, population.shape[1]), dtype=int)
    for parent_num in range(num_parents):
        max_fitness_idx = np.where(fitnesses == np.max(fitnesses))
        max_fitness_idx = max_fitness_idx[0][0]
        parents[parent_num, :] = population[max_fitness_idx, :]
        fitnesses[max_fitness_idx] = -999999
    return parents

def crossover(parents, offspring_size):
    """Perform crossover between parents to generate offspring."""
    offspring = np.empty(offspring_size, dtype=int)
    crossover_point = np.uint8(offspring_size[1]/2)
    for k in range(offspring_size[0]):
        parent1_idx = k % parents.shape[0]
        parent2_idx = (k+1) % parents.shape[0]
        offspring[k, 0:crossover_point] = parents[parent1_idx, 0:crossover_point]
        offspring[k, crossover_point:] = parents[parent2_idx, crossover_point:]
    return offspring

def mutation(offspring_crossover, mutation_rate):
    """Mutate the offspring by flipping bits."""
    for idx in range(offspring_crossover.shape[0]):
        for _ in range(mutation_rate):
            gene_idx = np.random.randint(0, offspring_crossover.shape[1])
            offspring_crossover[idx, gene_idx] = 1 - offspring_crossover[idx, gene_idx]
    return offspring_crossover

def genetic_algorithm(X_train_scaled, y_train, num_generations=20, pop_size=20, num_parents_mating=10, mutation_rate=1):
    """Run the genetic algorithm for feature selection."""
    num_features = X_train_scaled.shape[1]
    population = initialize_population(pop_size, num_features)

    for generation in range(num_generations):
        fitnesses = np.array([fitness(ind, X_train_scaled, y_train) for ind in population])
        parents = select_parents(population, fitnesses, num_parents_mating)
        offspring_crossover = crossover(parents, (pop_size - num_parents_mating, num_features))
        offspring_mutation = mutation(offspring_crossover, mutation_rate)
        population[0:num_parents_mating, :] = parents
        population[num_parents_mating:, :] = offspring_mutation
        print(f'Generation {generation}, Best Fitness: {np.max(fitnesses)}')

    best_solution_idx = np.argmax(fitnesses)
    best_solution = population[best_solution_idx, :]
    selected_features = np.where(best_solution == 1)[0]

    return selected_features

def gaussian_naive_bayes(X_train, X_test, y_train, y_test, selected_features):
    """Train and evaluate the Gaussian Naive Bayes model."""
    X_train_ga = X_train[:, selected_features]
    X_test_ga = X_test[:, selected_features]

    gnb = GaussianNB()
    gnb.fit(X_train_ga, y_train)
    y_pred = gnb.predict(X_test_ga)

    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy: {accuracy * 100:.2f}%')
    print('Classification Report:')
    print(classification_report(y_test, y_pred))

def main():
    file_path = "/content/feature_time_48k_2048_load_1.csv"

    # Load and preprocess data
    X_train, X_test, y_train, y_test = load_and_preprocess_data(file_path)
    print("Chaos PSO SVM Based Fault Detection Metrics")
    chaos_pso(file_path)

    print("Genetic Algorithm - Gaussian Naive Bayes Based Fault Detection Metrics")
    selected_features = genetic_algorithm(X_train, y_train)
    gaussian_naive_bayes(X_train, X_test, y_train, y_test, selected_features)

if __name__ == "__main__":
    main()


Chaos PSO SVM Based Fault Detection Metrics
Iteration 1/20, Best Score: 0.9652173913043478
Iteration 2/20, Best Score: 0.9652173913043478
Iteration 3/20, Best Score: 0.9652173913043478
Iteration 4/20, Best Score: 0.967391304347826
Iteration 5/20, Best Score: 0.967391304347826
Iteration 6/20, Best Score: 0.967391304347826
Iteration 7/20, Best Score: 0.967391304347826
Iteration 8/20, Best Score: 0.967391304347826
Iteration 9/20, Best Score: 0.967391304347826
Iteration 10/20, Best Score: 0.967391304347826
Iteration 11/20, Best Score: 0.967391304347826
Iteration 12/20, Best Score: 0.967391304347826
Iteration 13/20, Best Score: 0.967391304347826
Iteration 14/20, Best Score: 0.967391304347826
Iteration 15/20, Best Score: 0.967391304347826
Iteration 16/20, Best Score: 0.967391304347826
Iteration 17/20, Best Score: 0.967391304347826
Iteration 18/20, Best Score: 0.967391304347826
Iteration 19/20, Best Score: 0.967391304347826
Iteration 20/20, Best Score: 0.967391304347826
Accuracy: 96.74%
Objec