In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

def objective_function(model, X, y):
    pred = (np.dot(X, model[:-1]) + model[-1] >= 0).astype(int)
    mse = np.mean(np.subtract(y, pred) ** 2)
    return mse

def bat_alg(X, y):
    #set params
    dim = X.shape[1]+1
    num_bats = 70
    num_gens = 100
    Lbound = -5
    Ubound = 5
    Qmin = 0
    Qmax = (Ubound-Lbound)/num_bats
    pulse_rate = 0.1
    loudness=0.9
    
    positions = np.random.uniform(Lbound, Ubound, (num_bats, dim))
    velocities = np.zeros((num_bats, dim))
    fitness = np.apply_along_axis(objective_function, 1, positions, X, y)
    
    gbest_position = positions[np.argmin(fitness)]
    gbest_fitness = np.min(fitness)

    for iteration in range(num_gens):
        for i in range(num_bats):
            freq = Qmin + (Qmax - Qmin) * np.random.rand()
            velocities[i] += (positions[i] - gbest_position) * freq
            new_position = positions[i] + velocities[i]
            new_position = np.clip(new_position, Lbound, Ubound)

            if np.random.rand() > pulse_rate:
                new_position = gbest_position + 0.001 * np.random.randn(dim)
                new_position = np.clip(new_position, Lbound, Ubound)

            new_fitness = objective_function(new_position, X, y)

            if new_fitness < fitness[i] and np.random.rand() < loudness:
                positions[i] = new_position
                fitness[i] = new_fitness
                if new_fitness < gbest_fitness:
                    gbest_position = new_position.copy()
                    gbest_fitness = new_fitness

    

    return gbest_position

def predict(model, X):
    pred = (np.dot(X, model[:-1]) + model[-1] >= 0).astype(int)
    return pred

def run(file_name):
    df = pd.read_csv(file_name)
    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    num_dims = X_train.shape[1] + 1
    num_bats = 70
    num_gens = 200
    alpha = 1
    gamma = 1
    Lbound = -5
    Ubound = 5
    

   
    for i in range(20):
        best_solution = bat_alg(X_train, y_train)

        # model performance on the test set
        y_pred = predict(best_solution, X_test)
        accuracy=  accuracy_score(y_test, y_pred)
        mse = np.mean(np.subtract(y_test, y_pred) ** 2)

        #print(f'Best Solution: {best_solution}')
        #print(f'Best Fitness: {best_fitness}')
        print(f'Accuracy: {accuracy * 100:.2f}%')
        #print(f'Mean Squared Error: {mse:.4f}')
    #print(f'Classification Report:\n{classification_report(y_test, y_pred)}')

    # fitness value over iterations
    #plt.figure(figsize=(10, 6))
    #plt.plot(fitness_over_time, label='Fitness over time')
    #plt.xlabel('Iteration')
    #plt.ylabel('Fitness')
    #plt.title('Fitness Value Convergence')
    #plt.legend()
    #plt.show()

if __name__ == "__main__":
    run('Behavior.csv')


Accuracy: 99.10%
Accuracy: 93.69%
Accuracy: 95.50%
Accuracy: 97.30%
Accuracy: 98.20%
Accuracy: 95.50%
Accuracy: 95.50%
Accuracy: 94.59%
Accuracy: 95.50%
Accuracy: 95.50%
Accuracy: 99.10%
Accuracy: 96.40%
Accuracy: 98.20%
Accuracy: 95.50%
Accuracy: 97.30%
Accuracy: 97.30%
Accuracy: 98.20%
Accuracy: 97.30%
Accuracy: 96.40%
Accuracy: 96.40%


import numpy as np
import pandas as pd
from pyspark.sql import SparkSession
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt
import matplotlib.cm as cm

#define objective function
def objective_function(firefly, X,y):
    pred = (np.dot(X,firefly[:-1])+firefly[-1]>=0).astype(int)
    mse = np.mean(np.subtract(y,pred)**2)
    return mse

#define firefly algorithm
def bat(X,y):
    #define params
    dim = X.shape[1]+1
    lb = -5
    ub = 5
    
    #initialize population
    bats = np.random.uniform(lb,ub,(n_bats, dim)
    


#classifies input
def predict(model, X):
    pred = (np.dot(X,model[:-1])+model[-1]>=0).astype(int)
    return pred


def run(file_name):
    spark = SparkSession.builder \
            .appName("Firefly Algorithm with Spark") \
            .getOrCreate()
    sc = spark.sparkContext

    #read data
    df = spark.read.csv(file_name, header=True, inferSchema=True)
    X = np.array(df.select(df.columns[:-1]).collect())
    y = np.array(df.select(df.columns[-1]).collect()).flatten()
    
    
    #transform y values to ints
    y = LabelEncoder().fit_transform(y)
    
    #scale X values
    X = StandardScaler().fit_transform(X)
   
    model = bat(X,y)
    
    y_pred = predict(model,X)
    accuracy2 = accuracy_score(y, y_pred)
    mse = np.mean(np.subtract(y,y_pred)**2)
    print(f'Accuracy: {accuracy2 * 100:.2f}%')
        
    
if __name__ == "__main__":
    run("Behavior.csv")