In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import numpy as np
import random

# Load dataset
data = pd.read_csv(r"D:\Sutding\4_th Comuter\2-Network Security\Phishing_Email.csv")
data = data.rename(columns={data.columns[0]: 'Index', 'Email Text': 'Text', 'Email Type': 'Label'})

data = data[['Text', 'Label']]

data = data.dropna(subset=['Text'])

data['Label'] = LabelEncoder().fit_transform(data['Label'])

X_train, X_test, y_train, y_test = train_test_split(
    data['Text'], data['Label'], test_size=0.3, random_state=42
)

# Vectorize email text using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train).toarray()
X_test_vec = vectorizer.transform(X_test).toarray()

# PSO Parameters
num_particles = 30
num_iterations = 50
inertia_weight = 0.7
cognitive_coeff = 1.5
social_coeff = 1.5

# Fitness function: Logistic Regression
def fitness_function(weights, X, y):
    from sklearn.linear_model import LogisticRegression
    model = LogisticRegression()
    model.fit(X * weights, y)
    predictions = model.predict(X * weights)
    return accuracy_score(y, predictions)

# Initialize particles
num_features = X_train_vec.shape[1]
particles = [
    {
        "position": np.random.uniform(0, 1, num_features),
        "velocity": np.random.uniform(-0.1, 0.1, num_features),
        "best_position": None,
        "best_score": -np.inf
    }
    for _ in range(num_particles)
]

global_best_position = None
global_best_score = -np.inf

# PSO Algorithm
for iteration in range(num_iterations):
    for particle in particles:
        # Evaluate fitness
        fitness = fitness_function(particle['position'], X_train_vec, y_train)

        # Update personal best
        if fitness > particle['best_score']:
            particle['best_score'] = fitness
            particle['best_position'] = particle['position'].copy()

        # Update global best
        if fitness > global_best_score:
            global_best_score = fitness
            global_best_position = particle['position'].copy()

    # Update particles
    for particle in particles:
        inertia = inertia_weight * particle['velocity']
        cognitive = cognitive_coeff * random.random() * (particle['best_position'] - particle['position'])
        social = social_coeff * random.random() * (global_best_position - particle['position'])

        particle['velocity'] = inertia + cognitive + social
        particle['position'] += particle['velocity']

# Evaluate on test set
final_weights = global_best_position
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train_vec * final_weights, y_train)
predictions = model.predict(X_test_vec * final_weights)
accuracy = accuracy_score(y_test, predictions)

print(f"Test Accuracy with PSO-optimized weights: {accuracy:.4f}")


Test Accuracy with PSO-optimized weights: 0.9261
