### Uploading the dataset:

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
df1 = pd.read_csv('/content/drive/MyDrive/Depression detection /Datasets /Twitter dataset/clean_d_tweets.csv')
df2 = pd.read_csv('/content/drive/MyDrive/Depression detection /Datasets /Twitter dataset/clean_non_d_tweets.csv')

In [3]:
df1['Labels']  = 1;
df2['Labels'] = 0;

In [4]:
df1 = df1[['tweet', 'Labels']]
df2 = df2[['tweet', 'Labels']]

In [5]:
merge = pd.concat([df1,df2])

In [6]:
merge = merge.sample(frac=1).reset_index(drop=True)

In [7]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

nltk.download('stopwords')
nltk.download('punkt')
def remove_stop_words(text):
  stop_words = set(stopwords.words('english'))
  tokens = word_tokenize(text)
  filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
  return ' '.join(filtered_tokens)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [8]:
merge['tweet'] = merge['tweet'].astype(str)
merge['tweet'] = merge['tweet'].apply(remove_stop_words)

### here we are implementing Bert, tfidf, word2vec with different ML algorithms like : logistic regression, random forest, Support vector machine, Naives Bayes and RNN


### 1.1 Bert + ML algorithm

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(merge['tweet'], merge['Labels'], test_size=0.2, random_state=42)


In [None]:
from transformers import BertTokenizer, BertModel
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


### Bert Implementation


In [None]:
import numpy as np
def encode_texts_in_batches(texts, batch_size=32):
    embeddings = []
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i+batch_size]
        encoded_inputs = tokenizer(batch_texts, padding=True, truncation=True, return_tensors='pt', max_length=512)
        with torch.no_grad():
            outputs = model(**encoded_inputs)
        batch_embeddings = outputs.last_hidden_state[:, 0, :].numpy()
        embeddings.extend(batch_embeddings)
    return np.array(embeddings)

X_train_embeddings = encode_texts_in_batches(X_train.tolist(), batch_size=16)
X_test_embeddings = encode_texts_in_batches(X_test.tolist(), batch_size=16)


### Bert + Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_embeddings, y_train)

y_pred = lr_model.predict(X_test_embeddings)
print('Logistic Regression Accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Logistic Regression Accuracy: 0.8468468468468469
              precision    recall  f1-score   support

           0       0.87      0.87      0.87       936
           1       0.81      0.81      0.81       618

    accuracy                           0.85      1554
   macro avg       0.84      0.84      0.84      1554
weighted avg       0.85      0.85      0.85      1554



### Bert + Random Forest


In [None]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_train_embeddings, y_train)
y_pred = rf_model.predict(X_test_embeddings)

print('Random Forest Accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Random Forest Accuracy: 0.8326898326898327
              precision    recall  f1-score   support

           0       0.84      0.89      0.87       936
           1       0.82      0.74      0.78       618

    accuracy                           0.83      1554
   macro avg       0.83      0.82      0.82      1554
weighted avg       0.83      0.83      0.83      1554



### Bert + Gaussian naives bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
nb_classifier = GaussianNB()
nb_classifier.fit(X_train_embeddings, y_train)

y_pred = nb_classifier.predict(X_test_embeddings)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

print(classification_report(y_test, y_pred))


Accuracy: 0.7477
              precision    recall  f1-score   support

           0       0.82      0.74      0.78       936
           1       0.66      0.76      0.71       618

    accuracy                           0.75      1554
   macro avg       0.74      0.75      0.74      1554
weighted avg       0.76      0.75      0.75      1554



### Bert + support vector

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

svm_classifier = SVC(kernel='linear')

svm_classifier.fit(X_train_embeddings, y_train)

y_pred = svm_classifier.predict(X_test_embeddings)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')

print(classification_report(y_test, y_pred))


Accuracy: 0.8449
              precision    recall  f1-score   support

           0       0.88      0.86      0.87       936
           1       0.79      0.83      0.81       618

    accuracy                           0.84      1554
   macro avg       0.84      0.84      0.84      1554
weighted avg       0.85      0.84      0.85      1554



### Bert + RNN

In [None]:
from transformers import BertTokenizer, BertModel
import torch
import numpy as np

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_bert_embeddings(texts, max_length=32):
    encoded_inputs = tokenizer(texts, padding=True, truncation=True, return_tensors='pt', max_length=max_length)
    with torch.no_grad():
        outputs = model(**encoded_inputs)
    embeddings = outputs.last_hidden_state
    return embeddings

texts = merge['tweet'].tolist()

embeddings = get_bert_embeddings(texts)


In [None]:

import torch.nn as nn

class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNNClassifier, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.rnn.num_layers, x.size(0), self.rnn.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

input_size = 768
hidden_size = 128
num_layers = 2
num_classes = 2

rnn_model = RNNClassifier(input_size, hidden_size, num_layers, num_classes)

from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

labels =  merge['Labels']

embeddings = embeddings.squeeze(0)
labels = torch.tensor(labels)

X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, random_state=42)

train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_data, batch_size=2, shuffle=True)
test_loader = DataLoader(test_data, batch_size=2, shuffle=False)



In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(rnn_model.parameters(), lr=0.001)

num_epochs = 5
rnn_model.train()

for epoch in range(num_epochs):
    for i, (texts, labels) in enumerate(train_loader):

        outputs = rnn_model(texts)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/5], Loss: 0.0516
Epoch [2/5], Loss: 0.0437
Epoch [3/5], Loss: 0.0075
Epoch [4/5], Loss: 0.0417
Epoch [5/5], Loss: 0.1289


In [None]:
from sklearn.metrics import accuracy_score

rnn_model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for texts, labels in test_loader:
        outputs = rnn_model(texts)
        _, predicted = torch.max(outputs.data, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

accuracy = accuracy_score(all_labels, all_preds)
print(f'Test Accuracy: {accuracy:.4f}')


Test Accuracy: 0.8378


## TF-IDF and ML algorithm

In [None]:

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

tfidf = TfidfVectorizer(stop_words = 'english')
x = tfidf.fit_transform(merge['tweet'])



#1.1 TF-IDF + Logistic Regression

x_train, x_test, y_train, y_test = train_test_split(x, merge['Labels'], test_size = 0.2, random_state = 42)

model = LogisticRegression()
model.fit(x_train,y_train)

y_pred = model.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(f"classification report:\n{report}")

Accuracy: 0.8365508365508365
classification report:
              precision    recall  f1-score   support

           0       0.81      0.95      0.87       930
           1       0.89      0.67      0.77       624

    accuracy                           0.84      1554
   macro avg       0.85      0.81      0.82      1554
weighted avg       0.84      0.84      0.83      1554



### Tf-idf + Random forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

model_rf = RandomForestClassifier(n_estimators = 100, random_state=42)
model_rf.fit(x_train,y_train)

y_pred = model_rf.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)


print(f"Accuracy: {accuracy}")
print(f"Classification Report:\n{report}")

Accuracy: 0.8223938223938224
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.81      0.84       918
           1       0.75      0.84      0.79       636

    accuracy                           0.82      1554
   macro avg       0.82      0.83      0.82      1554
weighted avg       0.83      0.82      0.82      1554



### Tf-idf + Support vector machine

In [None]:
from sklearn.svm import SVC

model_sv = SVC(kernel='linear', random_state=42)
model_sv.fit(x_train, y_train)

y_pred = model_sv.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Classification Report:\n{report}")

Accuracy: 0.8449163449163449
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.90      0.87       918
           1       0.84      0.76      0.80       636

    accuracy                           0.84      1554
   macro avg       0.84      0.83      0.84      1554
weighted avg       0.84      0.84      0.84      1554



### Tf-idf + Multinomial naives bayes


In [None]:
from sklearn.naive_bayes import MultinomialNB
model_nb = MultinomialNB()
model_nb.fit(x_train, y_train)

y_pred = model_nb.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Classification Report:\n{report}")


Accuracy: 0.8584298584298584
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.92      0.89       935
           1       0.86      0.77      0.81       619

    accuracy                           0.86      1554
   macro avg       0.86      0.84      0.85      1554
weighted avg       0.86      0.86      0.86      1554



### Tf-idf + RNN

In [None]:
import numpy as np
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout
from tensorflow.keras.optimizers import Adam

label_encoder = LabelEncoder()

merge['Labels'] = label_encoder.fit_transform(merge['Labels'])

X = tfidf.fit_transform(merge['tweet']).toarray()
y = merge['Labels'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)

model = Sequential()
model.add(SimpleRNN(50, input_shape=(X_train.shape[1], 1), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)


y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype("int32")

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Classification Report:\n{report}")



  super().__init__(**kwargs)


Epoch 1/10
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 1s/step - accuracy: 0.6027 - loss: 0.6780 - val_accuracy: 0.6299 - val_loss: 0.6650
Epoch 2/10
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m275s[0m 2s/step - accuracy: 0.5935 - loss: 0.6769 - val_accuracy: 0.6299 - val_loss: 0.6649
Epoch 3/10
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 2s/step - accuracy: 0.6050 - loss: 0.6732 - val_accuracy: 0.6299 - val_loss: 0.6634
Epoch 4/10
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 2s/step - accuracy: 0.5950 - loss: 0.6765 - val_accuracy: 0.6299 - val_loss: 0.6633
Epoch 5/10
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 2s/step - accuracy: 0.5884 - loss: 0.6794 - val_accuracy: 0.6299 - val_loss: 0.6611
Epoch 6/10
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 2s/step - accuracy: 0.6061 - loss: 0.6710 - val_accuracy: 0.6299 - val_loss: 0.6622
Epoch 7/10
[1m156/156

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## WORD2VEC + ML algorithms




In [9]:

from gensim.models import Word2Vec
import numpy as np


tokenized_text = merge['tweet'].apply(word_tokenize)

model = Word2Vec(sentences=tokenized_text, vector_size=100, window=5, min_count=1, workers=4)

def average_word_vectors(words, model, vocabulary, num_features):
    feature_vector = np.zeros((num_features,), dtype="float64")
    nwords = 0.

    for word in words:
        if word in vocabulary:
            nwords = nwords + 1.
            feature_vector = np.add(feature_vector, model.wv[word])

    if nwords:
        feature_vector = np.divide(feature_vector, nwords)

    return feature_vector

# Function to generate averaged word vector features for the training and test data
def averaged_word_vectorizer(corpus, model, num_features):
    vocabulary = set(model.wv.index_to_key)
    features = [average_word_vectors(tokenized_sentence, model, vocabulary, num_features)
                    for tokenized_sentence in corpus]
    return np.array(features)

# Get averaged word vectors for the text data
wordvec_arrays = averaged_word_vectorizer(tokenized_text, model=model, num_features=100)
X = pd.DataFrame(wordvec_arrays)
y = np.array(merge['Labels'])  # Replace with your actual labels


### word2vec + Logistic regression

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

logreg = LogisticRegression(max_iter=1000)

logreg.fit(X_train, y_train)

y_pred = logreg.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test,y_pred)
print(f'Accuracy: {accuracy:.2f}')
print(f'Classification report:\n{report}')

Accuracy: 0.71
Classification report:
              precision    recall  f1-score   support

           0       0.71      0.89      0.79       948
           1       0.71      0.42      0.53       606

    accuracy                           0.71      1554
   macro avg       0.71      0.65      0.66      1554
weighted avg       0.71      0.71      0.69      1554



### Word2vec + random forest




In [None]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Random Forest Accuracy: {accuracy}")
print(f"Random Forest Classification Report:\n{report}")


Random Forest Accuracy: 0.7638352638352638
Random Forest Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.86      0.81       935
           1       0.74      0.62      0.68       619

    accuracy                           0.76      1554
   macro avg       0.76      0.74      0.75      1554
weighted avg       0.76      0.76      0.76      1554



### WORD2Vec + Support vector machine

In [None]:
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)


print(f"SVM classification accuracy: {accuracy}")
print(f"SVM classification report:\n{report}")


SVM classification accuracy: 0.7123552123552124
SVM classification report:
              precision    recall  f1-score   support

           0       0.69      0.94      0.80       935
           1       0.80      0.37      0.51       619

    accuracy                           0.71      1554
   macro avg       0.74      0.66      0.65      1554
weighted avg       0.73      0.71      0.68      1554



### Word2vec + Gaussian Naive bayes

In [13]:
from sklearn.naive_bayes import GaussianNB

gnb_model = GaussianNB()
gnb_model.fit(X_train, y_train)

y_pred = gnb_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Gaussian Naive Bayes Accuracy: {accuracy}")
print(f"Gaussian Naive Bayes Classification Report:\n{report}")

Gaussian Naive Bayes Accuracy: 0.6512226512226512
Gaussian Naive Bayes Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.74      0.72       948
           1       0.56      0.52      0.54       606

    accuracy                           0.65      1554
   macro avg       0.63      0.63      0.63      1554
weighted avg       0.65      0.65      0.65      1554



### Word2Vedc + RNN

In [14]:
import tensorflow as tf

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
merge['Labels'] = label_encoder.fit_transform(merge['Labels'])

w2v_model = Word2Vec(sentences=merge['tweet'], vector_size=100, window=5, min_count=1, workers=4)

word_index = {word: index + 1 for index, word in enumerate(w2v_model.wv.index_to_key)}
sequences = [[word_index[word] for word in text if word in word_index] for text in merge['tweet']]

max_sequence_length = max(len(seq) for seq in sequences)
X = pad_sequences(sequences, maxlen=max_sequence_length)

y = to_categorical(merge['Labels'])

embedding_dim = w2v_model.vector_size
embedding_matrix = np.zeros((len(word_index) + 1, embedding_dim))
for word, i in word_index.items():
    if word in w2v_model.wv:
        embedding_matrix[i] = w2v_model.wv[word]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [15]:

model = Sequential()
model.add(Embedding(input_dim=len(word_index) + 1, output_dim=embedding_dim, weights=[embedding_matrix], input_length=max_sequence_length, trainable=False))
model.add(SimpleRNN(100, activation='relu'))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

loss, accuracy = model.evaluate(X_test, y_test)
print(f"RNN Accuracy: {accuracy}")

Epoch 1/10




[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 82ms/step - accuracy: 0.5709 - loss: 0.6710 - val_accuracy: 0.6315 - val_loss: 0.6471
Epoch 2/10
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 86ms/step - accuracy: 0.6253 - loss: 17.9255 - val_accuracy: 0.6211 - val_loss: 0.6450
Epoch 3/10
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 92ms/step - accuracy: 0.6134 - loss: 0.6442 - val_accuracy: 0.6098 - val_loss: 0.6366
Epoch 4/10
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 88ms/step - accuracy: 0.6587 - loss: 0.6117 - val_accuracy: 0.6758 - val_loss: 0.5967
Epoch 5/10
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 86ms/step - accuracy: 0.7074 - loss: 0.5750 - val_accuracy: 0.7530 - val_loss: 0.5127
Epoch 6/10
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 86ms/step - accuracy: 0.7499 - loss: 0.5201 - val_accuracy: 0.6983 - val_loss: 0.5506
Epoch 7/10
[1m156/156[0m

## from here we are starting on the Hypertuning SVM parameters.




## BEE COLONY

In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(merge['tweet'], merge['Labels'], test_size=0.2, random_state=42)

# Text vectorization
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)



In [None]:
class ArtificialBeeColony:
    def __init__(self, obj_func, bounds, colony_size, max_iter, limit):
        self.obj_func = obj_func
        self.bounds = bounds
        self.colony_size = colony_size
        self.max_iter = max_iter
        self.limit = limit
        self.dim = len(bounds)

        self.food_sources = np.random.rand(colony_size, self.dim)
        self.food_sources = self.bounds[:, 0] + self.food_sources * (self.bounds[:, 1] - self.bounds[:, 0])
        self.fitness = np.array([self.obj_func(ind) for ind in self.food_sources])
        self.trial = np.zeros(colony_size)

    def optimize(self):
        for iter in range(self.max_iter):
            # Employed bee phase
            for i in range(self.colony_size):
                k = np.random.randint(0, self.colony_size)
                while k == i:
                    k = np.random.randint(0, self.colony_size)

                phi = np.random.uniform(-1, 1, self.dim)
                new_solution = self.food_sources[i] + phi * (self.food_sources[i] - self.food_sources[k])
                new_solution = np.clip(new_solution, self.bounds[:, 0], self.bounds[:, 1])

                new_fitness = self.obj_func(new_solution)

                if new_fitness < self.fitness[i]:
                    self.food_sources[i] = new_solution
                    self.fitness[i] = new_fitness
                    self.trial[i] = 0
                else:
                    self.trial[i] += 1

            # Onlooker bee phase
            fitness_prob = self.fitness / np.sum(self.fitness)
            for i in range(self.colony_size):
                r = np.random.rand()
                if r < fitness_prob[i]:
                    k = np.random.randint(0, self.colony_size)
                    while k == i:
                        k = np.random.randint(0, self.colony_size)

                    phi = np.random.uniform(-1, 1, self.dim)
                    new_solution = self.food_sources[i] + phi * (self.food_sources[i] - self.food_sources[k])
                    new_solution = np.clip(new_solution, self.bounds[:, 0], self.bounds[:, 1])

                    new_fitness = self.obj_func(new_solution)

                    if new_fitness < self.fitness[i]:
                        self.food_sources[i] = new_solution
                        self.fitness[i] = new_fitness
                        self.trial[i] = 0
                    else:
                        self.trial[i] += 1

            # Scout bee phase
            for i in range(self.colony_size):
                if self.trial[i] > self.limit:
                    self.food_sources[i] = self.bounds[:, 0] + np.random.rand(self.dim) * (self.bounds[:, 1] - self.bounds[:, 0])
                    self.fitness[i] = self.obj_func(self.food_sources[i])
                    self.trial[i] = 0

            best_index = np.argmin(self.fitness)
            print(f"Iteration {iter+1}/{self.max_iter}, Best Fitness: {self.fitness[best_index]}")

        best_index = np.argmin(self.fitness)
        return self.food_sources[best_index], self.fitness[best_index]


In [None]:
def svm_objective(params):
    C, gamma = params
    C = max(C, 0.1)  # Ensure C is within the valid range
    gamma = max(gamma, 0.0001)  # Ensure gamma is within the valid range
    svm = SVC(C=C, gamma=gamma)
    svm.fit(X_train_tfidf, y_train)
    y_pred = svm.predict(X_test_tfidf)
    accuracy = accuracy_score(y_test, y_pred)
    return -accuracy  # Minimize negative accuracy to maximize accuracy


In [None]:
# Define parameter bounds (C and gamma)
bounds = np.array([[0.1, 100], [0.0001, 1]])

# Artificial Bee Colony parameters
colony_size = 20
max_iter = 50
limit = 10

# Run Artificial Bee Colony algorithm
abc = ArtificialBeeColony(svm_objective, bounds, colony_size, max_iter, limit)
best_params, best_fitness = abc.optimize()

# Output best parameters
print(f"Best Parameters: C={best_params[0]}, gamma={best_params[1]}")
print(f"Best Accuracy: {-best_fitness}")


Iteration 1/50, Best Fitness: -0.8674388674388674
Iteration 2/50, Best Fitness: -0.8674388674388674
Iteration 3/50, Best Fitness: -0.8674388674388674
Iteration 4/50, Best Fitness: -0.8674388674388674
Iteration 5/50, Best Fitness: -0.8674388674388674
Iteration 6/50, Best Fitness: -0.8674388674388674
Iteration 7/50, Best Fitness: -0.8674388674388674
Iteration 8/50, Best Fitness: -0.8674388674388674
Iteration 9/50, Best Fitness: -0.8687258687258688
Iteration 10/50, Best Fitness: -0.8687258687258688
Iteration 11/50, Best Fitness: -0.8687258687258688
Iteration 12/50, Best Fitness: -0.8687258687258688
Iteration 13/50, Best Fitness: -0.8693693693693694
Iteration 14/50, Best Fitness: -0.8693693693693694
Iteration 15/50, Best Fitness: -0.8693693693693694
Iteration 16/50, Best Fitness: -0.87001287001287
Iteration 17/50, Best Fitness: -0.87001287001287
Iteration 18/50, Best Fitness: -0.87001287001287
Iteration 19/50, Best Fitness: -0.87001287001287
Iteration 20/50, Best Fitness: -0.87001287001287

In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_20newsgroups

# Sample data
data = fetch_20newsgroups(subset='train')
df = pd.DataFrame({'text': data.data, 'label': data.target})

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# Text vectorization
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [None]:
def levy_flight(Lambda):
    sigma1 = np.power((np.math.gamma(1 + Lambda) * np.sin(np.pi * Lambda / 2)) / np.math.gamma((1 + Lambda) / 2) * Lambda * np.power(2, (Lambda - 1) / 2), 1 / Lambda)
    sigma2 = 1
    u = np.random.normal(0, sigma1, 1)
    v = np.random.normal(0, sigma2, 1)
    step = u / np.power(np.abs(v), 1 / Lambda)
    return step

def objective_function(params):
    C, gamma = params
    svm = SVC(C=C, gamma=gamma)
    svm.fit(X_train_tfidf, y_train)
    y_pred = svm.predict(X_test_tfidf)
    accuracy = accuracy_score(y_test, y_pred)
    return -accuracy  # Negative because we need to minimize the objective function

def simple_bounds(s, lb, ub):
    ns_temp = s
    ns_temp[s < lb] = lb[s < lb]
    ns_temp[s > ub] = ub[s > ub]
    return ns_temp

def cuckoo_search(n=25, pa=0.25, Lambda=1.5, num_iter=50):
    dim = 2
    lb = [0.01, 0.0001]
    ub = [100, 1]
    nest = np.random.rand(n, dim) * (ub - lb) + lb
    fitness = np.zeros(n)

    for i in range(n):
        fitness[i] = objective_function(nest[i, :])

    best_nest = nest[np.argmin(fitness), :]
    best_fitness = np.min(fitness)

    for iter in range(num_iter):
        new_nest = np.zeros((n, dim))
        for i in range(n):
            step_size = levy_flight(Lambda)
            step = step_size * (nest[i, :] - best_nest)
            new_nest[i, :] = nest[i, :] + step * np.random.randn(dim)
            new_nest[i, :] = simple_bounds(new_nest[i, :], lb, ub)

        for i in range(n):
            fnew = objective_function(new_nest[i, :])
            if fnew < fitness[i]:
                fitness[i] = fnew
                nest[i, :] = new_nest[i, :]

        indices = np.argsort(fitness)
        n_best = int(n * pa)
        nest[indices[:n_best], :] = best_nest

        best_nest = nest[np.argmin(fitness), :]
        best_fitness = np.min(fitness)

        print(f'Iteration {iter+1}/{num_iter}, Best Fitness: {-best_fitness}')

    return best_nest, -best_fitness


In [None]:
best_params, best_accuracy = cuckoo_search(n=25, pa=0.25, Lambda=1.5, num_iter=50)
print(f'Best Parameters: C={best_params[0]}, gamma={best_params[1]}')
print(f'Best Accuracy: {best_accuracy}')


## PSO

In [None]:
!pip install pandas scikit-learn pyswarm


Collecting pyswarm
  Downloading pyswarm-0.6.tar.gz (4.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyswarm
  Building wheel for pyswarm (setup.py) ... [?25l[?25hdone
  Created wheel for pyswarm: filename=pyswarm-0.6-py3-none-any.whl size=4464 sha256=6494a009f250490cdb4b4dbddaa46002c79c533913ee9ef021b75c620dfebb98
  Stored in directory: /root/.cache/pip/wheels/71/67/40/62fa158f497f942277cbab8199b05cb61c571ab324e67ad0d6
Successfully built pyswarm
Installing collected packages: pyswarm
Successfully installed pyswarm-0.6


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer


# Extract the text and label columns
texts = merge['tweet'].astype(str).tolist()
labels = merge['Labels'].values

# Split the data into training and testing sets
texts_train, texts_test, labels_train, labels_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Convert text data to TF-IDF features
vectorizer = TfidfVectorizer(max_features=5000)
X_train = vectorizer.fit_transform(texts_train)
X_test = vectorizer.transform(texts_test)


In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from pyswarm import pso

def evaluate_individual(params):
    C, gamma = params

    model = SVC(C=C, gamma=gamma, kernel='rbf')

    # Train and evaluate the model
    model.fit(X_train, labels_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(labels_test, y_pred)

    return -accuracy


In [None]:
lb = [1e-6, 1e-6]
ub = [1e+6, 1e+1]

best_params, _ = pso(evaluate_individual, lb, ub, swarmsize=10, maxiter=20)

best_C, best_gamma = best_params

best_model = SVC(C=best_C, gamma=best_gamma, kernel='rbf')
best_model.fit(X_train, labels_train)



Stopping search: maximum iterations reached --> 20


In [None]:
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(labels_test, y_pred)
print(f"Best Parameters: C = {970705.2288605786}, gamma = {1.5876338750856334}")
print(f"Test Set Accuracy: {accuracy * 100:.2f}%")

Best Parameters: C = 970705.2288605786, gamma = 1.5876338750856334
Test Set Accuracy: 87.64%


## FLOWER POLLINATION

In [None]:
!pip install pandas scikit-learn




In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer


# Extract the text and label columns
texts = merge['tweet'].astype(str).tolist()
labels = merge['Labels'].values

# Split the data into training and testing sets
texts_train, texts_test, labels_train, labels_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Convert text data to TF-IDF features
vectorizer = TfidfVectorizer(max_features=5000)
X_train = vectorizer.fit_transform(texts_train)
X_test = vectorizer.transform(texts_test)


In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Define the Flower Pollination Algorithm (FPA) function
def flower_pollination_algorithm(fitness_func, lb, ub, num_flowers, max_gen):
    """
    FPA implementation for optimizing SVM hyperparameters.
    Parameters:
    - fitness_func: Function to evaluate fitness (accuracy) of SVM with given hyperparameters
    - lb: Lower bounds for hyperparameters (C, gamma)
    - ub: Upper bounds for hyperparameters (C, gamma)
    - num_flowers: Number of flowers (population size)
    - max_gen: Maximum number of generations (iterations)

    Returns:
    - best_params: Best hyperparameters found by FPA
    """
    flowers = np.random.uniform(lb, ub, size=(num_flowers, len(lb)))
    best_flower = None
    best_fitness = -np.inf

    for gen in range(max_gen):

        fitness = np.array([fitness_func(flower) for flower in flowers])

        max_index = np.argmax(fitness)
        if fitness[max_index] > best_fitness:
            best_flower = flowers[max_index]
            best_fitness = fitness[max_index]

        for i in range(num_flowers):
            beta = np.random.uniform(-1, 1, size=len(lb))
            step_size = np.random.uniform(0, 1)
            new_flower = flowers[i] + step_size * beta * (flowers[max_index] - flowers[i])

            new_flower = np.clip(new_flower, lb, ub)
            if fitness_func(new_flower) > fitness[i]:
                flowers[i] = new_flower

    return best_flower


In [None]:

def evaluate_individual(params):
    C, gamma = params
    model = SVC(C=C, gamma=gamma, kernel='rbf')

    model.fit(X_train, labels_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(labels_test, y_pred)

    return accuracy

lb = [1e-6, 1e-6]
ub = [1e+6, 1e+1]

best_params = flower_pollination_algorithm(evaluate_individual, lb, ub, num_flowers=20, max_gen=50)
best_C, best_gamma = best_params

best_model = SVC(C=best_C, gamma=best_gamma, kernel='rbf')
best_model.fit(X_train, labels_train)

y_pred = best_model.predict(X_test)
accuracy = accuracy_score(labels_test, y_pred)
print(f"Best Parameters: C = {best_C}, gamma = {best_gamma}")
print(f"Test Set Accuracy: {accuracy * 100:.2f}%")


Best Parameters: C = 958859.9790305122, gamma = 1.3730403661081159
Test Set Accuracy: 86.23%


## SMRO

In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_20newsgroups

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(merge['tweet'], merge['Labels'], test_size=0.2, random_state=42)

# Text vectorization
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [None]:
def smro_algorithm(obj_func, bounds, population_size, max_iter):
    # Initialize the population
    population = np.random.rand(population_size, len(bounds))
    population = bounds[:, 0] + population * (bounds[:, 1] - bounds[:, 0])
    fitness = np.zeros(population_size)

    # Evaluate initial population
    for i in range(population_size):
        fitness[i] = obj_func(population[i])

    # Iterate to evolve solutions
    for iteration in range(max_iter):
        for i in range(population_size):

            candidate = population[i] + np.random.uniform(-1, 1, len(bounds)) * (population[np.random.randint(population_size)] - population[i])
            candidate = np.clip(candidate, bounds[:, 0], bounds[:, 1])
            candidate_fitness = obj_func(candidate)

            if candidate_fitness < fitness[i]:
                population[i] = candidate
                fitness[i] = candidate_fitness

        print(f"Iteration {iteration+1}/{max_iter}, Best Fitness: {min(fitness)}")

    best_idx = np.argmin(fitness)
    return population[best_idx], fitness[best_idx]


In [None]:
def svm_objective(params):
    C, gamma = params
    svm = SVC(C=C, gamma=gamma)
    svm.fit(X_train_tfidf, y_train)
    y_pred = svm.predict(X_test_tfidf)
    accuracy = accuracy_score(y_test, y_pred)
    return -accuracy


In [None]:

bounds = np.array([[0.1, 100], [0.0001, 1]])

population_size = 10
max_iter = 50
best_params, best_fitness = smro_algorithm(svm_objective, bounds, population_size, max_iter)

print(f"Best Parameters: C={best_params[0]}, gamma={best_params[1]}")
print(f"Best Accuracy: {-best_fitness}")


Iteration 1/50, Best Fitness: -0.868082368082368
Iteration 2/50, Best Fitness: -0.868082368082368
Iteration 3/50, Best Fitness: -0.868082368082368
Iteration 4/50, Best Fitness: -0.868082368082368
Iteration 5/50, Best Fitness: -0.868082368082368
Iteration 6/50, Best Fitness: -0.868082368082368
Iteration 7/50, Best Fitness: -0.8687258687258688
Iteration 8/50, Best Fitness: -0.8693693693693694
Iteration 9/50, Best Fitness: -0.8693693693693694
Iteration 10/50, Best Fitness: -0.8693693693693694
Iteration 11/50, Best Fitness: -0.8693693693693694
Iteration 12/50, Best Fitness: -0.8693693693693694
Iteration 13/50, Best Fitness: -0.8693693693693694
Iteration 14/50, Best Fitness: -0.8693693693693694
Iteration 15/50, Best Fitness: -0.8693693693693694
Iteration 16/50, Best Fitness: -0.8693693693693694
Iteration 17/50, Best Fitness: -0.8693693693693694
Iteration 18/50, Best Fitness: -0.8693693693693694
Iteration 19/50, Best Fitness: -0.8693693693693694
Iteration 20/50, Best Fitness: -0.869369369369

## GREY WOLF OPTIMIZER

In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_20newsgroups

X_train, X_test, y_train, y_test = train_test_split(merge['tweet'], merge['Labels'], test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [None]:
class GreyWolfOptimizer:
    def __init__(self, obj_func, bounds, population_size, max_iter):
        self.obj_func = obj_func
        self.bounds = bounds
        self.population_size = population_size
        self.max_iter = max_iter
        self.dim = len(bounds)
        self.alpha_pos = np.zeros(self.dim)
        self.alpha_score = float("inf")
        self.beta_pos = np.zeros(self.dim)
        self.beta_score = float("inf")
        self.delta_pos = np.zeros(self.dim)
        self.delta_score = float("inf")

    def optimize(self):
        # Initialize the positions of wolves
        population = np.random.rand(self.population_size, self.dim)
        population = self.bounds[:, 0] + population * (self.bounds[:, 1] - self.bounds[:, 0])

        for iter in range(self.max_iter):
            for i in range(self.population_size):
                fitness = self.obj_func(population[i])

                # Update alpha, beta, and delta wolves
                if fitness < self.alpha_score:
                    self.delta_score = self.beta_score
                    self.delta_pos = self.beta_pos.copy()
                    self.beta_score = self.alpha_score
                    self.beta_pos = self.alpha_pos.copy()
                    self.alpha_score = fitness
                    self.alpha_pos = population[i].copy()
                elif fitness < self.beta_score:
                    self.delta_score = self.beta_score
                    self.delta_pos = self.beta_pos.copy()
                    self.beta_score = fitness
                    self.beta_pos = population[i].copy()
                elif fitness < self.delta_score:
                    self.delta_score = fitness
                    self.delta_pos = population[i].copy()

            a = 2 - iter * (2 / self.max_iter)  # Decrease linearly from 2 to 0

            for i in range(self.population_size):
                for j in range(self.dim):
                    r1 = np.random.rand()
                    r2 = np.random.rand()

                    A1 = 2 * a * r1 - a
                    C1 = 2 * r2
                    D_alpha = abs(C1 * self.alpha_pos[j] - population[i, j])
                    X1 = self.alpha_pos[j] - A1 * D_alpha

                    r1 = np.random.rand()
                    r2 = np.random.rand()

                    A2 = 2 * a * r1 - a
                    C2 = 2 * r2
                    D_beta = abs(C2 * self.beta_pos[j] - population[i, j])
                    X2 = self.beta_pos[j] - A2 * D_beta

                    r1 = np.random.rand()
                    r2 = np.random.rand()

                    A3 = 2 * a * r1 - a
                    C3 = 2 * r2
                    D_delta = abs(C3 * self.delta_pos[j] - population[i, j])
                    X3 = self.delta_pos[j] - A3 * D_delta

                    population[i, j] = (X1 + X2 + X3) / 3

            print(f"Iteration {iter+1}/{self.max_iter}, Best Fitness: {self.alpha_score}")

        return self.alpha_pos, self.alpha_score


In [None]:
def svm_objective(params):
    C, gamma = params
    C = max(C, 0.1)  # Ensure C is within the valid range
    gamma = max(gamma, 0.0001)  # Ensure gamma is within the valid range
    svm = SVC(C=C, gamma=gamma)
    svm.fit(X_train_tfidf, y_train)
    y_pred = svm.predict(X_test_tfidf)
    accuracy = accuracy_score(y_test, y_pred)
    return -accuracy  # Minimize negative accuracy to maximize accuracy


In [None]:
# Define parameter bounds (C and gamma)
bounds = np.array([[0.1, 100], [0.0001, 1]])

population_size = 10
max_iter = 50

gwo = GreyWolfOptimizer(svm_objective, bounds, population_size, max_iter)
best_params, best_fitness = gwo.optimize()

print(f"Best Parameters: C={best_params[0]}, gamma={best_params[1]}")
print(f"Best Accuracy: {-best_fitness}")


Iteration 1/50, Best Fitness: -0.8777348777348777
Iteration 2/50, Best Fitness: -0.8777348777348777
Iteration 3/50, Best Fitness: -0.8777348777348777
Iteration 4/50, Best Fitness: -0.8777348777348777
Iteration 5/50, Best Fitness: -0.8777348777348777
Iteration 6/50, Best Fitness: -0.8777348777348777
Iteration 7/50, Best Fitness: -0.8777348777348777
Iteration 8/50, Best Fitness: -0.8777348777348777
Iteration 9/50, Best Fitness: -0.8777348777348777
Iteration 10/50, Best Fitness: -0.8777348777348777
Iteration 11/50, Best Fitness: -0.8777348777348777
Iteration 12/50, Best Fitness: -0.8777348777348777
Iteration 13/50, Best Fitness: -0.8777348777348777
Iteration 14/50, Best Fitness: -0.8777348777348777
Iteration 15/50, Best Fitness: -0.8777348777348777
Iteration 16/50, Best Fitness: -0.8777348777348777
Iteration 17/50, Best Fitness: -0.8783783783783784
Iteration 18/50, Best Fitness: -0.879021879021879
Iteration 19/50, Best Fitness: -0.879021879021879
Iteration 20/50, Best Fitness: -0.87902187

## ANT COLONY OPTIMIZATION

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(merge['tweet'], merge['Labels'], test_size=0.2, random_state=42)

# TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer(max_features=1000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Scale data (optional but recommended)
scaler = StandardScaler(with_mean=False)
X_train_scaled = scaler.fit_transform(X_train_tfidf)
X_test_scaled = scaler.transform(X_test_tfidf)



In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# ACO parameters
num_ants = 10  # Number of ants (solutions)
num_iterations = 20  # Number of iterations
pheromone_decay = 0.5  # Pheromone decay rate
alpha = 1.0  # Pheromone weight
beta = 2.0  # Heuristic weight

# SVM parameter search space
C_values = [0.1, 1.0, 10.0]  # List of C values to search
gamma_values = ['auto', 'scale']  # List of gamma values to search


In [None]:
# Initialize pheromone trails
num_params = len(C_values) * len(gamma_values)
pheromones = np.ones((len(C_values), len(gamma_values)))


In [None]:
# ACO loop
for iteration in range(num_iterations):
    ant_solutions = []

    # Construct solutions (parameter sets) for each ant
    for ant in range(num_ants):
        # Initialize solution set (C index, gamma index)
        solution_set = set()

        # Build solution for current ant
        while len(solution_set) < num_params:
            # Calculate probabilities for each parameter combination
            probabilities = np.zeros((len(C_values), len(gamma_values)))
            total = np.sum(pheromones)

            for i in range(len(C_values)):
                for j in range(len(gamma_values)):
                    if (i, j) not in solution_set:
                        probabilities[i, j] = (pheromones[i, j] ** alpha) * (1.0 / total) ** beta

            # Select next parameter combination based on probabilities
            prob_flat = probabilities.flatten()
            chosen_index = np.random.choice(np.arange(len(prob_flat)), p=prob_flat / np.sum(prob_flat))
            chosen_params = np.unravel_index(chosen_index, probabilities.shape)

            # Add chosen parameter combination to solution set
            solution_set.add(chosen_params)

        ant_solutions.append(list(solution_set))

    # Evaluate solutions and update pheromones
    for ant_solution in ant_solutions:
        best_accuracy = 0.0
        best_params = None

        # Evaluate SVM for each parameter combination
        for C_idx, gamma_idx in ant_solution:
            C = C_values[C_idx]
            gamma = gamma_values[gamma_idx]

            # Train SVM on current parameters
            svm_model = SVC(kernel='rbf', C=C, gamma=gamma, random_state=42)
            svm_model.fit(X_train_scaled, y_train)

            # Evaluate SVM model
            y_pred = svm_model.predict(X_test_scaled)
            accuracy = accuracy_score(y_test, y_pred)

            # Update best parameters if accuracy improves
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_params = (C_idx, gamma_idx)

        # Update pheromones based on solution quality
        pheromones[best_params] += best_accuracy

    # Decay pheromones
    pheromones *= pheromone_decay

# Select best parameters based on highest pheromone levels
best_params_idx = np.unravel_index(np.argmax(pheromones), pheromones.shape)
best_C = C_values[best_params_idx[0]]
best_gamma = gamma_values[best_params_idx[1]]

print(f"Best SVM Parameters: C = {best_C}, gamma = {best_gamma}")


Best SVM Parameters: C = 10.0, gamma = scale


In [None]:
# Train SVM with best parameters
svm_best = SVC(kernel='rbf', C=best_C, gamma=best_gamma, random_state=42)
svm_best.fit(X_train_scaled, y_train)

# Evaluate SVM model
y_pred_best = svm_best.predict(X_test_scaled)
accuracy_best = accuracy_score(y_test, y_pred_best)
print(f"Final SVM Model Accuracy with Best Parameters: {accuracy_best:.4f}")


Final SVM Model Accuracy with Best Parameters: 0.8436
