<a href="https://colab.research.google.com/github/khamesi1985/2025/blob/main/Decision_Tree_with_P_S_O_FS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pyswarms
import pandas as pd
import numpy as np
import pyswarms as pso
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# بارگذاری داده‌ها
url = "https://raw.githubusercontent.com/khamesi1985/2025/main/wdbc.data"
data = pd.read_csv(url, header=None)
data = data.dropna()
X_full = data.iloc[:, 2:32]
Pre_Y = data.iloc[:, 1]
make_bin = LabelEncoder()
make_bin.fit(Pre_Y)
Y_full = make_bin.transform(Pre_Y)

# تقسیم داده‌ها به آموزش و آزمون
X_train_full, X_test_full, Y_train, Y_test = train_test_split(X_full, Y_full, test_size=0.2, random_state=42)

# استاندارد سازی داده ها
scaler = MinMaxScaler()
scaler.fit(X_train_full)
X_train_scaled_full = scaler.transform(X_train_full)
X_test_scaled_full = scaler.transform(X_test_full)

# پیاده سازی مدل درخت تصمیم
DT_model = DecisionTreeClassifier(random_state=42, class_weight='balanced')
DT_scores = cross_val_score(DT_model, X_train_scaled_full, Y_train, cv=4, scoring='accuracy')
DT_model.fit(X_train_scaled_full, Y_train)
Y_pred_cross_val_DT = cross_val_predict(DT_model, X_train_scaled_full, Y_train, cv=4)
Y_pred_test_DT = DT_model.predict(X_test_scaled_full)

# محاسبه دقت مدل درخت تصمیم
accuracy_test_DT = accuracy_score(Y_test, Y_pred_test_DT)
precision_test_DT = precision_score(Y_test, Y_pred_test_DT)
recall_test_DT = recall_score(Y_test, Y_pred_test_DT)
f1_score_test_DT = f1_score(Y_test, Y_pred_test_DT)
print("\n*** DECISION TREE (DT) CLASSIFIER ***")
print("--- Train Data Evaluate via Cross Validation by DT ---")
print(f"Accuracy Scores for Each Fold By DT : {DT_scores}")
print(f"Mean Accuracy score by DT : {np.mean(DT_scores)}")
print(classification_report(Y_train, Y_pred_cross_val_DT, target_names=make_bin.classes_))
print("--- Test Data Evaluate by DT ---")
print("Accuracy On Test Data by DT = ", accuracy_test_DT)
print("Precision On Test Data by DT = ", precision_test_DT)
print("Recall On Test Data by DT = ", recall_test_DT)
print("F1-Score On Test Data by DT = ", f1_score_test_DT)
len_test = len(Y_test)
TN = TP = FN = FP = 0
for i in range (len_test):
  if Y_test[i] == 0 and Y_pred_test_DT[i] == 1:
    FP = FP + 1
  elif Y_test[i] == 1 and Y_pred_test_DT[i] == 0:
    FN = FN + 1
  elif Y_test[i] == 1 and Y_pred_test_DT[i] == 1:
    TP = TP + 1
  elif Y_test[i] == 0 and Y_pred_test_DT[i] == 0:
    TN = TN + 1
print("TP On Test Data by DT = ", TP)
print("TN On Test Data by DT = ", TN)
print("FP On Test Data by DT = ", FP)
print("FN On Test Data by DT = ", FN)
print(classification_report(Y_test, Y_pred_test_DT, target_names=make_bin.classes_))
print("-"*55)

# پیاده سازی الگوریتم بهینه سازی گروه ذرات و انتخاب بهترین ویژگی ها
def f_fitness(subset_features, X_data, Y_data, classifier, cv_folds):
  subset_features = (subset_features > 0.5).astype(int)
  if np.sum(subset_features) == 0:
    return 1.0
  selected_indices = np.where(subset_features == 1)[0]
  X_selected = X_data[:, selected_indices]
  scores = cross_val_score(classifier, X_selected, Y_data, cv=cv_folds, scoring='accuracy')
  cost = 1-np.mean(scores)
  return cost
np.random.seed(42)
dimensions = X_train_scaled_full.shape[1]   # با توجه به اینکه خروجی این دستور یک تاپل دوتاییست که عدد اول تعداد سطرها و عدد دوم تعداد ستونها را نشان میدهد در صورتی که داخل کروشه به جای عدد یک، عدد صفر بنویسیم تعداد سطرها یا همان نمونه ها را بدست خواهد آورد
options = {'c1': 1, 'c2': 2, 'w': 0.9, 'k': 20, 'p': 1}
n_particles = 20
iters = 100
DT_model_for_pso = DecisionTreeClassifier(random_state=42, class_weight='balanced')
PSO_alg = pso.discrete.BinaryPSO(n_particles=n_particles, dimensions=dimensions, options=options)
cost, pos = PSO_alg.optimize(f_fitness, iters=iters, X_data=X_train_scaled_full, Y_data=Y_train, classifier=DT_model_for_pso, cv_folds=4)
print(f"\n*** P.S.O Feature Selection Results ***")
print(f"Best fitness (Mean CV Accuracy) found by P.S.O : {1-cost}")
selected_features_mask = (pos > 0.5).astype(int)
selected_feature_indices = np.where(selected_features_mask == 1)[0]
num_selected_features = len(selected_feature_indices)
print(f"Number of selected features : {num_selected_features}")
print(f"Selected feature indices : {selected_feature_indices}")
X_train_selected = X_train_scaled_full[:, selected_feature_indices]
X_test_selected = X_test_scaled_full[:, selected_feature_indices]

# پیاده سازی مدل درخت تصمیم بعد از انتخاب ویژگی های بهینه توسط الگوریتم بهینه سازی گروه ذرات P.S.O
DT_model_with_PSO = DecisionTreeClassifier(random_state=42, class_weight='balanced')
DT_scores_with_PSO = cross_val_score(DT_model_with_PSO, X_train_selected, Y_train, cv=4, scoring='accuracy')
DT_model_with_PSO.fit(X_train_selected, Y_train)
Y_pred_cross_val_DT_with_PSO = cross_val_predict(DT_model_with_PSO, X_train_selected, Y_train, cv=4)
Y_pred_test_DT_with_PSO = DT_model_with_PSO.predict(X_test_selected)

# محاسبه دقت مدل درخت تصمیم بعد از انتخاب ویژگی های بهینه توسط الگوریتم بهینه سازی گروه ذرات P.S.O
accuracy_test_DT_with_PSO = accuracy_score(Y_test, Y_pred_test_DT_with_PSO)
precision_test_DT_with_PSO = precision_score(Y_test, Y_pred_test_DT_with_PSO)
recall_test_DT_with_PSO = recall_score(Y_test, Y_pred_test_DT_with_PSO)
f1_score_test_DT_with_PSO = f1_score(Y_test, Y_pred_test_DT_with_PSO)
print("\n*** DECISION TREE (DT) CLASSIFIER WITH PSO FEATURE SELECTION ***")
print("--- Train Data Evaluate via Cross Validation by DT with P.S.O ---")
print(f"Accuracy Scores for Each Fold By DT with P.S.O : {DT_scores_with_PSO}")
print(f"Mean Accuracy score by DT with P.S.O : {np.mean(DT_scores_with_PSO)}")
print(classification_report(Y_train, Y_pred_cross_val_DT_with_PSO, target_names=make_bin.classes_))
print("--- Test Data Evaluate by DT with P.S.O ---")
print("Accuracy On Test Data by DT with P.S.O = ", accuracy_test_DT_with_PSO)
print("Precision On Test Data by DT with P.S.O = ", precision_test_DT_with_PSO)
print("Recall On Test Data by DT with P.S.O = ", recall_test_DT_with_PSO)
print("F1-Score On Test Data by DT with P.S.O = ", f1_score_test_DT_with_PSO)
len_test = len(Y_test)
TN = TP = FN = FP = 0
for i in range (len_test):
  if Y_test[i] == 0 and Y_pred_test_DT_with_PSO[i] == 1:
    FP = FP + 1
  elif Y_test[i] == 1 and Y_pred_test_DT_with_PSO[i] == 0:
    FN = FN + 1
  elif Y_test[i] == 1 and Y_pred_test_DT_with_PSO[i] == 1:
    TP = TP + 1
  elif Y_test[i] == 0 and Y_pred_test_DT_with_PSO[i] == 0:
    TN = TN + 1
print("TP On Test Data by DT with P.S.O = ", TP)
print("TN On Test Data by DT with P.S.O = ", TN)
print("FP On Test Data by DT with P.S.O = ", FP)
print("FN On Test Data by DT with P.S.O = ", FN)
print(classification_report(Y_test, Y_pred_test_DT_with_PSO, target_names=make_bin.classes_))
print("-"*55)

Collecting pyswarms
  Downloading pyswarms-1.3.0-py2.py3-none-any.whl.metadata (33 kB)
Downloading pyswarms-1.3.0-py2.py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyswarms
Successfully installed pyswarms-1.3.0


2025-07-21 17:45:11,044 - pyswarms.discrete.binary - INFO - Optimize for 100 iters with {'c1': 1, 'c2': 2, 'w': 0.9, 'k': 20, 'p': 1}



*** DECISION TREE (DT) CLASSIFIER ***
--- Train Data Evaluate via Cross Validation by DT ---
Accuracy Scores for Each Fold By DT : [0.92982456 0.92982456 0.87719298 0.91150442]
Mean Accuracy score by DT : 0.9120866325104797
              precision    recall  f1-score   support

           B       0.92      0.94      0.93       286
           M       0.89      0.87      0.88       169

    accuracy                           0.91       455
   macro avg       0.91      0.90      0.91       455
weighted avg       0.91      0.91      0.91       455

--- Test Data Evaluate by DT ---
Accuracy On Test Data by DT =  0.956140350877193
Precision On Test Data by DT =  0.9523809523809523
Recall On Test Data by DT =  0.9302325581395349
F1-Score On Test Data by DT =  0.9411764705882353
TP On Test Data by DT =  40
TN On Test Data by DT =  69
FP On Test Data by DT =  2
FN On Test Data by DT =  3
              precision    recall  f1-score   support

           B       0.96      0.97      0.97        7

pyswarms.discrete.binary: 100%|██████████|100/100, best_cost=0.0704
2025-07-21 17:45:48,212 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.07036950784039753, best pos: [1 0 0 0 1 0 0 1 1 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1]



*** P.S.O Feature Selection Results ***
Best fitness (Mean CV Accuracy) found by P.S.O : 0.9296304921596025
Number of selected features : 10
Selected feature indices : [ 0  4  7  8  9 12 13 25 27 29]

*** DECISION TREE (DT) CLASSIFIER WITH PSO FEATURE SELECTION ***
--- Train Data Evaluate via Cross Validation by DT with P.S.O ---
Accuracy Scores for Each Fold By DT with P.S.O : [0.92105263 0.9122807  0.86842105 0.92920354]
Mean Accuracy score by DT with P.S.O : 0.9077394814469802
              precision    recall  f1-score   support

           B       0.94      0.91      0.93       286
           M       0.86      0.90      0.88       169

    accuracy                           0.91       455
   macro avg       0.90      0.91      0.90       455
weighted avg       0.91      0.91      0.91       455

--- Test Data Evaluate by DT with P.S.O ---
Accuracy On Test Data by DT with P.S.O =  0.9385964912280702
Precision On Test Data by DT with P.S.O =  0.9090909090909091
Recall On Test Data 

In [None]:
pip install pyswarm

