<a href="https://colab.research.google.com/github/khamesi1985/2025/blob/main/Decision_Tree_with_Maunal_PSO_FS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# بارگذاری داده‌ها
url = "https://raw.githubusercontent.com/khamesi1985/2025/main/wdbc.data"
data = pd.read_csv(url, header=None)
data = data.dropna()
X_full = data.iloc[:, 2:32]
Pre_Y = data.iloc[:, 1]
make_bin = LabelEncoder()
make_bin.fit(Pre_Y)
Y_full = make_bin.transform(Pre_Y)

# تقسیم داده‌ها به آموزش و آزمون
X_train_full, X_test_full, Y_train, Y_test = train_test_split(X_full, Y_full, test_size=0.2, random_state=42)

# استاندارد سازی داده ها
scaler = MinMaxScaler()
scaler.fit(X_train_full)
X_train_scaled_full = scaler.transform(X_train_full)
X_test_scaled_full = scaler.transform(X_test_full)

# پیاده سازی مدل درخت تصمیم
DT_model = DecisionTreeClassifier(random_state=42, class_weight='balanced')
DT_scores = cross_val_score(DT_model, X_train_scaled_full, Y_train, cv=4, scoring='accuracy')
DT_model.fit(X_train_scaled_full, Y_train)
Y_pred_cross_val_DT = cross_val_predict(DT_model, X_train_scaled_full, Y_train, cv=4)
Y_pred_test_DT = DT_model.predict(X_test_scaled_full)

# محاسبه دقت مدل درخت تصمیم
accuracy_test_DT = accuracy_score(Y_test, Y_pred_test_DT)
precision_test_DT = precision_score(Y_test, Y_pred_test_DT)
recall_test_DT = recall_score(Y_test, Y_pred_test_DT)
f1_score_test_DT = f1_score(Y_test, Y_pred_test_DT)
print("\n*** DECISION TREE (DT) CLASSIFIER ***")
print("--- Train Data Evaluate via Cross Validation by DT ---")
print(f"Accuracy Scores for Each Fold By DT : {DT_scores}")
print(f"Mean Accuracy score by DT : {np.mean(DT_scores)}")
print(classification_report(Y_train, Y_pred_cross_val_DT, target_names=make_bin.classes_))
print("--- Test Data Evaluate by DT ---")
print("Accuracy On Test Data by DT = ", accuracy_test_DT)
print("Precision On Test Data by DT = ", precision_test_DT)
print("Recall On Test Data by DT = ", recall_test_DT)
print("F1-Score On Test Data by DT = ", f1_score_test_DT)
len_test = len(Y_test)
TN = TP = FN = FP = 0
for i in range (len_test):
  if Y_test[i] == 0 and Y_pred_test_DT[i] == 1:
    FP = FP + 1
  elif Y_test[i] == 1 and Y_pred_test_DT[i] == 0:
    FN = FN + 1
  elif Y_test[i] == 1 and Y_pred_test_DT[i] == 1:
    TP = TP + 1
  elif Y_test[i] == 0 and Y_pred_test_DT[i] == 0:
    TN = TN + 1
print("TP On Test Data by DT = ", TP)
print("TN On Test Data by DT = ", TN)
print("FP On Test Data by DT = ", FP)
print("FN On Test Data by DT = ", FN)
print(classification_report(Y_test, Y_pred_test_DT, target_names=make_bin.classes_))
print("-"*55)

# پیاده سازی الگوریتم بهینه سازی گروه ذرات و انتخاب بهترین ویژگی ها
particles = 20
iterations = 100
dimensions = X_train_scaled_full.shape[1]
c1 = 1
c2 = 2
w = 0.9
alfa = 0.01
np.random.seed(44)
positions = np.random.uniform(0, 1, (particles, dimensions))
velocities = np.zeros((particles, dimensions))
personal_best_positions = positions.copy()
personal_best_scores = np.ones(particles) * np.inf
global_best_score = np.inf
global_best_position = positions[0].copy()
for iteration in range(iterations):
    for i in range(particles):
        r1 = np.random.rand(dimensions)
        r2 = np.random.rand(dimensions)
        velocities[i] = (w * velocities[i] + c1 * r1 * (personal_best_positions[i] - positions[i]) + c2 * r2 * (global_best_position - positions[i]))
        positions[i] = positions[i] + velocities[i]
        # محاسبه برازندگی ذره جدید
        subset_features = (positions[i] > 0.5)
        if np.sum(subset_features) == 0:
            score = 1.0
        else:
            X_selected = X_train_scaled_full[:, subset_features]
            model = DecisionTreeClassifier(random_state=42, class_weight='balanced')
            scores = cross_val_score(model, X_selected, Y_train, cv=4, scoring='accuracy')
            acc = np.mean(scores)
            penalty = alfa * (np.sum(subset_features) / dimensions)
            score = 1 - acc + penalty
        # به‌روزرسانی بهترین وضعیت شخصی
        if score < personal_best_scores[i]:
            personal_best_positions[i] = positions[i].copy()
            personal_best_scores[i] = score
    # به‌روزرسانی بهترین وضعیت کلی
    best_index = np.argmin(personal_best_scores)
    if personal_best_scores[best_index] < global_best_score:
        global_best_position = personal_best_positions[best_index].copy()
        global_best_score = personal_best_scores[best_index]
    print(f"Iteration {iteration+1} | Best Score: {global_best_score:.4f}")
# انتخاب نهایی ویژگی‌ها بر اساس بهترین موقعیت کلی
best_features_mask = (global_best_position > 0.5)
num_selected_features = np.sum(best_features_mask)
print(f"\nTotal Selected Features by PSO: {num_selected_features}/{dimensions}")
selected_features = np.where(best_features_mask == 1)[0]
print(f"\nSelected Features by PSO: {selected_features}")
X_train_selected = X_train_scaled_full[:, best_features_mask]
X_test_selected = X_test_scaled_full[:, best_features_mask]

# پیاده سازی مدل درخت تصمیم بعد از انتخاب ویژگی های بهینه توسط الگوریتم بهینه سازی گروه ذرات P.S.O
DT_model_with_PSO = DecisionTreeClassifier(random_state=42, class_weight='balanced')
DT_scores_with_PSO = cross_val_score(DT_model_with_PSO, X_train_selected, Y_train, cv=4, scoring='accuracy')
DT_model_with_PSO.fit(X_train_selected, Y_train)
Y_pred_cross_val_DT_with_PSO = cross_val_predict(DT_model_with_PSO, X_train_selected, Y_train, cv=4)
Y_pred_test_DT_with_PSO = DT_model_with_PSO.predict(X_test_selected)

# محاسبه دقت مدل درخت تصمیم بعد از انتخاب ویژگی های بهینه توسط الگوریتم بهینه سازی گروه ذرات P.S.O
accuracy_test_DT_with_PSO = accuracy_score(Y_test, Y_pred_test_DT_with_PSO)
precision_test_DT_with_PSO = precision_score(Y_test, Y_pred_test_DT_with_PSO)
recall_test_DT_with_PSO = recall_score(Y_test, Y_pred_test_DT_with_PSO)
f1_score_test_DT_with_PSO = f1_score(Y_test, Y_pred_test_DT_with_PSO)
print("\n*** DECISION TREE (DT) CLASSIFIER WITH PSO FEATURE SELECTION ***")
print("--- Train Data Evaluate via Cross Validation by DT with P.S.O ---")
print(f"Accuracy Scores for Each Fold By DT with P.S.O : {DT_scores_with_PSO}")
print(f"Mean Accuracy score by DT with P.S.O : {np.mean(DT_scores_with_PSO)}")
print(classification_report(Y_train, Y_pred_cross_val_DT_with_PSO, target_names=make_bin.classes_))
print("--- Test Data Evaluate by DT with P.S.O ---")
print("Accuracy On Test Data by DT with P.S.O = ", accuracy_test_DT_with_PSO)
print("Precision On Test Data by DT with P.S.O = ", precision_test_DT_with_PSO)
print("Recall On Test Data by DT with P.S.O = ", recall_test_DT_with_PSO)
print("F1-Score On Test Data by DT with P.S.O = ", f1_score_test_DT_with_PSO)
len_test = len(Y_test)
TN = TP = FN = FP = 0
for i in range (len_test):
  if Y_test[i] == 0 and Y_pred_test_DT_with_PSO[i] == 1:
    FP = FP + 1
  elif Y_test[i] == 1 and Y_pred_test_DT_with_PSO[i] == 0:
    FN = FN + 1
  elif Y_test[i] == 1 and Y_pred_test_DT_with_PSO[i] == 1:
    TP = TP + 1
  elif Y_test[i] == 0 and Y_pred_test_DT_with_PSO[i] == 0:
    TN = TN + 1
print("TP On Test Data by DT with P.S.O = ", TP)
print("TN On Test Data by DT with P.S.O = ", TN)
print("FP On Test Data by DT with P.S.O = ", FP)
print("FN On Test Data by DT with P.S.O = ", FN)
print(classification_report(Y_test, Y_pred_test_DT_with_PSO, target_names=make_bin.classes_))
print("-"*55)


*** DECISION TREE (DT) CLASSIFIER ***
--- Train Data Evaluate via Cross Validation by DT ---
Accuracy Scores for Each Fold By DT : [0.92982456 0.92982456 0.87719298 0.91150442]
Mean Accuracy score by DT : 0.9120866325104797
              precision    recall  f1-score   support

           B       0.92      0.94      0.93       286
           M       0.89      0.87      0.88       169

    accuracy                           0.91       455
   macro avg       0.91      0.90      0.91       455
weighted avg       0.91      0.91      0.91       455

--- Test Data Evaluate by DT ---
Accuracy On Test Data by DT =  0.956140350877193
Precision On Test Data by DT =  0.9523809523809523
Recall On Test Data by DT =  0.9302325581395349
F1-Score On Test Data by DT =  0.9411764705882353
TP On Test Data by DT =  40
TN On Test Data by DT =  69
FP On Test Data by DT =  2
FN On Test Data by DT =  3
              precision    recall  f1-score   support

           B       0.96      0.97      0.97        7