In [1]:
#1
from sklearn.datasets import load_breast_cancer

my_data = load_breast_cancer()


In [2]:
#2
from sklearn.manifold import TSNE
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


In [3]:
#3
from sklearn.model_selection import train_test_split
import numpy as np


X = np.random.rand(100, 5)
y = np.random.randint(0, 2, size=100)

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,

    test_size=0.2,
    random_state=42,
    shuffle=True,
    stratify=y
)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


X_train shape: (80, 5)
X_test shape: (20, 5)
y_train shape: (80,)
y_test shape: (20,)


In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

# טעינת הנתונים
data = load_breast_cancer()
X = data.data
y = data.target

# חלוקת הנתונים
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# יצירת המודל עם הפרמטרים הרצויים
model = DecisionTreeClassifier(
    criterion='entropy',
    splitter='best',
    max_depth=5,
    min_samples_split=2,
    min_samples_leaf=1,
    min_weight_fraction_leaf=0.0,
    max_features=None,
    random_state=42,
    max_leaf_nodes=15,
    min_impurity_decrease=0.0,
    class_weight={0: 1, 1: 2},
    ccp_alpha=0.01
)

# אימון המודל על נתוני האימון
model.fit(X_train, y_train)

# חיזוי על סט הבדיקה
y_test_pred = model.predict(X_test)

# חישוב מדדים והדפסתם
# 1. Accuracy
accuracy = accuracy_score(y_test, y_test_pred)
print(f"Accuracy: {accuracy:.2f}")

# 2. Precision
precision = precision_score(y_test, y_test_pred, average='binary')
print(f"Precision: {precision:.2f}")

# 3. Recall
recall = recall_score(y_test, y_test_pred, average='binary')
print(f"Recall: {recall:.2f}")

# 4. F1-Score
f1 = f1_score(y_test, y_test_pred, average='binary')
print(f"F1-Score: {f1:.2f}")

# 5. Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_test_pred)
print("\nConfusion Matrix:")
print(conf_matrix)


Accuracy: 0.93
Precision: 0.94
Recall: 0.94
F1-Score: 0.94

Confusion Matrix:
[[38  4]
 [ 4 68]]


In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

model = RandomForestClassifier(
    n_estimators=300,
    criterion='entropy',
    max_depth=20,
    min_samples_split=4,
    min_samples_leaf=2,
    min_weight_fraction_leaf=0.0,
    max_features='sqrt',
    max_leaf_nodes=None,
    min_impurity_decrease=0.001,
    bootstrap=True,
    oob_score=True,
    n_jobs=-1,
    random_state=42,
    verbose=0,
    warm_start=False,
    class_weight='balanced',
    ccp_alpha=0.0005,
    max_samples=0.95
)

model.fit(X_train, y_train)

y_test_pred = model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"Test Accuracy: {test_accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred, target_names=data.target_names))


Test Accuracy: 0.96

Classification Report:
              precision    recall  f1-score   support

   malignant       0.95      0.93      0.94        42
      benign       0.96      0.97      0.97        72

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114



In [12]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# יצירת נתונים לדוגמה
X, y = make_classification(
    n_samples=1000,
    n_features=20,
    n_informative=15,
    n_redundant=5,
    random_state=42
)

# חלוקת הנתונים
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# מודל בסיסי
base_estimator = DecisionTreeClassifier(
    max_depth=3,  # עומק מקסימלי לעץ הבסיסי
    random_state=42
)

# הגדרת המודל AdaBoost עם כל הפרמטרים
model_adaboost = AdaBoostClassifier(
    estimator=base_estimator,      # המודל הבסיסי שמשמש לעצי ההחלטה
    n_estimators=200,               # מספר המקסימום של עצי החלטה (ברירת מחדל 50)
    learning_rate=0.3,             # קצב הלמידה (ברירת מחדל 1.0)
    algorithm='SAMME.R',           # האלגוריתם (ברירת מחדל 'SAMME.R')
    random_state=42                # מצב אקראי להבטחת שחזור תוצאות
)

# אימון המודל
model_adaboost.fit(X_train, y_train)

# חיזוי
y_pred = model_adaboost.predict(X_test)

# הערכת המודל
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))




Test Accuracy: 0.89

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.90      0.90       106
           1       0.88      0.88      0.88        94

    accuracy                           0.89       200
   macro avg       0.89      0.89      0.89       200
weighted avg       0.89      0.89      0.89       200



In [13]:
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report

# מודל בסיסי של Decision Tree
dt_model = DecisionTreeClassifier(
    max_depth=3,
    min_samples_split=10,
    min_samples_leaf=5,
    random_state=42
)

# אימון Decision Tree
dt_model.fit(X_train, y_train)
y_test_pred_dt = dt_model.predict(X_test)
f1_dt = f1_score(y_test, y_test_pred_dt)

# הגדרת Random Forest
rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=5,
    random_state=42
)

# אימון Random Forest
rf_model.fit(X_train, y_train)
y_test_pred_rf = rf_model.predict(X_test)
f1_rf = f1_score(y_test, y_test_pred_rf)

# הגדרת AdaBoost
base_estimator = DecisionTreeClassifier(
    max_depth=3,
    min_samples_split=10,
    min_samples_leaf=5,
    random_state=42
)

model_ab = AdaBoostClassifier(
    estimator=base_estimator,
    n_estimators=200,
    learning_rate=0.1,
    algorithm='SAMME.R',
    random_state=42
)

# אימון AdaBoost
model_ab.fit(X_train, y_train)
y_test_pred_ab = model_ab.predict(X_test)
f1_ab = f1_score(y_test, y_test_pred_ab)

# השוואת המודלים
f1_scores = {"Decision Tree": f1_dt, "Random Forest": f1_rf, "AdaBoost": f1_ab}
winner = max(f1_scores, key=f1_scores.get)

# תוצאות
print(f"Decision Tree F1-Score: {f1_dt:.2f}")
print(f"Random Forest F1-Score: {f1_rf:.2f}")
print(f"AdaBoost F1-Score: {f1_ab:.2f}")
print(f"\nThe best performing model is: {winner} with F1-Score: {f1_scores[winner]:.2f}")

# דוח מפורט למודל הטוב ביותר
if winner == "Decision Tree":
    print("\nClassification Report for Decision Tree:")
    print(classification_report(y_test, y_test_pred_dt))
elif winner == "Random Forest":
    print("\nClassification Report for Random Forest:")
    print(classification_report(y_test, y_test_pred_rf))
elif winner == "AdaBoost":
    print("\nClassification Report for AdaBoost:")
    print(classification_report(y_test, y_test_pred_ab))




Decision Tree F1-Score: 0.77
Random Forest F1-Score: 0.87
AdaBoost F1-Score: 0.89

The best performing model is: AdaBoost with F1-Score: 0.89

Classification Report for AdaBoost:
              precision    recall  f1-score   support

           0       0.89      0.92      0.91       106
           1       0.91      0.87      0.89        94

    accuracy                           0.90       200
   macro avg       0.90      0.90      0.90       200
weighted avg       0.90      0.90      0.90       200

