In [6]:
import pandas as pd
from sklearn.datasets import load_breast_cancer

# Load dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Correlation with target
X["target"] = y
correlations = X.corr()["target"].drop("target").abs()
top_10_features = correlations.sort_values(ascending=False).head(10)
selected_features = top_10_features.index.tolist()


In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Prepare feature data
X = X[selected_features]  # X now contains only the best 10 features
y = data.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Train Decision Tree
dt = DecisionTreeClassifier(random_state=42)  # Set random_state for reproducibility
dt.fit(X_train, y_train)
dt_preds = dt.predict(X_test)

# Evaluation
print("Decision Tree (Top 10 Features) Accuracy:", accuracy_score(y_test, dt_preds))
print("Classification Report:\n", classification_report(y_test, dt_preds))



Decision Tree (Top 10 Features) Accuracy: 0.9385964912280702
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.94      0.93        47
           1       0.95      0.94      0.95        67

    accuracy                           0.94       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.94      0.94      0.94       114



In [7]:
import joblib

# Save the trained Decision Tree model
joblib.dump(dt, 'breast_cancer_model.pkl')

# Save selected features too (important for the web app input form)
joblib.dump(selected_features, 'selected_features.pkl')


['selected_features.pkl']