In [None]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt

# 1. Load the data as a DataFrame
data = load_breast_cancer(as_frame=True)
df = data.frame

# 2. Split into Features (X) and Target (y)
X = data.data
y = data.target

# 3. Create Training and Testing sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Train a Logistic Regression model
model = LogisticRegression(max_iter=10000)
model.fit(X_train, y_train)

# 5. Evaluate
predictions = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, predictions):.2%}")
print("\nDetailed Report:\n", classification_report(y_test, predictions))

feature_importance = pd.Series(model.coef_[0], index=data.feature_names)
feature_importance.sort_values().plot(kind='barh', figsize=(10, 8))
plt.title("Which features the 'Math' thinks are most important")
plt.show()

Accuracy: 95.61%

Detailed Report:
               precision    recall  f1-score   support

           0       0.97      0.91      0.94        43
           1       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

