# Logistic Regression on the Breast Cancer Dataset
This notebook demonstrates a basic binary classification task using logistic regression from **scikit-learn**. The dataset contains measurements for breast cancer tumors and whether they are malignant or benign.


In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay, RocCurveDisplay
import matplotlib.pyplot as plt


In [None]:
# Load dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Train logistic regression model
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc:.3f}")


In [None]:
# Display classification report and confusion matrix
print(classification_report(y_test, y_pred, target_names=data.target_names))
ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
plt.show()


In [None]:
# ROC curve
RocCurveDisplay.from_estimator(clf, X_test, y_test)
plt.show()
