# Logistic Regression - Breast Cancer Classification
This notebook performs binary classification using logistic regression on the Breast Cancer Wisconsin dataset.

## Step 1: Import Libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt

## Step 2: Load Dataset

In [None]:
df = pd.read_excel('cancer dataset.xlsx', sheet_name='in')
df.head()

## Step 3: Prepare Features and Target

In [None]:
X = df.drop(columns=['id', 'diagnosis'])
y = df['diagnosis'].map({'M': 1, 'B': 0})  # Malignant = 1, Benign = 0

## Step 4: Train-Test Split and Standardize

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Step 5: Fit Logistic Regression Model

In [None]:
model = LogisticRegression()
model.fit(X_train_scaled, y_train)
y_probs = model.predict_proba(X_test_scaled)[:, 1]
y_preds = model.predict(X_test_scaled)

## Step 6: Evaluate Model

In [None]:
conf_matrix = confusion_matrix(y_test, y_preds)
precision = precision_score(y_test, y_preds)
recall = recall_score(y_test, y_preds)
roc_auc = roc_auc_score(y_test, y_probs)
conf_matrix, precision, recall, roc_auc

## Step 7: Plot ROC Curve

In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_probs)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.2f}')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Logistic Regression')
plt.legend()
plt.grid(True)
plt.show()

## Step 8: Threshold Tuning Example

In [None]:
threshold = 0.4
y_pred_tuned = (y_probs > threshold).astype(int)
conf_matrix_tuned = confusion_matrix(y_test, y_pred_tuned)
conf_matrix_tuned

## Step 9: Save Outputs

In [None]:
df.to_csv('cleaned_cancer_dataset.csv', index=False)
pd.DataFrame({'Actual': y_test.values, 'Predicted': y_preds, 'Probabilities': y_probs}).to_csv('predictions.csv', index=False)