In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve
import seaborn as sns


In [None]:
# Load the CSV file
df = pd.read_csv('/content/data.csv')  # adjust path if different

# Drop unnecessary columns
df.drop(columns=['id', 'Unnamed: 32'], inplace=True, errors='ignore')

# Convert 'diagnosis' to binary (Malignant=1, Benign=0)
df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})

# Rename column to 'target' for clarity
df.rename(columns={'diagnosis': 'target'}, inplace=True)

# View first 5 rows
df.head()


In [None]:
# Separate features and target
X = df.drop('target', axis=1)
y = df['target']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# Precision, Recall, ROC-AUC
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test, y_prob))


In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_prob)

plt.plot(fpr, tpr, label='ROC Curve')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()


In [None]:
y_pred_thresh = (y_prob >= 0.6).astype(int)

print("Custom Precision (0.6):", precision_score(y_test, y_pred_thresh))
print("Custom Recall (0.6):", recall_score(y_test, y_pred_thresh))


##  Sigmoid Function Explained

The sigmoid function maps values to the (0, 1) range, making it ideal for probabilities in binary classification.

### Formula:
sigmoid(x) = 1 / (1 + e^(-x))

In logistic regression, its used to turn linear outputs into probability scores for classification.
