In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd

#Loading data
data = load_breast_cancer()
X, y = data.data, data.target

#Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42, stratify=y)

#Training Logistic Regression model
model = LogisticRegression(max_iter=5000)
model.fit(X_train, y_train)

#predicting
y_pred = model.predict(X_test)

#Evaluation
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix: ", confusion_matrix(y_test, y_pred))
print("\nClassification Report: ", classification_report(y_test, y_pred))


: 

In [None]:
# --- Threshold experiments: precision / recall / confusion matrices ---
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import (precision_score, recall_score, f1_score,
                             confusion_matrix, precision_recall_curve,
                             classification_report)

# get predicted probabilities for the positive class (class 1 = malignant)
probs = model.predict_proba(X_test)[:, 1]

# thresholds to inspect
thresholds = [0.3, 0.5, 0.7]

results = []
for t in thresholds:
    preds_t = (probs >= t).astype(int)
    prec = precision_score(y_test, preds_t)
    rec = recall_score(y_test, preds_t)
    f1 = f1_score(y_test, preds_t)
    cm = confusion_matrix(y_test, preds_t)
    results.append((t, prec, rec, f1, cm))
    print(f"\n--- Threshold = {t:.2f} ---")
    print(f"Precision: {prec:.3f}   Recall: {rec:.3f}   F1: {f1:.3f}")
    print("Confusion matrix (rows=true, cols=pred):\n", cm)
    print("Classification report:\n", classification_report(y_test, preds_t, digits=3))

# --- Precision-Recall curve and threshold plot ---
precisions, recalls, pr_thresholds = precision_recall_curve(y_test, probs)

# pr_thresholds has length len(precisions)-1; align for plotting:
plt.figure(figsize=(8,4))

plt.subplot(1,2,1)
plt.plot(pr_thresholds, precisions[:-1], label="Precision")
plt.plot(pr_thresholds, recalls[:-1], label="Recall")
plt.xlabel("Threshold")
plt.title("Precision and Recall vs Threshold")
plt.legend()
plt.grid(True)

plt.subplot(1,2,2)
plt.plot(recalls, precisions)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall curve")
plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import recall_score, classification_report, confusion_matrix

#load data
data = load_breast_cancer()
X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state=42, stratify=y)

#setting up cross validation
cv = StratifiedKFold(n_splits = 5, shuffle = True, random_state=42)

k_values = list(range(1, 30, 2))

mean_recalls = []
std_recalls = []

for k in k_values:
    #Scaling + KNN
    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("knn", KNeighborsClassifier(n_neighbors = k))
    ])
    #cross-validation score to return recall for each fold
    scores = cross_val_score(pipe, X_train, y_train, cv=cv, scoring="recall", n_jobs = -1)
    mean_recalls.append(scores.mean())
    std_recalls.append(scores.std())
    print(f"{k:2d}  mean recall = {scores.mean():.3f}     std recall = {scores.std():.3f}")

#plot recall vs k
plt.figure(figsize=(8,4))
plt.errorbar(k_values, mean_recalls, yerr=std_recalls, marker='o', capsize = 4)
plt.xlabel("k (n_neighbor)")
plt.ylabel("Mean recall (5-fold CV)")
plt.title("recall vs k (with error bars)")
plt.grid(True)
plt.show()

#finding the best k
best_idx = int(np.argmax(mean_recalls))
best_k = k_values[best_idx]

print(f"\nBest k by mean recall = {best_k} (mean recall = {mean_recalls[best_idx]:.3f})")

final_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("knn", KNeighborsClassifier(n_neighbors=best_k))
])
final_pipe.fit(X_train, y_train)
y_pred_final = final_pipe.predict(X_test)




#Metrics
print("\nTest set results with best k:")
print("Recall on test set:", recall_score(y_test, y_pred_final))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_final))
print("\nClassification report:\n", classification_report(y_test, y_pred_final, digits=3))



In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix, recall_score

#Load data
data = load_breast_cancer()

X,y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42, stratify=y)

#Set up pipeline for svm
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("svm", SVC(kernel = "linear", C=1, random_state=42))
])


#Cross-validation with recall as a metric
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(pipe, X_train, y_train, cv=cv, scoring="recall")
print(f"mean recall (CV) : {scores.mean():.2f} +- {scores.std():.3f}")

#Training and evaluation
pipe.fit(X_train, y_train)
y_pred= pipe.predict(X_test)

#metrics
print("Recall on test set:", recall_score(y_test, y_pred))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification report:\n", classification_report(y_test, y_pred, digits=3))



In [None]:
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import roc_curve, auc

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train SVM with probability enabled
model = SVC(probability=True, random_state=42)
model.fit(X_train, y_train)

# Get predicted probabilities for the positive class
y_prob = model.predict_proba(X_test)[:, 1]

# Compute ROC curve and AUC
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(6, 6))
plt.plot(fpr, tpr, label=f"SVM (AUC = {roc_auc:.2f})")
plt.plot([0, 1], [0, 1], "k--", label="Random Guess")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve - SVM on Breast Cancer Data")
plt.legend(loc="lower right")
plt.show()
