Compares the performance of:
1. **Logistic Regression** (Baseline)
2. **Custom Neural Network** (Advanced)

## 1. Import Libraries and Load Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, roc_curve, auc,
    precision_recall_curve, average_precision_score
)
import warnings
warnings.filterwarnings('ignore')

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)
np.random.seed(42)

In [None]:
# Load and prepare data (same as in neural_network.ipynb)
data = pd.read_csv("wdbc.data", header=None)

# Add column names
data.columns = ["id", "diagnosis", "radius1", "texture1", "perimeter1", "area1", 
                "smoothness1", "compactness1", "concavity1", "concave_points1", 
                "symmetry1", "fractal_dimension1", "radius2", "texture2", 
                "perimeter2", "area2", "smoothness2", "compactness2", 
                "concavity2", "concave_points2", "symmetry2", 
                "fractal_dimension2", "radius3", "texture3", "perimeter3", 
                "area3", "smoothness3", "compactness3", "concavity3", 
                "concave_points3", "symmetry3", "fractal_dimension3"]

# Prepare features and target
X = data.drop(['id', 'diagnosis'], axis=1)
y = (data['diagnosis'] == 'M').astype(int)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to numpy arrays
y_train = y_train.values
y_test = y_test.values

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")
print(f"Features: {X.shape[1]}")

## 2. Define the Neural Network Class

Copied the SimpleNeuralNetwork class from neural_network

In [None]:
class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights and biases
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))
    
    def relu(self, Z):
        return np.maximum(0, Z)
    
    def relu_derivative(self, Z):
        return (Z > 0).astype(float)
    
    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-np.clip(Z, -500, 500)))
    
    def forward(self, X):
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = self.relu(self.Z1)
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = self.sigmoid(self.Z2)
        return self.A2
    
    def backward(self, X, y, learning_rate):
        m = X.shape[0]
        
        dZ2 = self.A2 - y.reshape(-1, 1)
        dW2 = np.dot(self.A1.T, dZ2) / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m
        
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = np.dot(X.T, dZ1) / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m
        
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
    
    def compute_loss(self, y_true, y_pred):
        m = len(y_true)
        y_true = y_true.reshape(-1, 1)
        loss = -np.mean(y_true * np.log(y_pred + 1e-8) + (1 - y_true) * np.log(1 - y_pred + 1e-8))
        return loss
    
    def train(self, X, y, epochs=1000, learning_rate=0.1):
        losses = []
        for epoch in range(epochs):
            y_pred = self.forward(X)
            loss = self.compute_loss(y, y_pred)
            losses.append(loss)
            self.backward(X, y, learning_rate)
            
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Loss: {loss:.4f}')
        
        return losses
    
    def predict(self, X):
        y_pred = self.forward(X)
        return (y_pred > 0.5).astype(int)
    
    def predict_proba(self, X):
        return self.forward(X)

print("✓ Neural Network class defined")

## 3. Train Both Models

In [None]:
print("Training Logistic Regression...")
logreg = LogisticRegression(max_iter=10000, random_state=42)
logreg.fit(X_train_scaled, y_train)

y_pred_logreg = logreg.predict(X_test_scaled)
y_pred_proba_logreg = logreg.predict_proba(X_test_scaled)[:, 1]

print(f" Logistic Regression - Test Accuracy: {accuracy_score(y_test, y_pred_logreg):.4f}")

In [None]:
print("\nTraining Neural Network...")
input_size = X_train_scaled.shape[1]
hidden_size = 16
output_size = 1

nn = SimpleNeuralNetwork(input_size, hidden_size, output_size)
losses = nn.train(X_train_scaled, y_train, epochs=1000, learning_rate=0.1)

y_pred_nn = nn.predict(X_test_scaled).flatten()
y_pred_proba_nn = nn.predict_proba(X_test_scaled).flatten()

print(f"\n✓ Neural Network - Test Accuracy: {accuracy_score(y_test, y_pred_nn):.4f}")

## 4. Calculate All Evaluation Metrics

In [None]:
def calculate_metrics(y_true, y_pred, y_pred_proba, model_name):
    """Calculate comprehensive evaluation metrics"""
    fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
    metrics = {
        'Model': model_name,
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision': precision_score(y_true, y_pred),
        'Recall': recall_score(y_true, y_pred),
        'F1-Score': f1_score(y_true, y_pred),
        'ROC-AUC': auc(fpr, tpr),
        'Avg Precision': average_precision_score(y_true, y_pred_proba)
    }
    return metrics

metrics_logreg = calculate_metrics(y_test, y_pred_logreg, y_pred_proba_logreg, 'Logistic Regression')
metrics_nn = calculate_metrics(y_test, y_pred_nn, y_pred_proba_nn, 'Neural Network')

metrics_df = pd.DataFrame([metrics_logreg, metrics_nn]).set_index('Model')

print("\n" + "="*70)
print("MODEL PERFORMANCE COMPARISON")
print("="*70)
print(metrics_df.round(4))
print("="*70)

## 5. Visualize Metric Comparisons

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
metrics_df.T.plot(kind='bar', ax=ax, color=['#3498db', '#e74c3c'], width=0.8)
plt.title('Model Performance Comparison', fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Metrics', fontsize=12, fontweight='bold')
plt.ylabel('Score', fontsize=12, fontweight='bold')
plt.xticks(rotation=45, ha='right')
plt.legend(title='Model', frameon=True, shadow=True)
plt.ylim([0.85, 1.0])
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

print("\n Best Performance by Metric:")
print("-" * 40)
for col in metrics_df.columns:
    best_model = metrics_df[col].idxmax()
    best_score = metrics_df[col].max()
    print(f"{col:15s}: {best_model:20s} ({best_score:.4f})")

## 6. Confusion Matrices Side-by-Side

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Logistic Regression
cm_logreg = confusion_matrix(y_test, y_pred_logreg)
sns.heatmap(cm_logreg, annot=True, fmt='d', cmap='Blues', ax=axes[0],
            xticklabels=['Benign', 'Malignant'],
            yticklabels=['Benign', 'Malignant'])
axes[0].set_title('Logistic Regression', fontsize=14, fontweight='bold')
axes[0].set_ylabel('Actual', fontsize=12, fontweight='bold')
axes[0].set_xlabel('Predicted', fontsize=12, fontweight='bold')

# Neural Network
cm_nn = confusion_matrix(y_test, y_pred_nn)
sns.heatmap(cm_nn, annot=True, fmt='d', cmap='Reds', ax=axes[1],
            xticklabels=['Benign', 'Malignant'],
            yticklabels=['Benign', 'Malignant'])
axes[1].set_title('Neural Network', fontsize=14, fontweight='bold')
axes[1].set_ylabel('Actual', fontsize=12, fontweight='bold')
axes[1].set_xlabel('Predicted', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

# Print detailed analysis
for name, cm in [('Logistic Regression', cm_logreg), ('Neural Network', cm_nn)]:
    tn, fp, fn, tp = cm.ravel()
    fn_rate = fn / (fn + tp) * 100
    print(f"\n{name}:")
    print(f"  True Negatives:  {tn}  |  False Positives: {fp}")
    print(f"  False Negatives: {fn}  |  True Positives:  {tp}")
    print(f"    False Negative Rate: {fn_rate:.2f}% (missed cancer cases)")

## 7. ROC Curves Comparison

In [None]:
fpr_logreg, tpr_logreg, _ = roc_curve(y_test, y_pred_proba_logreg)
roc_auc_logreg = auc(fpr_logreg, tpr_logreg)

fpr_nn, tpr_nn, _ = roc_curve(y_test, y_pred_proba_nn)
roc_auc_nn = auc(fpr_nn, tpr_nn)

plt.figure(figsize=(10, 8))
plt.plot(fpr_logreg, tpr_logreg, color='#3498db', lw=2.5, 
         label=f'Logistic Regression (AUC = {roc_auc_logreg:.4f})')
plt.plot(fpr_nn, tpr_nn, color='#e74c3c', lw=2.5, 
         label=f'Neural Network (AUC = {roc_auc_nn:.4f})')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--', label='Random')

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate', fontsize=12, fontweight='bold')
plt.ylabel('True Positive Rate', fontsize=12, fontweight='bold')
plt.title('ROC Curves - Model Comparison', fontsize=16, fontweight='bold', pad=15)
plt.legend(loc='lower right', fontsize=11, frameon=True, shadow=True)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

## 8. Classification Reports

In [None]:
print("="*70)
print("LOGISTIC REGRESSION - Classification Report")
print("="*70)
print(classification_report(y_test, y_pred_logreg, 
                          target_names=['Benign (0)', 'Malignant (1)']))

print("\n" + "="*70)
print("NEURAL NETWORK - Classification Report")
print("="*70)
print(classification_report(y_test, y_pred_nn, 
                          target_names=['Benign (0)', 'Malignant (1)']))

## 9. Training Loss Visualization (Neural Network)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(losses, color='#e74c3c', linewidth=2)
plt.title('Neural Network Training Loss', fontsize=14, fontweight='bold')
plt.xlabel('Epoch', fontsize=12, fontweight='bold')
plt.ylabel('Loss', fontsize=12, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"Initial Loss: {losses[0]:.4f}")
print(f"Final Loss: {losses[-1]:.4f}")
print(f"Loss Reduction: {(1 - losses[-1]/losses[0])*100:.1f}%")

## 10. Final Summary and Conclusions

In [None]:
print("\n" + "="*70)
print("FINAL SUMMARY & CONCLUSIONS")
print("="*70)

best_f1_model = metrics_df['F1-Score'].idxmax()
best_f1_score = metrics_df['F1-Score'].max()
best_recall_model = metrics_df['Recall'].idxmax()
best_recall_score = metrics_df['Recall'].max()

print(f"\n Best Overall Model (F1-Score): {best_f1_model}")
print(f"   F1-Score: {best_f1_score:.4f}")
print(f"\n Best at Detecting Cancer (Recall): {best_recall_model}")
print(f"   Recall: {best_recall_score:.4f}")

acc_diff = abs(metrics_df.loc['Neural Network', 'Accuracy'] - 
               metrics_df.loc['Logistic Regression', 'Accuracy'])

print("\n Key Findings:")
print("-" * 70)
if acc_diff < 0.02:
    print("• Models perform very similarly (< 2% difference)")
    print("• Both models are effective for this classification task")
elif metrics_df.loc['Neural Network', 'Accuracy'] > metrics_df.loc['Logistic Regression', 'Accuracy']:
    print(f"• Neural Network outperforms by {acc_diff*100:.2f}% in accuracy")
    print("• Neural Network captures additional non-linear patterns")
else:
    print(f"• Logistic Regression outperforms by {acc_diff*100:.2f}%")
    print("• Data relationships are primarily linear")

cm_logreg = confusion_matrix(y_test, y_pred_logreg)
cm_nn = confusion_matrix(y_test, y_pred_nn)
fn_rate_logreg = cm_logreg[1, 0] / (cm_logreg[1, 0] + cm_logreg[1, 1]) * 100
fn_rate_nn = cm_nn[1, 0] / (cm_nn[1, 0] + cm_nn[1, 1]) * 100

print(f"\n Clinical Considerations:")
print("-" * 70)
print("• RECALL is critical - missing cancer is worse than false alarms")
print(f"\n  False Negative Rates:")
print(f"  • Logistic Regression: {fn_rate_logreg:.2f}%")
print(f"  • Neural Network: {fn_rate_nn:.2f}%")

print("\n Recommendations:")
print("-" * 70)
print("1. Both models achieve excellent performance (>95% accuracy)")
print("2. Consider ensemble methods for even better results")
print("3. Adjust decision threshold to minimize false negatives")
print("4. Next step: Implement image-based YOLO model on CBIS-DDSM dataset")

print("\n" + "="*70)
print("✓ Analysis Complete - Results Ready")
print("="*70)

## 11. Save Results

In [None]:
# Save metrics comparison
metrics_df.to_csv('model_comparison_results.csv')
print("✓ Saved: model_comparison_results.csv")

# Save summary
summary = {
    'metrics': metrics_df,
    'best_f1_model': best_f1_model,
    'best_recall_model': best_recall_model,
    'confusion_matrices': {
        'logistic_regression': cm_logreg,
        'neural_network': cm_nn
    }
}

with open('evaluation_summary.pkl', 'wb') as f:
    pickle.dump(summary, f)
print("✓ Saved: evaluation_summary.pkl")