# Data Science & Machine Learning 101
## A 60-Minute Hands-On Learning Session

Welcome to this practical introduction to data science and machine learning! In the next hour, we'll walk through a complete ML workflow using the famous Titanic dataset.

### What we'll cover:
1. **Data Loading & Understanding** (10 min)
2. **Exploratory Data Analysis (EDA)** (15 min)
3. **Data Preprocessing & Splitting** (10 min)
4. **Training ML Models** (15 min)
5. **Model Evaluation & Conclusions** (10 min)

### Business Context:
We'll predict passenger survival on the Titanic - a classic ML problem that demonstrates how data science can uncover patterns and make predictions from historical data.

## 1. Data Loading & Understanding (10 minutes)

First, let's import our essential libraries and load the Titanic dataset.

In [None]:
# Import essential libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('default')
sns.set_palette("husl")

print("✅ Libraries imported successfully!")

In [None]:
# Load the Titanic dataset from seaborn (built-in dataset)
df = sns.load_dataset('titanic')
# First look at our data
print("🔍 DATASET OVERVIEW")
print(f"Shape: {df.shape} (rows, columns)")
print("\n📋 First 5 rows:")
df.head()

In [None]:
# show basic stats
print(f"🎯 Survival rate: {df['survived'].mean():.1%}")
# min, max, average, median age
print(f"📉 Min age: {df['age'].min():.1f} years")
print(f"📈 Max age: {df['age'].max():.1f} years")
print(f"📊 Median age: {df['age'].median():.1f} years")
print(f"👶 Average age: {df['age'].mean():.1f} years")


In [None]:
# Data types and basic info
print("📊 DATA TYPES & INFO:")
print(df.info())
print("\n📈 BASIC STATISTICS:")
df.describe()

In [None]:
# Check for missing values
print("❓ MISSING VALUES:")
missing_data = df.isnull().sum()
missing_pct = (missing_data / len(df)) * 100
missing_df = pd.DataFrame({
    'Missing Count': missing_data,
    'Percentage': missing_pct
}).sort_values('Missing Count', ascending=False)

print(missing_df[missing_df['Missing Count'] > 0])

### 💡 Key Insights from Data Overview:
- We have 891 passengers with 15 features
- Survival rate was 38.4% (tragic but gives us a balanced prediction problem)
- Missing data in 'age' (20%), 'embarked' (0.2%), and 'deck' (77%)
- Mix of numerical (age, fare) and categorical (sex, class) variables

## 2. Exploratory Data Analysis (EDA) (15 minutes)

Let's explore the data to understand what factors influenced survival.

In [None]:
# Survival distribution
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Survival count
survival_counts = df['survived'].value_counts()
axes[0].bar(['Died', 'Survived'], survival_counts.values, color=['red', 'green'], alpha=0.7)
axes[0].set_title('Passenger Survival Distribution')
axes[0].set_ylabel('Number of Passengers')

# Survival percentage
survival_pct = df['survived'].value_counts(normalize=True) * 100
axes[1].pie(survival_pct.values, labels=['Died', 'Survived'], autopct='%1.1f%%', 
           colors=['red', 'green'])
axes[1].set_title('Survival Rate')

plt.tight_layout()
plt.show()

print(f"📊 Total passengers: {len(df)}")
print(f"🟢 Survived: {df['survived'].sum()} ({df['survived'].mean():.1%})")
print(f"🔴 Died: {(df['survived'] == 0).sum()} ({(df['survived'] == 0).mean():.1%})")

In [None]:
# Analyze survival by passenger class and gender
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Survival by class
survival_by_class = df.groupby('class')['survived'].mean().sort_values(ascending=False)
survival_by_class.plot(kind='bar', ax=axes[0], color='skyblue')
axes[0].set_title('Survival Rate by Passenger Class')
axes[0].set_ylabel('Survival Rate')
axes[0].tick_params(axis='x', rotation=0)

# Survival by gender
survival_by_sex = df.groupby('sex')['survived'].mean().sort_values(ascending=False)
survival_by_sex.plot(kind='bar', ax=axes[1], color='coral')
axes[1].set_title('Survival Rate by Gender')
axes[1].set_ylabel('Survival Rate')
axes[1].tick_params(axis='x', rotation=0)

plt.tight_layout()
plt.show()

print("📊 SURVIVAL RATES:")
print("By Class:")
for class_name, rate in survival_by_class.items():
    print(f"  {class_name}: {rate:.1%}")
print("By Gender:")
for gender, rate in survival_by_sex.items():
    print(f"  {gender}: {rate:.1%}")

In [None]:
# Age and fare analysis
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Age distribution by survival
df.boxplot(column='age', by='survived', ax=axes[0,0])
axes[0,0].set_title('Age Distribution by Survival')
axes[0,0].set_xlabel('Survived (0=Died, 1=Survived)')

# Fare distribution by survival
df.boxplot(column='fare', by='survived', ax=axes[0,1])
axes[0,1].set_title('Fare Distribution by Survival')
axes[0,1].set_xlabel('Survived (0=Died, 1=Survived)')

# Age histogram by survival
for survived in [0, 1]:
    subset = df[df['survived'] == survived]['age'].dropna()
    axes[1,0].hist(subset, alpha=0.6, label=f'Survived: {bool(survived)}', bins=20)
axes[1,0].set_xlabel('Age')
axes[1,0].set_ylabel('Count')
axes[1,0].set_title('Age Distribution by Survival')
axes[1,0].legend()

# Survival by embarkation port
survival_by_port = df.groupby('embarked')['survived'].mean()
survival_by_port.plot(kind='bar', ax=axes[1,1], color='lightgreen')
axes[1,1].set_title('Survival Rate by Embarkation Port')
axes[1,1].set_ylabel('Survival Rate')
axes[1,1].tick_params(axis='x', rotation=0)

plt.tight_layout()
plt.show()

### 📊 How to Read the Age & Fare Distribution Graphs

**Top Left - Age Box Plot:**
- The box shows the middle 50% of ages (25th to 75th percentile)
- The line inside the box = median age
- The "whiskers" (lines extending from box) show the range of typical values
- Dots above/below whiskers = outliers (unusually young/old passengers)
- **Key insight**: Survivors had a slightly younger median age than non-survivors

**Top Right - Fare Box Plot:**
- Similar to age, but for ticket prices
- Survivors clearly paid higher fares on average
- Many high-fare outliers among survivors (expensive first-class tickets)
- **Key insight**: Higher fare = better accommodations = better survival chances

**Bottom Left - Age Histogram:**
- Pink bars = passengers who died
- Brown/gold bars = passengers who survived  
- Height of bars = number of passengers in that age group
- **Key insight**: Children (0-15) and young adults had better survival rates

**Bottom Right - Embarkation Port Bar Chart:**
- C = Cherbourg, Q = Queenstown, S = Southampton
- Height = survival rate for passengers boarding at each port
- **Key insight**: Cherbourg passengers had highest survival rate (~55%)

### 🔍 What These Patterns Tell Us:

**Business Insights from the Graphs:**

1. **Socioeconomic Factor**: The fare distribution clearly shows wealth mattered for survival
   - First-class passengers (high fares) had better access to lifeboats
   - Third-class passengers were often trapped below deck

2. **Age Advantage**: The age histogram reveals the "women and children first" protocol
   - Children under 15 had disproportionately high survival rates
   - Working-age adults (20-40) had mixed outcomes

3. **Port of Embarkation**: Cherbourg passengers had better survival rates because:
   - Cherbourg was a premium port with more first-class passengers
   - These passengers boarded later and had better cabin locations

**How to Spot Patterns:**
- Look for **shifts** in box plot medians (center lines)
- Compare **spreads** of the boxes (variability within groups)
- Notice **overlapping vs. separate** distributions
- Count **outliers** - they often tell interesting stories!

In [None]:
# Combined analysis: Class and Gender
survival_pivot = df.groupby(['class', 'sex'])['survived'].mean().unstack()

plt.figure(figsize=(10, 6))
survival_pivot.plot(kind='bar', width=0.8)
plt.title('Survival Rate by Class and Gender')
plt.xlabel('Passenger Class')
plt.ylabel('Survival Rate')
plt.legend(title='Gender')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

print("📊 SURVIVAL RATES BY CLASS AND GENDER:")
print(survival_pivot)

### 💡 Key EDA Insights:
1. **"Women and Children First"**: Females had 74% survival rate vs 19% for males
2. **Class Matters**: First class had 63% survival vs 24% in third class  
3. **Age Factor**: Children had higher survival rates
4. **Fare**: Higher fares (better accommodations) correlated with survival
5. **Embarkation**: Passengers from Cherbourg had higher survival rates

## 3. Data Preprocessing & Splitting (10 minutes)

Now let's prepare our data for machine learning models.

In [None]:
df.info()

In [None]:
# Prepare features for modeling
print("🔧 PREPROCESSING DATA FOR MACHINE LEARNING")

# Handle missing values first
df_clean = df.copy()

# Fill missing ages with median age by gender and class
df_clean['age'] = df_clean.groupby(['sex', 'class'])['age'].transform(
    lambda x: x.fillna(x.median())
)

# Fill missing embarked with most common port (Southampton)
df_clean['embarked'] = df_clean['embarked'].fillna(df_clean['embarked'].mode().values[0])
df_clean['embark_town'] = df_clean['embark_town'].fillna(df_clean['embark_town'].mode().values[0])

# Drop columns with too many missing values or not useful for prediction
df_clean = df_clean.drop(['deck', 'alive'], axis=1)

print(f"✅ Missing values handled")
print(f"✅ Dataset shape after cleaning: {df_clean.shape}")

# Check remaining missing values
remaining_missing = df_clean.isnull().sum()
print(f"✅ Remaining missing values: {remaining_missing.sum()}")

In [None]:
df_clean.info()

In [None]:
# Select and engineer features
feature_columns = ['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked']

# Create new features (feature engineering)
df_clean['family_size'] = df_clean['sibsp'] + df_clean['parch'] + 1
df_clean['is_alone'] = (df_clean['family_size'] == 1).astype(int)
df_clean['age_group'] = pd.cut(df_clean['age'], bins=[0, 12, 18, 35, 60, 100], 
                               labels=['Child', 'Teen', 'Adult', 'Middle-aged', 'Senior'])

# Add these new features to our feature list
feature_columns.extend(['family_size', 'is_alone', 'age_group'])

X = df_clean[feature_columns].copy()
y = df_clean['survived']

print(f"✅ Features selected: {len(feature_columns)}")
print(f"✅ New features created: family_size, is_alone, age_group")
print(f"✅ Target variable: survived ({y.sum()} survivors)")

In [None]:
df_clean.describe()

In [None]:
# Handle categorical variables using Label Encoding
X_processed = X.copy()
label_encoders = {}

categorical_features = ['sex', 'embarked', 'age_group']

for feature in categorical_features:
    le = LabelEncoder()
    X_processed[feature] = le.fit_transform(X_processed[feature])
    label_encoders[feature] = le
    print(f"✅ Encoded {feature}: {le.classes_}")

print(f"\n🔍 Processed features shape: {X_processed.shape}")
X_processed.head()

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, 
    test_size=0.2,      # 20% for testing
    random_state=42,    # for reproducibility
    stratify=y          # maintain same survival ratio in train/test
)

print("📊 DATA SPLIT SUMMARY:")
print(f"Training set: {X_train.shape[0]} samples ({X_train.shape[0]/len(df_clean):.1%})")
print(f"Test set: {X_test.shape[0]} samples ({X_test.shape[0]/len(df_clean):.1%})")
print(f"Training survival rate: {y_train.mean():.1%}")
print(f"Test survival rate: {y_test.mean():.1%}")

In [None]:
# Scale numerical features
scaler = StandardScaler()
numerical_features = ['age', 'fare', 'family_size']

X_train_scaled = X_train.copy()
X_test_scaled = X_test.copy()

X_train_scaled[numerical_features] = scaler.fit_transform(X_train[numerical_features])
X_test_scaled[numerical_features] = scaler.transform(X_test[numerical_features])

print("✅ Features scaled using StandardScaler")
print("📊 Scaling example (fare):") 
print(f"  Original range: ${X_train['fare'].min():.2f} - ${X_train['fare'].max():.2f}")
print(f"  Scaled range: {X_train_scaled['fare'].min():.2f} - {X_train_scaled['fare'].max():.2f}")

### 💡 Preprocessing Summary:
- **Features**: 10 predictive features selected (including 3 engineered features)
- **Encoding**: Categorical variables converted to numbers
- **Splitting**: 80% training, 20% testing
- **Scaling**: Numerical features standardized (mean=0, std=1)

Data is now ready for machine learning!

## 4. Training ML Models (15 minutes)

Let's train two different types of models and compare their performance.

In [None]:
# Model 1: Logistic Regression (Linear model, good baseline)
print("🤖 TRAINING MODEL 1: LOGISTIC REGRESSION")
print("📝 Good for: interpretability, baseline performance, linear relationships")

lr_model = LogisticRegression(random_state=42, max_iter=1000)
lr_model.fit(X_train_scaled, y_train)

# Make predictions
lr_train_pred = lr_model.predict(X_train_scaled)
lr_test_pred = lr_model.predict(X_test_scaled)
lr_test_proba = lr_model.predict_proba(X_test_scaled)[:, 1]

# Calculate accuracies
lr_train_accuracy = accuracy_score(y_train, lr_train_pred)
lr_test_accuracy = accuracy_score(y_test, lr_test_pred)

print(f"✅ Training accuracy: {lr_train_accuracy:.3f}")
print(f"✅ Test accuracy: {lr_test_accuracy:.3f}")

# Feature importance (coefficients)
feature_importance_lr = pd.DataFrame({
    'feature': X_processed.columns,
    'importance': abs(lr_model.coef_[0])
}).sort_values('importance', ascending=False)

print(f"\n🔍 TOP 5 MOST IMPORTANT FEATURES:")
for idx, row in feature_importance_lr.head().iterrows():
    print(f"  {row['feature']}: {row['importance']:.3f}")

In [None]:
# Model 2: Random Forest (Ensemble model, handles non-linear relationships)
print("🌳 TRAINING MODEL 2: RANDOM FOREST")
print("📝 Good for: non-linear relationships, feature interactions, robust predictions")

rf_model = RandomForestClassifier(
    n_estimators=100,    # number of trees
    random_state=42,
    max_depth=5          # prevent overfitting
)
rf_model.fit(X_train, y_train)  # Random Forest doesn't require scaling

# Make predictions
rf_train_pred = rf_model.predict(X_train)
rf_test_pred = rf_model.predict(X_test)
rf_test_proba = rf_model.predict_proba(X_test)[:, 1]

# Calculate accuracies
rf_train_accuracy = accuracy_score(y_train, rf_train_pred)
rf_test_accuracy = accuracy_score(y_test, rf_test_pred)

print(f"✅ Training accuracy: {rf_train_accuracy:.3f}")
print(f"✅ Test accuracy: {rf_test_accuracy:.3f}")

# Feature importance
feature_importance_rf = pd.DataFrame({
    'feature': X_processed.columns,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)

print(f"\n🔍 TOP 5 MOST IMPORTANT FEATURES:")
for idx, row in feature_importance_rf.head().iterrows():
    print(f"  {row['feature']}: {row['importance']:.3f}")

In [None]:
# Compare models side by side
models_comparison = pd.DataFrame({
    'Model': ['Logistic Regression', 'Random Forest'],
    'Training Accuracy': [lr_train_accuracy, rf_train_accuracy],
    'Test Accuracy': [lr_test_accuracy, rf_test_accuracy],
    'Overfitting': [lr_train_accuracy - lr_test_accuracy, rf_train_accuracy - rf_test_accuracy]
})

print("📊 MODEL COMPARISON:")
print(models_comparison)

# Visualize feature importance comparison
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Logistic Regression feature importance
feature_importance_lr.head(8).plot(x='feature', y='importance', kind='barh', ax=axes[0])
axes[0].set_title('Logistic Regression - Feature Importance')
axes[0].set_xlabel('Absolute Coefficient Value')

# Random Forest feature importance  
feature_importance_rf.head(8).plot(x='feature', y='importance', kind='barh', ax=axes[1])
axes[1].set_title('Random Forest - Feature Importance')
axes[1].set_xlabel('Importance Score')

plt.tight_layout()
plt.show()

## 5. Model Evaluation & Conclusions (10 minutes)

Let's evaluate our models thoroughly and draw business conclusions.

In [None]:
# Detailed evaluation metrics
from sklearn.metrics import precision_score, recall_score, f1_score

def evaluate_model(y_true, y_pred, y_proba, model_name):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    print(f"📈 {model_name.upper()} EVALUATION:")
    print(f"  Accuracy:  {accuracy:.3f} (Overall correct predictions)")
    print(f"  Precision: {precision:.3f} (Of predicted survivors, how many actually survived)")
    print(f"  Recall:    {recall:.3f} (Of actual survivors, how many we correctly identified)")
    print(f"  F1-Score:  {f1:.3f} (Balanced measure of precision and recall)")
    return accuracy, precision, recall, f1

# Evaluate both models
lr_metrics = evaluate_model(y_test, lr_test_pred, lr_test_proba, "Logistic Regression")
print()
rf_metrics = evaluate_model(y_test, rf_test_pred, rf_test_proba, "Random Forest")

In [None]:
# Confusion matrices
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Logistic Regression confusion matrix
cm_lr = confusion_matrix(y_test, lr_test_pred)
sns.heatmap(cm_lr, annot=True, fmt='d', cmap='Blues', ax=axes[0])
axes[0].set_title('Logistic Regression - Confusion Matrix')
axes[0].set_ylabel('Actual')
axes[0].set_xlabel('Predicted')

# Random Forest confusion matrix
cm_rf = confusion_matrix(y_test, rf_test_pred)
sns.heatmap(cm_rf, annot=True, fmt='d', cmap='Greens', ax=axes[1])
axes[1].set_title('Random Forest - Confusion Matrix')
axes[1].set_ylabel('Actual')
axes[1].set_xlabel('Predicted')

plt.tight_layout()
plt.show()

print("📊 CONFUSION MATRIX INTERPRETATION:")
print("  Top-left: True Negatives (correctly predicted deaths)")
print("  Top-right: False Positives (incorrectly predicted survivors)")
print("  Bottom-left: False Negatives (missed survivors)")
print("  Bottom-right: True Positives (correctly predicted survivors)")

In [None]:
# Model predictions on sample passengers
print("🔍 SAMPLE PREDICTIONS:")
print("Let's see how our models predict on some test passengers:\n")

sample_indices = [0, 5, 10, 15, 20]
for i in sample_indices:
    actual = y_test.iloc[i]
    lr_pred = lr_test_pred[i]
    rf_pred = rf_test_pred[i]
    lr_prob = lr_test_proba[i]
    rf_prob = rf_test_proba[i]
    
    # Get original passenger info
    passenger_idx = y_test.index[i]
    passenger_info = df_clean.loc[passenger_idx]
    
    print(f"Passenger {i+1}:")
    print(f"  Class: {passenger_info['class']}, Gender: {passenger_info['sex']}, Age: {passenger_info['age']:.0f}")
    print(f"  Actual: {'Survived' if actual else 'Died'}")
    print(f"  LR Prediction: {'Survived' if lr_pred else 'Died'} (confidence: {lr_prob:.2f})")
    print(f"  RF Prediction: {'Survived' if rf_pred else 'Died'} (confidence: {rf_prob:.2f})")
    print()

## 📊 Business Conclusions & Insights

### Key Findings:
1. **Gender was the strongest predictor**: Women had ~74% survival rate vs ~19% for men
2. **Passenger class mattered significantly**: First class passengers had much higher survival rates
3. **Age played a role**: Children had better survival chances
4. **Family connections**: Being alone vs. having family affected survival differently

### Model Performance:
- **Random Forest** slightly outperformed Logistic Regression
- Both models achieved ~80% accuracy on test data
- Models successfully learned the "women and children first" principle

### Business Applications:
1. **Historical Analysis**: Understanding social dynamics and emergency protocols
2. **Risk Assessment**: Identifying factors that influence survival in emergencies  
3. **Policy Making**: Informing safety regulations and evacuation procedures
4. **Feature Engineering**: Demonstrated how creating new features (family_size, is_alone) can improve predictions

### Next Steps:
- Try more advanced models (XGBoost, Neural Networks)
- Collect more features (cabin location, exact boarding time)
- Cross-validation for more robust model evaluation
- Deploy model as a web service for interactive predictions

**🎯 Mission Accomplished!** In 60 minutes, we've completed a full machine learning workflow from data exploration to actionable insights.

# Bonus Content

### Gradient Descent

Here's a simple, visualizable Python example of gradient descent using NumPy for a toy problem: minimizing a basic quadratic function like:

𝑓(𝑤)=(𝑤−3)^2
 
which has its minimum at 𝑤=3

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Function to minimize and its derivative (gradient)
def loss(w):
    # Simple quadratic loss function: (w - 3)^2
    # In python we use ** for exponentiation
    return (w - 3)**2

def gradient(w):
    # derivative of (w - 3)^2 is 2 * (w - 3)
    return 2 * (w - 3)

# Gradient Descent parameters
w = 0.0                 # initial guess
learning_rate = 0.1     # step size
epochs = 25             # number of iterations

# To store history for plotting
w_history = [w]
loss_history = [loss(w)]

# Gradient Descent loop
for i in range(epochs):
    grad = gradient(w)
    w = w - learning_rate * grad
    w_history.append(w)
    loss_history.append(loss(w))
    print(f"Epoch {i+1:02d}: w = {w:.4f}, loss = {loss(w):.6f}")
    if i>5 and len(set([round(l, 6) for l in loss_history[-5:]]))==1:
        print("Convergence reached, stopping early.")
        break  

# Plotting the descent
w_vals = np.linspace(-1, 7, 100)
plt.figure(figsize=(8, 5))
plt.plot(w_vals, loss(w_vals), label='Loss function')
plt.scatter(w_history, loss_history, color='red', label='Gradient descent steps')
plt.plot(w_history, loss_history, color='red', linestyle='--')
plt.xlabel('w')
plt.ylabel('Loss')
plt.title('Gradient Descent on (w - 3)^2')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Loss function and its gradient
def loss(w1, w2):
    return (w1 - 3)**2 + (w2 + 2)**2

def gradient(w1, w2):
    grad_w1 = 2 * (w1 - 3)
    grad_w2 = 2 * (w2 + 2)
    return grad_w1, grad_w2

# Initial point
w1, w2 = 0, 0
learning_rate = 0.1
epochs = 30

# Store history for plotting
w1_history, w2_history, loss_history = [w1], [w2], [loss(w1, w2)]

for i in range(epochs):
    dw1, dw2 = gradient(w1, w2)
    w1 -= learning_rate * dw1
    w2 -= learning_rate * dw2
    w1_history.append(w1)
    w2_history.append(w2)
    loss_history.append(loss(w1, w2))
    print(f"Epoch {i+1:02d}: w1 = {w1:.4f}, w2 = {w2:.4f}, loss = {loss(w1, w2):.6f}")

# Create grid for surface plot
W1, W2 = np.meshgrid(np.linspace(-1, 6, 100), np.linspace(-5, 2, 100))
Z = loss(W1, W2)

# 3D plot
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(W1, W2, Z, cmap='viridis', alpha=0.7)
ax.plot(w1_history, w2_history, loss_history, color='r', marker='o', label='Gradient descent path')
ax.set_xlabel('w1')
ax.set_ylabel('w2')
ax.set_zlabel('Loss')
ax.set_title('Gradient Descent in 2D Parameter Space')
ax.legend()
plt.show()


### Binary Cross-entropy Loss

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Prediction probabilities from 0.01 to 0.99
y_hat = np.linspace(0.01, 0.99, 100)

# Compute loss for y=1 and y=0
loss_y1 = -np.log(y_hat)          # when true label y = 1
loss_y0 = -np.log(1 - y_hat)      # when true label y = 0

plt.figure(figsize=(8, 5))
plt.plot(y_hat, loss_y1, label='y=1', color='green')
plt.plot(y_hat, loss_y0, label='y=0', color='red')
plt.xlabel('Predicted Probability (ŷ)')
plt.ylabel('Cross-Entropy Loss')
plt.title('Binary Cross-Entropy Loss')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Ground truth and model predictions
y_true = np.array([1, 0, 1, 1, 0])
y_pred = np.array([0.9, 0.1, 0.4, 0.8, 0.2])

# Binary cross-entropy loss
def binary_cross_entropy(y, y_hat):
    # Clip to avoid log(0)
    y_hat = np.clip(y_hat, 1e-15, 1 - 1e-15)
    return -np.mean(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))

# Per-sample loss
print("Per-sample loss:")
per_sample_loss = - (y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
# print y_true, y_pred, per_sample_loss one under another and make sure they are aligned
for true, pred, loss in zip(y_true, y_pred, per_sample_loss):
    print(f"True: {true}, Pred: {pred:.2f}, Loss: {loss:.4f}")

# Overall cross-entropy loss
print("\nOverall cross-entropy loss:")
loss = binary_cross_entropy(y_true, y_pred)
print(f"Cross-entropy loss: {loss:.4f}")