# üéì SVM+ORB Face Detector Training
## Classical Computer Vision Face Detection Model

Train your own face detector using:
- **ORB** (Oriented FAST + Rotated BRIEF) for feature extraction
- **Bag of Visual Words (BoVW)** with K-means clustering
- **Linear SVM** for classification

### üì¶ What You'll Get:
1. `svm_model.pkl` - Trained SVM classifier
2. `bovw_encoder.pkl` - K-means codebook (BoVW encoder)
3. `scaler.pkl` - Feature scaler

### üìÇ Dataset Structure Required:
```
dataset.zip
‚îú‚îÄ‚îÄ faces/          (positive samples - face images)
‚îî‚îÄ‚îÄ non_faces/      (negative samples - non-face images)
```

## üì• Step 1: Setup & Upload Dataset

In [None]:
# Install required packages
!pip install opencv-python scikit-learn scikit-image joblib numpy matplotlib seaborn tqdm -q

print("‚úì Packages installed successfully")

In [None]:
# Import libraries
import cv2
import numpy as np
import os
import pickle
import joblib
from pathlib import Path
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')

print("‚úì Libraries imported")
print(f"OpenCV version: {cv2.__version__}")
print(f"NumPy version: {np.__version__}")

In [None]:
# Upload your dataset.zip file
from google.colab import files

print("üì§ Please upload your dataset.zip file (contains 'faces/' and 'non_faces/' folders)")
print("‚è≥ Waiting for upload...\n")

uploaded = files.upload()

print("\n‚úì Upload complete!")

In [None]:
# Extract dataset
import zipfile

zip_filename = list(uploaded.keys())[0]
print(f"üì¶ Extracting {zip_filename}...")

with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall('data')

print("‚úì Extraction complete")

# Verify structure
faces_dir = 'data/faces'
non_faces_dir = 'data/non_faces'

if os.path.exists(faces_dir):
    num_faces = len(os.listdir(faces_dir))
    print(f"‚úì Found {num_faces} face images")
else:
    print("‚úó 'faces/' folder not found!")

if os.path.exists(non_faces_dir):
    num_non_faces = len(os.listdir(non_faces_dir))
    print(f"‚úì Found {num_non_faces} non-face images")
else:
    print("‚úó 'non_faces/' folder not found!")

## üñºÔ∏è Step 2: Visualize Sample Images

In [None]:
# Visualize sample images
fig, axes = plt.subplots(2, 5, figsize=(15, 6))

# Show face samples
face_files = [f for f in os.listdir(faces_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))][:5]
for i, fname in enumerate(face_files):
    img = cv2.imread(os.path.join(faces_dir, fname))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    axes[0, i].imshow(img)
    axes[0, i].set_title(f'Face {i+1}')
    axes[0, i].axis('off')

# Show non-face samples
non_face_files = [f for f in os.listdir(non_faces_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))][:5]
for i, fname in enumerate(non_face_files):
    img = cv2.imread(os.path.join(non_faces_dir, fname))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    axes[1, i].imshow(img)
    axes[1, i].set_title(f'Non-Face {i+1}')
    axes[1, i].axis('off')

plt.tight_layout()
plt.show()

print(f"Dataset Summary:")
print(f"  Faces: {num_faces}")
print(f"  Non-faces: {num_non_faces}")
print(f"  Total: {num_faces + num_non_faces}")
print(f"  Balance: {num_faces/(num_faces + num_non_faces)*100:.1f}% faces")

## ‚öôÔ∏è Step 3: Configure Training Parameters

In [None]:
# Training Configuration
CONFIG = {
    # ORB parameters
    'orb_max_keypoints': 500,
    'orb_scale_factor': 1.2,
    'orb_n_levels': 8,
    'orb_patch_size': 31,
    
    # Image preprocessing
    'target_size': (128, 128),  # Resize all images to this size
    
    # BoVW parameters
    'bovw_k': 256,  # Number of visual words (clusters)
    'bovw_max_descriptors': 200000,  # Max descriptors for k-means training
    
    # SVM parameters
    'svm_C': [0.1, 1.0, 10.0],  # Regularization parameter (will try all)
    'svm_max_iter': 10000,
    
    # Data split
    'train_ratio': 0.70,
    'val_ratio': 0.15,
    'test_ratio': 0.15,
    
    # Random seed for reproducibility
    'random_seed': 42
}

print("üìã Training Configuration:")
print("="*50)
for key, value in CONFIG.items():
    print(f"  {key}: {value}")
print("="*50)

## üîç Step 4: Extract ORB Features

In [None]:
# Initialize ORB detector
orb = cv2.ORB_create(
    nfeatures=CONFIG['orb_max_keypoints'],
    scaleFactor=CONFIG['orb_scale_factor'],
    nlevels=CONFIG['orb_n_levels'],
    edgeThreshold=CONFIG['orb_patch_size'],
    patchSize=CONFIG['orb_patch_size'],
    WTA_K=2,
    scoreType=cv2.ORB_HARRIS_SCORE
)

print(f"‚úì ORB detector initialized")
print(f"  Max keypoints: {CONFIG['orb_max_keypoints']}")
print(f"  Scale factor: {CONFIG['orb_scale_factor']}")
print(f"  Pyramid levels: {CONFIG['orb_n_levels']}")

In [None]:
def load_and_preprocess_image(img_path, target_size):
    """Load image and preprocess for ORB feature extraction"""
    img = cv2.imread(img_path)
    if img is None:
        return None
    
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Resize
    gray = cv2.resize(gray, target_size, interpolation=cv2.INTER_AREA)
    
    # Histogram equalization
    gray = cv2.equalizeHist(gray)
    
    return gray


def extract_orb_descriptors(img, orb_detector):
    """Extract ORB descriptors from image"""
    keypoints, descriptors = orb_detector.detectAndCompute(img, None)
    return descriptors


print("‚úì Helper functions defined")

In [None]:
# Extract ORB descriptors from all images
print("üîç Extracting ORB features from all images...\n")

all_descriptors = []  # For k-means training
descriptors_per_image = []  # Store descriptors for each image
labels = []  # Labels: 1=face, 0=non-face
filepaths = []  # Keep track of file paths

# Process face images (label = 1)
print("Processing face images...")
face_files = [f for f in os.listdir(faces_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
for fname in tqdm(face_files, desc="Faces"):
    img_path = os.path.join(faces_dir, fname)
    img = load_and_preprocess_image(img_path, CONFIG['target_size'])
    
    if img is not None:
        descriptors = extract_orb_descriptors(img, orb)
        
        if descriptors is not None and len(descriptors) > 0:
            descriptors_per_image.append(descriptors)
            labels.append(1)  # Face
            filepaths.append(img_path)
            
            # Add to global descriptor pool
            all_descriptors.append(descriptors)

# Process non-face images (label = 0)
print("\nProcessing non-face images...")
non_face_files = [f for f in os.listdir(non_faces_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
for fname in tqdm(non_face_files, desc="Non-faces"):
    img_path = os.path.join(non_faces_dir, fname)
    img = load_and_preprocess_image(img_path, CONFIG['target_size'])
    
    if img is not None:
        descriptors = extract_orb_descriptors(img, orb)
        
        if descriptors is not None and len(descriptors) > 0:
            descriptors_per_image.append(descriptors)
            labels.append(0)  # Non-face
            filepaths.append(img_path)
            
            # Add to global descriptor pool
            all_descriptors.append(descriptors)

print(f"\n‚úì Feature extraction complete")
print(f"  Total images processed: {len(descriptors_per_image)}")
print(f"  Faces: {sum(labels)}")
print(f"  Non-faces: {len(labels) - sum(labels)}")

## üéØ Step 5: Build BoVW Codebook (K-means)

In [None]:
# Prepare descriptors for k-means
print("üìä Preparing descriptors for k-means clustering...\n")

# Stack all descriptors
all_descriptors_stacked = np.vstack(all_descriptors)
print(f"Total descriptors: {len(all_descriptors_stacked):,}")

# Sample descriptors if too many
max_desc = CONFIG['bovw_max_descriptors']
if len(all_descriptors_stacked) > max_desc:
    print(f"Sampling {max_desc:,} descriptors for k-means...")
    np.random.seed(CONFIG['random_seed'])
    indices = np.random.choice(len(all_descriptors_stacked), max_desc, replace=False)
    descriptors_for_kmeans = all_descriptors_stacked[indices]
else:
    descriptors_for_kmeans = all_descriptors_stacked

print(f"‚úì Using {len(descriptors_for_kmeans):,} descriptors for k-means")
print(f"  Descriptor shape: {descriptors_for_kmeans.shape}")

In [None]:
# Train K-means clustering (BoVW codebook)
print(f"\nüéØ Training K-means with k={CONFIG['bovw_k']}...")
print("‚è≥ This may take a few minutes...\n")

kmeans = KMeans(
    n_clusters=CONFIG['bovw_k'],
    random_state=CONFIG['random_seed'],
    n_init=10,
    max_iter=300,
    verbose=1
)

kmeans.fit(descriptors_for_kmeans)

print(f"\n‚úì K-means training complete")
print(f"  Number of clusters (visual words): {kmeans.n_clusters}")
print(f"  Inertia: {kmeans.inertia_:.2f}")
print(f"  Iterations: {kmeans.n_iter_}")

## üìä Step 6: Encode Images to BoVW Histograms

In [None]:
def encode_to_bovw(descriptors, kmeans_model, k, normalize='l2'):
    """Encode ORB descriptors to BoVW histogram"""
    if descriptors is None or len(descriptors) == 0:
        # Return uniform histogram for images with no keypoints
        return np.ones(k, dtype=np.float32) / k
    
    # Predict visual words
    visual_words = kmeans_model.predict(descriptors)
    
    # Build histogram
    histogram = np.bincount(visual_words, minlength=k).astype(np.float32)
    
    # Normalize
    if normalize == 'l1':
        norm = np.sum(histogram)
        if norm > 0:
            histogram = histogram / norm
    elif normalize == 'l2':
        norm = np.linalg.norm(histogram)
        if norm > 0:
            histogram = histogram / norm
    
    return histogram


print("‚úì BoVW encoding function defined")

In [None]:
# Encode all images to BoVW histograms
print("üìä Encoding images to BoVW histograms...\n")

X = []  # Features (BoVW histograms)
y = np.array(labels)  # Labels

for descriptors in tqdm(descriptors_per_image, desc="Encoding"):
    histogram = encode_to_bovw(descriptors, kmeans, CONFIG['bovw_k'], normalize='l2')
    X.append(histogram)

X = np.array(X)

print(f"\n‚úì Encoding complete")
print(f"  Feature matrix shape: {X.shape}")
print(f"  Labels shape: {y.shape}")
print(f"  Features per image: {X.shape[1]}")

## ‚úÇÔ∏è Step 7: Split Dataset (70% / 15% / 15%)

In [None]:
# Split dataset: 70% train, 15% val, 15% test
print("‚úÇÔ∏è Splitting dataset...\n")

# First split: 70% train, 30% temp (val + test)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y,
    test_size=0.30,
    random_state=CONFIG['random_seed'],
    stratify=y
)

# Second split: 50% val, 50% test from temp
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp,
    test_size=0.50,
    random_state=CONFIG['random_seed'],
    stratify=y_temp
)

print(f"‚úì Dataset split complete")
print(f"\nTraining set:")
print(f"  Total: {len(X_train)} ({len(X_train)/len(X)*100:.1f}%)")
print(f"  Faces: {sum(y_train)}")
print(f"  Non-faces: {len(y_train) - sum(y_train)}")

print(f"\nValidation set:")
print(f"  Total: {len(X_val)} ({len(X_val)/len(X)*100:.1f}%)")
print(f"  Faces: {sum(y_val)}")
print(f"  Non-faces: {len(y_val) - sum(y_val)}")

print(f"\nTest set:")
print(f"  Total: {len(X_test)} ({len(X_test)/len(X)*100:.1f}%)")
print(f"  Faces: {sum(y_test)}")
print(f"  Non-faces: {len(y_test) - sum(y_test)}")

## üîß Step 8: Feature Scaling

In [None]:
# Scale features using StandardScaler
print("üîß Scaling features...\n")

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print(f"‚úì Feature scaling complete")
print(f"  Scaler mean: {scaler.mean_[:5]}...")
print(f"  Scaler std: {scaler.scale_[:5]}...")

## ü§ñ Step 9: Train Linear SVM

In [None]:
# Hyperparameter tuning with GridSearchCV
print("ü§ñ Training Linear SVM with hyperparameter tuning...\n")

param_grid = {
    'C': CONFIG['svm_C']
}

svm = LinearSVC(
    max_iter=CONFIG['svm_max_iter'],
    random_state=CONFIG['random_seed'],
    dual=False  # Use primal formulation for faster training
)

grid_search = GridSearchCV(
    svm,
    param_grid,
    cv=5,
    scoring='f1',
    verbose=2
)

grid_search.fit(X_train_scaled, y_train)

print(f"\n‚úì Training complete")
print(f"\nBest parameters: {grid_search.best_params_}")
print(f"Best cross-validation F1 score: {grid_search.best_score_:.4f}")

# Get best model
best_svm = grid_search.best_estimator_

## üìà Step 10: Evaluate Model

In [None]:
# Evaluate on validation set
print("üìà Evaluating on validation set...\n")

y_val_pred = best_svm.predict(X_val_scaled)

val_accuracy = accuracy_score(y_val, y_val_pred)
val_precision = precision_score(y_val, y_val_pred)
val_recall = recall_score(y_val, y_val_pred)
val_f1 = f1_score(y_val, y_val_pred)

print("Validation Set Performance:")
print("="*50)
print(f"  Accuracy:  {val_accuracy:.4f}")
print(f"  Precision: {val_precision:.4f}")
print(f"  Recall:    {val_recall:.4f}")
print(f"  F1 Score:  {val_f1:.4f}")
print("="*50)

In [None]:
# Evaluate on test set
print("\nüìà Evaluating on test set...\n")

y_test_pred = best_svm.predict(X_test_scaled)

test_accuracy = accuracy_score(y_test, y_test_pred)
test_precision = precision_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)

print("Test Set Performance:")
print("="*50)
print(f"  Accuracy:  {test_accuracy:.4f}")
print(f"  Precision: {test_precision:.4f}")
print(f"  Recall:    {test_recall:.4f}")
print(f"  F1 Score:  {test_f1:.4f}")
print("="*50)

print("\n" + classification_report(y_test, y_test_pred, target_names=['Non-Face', 'Face']))

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_test_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Non-Face', 'Face'], yticklabels=['Non-Face', 'Face'])
plt.title('Confusion Matrix - Test Set')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

print(f"\nTrue Negatives:  {cm[0,0]}")
print(f"False Positives: {cm[0,1]}")
print(f"False Negatives: {cm[1,0]}")
print(f"True Positives:  {cm[1,1]}")

## üíæ Step 11: Save Models

In [None]:
# Create output directory
output_dir = 'trained_models'
os.makedirs(output_dir, exist_ok=True)

print(f"üíæ Saving models to '{output_dir}/'...\n")

# Save SVM model
svm_path = os.path.join(output_dir, 'svm_model.pkl')
joblib.dump(best_svm, svm_path)
print(f"‚úì SVM model saved: {svm_path}")
print(f"  File size: {os.path.getsize(svm_path) / 1024:.1f} KB")

# Save BoVW encoder (k-means)
bovw_path = os.path.join(output_dir, 'bovw_encoder.pkl')
joblib.dump(kmeans, bovw_path)
print(f"‚úì BoVW encoder saved: {bovw_path}")
print(f"  File size: {os.path.getsize(bovw_path) / 1024:.1f} KB")

# Save scaler
scaler_path = os.path.join(output_dir, 'scaler.pkl')
joblib.dump(scaler, scaler_path)
print(f"‚úì Scaler saved: {scaler_path}")
print(f"  File size: {os.path.getsize(scaler_path) / 1024:.1f} KB")

# Save training config and metrics
config_path = os.path.join(output_dir, 'training_info.txt')
with open(config_path, 'w') as f:
    f.write("="*60 + "\n")
    f.write("SVM+ORB Face Detector Training Results\n")
    f.write("="*60 + "\n\n")
    
    f.write("Configuration:\n")
    f.write("-"*60 + "\n")
    for key, value in CONFIG.items():
        f.write(f"{key}: {value}\n")
    
    f.write("\n" + "="*60 + "\n")
    f.write("Dataset Split:\n")
    f.write("-"*60 + "\n")
    f.write(f"Training: {len(X_train)} samples\n")
    f.write(f"Validation: {len(X_val)} samples\n")
    f.write(f"Test: {len(X_test)} samples\n")
    
    f.write("\n" + "="*60 + "\n")
    f.write("Test Set Performance:\n")
    f.write("-"*60 + "\n")
    f.write(f"Accuracy:  {test_accuracy:.4f}\n")
    f.write(f"Precision: {test_precision:.4f}\n")
    f.write(f"Recall:    {test_recall:.4f}\n")
    f.write(f"F1 Score:  {test_f1:.4f}\n")
    f.write("="*60 + "\n")

print(f"‚úì Training info saved: {config_path}")

print("\n" + "="*60)
print("‚úÖ ALL MODELS SAVED SUCCESSFULLY!")
print("="*60)

## üì• Step 12: Download Models

In [None]:
# Zip all models for easy download
import shutil

print("üì¶ Creating models.zip for download...\n")

zip_filename = 'trained_models'
shutil.make_archive(zip_filename, 'zip', output_dir)

print(f"‚úì Archive created: {zip_filename}.zip")
print(f"  Size: {os.path.getsize(zip_filename + '.zip') / 1024 / 1024:.2f} MB")

# Download
print("\nüì• Downloading models.zip...")
files.download(f'{zip_filename}.zip')

print("\n‚úÖ Download complete!")
print("\n" + "="*60)
print("Next Steps:")
print("="*60)
print("1. Extract trained_models.zip")
print("2. Copy these files to your backend/models/ folder:")
print("   - svm_model.pkl")
print("   - bovw_encoder.pkl")
print("   - scaler.pkl")
print("3. Run: python face_detector_cli.py webcam --camera 0 --show")
print("="*60)

## üìä Summary Statistics

In [None]:
# Print final summary
print("\n" + "="*60)
print("üéâ TRAINING COMPLETE!")
print("="*60)
print("\nModel Performance Summary:")
print("-"*60)
print(f"Dataset Size: {len(X)} images")
print(f"Training Set: {len(X_train)} (70%)")
print(f"Validation Set: {len(X_val)} (15%)")
print(f"Test Set: {len(X_test)} (15%)")
print("\nTest Set Metrics:")
print(f"  Accuracy:  {test_accuracy*100:.2f}%")
print(f"  Precision: {test_precision*100:.2f}%")
print(f"  Recall:    {test_recall*100:.2f}%")
print(f"  F1 Score:  {test_f1*100:.2f}%")
print("\nModel Details:")
print(f"  ORB Keypoints: {CONFIG['orb_max_keypoints']}")
print(f"  BoVW Clusters (k): {CONFIG['bovw_k']}")
print(f"  SVM C parameter: {best_svm.C}")
print(f"  Feature dimension: {X.shape[1]}")
print("="*60)
print("\n‚úÖ Models saved and ready for deployment!")
print("üì• Download the trained_models.zip file above")