In [None]:
! pip install facenet_pytorch



In [15]:
import numpy as np
import sqlite3
import pickle
import os
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV, LeaveOneOut
from sklearn.decomposition import PCA

def train_face_recognition_model(augment_samples=True, num_augmented_samples=5):
    """
    Train a custom SVM classifier using existing face embeddings from the database

    Parameters:
    augment_samples (bool): Whether to generate additional training samples through augmentation
    num_augmented_samples (int): Number of augmented samples to create per user
    """
    db_path = '/content/face_database.db'
    print(f"Using database at: {db_path}")

    try:
        # Connect to the database
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()

        # Fetch data from the users table
        cursor.execute('SELECT id, facenet_embedding FROM users')

        X = []  # embeddings
        y = []  # labels

        # Original embeddings
        original_embeddings = {}

        for row in cursor.fetchall():
            user_id = row[0]
            embedding = np.frombuffer(row[1], dtype=np.float32)

            # Store the original embedding for each user
            original_embeddings[user_id] = embedding

            # Add original embedding to training data
            X.append(embedding)
            y.append(user_id)

        conn.close()

        # Perform data augmentation if enabled
        if augment_samples:
            print(f"Augmenting dataset with {num_augmented_samples} synthetic samples per user...")
            for user_id, orig_embedding in original_embeddings.items():
                # Create synthetic variations of each embedding
                for i in range(num_augmented_samples):
                    # Add small random noise to create variations (small % of feature magnitude)
                    noise_scale = 0.02  # 2% noise
                    noise = np.random.normal(0, noise_scale, size=orig_embedding.shape) * np.mean(np.abs(orig_embedding))
                    augmented_embedding = orig_embedding + noise

                    # Normalize the embedding to maintain unit length
                    augmented_embedding = augmented_embedding / np.linalg.norm(augmented_embedding)

                    # Add to training data
                    X.append(augmented_embedding)
                    y.append(user_id)

        # Convert to numpy arrays
        X = np.array(X)

        print(f"Training dataset: {len(X)} face embeddings from {len(original_embeddings)} unique users")
        if augment_samples:
            print(f"Each user now has {1 + num_augmented_samples} samples (1 original + {num_augmented_samples} augmented)")

        # Optional: Apply PCA for dimensionality reduction
        apply_pca = input("Apply PCA for dimensionality reduction? (y/n): ").lower() == 'y'
        if apply_pca:
            # Keep 95% of variance
            pca = PCA(n_components=0.95)
            X_reduced = pca.fit_transform(X)
            print(f"Reduced dimensions from {X.shape[1]} to {X_reduced.shape[1]} features")

            # Save the PCA model
            with open('face_pca.pkl', 'wb') as f:
                pickle.dump(pca, f)
            print("PCA model saved as 'face_pca.pkl'")

            # Use reduced features for training
            X = X_reduced

        # Encode user labels
        label_encoder = LabelEncoder()
        y_encoded = label_encoder.fit_transform(y)

        print(f"Training SVM model...")

        # Check how many samples per class we have
        unique_users, counts = np.unique(y, return_counts=True)
        min_samples = np.min(counts)

        print(f"Minimum samples per user: {min_samples}")

        # Parameter grid for SVM
        param_grid = {
            'C': [0.1, 1, 10, 100],
            'kernel': ['linear', 'rbf'],
            'gamma': ['scale', 'auto', 0.01]
        }

        # Choose appropriate cross-validation strategy
        if min_samples < 3:
            print("Using leave-one-out cross-validation")
            cv = LeaveOneOut()
        else:
            cv_folds = min(3, min_samples)
            print(f"Using {cv_folds}-fold cross-validation")
            cv = cv_folds

        try:
            grid_search = GridSearchCV(
                SVC(probability=True),
                param_grid,
                cv=cv,
                n_jobs=-1,
                verbose=1
            )

            grid_search.fit(X, y_encoded)
            best_model = grid_search.best_estimator_

            print(f"Best parameters found: {grid_search.best_params_}")
            print(f"Model accuracy: {grid_search.best_score_:.4f}")
        except ValueError as e:
            print(f"Cross-validation failed: {e}")
            print("Training model without cross-validation...")

            # Fallback to a simple model without cross-validation
            best_model = SVC(probability=True, C=1, kernel='linear', gamma='scale')
            best_model.fit(X, y_encoded)
            print("Model trained without validation")

        # Save the model and label encoder
        with open('face_model.pkl', 'wb') as f:
            pickle.dump((best_model, label_encoder), f)

        print("Model successfully trained and saved as 'face_model.pkl'")

        # Print registered users for reference
        print("\nRegistered Users:")
        for i, user_id in enumerate(label_encoder.classes_):
            print(f"{i+1}. {user_id}")

        return True

    except Exception as e:
        print(f"An error occurred: {e}")
        import traceback
        traceback.print_exc()
        return False

if __name__ == "__main__":
    # Set to True to generate synthetic samples
    train_face_recognition_model(augment_samples=True, num_augmented_samples=5)

Using database at: /content/face_database.db
Augmenting dataset with 5 synthetic samples per user...
Training dataset: 72 face embeddings from 12 unique users
Each user now has 6 samples (1 original + 5 augmented)
Apply PCA for dimensionality reduction? (y/n): n
Training SVM model...
Minimum samples per user: 6
Using 3-fold cross-validation
Fitting 3 folds for each of 24 candidates, totalling 72 fits
Best parameters found: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
Model accuracy: 1.0000
Model successfully trained and saved as 'face_model.pkl'

Registered Users:
1. Adeesha
2. Avishka
3. Chandima
4. Maheesh
5. Menuka
6. Mubarak
7. Tharindu
8. isuru
9. nadun
10. nuwanthika
11. ravindu
12. shamal
