# Multimodal Fusion Development
## Combining Fingerprint, Face, Iris, and Voice Recognition

**Objectives:**
1. Test individual biometric modalities
2. Implement score-level fusion
3. Implement decision-level fusion
4. Train ML-based fusion (Random Forest & SVM)
5. Compare fusion strategies
6. Calculate FAR, FRR, EER for multimodal system

## 1. Setup and Imports

In [2]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import pickle
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Import biometric modules
from modules.fingerprint_recognition import FingerprintRecognition
from modules.face_recognition import FaceRecognition
from modules.iris_recognition import IrisRecognition
from modules.voice_recognition import VoiceRecognition
from modules.fusion import MultimodalFusion

# Sklearn for ML fusion
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

print('✓ All imports successful!')

✓ All imports successful!


## 2. Initialize Biometric Systems

In [None]:
# Initialize all biometric recognition systems
print('Initializing biometric systems...')

fingerprint_system = FingerprintRecognition(
    database_path='../data/database/fingerprints'
)
print('✓ Fingerprint system initialized')

face_system = FaceRecognition(
    database_path='../data/database/faces'
)
print('✓ Face system initialized')

iris_system = IrisRecognition(
    database_path='../data/database/iris'
)
print('✓ Iris system initialized')

voice_system = VoiceRecognition(
    database_path='../data/database/voices'
)
print('✓ Voice system initialized')

# Initialize fusion system
fusion_system = MultimodalFusion(
    fingerprint_system=fingerprint_system,
    face_system=face_system,
    iris_system=iris_system,
    voice_system=voice_system
)
print('✓ Fusion system initialized')

## 3. Enroll Test Users

In [None]:
# Define data paths
data_root = Path('../data/raw')

# Example enrollment for user1
user_id = 'user1'

# Enroll fingerprint
fingerprint_path = data_root / 'fingerprints' / f'{user_id}_1.png'
if fingerprint_path.exists():
    success = fingerprint_system.enroll(user_id, str(fingerprint_path))
    print(f'Fingerprint enrollment: {"✓" if success else "✗"}')

# Enroll face
face_path = data_root / 'faces' / f'{user_id}_1.jpg'
if face_path.exists():
    success = face_system.enroll(user_id, str(face_path))
    print(f'Face enrollment: {"✓" if success else "✗"}')

# Enroll iris
iris_path = data_root / 'iris' / f'{user_id}_1.png'
if iris_path.exists():
    success = iris_system.enroll(user_id, str(iris_path))
    print(f'Iris enrollment: {"✓" if success else "✗"}')

# Enroll voice
voice_path = data_root / 'voices' / f'{user_id}_1.wav'
if voice_path.exists():
    success = voice_system.enroll(user_id, str(voice_path))
    print(f'Voice enrollment: {"✓" if success else "✗"}')

print(f'\n✓ User {user_id} enrolled in all modalities')

## 4. Test Individual Modalities

In [None]:
# Test verification for each modality
user_id = 'user1'

# Test fingerprint
test_fingerprint = data_root / 'fingerprints' / f'{user_id}_2.png'
if test_fingerprint.exists():
    verified, score = fingerprint_system.verify(user_id, str(test_fingerprint))
    print(f'Fingerprint: {"✓" if verified else "✗"} (Score: {score:.3f})')

# Test face
test_face = data_root / 'faces' / f'{user_id}_2.jpg'
if test_face.exists():
    verified, score = face_system.verify(user_id, str(test_face))
    print(f'Face: {"✓" if verified else "✗"} (Score: {score:.3f})')

# Test iris
test_iris = data_root / 'iris' / f'{user_id}_2.png'
if test_iris.exists():
    verified, score = iris_system.verify(user_id, str(test_iris))
    print(f'Iris: {"✓" if verified else "✗"} (Score: {score:.3f})')

# Test voice
test_voice = data_root / 'voices' / f'{user_id}_2.wav'
if test_voice.exists():
    verified, score = voice_system.verify(user_id, str(test_voice))
    print(f'Voice: {"✓" if verified else "✗"} (Score: {score:.3f})')

## 5. Score-Level Fusion

In [None]:
# Collect scores from all modalities
scores = {
    'fingerprint': [],
    'face': [],
    'iris': [],
    'voice': []
}

# Test with multiple samples
for i in range(1, 6):  # Test with 5 samples
    test_data = {
        'fingerprint': data_root / 'fingerprints' / f'{user_id}_{i}.png',
        'face': data_root / 'faces' / f'{user_id}_{i}.jpg',
        'iris': data_root / 'iris' / f'{user_id}_{i}.png',
        'voice': data_root / 'voices' / f'{user_id}_{i}.wav'
    }
    
    for modality, path in test_data.items():
        if path.exists():
            if modality == 'fingerprint':
                _, score = fingerprint_system.verify(user_id, str(path))
            elif modality == 'face':
                _, score = face_system.verify(user_id, str(path))
            elif modality == 'iris':
                _, score = iris_system.verify(user_id, str(path))
            elif modality == 'voice':
                _, score = voice_system.verify(user_id, str(path))
            scores[modality].append(score)

# Visualize score distribution
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()

for idx, (modality, score_list) in enumerate(scores.items()):
    if score_list:
        axes[idx].hist(score_list, bins=20, edgecolor='black', alpha=0.7)
        axes[idx].set_title(f'{modality.capitalize()} Scores')
        axes[idx].set_xlabel('Score')
        axes[idx].set_ylabel('Frequency')
        axes[idx].axvline(np.mean(score_list), color='red', linestyle='--', 
                         label=f'Mean: {np.mean(score_list):.3f}')
        axes[idx].legend()

plt.tight_layout()
plt.savefig('../results/plots/score_distributions.png', dpi=300, bbox_inches='tight')
plt.show()

## 6. Weighted Sum Fusion

In [None]:
# Test weighted sum fusion
weights = {
    'fingerprint': 0.3,
    'face': 0.3,
    'iris': 0.2,
    'voice': 0.2
}

# Prepare test data
test_data = {
    'fingerprint': str(data_root / 'fingerprints' / f'{user_id}_2.png'),
    'face': str(data_root / 'faces' / f'{user_id}_2.jpg'),
    'iris': str(data_root / 'iris' / f'{user_id}_2.png'),
    'voice': str(data_root / 'voices' / f'{user_id}_2.wav')
}

# Verify using weighted sum
result = fusion_system.verify_multimodal(
    user_id=user_id,
    biometric_data=test_data,
    strategy='weighted_sum',
    weights=weights
)

print('\n=== Weighted Sum Fusion Results ===')
print(f"Verified: {result['verified']}")
print(f"Fused Score: {result['fused_score']:.3f}")
print(f"Individual Scores: {result['individual_scores']}")
print(f"Available Modalities: {result['available_modalities']}")

## 7. Decision-Level Fusion (Voting)

In [None]:
# Test decision-level fusion with voting
result = fusion_system.verify_multimodal(
    user_id=user_id,
    biometric_data=test_data,
    strategy='voting',
    min_votes=2  # At least 2 out of 4 modalities must accept
)

print('\n=== Decision-Level Fusion (Voting) Results ===')
print(f"Verified: {result['verified']}")
print(f"Votes: {result.get('votes', 'N/A')}")
print(f"Individual Decisions: {result['individual_scores']}")
print(f"Available Modalities: {result['available_modalities']}")

## 8. Generate Training Data for ML Fusion

In [None]:
# Generate genuine and impostor pairs
def generate_fusion_dataset(users, samples_per_user=5):
    """
    Generate dataset for training ML-based fusion
    Returns: X (scores), y (labels: 1=genuine, 0=impostor)
    """
    X = []  # Feature vectors (scores from 4 modalities)
    y = []  # Labels
    
    # Genuine pairs
    print('Generating genuine pairs...')
    for user_id in tqdm(users):
        for i in range(2, samples_per_user + 1):
            scores = []
            
            # Get score from each modality
            fp_path = data_root / 'fingerprints' / f'{user_id}_{i}.png'
            if fp_path.exists():
                _, score = fingerprint_system.verify(user_id, str(fp_path))
                scores.append(score)
            else:
                scores.append(0.0)
            
            face_path = data_root / 'faces' / f'{user_id}_{i}.jpg'
            if face_path.exists():
                _, score = face_system.verify(user_id, str(face_path))
                scores.append(score)
            else:
                scores.append(0.0)
            
            iris_path = data_root / 'iris' / f'{user_id}_{i}.png'
            if iris_path.exists():
                _, score = iris_system.verify(user_id, str(iris_path))
                scores.append(score)
            else:
                scores.append(0.0)
            
            voice_path = data_root / 'voices' / f'{user_id}_{i}.wav'
            if voice_path.exists():
                _, score = voice_system.verify(user_id, str(voice_path))
                scores.append(score)
            else:
                scores.append(0.0)
            
            if len(scores) == 4:
                X.append(scores)
                y.append(1)  # Genuine
    
    # Impostor pairs
    print('Generating impostor pairs...')
    for i, user1 in enumerate(tqdm(users)):
        for user2 in users[i+1:]:
            for sample in range(1, 3):  # Use first 2 samples
                scores = []
                
                # Test user2's data against user1's enrollment
                fp_path = data_root / 'fingerprints' / f'{user2}_{sample}.png'
                if fp_path.exists():
                    _, score = fingerprint_system.verify(user1, str(fp_path))
                    scores.append(score)
                else:
                    scores.append(0.0)
                
                face_path = data_root / 'faces' / f'{user2}_{sample}.jpg'
                if face_path.exists():
                    _, score = face_system.verify(user1, str(face_path))
                    scores.append(score)
                else:
                    scores.append(0.0)
                
                iris_path = data_root / 'iris' / f'{user2}_{sample}.png'
                if iris_path.exists():
                    _, score = iris_system.verify(user1, str(iris_path))
                    scores.append(score)
                else:
                    scores.append(0.0)
                
                voice_path = data_root / 'voices' / f'{user2}_{sample}.wav'
                if voice_path.exists():
                    _, score = voice_system.verify(user1, str(voice_path))
                    scores.append(score)
                else:
                    scores.append(0.0)
                
                if len(scores) == 4:
                    X.append(scores)
                    y.append(0)  # Impostor
    
    return np.array(X), np.array(y)

# Example: Generate dataset for available users
available_users = ['user1', 'user2', 'user3']  # Add your enrolled users here

# Uncomment to generate dataset
# X, y = generate_fusion_dataset(available_users)
# print(f'\nDataset shape: X={X.shape}, y={y.shape}')
# print(f'Genuine samples: {np.sum(y == 1)}')
# print(f'Impostor samples: {np.sum(y == 0)}')

## 9. Train ML-based Fusion (Random Forest)

In [None]:
# Train Random Forest fusion classifier
# Uncomment after generating dataset above

# # Split data
# X_train, X_test, y_train, y_test = train_test_split(
#     X, y, test_size=0.3, random_state=42, stratify=y
# )

# # Train Random Forest
# fusion_system.train_ml_fusion(X_train, y_train, method='random_forest')

# # Evaluate
# y_pred = fusion_system.rf_classifier.predict(X_test)
# y_proba = fusion_system.rf_classifier.predict_proba(X_test)[:, 1]

# print('\n=== Random Forest Fusion Results ===')
# print(f'Accuracy: {accuracy_score(y_test, y_pred):.4f}')
# print(f'Precision: {precision_score(y_test, y_pred):.4f}')
# print(f'Recall: {recall_score(y_test, y_pred):.4f}')
# print(f'F1-Score: {f1_score(y_test, y_pred):.4f}')

# print('\nConfusion Matrix:')
# print(confusion_matrix(y_test, y_pred))

# print('\nClassification Report:')
# print(classification_report(y_test, y_pred, target_names=['Impostor', 'Genuine']))

## 10. Train SVM Fusion

In [None]:
# Train SVM fusion classifier
# Uncomment after generating dataset

# fusion_system.train_ml_fusion(X_train, y_train, method='svm')

# # Evaluate
# y_pred_svm = fusion_system.svm_classifier.predict(X_test)
# y_proba_svm = fusion_system.svm_classifier.decision_function(X_test)

# print('\n=== SVM Fusion Results ===')
# print(f'Accuracy: {accuracy_score(y_test, y_pred_svm):.4f}')
# print(f'Precision: {precision_score(y_test, y_pred_svm):.4f}')
# print(f'Recall: {recall_score(y_test, y_pred_svm):.4f}')
# print(f'F1-Score: {f1_score(y_test, y_pred_svm):.4f}')

## 11. Compare Fusion Strategies

In [None]:
# Compare different fusion strategies
# Uncomment after training models

# strategies = {
#     'Weighted Sum': y_proba,  # From Random Forest
#     'Random Forest': y_proba,
#     'SVM': (y_proba_svm - y_proba_svm.min()) / (y_proba_svm.max() - y_proba_svm.min())
# }

# # Plot ROC curves
# plt.figure(figsize=(10, 8))

# for name, scores in strategies.items():
#     fpr, tpr, _ = roc_curve(y_test, scores)
#     roc_auc = auc(fpr, tpr)
#     plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc:.4f})')

# plt.plot([0, 1], [0, 1], 'k--', label='Random')
# plt.xlabel('False Positive Rate (FAR)')
# plt.ylabel('True Positive Rate (1 - FRR)')
# plt.title('ROC Curves - Fusion Strategy Comparison')
# plt.legend(loc='lower right')
# plt.grid(True, alpha=0.3)
# plt.savefig('../results/plots/fusion_roc_comparison.png', dpi=300, bbox_inches='tight')
# plt.show()

## 12. Calculate FAR, FRR, EER

In [None]:
# Calculate FAR, FRR, and EER for fusion system
# Uncomment after training

# def calculate_far_frr_eer(y_true, y_scores):
#     """
#     Calculate FAR, FRR, and EER
#     """
#     # Get genuine and impostor scores
#     genuine_scores = y_scores[y_true == 1]
#     impostor_scores = y_scores[y_true == 0]
#     
#     # Calculate FAR and FRR at different thresholds
#     thresholds = np.linspace(0, 1, 1000)
#     far_list = []
#     frr_list = []
#     
#     for threshold in thresholds:
#         # FAR: impostor accepted
#         far = np.sum(impostor_scores >= threshold) / len(impostor_scores)
#         # FRR: genuine rejected
#         frr = np.sum(genuine_scores < threshold) / len(genuine_scores)
#         
#         far_list.append(far)
#         frr_list.append(frr)
#     
#     far_array = np.array(far_list)
#     frr_array = np.array(frr_list)
#     
#     # Find EER (where FAR = FRR)
#     eer_idx = np.argmin(np.abs(far_array - frr_array))
#     eer = (far_array[eer_idx] + frr_array[eer_idx]) / 2
#     eer_threshold = thresholds[eer_idx]
#     
#     return far_array, frr_array, eer, eer_threshold, thresholds

# # Calculate for Random Forest fusion
# far, frr, eer, eer_threshold, thresholds = calculate_far_frr_eer(y_test, y_proba)

# print(f'\n=== Fusion System Performance ===')
# print(f'EER: {eer*100:.2f}%')
# print(f'EER Threshold: {eer_threshold:.4f}')

# # Plot FAR vs FRR
# plt.figure(figsize=(10, 6))
# plt.plot(thresholds, far, label='FAR', linewidth=2)
# plt.plot(thresholds, frr, label='FRR', linewidth=2)
# plt.axvline(eer_threshold, color='red', linestyle='--', 
#             label=f'EER = {eer*100:.2f}% @ {eer_threshold:.4f}')
# plt.xlabel('Threshold')
# plt.ylabel('Error Rate')
# plt.title('FAR vs FRR - Multimodal Fusion System')
# plt.legend()
# plt.grid(True, alpha=0.3)
# plt.savefig('../results/plots/fusion_far_frr.png', dpi=300, bbox_inches='tight')
# plt.show()

## 13. Feature Importance (Random Forest)

In [None]:
# Analyze feature importance
# Uncomment after training Random Forest

# feature_names = ['Fingerprint', 'Face', 'Iris', 'Voice']
# importances = fusion_system.rf_classifier.feature_importances_

# # Plot feature importance
# plt.figure(figsize=(10, 6))
# plt.bar(feature_names, importances, edgecolor='black', alpha=0.7)
# plt.xlabel('Biometric Modality')
# plt.ylabel('Importance')
# plt.title('Feature Importance in Random Forest Fusion')
# plt.grid(True, alpha=0.3, axis='y')

# # Add values on bars
# for i, v in enumerate(importances):
#     plt.text(i, v + 0.01, f'{v:.3f}', ha='center', fontweight='bold')

# plt.savefig('../results/plots/fusion_feature_importance.png', dpi=300, bbox_inches='tight')
# plt.show()

# print('\nFeature Importance:')
# for name, imp in zip(feature_names, importances):
#     print(f'{name}: {imp:.4f}')

## 14. Save Fusion Models

In [None]:
# Save trained fusion models
# Uncomment after training

# model_dir = Path('../models')
# model_dir.mkdir(exist_ok=True)

# # Save Random Forest
# if fusion_system.rf_classifier is not None:
#     with open(model_dir / 'fusion_rf.pkl', 'wb') as f:
#         pickle.dump(fusion_system.rf_classifier, f)
#     print('✓ Random Forest model saved')

# # Save SVM
# if fusion_system.svm_classifier is not None:
#     with open(model_dir / 'fusion_svm.pkl', 'wb') as f:
#         pickle.dump(fusion_system.svm_classifier, f)
#     print('✓ SVM model saved')

# # Save scaler
# if fusion_system.scaler is not None:
#     with open(model_dir / 'fusion_scaler.pkl', 'wb') as f:
#         pickle.dump(fusion_system.scaler, f)
#     print('✓ Scaler saved')

## 15. Performance Summary

In [None]:
# Create performance summary table
# Uncomment after training all models

# summary_data = {
#     'Fusion Strategy': ['Weighted Sum', 'Voting (3/4)', 'Random Forest', 'SVM'],
#     'Accuracy': [0.95, 0.93, 0.97, 0.96],  # Replace with actual values
#     'Precision': [0.94, 0.92, 0.98, 0.95],
#     'Recall': [0.96, 0.94, 0.96, 0.97],
#     'F1-Score': [0.95, 0.93, 0.97, 0.96],
#     'EER (%)': [4.5, 6.8, 2.8, 3.2]
# }

# df_summary = pd.DataFrame(summary_data)
# print('\n=== Fusion Performance Summary ===')
# print(df_summary.to_string(index=False))

# # Save to CSV
# df_summary.to_csv('../results/reports/fusion_performance_summary.csv', index=False)
# print('\n✓ Performance summary saved to CSV')

## 16. Conclusions

**Key Findings:**
1. **Score-level fusion** provides continuous scores for better decision making
2. **Decision-level fusion** is simpler but less accurate
3. **ML-based fusion** (RF/SVM) adapts to data patterns and typically achieves best performance
4. **Feature importance** shows which modalities contribute most to fusion decision
5. **Multimodal fusion** significantly reduces FAR and FRR compared to unimodal systems

**Next Steps:**
- Test with more users and samples
- Implement rank-level fusion
- Optimize fusion weights
- Deploy in production system