## Goal of the Voiceprint Model:
To verify that a person speaking a specific phrase is the same, authorized user who just passed the facial recognition check. It's a binary classification task: "Accept" (voice matches the user) or "Reject" (voice does not match / unauthorized user).

In [None]:
# Cell 1: Import all required libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
import soundfile as sf
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder
import warnings
warnings.filterwarnings('ignore')

os.makedirs('/data/audio/', exist_ok=True)
print("All libraries imported successfully!")

** Voiceprint Verification Model Development
Phase 1: Data Collection & Setup

In [None]:
# Defining audio directory and check'

def check_audio_files():
    """Check what audio files are available in the directory"""
    if os.path.exists(AUDIO_DIR):
        audio_files = [f for f in os.listdir(AUDIO_DIR) if f.endswith(('.wav', '.mp3', '.m4a'))]
        print(f"Found {len(audio_files)} audio files:")
        for file in audio_files:
            print(f"  - {file}")
        return audio_files
    else:
        print(f"Directory {AUDIO_DIR} does not exist. Please create it and add your audio files.")
        return []

audio_files = check_audio_files()

In [None]:
# Create sample audio data if no files exist (for demonstration)
def create_sample_audio_files():
    """Create sample audio files for demonstration if none exist"""
    sample_phrases = ['yes_approve', 'confirm_transaction']
    users = ['user1', 'user2', 'user3']

    # Create some sample audio files (sine waves simulating speech)
    sample_rate = 22050
    duration = 2.0

    for user in users:
        for phrase in sample_phrases:
            filename = f"{user}_{phrase}.wav"
            filepath = os.path.join(AUDIO_DIR, filename)

            # Create a unique frequency pattern for each user+phrase combination
            base_freq = 200 + (hash(user) % 100) + (hash(phrase) % 50)
            t = np.linspace(0, duration, int(sample_rate * duration))

            # Generate a more complex waveform simulating speech
            audio = 0.5 * np.sin(2 * np.pi * base_freq * t)
            audio += 0.3 * np.sin(2 * np.pi * (base_freq * 2) * t)
            audio += 0.2 * np.sin(2 * np.pi * (base_freq * 0.5) * t)

            # Add some noise
            audio += 0.05 * np.random.normal(0, 1, len(t))

            # Normalize
            audio = audio / np.max(np.abs(audio))

            sf.write(filepath, audio, sample_rate)
            print(f"Created sample file: {filename}")

    return check_audio_files()

if not audio_files:
    print("Creating sample audio files for demonstration...")
    audio_files = create_sample_audio_files()

** Data Preprocessing & Augmentation

In [None]:
# Load and visualize original audio samples
def load_and_visualize_audio(filepath, title):
    """Load an audio file and create visualization"""
    audio, sr = librosa.load(filepath, sr=None)

    # Create subplots
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
    fig.suptitle(f'Audio Analysis: {title}', fontsize=16)

    # Waveform
    librosa.display.waveshow(audio, sr=sr, ax=ax1, color='blue')
    ax1.set_title('Waveform')
    ax1.set_xlabel('Time (s)')
    ax1.set_ylabel('Amplitude')

    # Spectrogram
    D = librosa.amplitude_to_db(np.abs(librosa.stft(audio)), ref=np.max)
    img = librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log', ax=ax2)
    ax2.set_title('Spectrogram')
    plt.colorbar(img, ax=ax2, format='%+2.0f dB')

    plt.tight_layout()
    plt.show()

    return audio, sr

# Visualize first few files
print("Visualizing sample audio files...")
for i, file in enumerate(audio_files[:2]):  # Show first 2 files
    filepath = os.path.join(AUDIO_DIR, file)
    audio, sr = load_and_visualize_audio(filepath, file)

In [None]:
# Audio Augmentation Functions
def augment_audio(audio, sr, augment_type):
    """Apply different types of audio augmentation"""
    if augment_type == 'pitch_shift':
        # Shift pitch by random semitones
        n_steps = np.random.choice([-2, -1, 1, 2])
        return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)

    elif augment_type == 'time_stretch':
        # Stretch or compress time
        rate = np.random.choice([0.8, 0.9, 1.1, 1.2])
        return librosa.effects.time_stretch(audio, rate=rate)

    elif augment_type == 'add_noise':
        # Add Gaussian noise
        noise = np.random.normal(0, 0.005, audio.shape)
        return audio + noise

    elif augment_type == 'time_shift':
        # Shift audio in time
        shift = sr // 10  # Shift by 100ms
        if np.random.random() > 0.5:
            shift = -shift
        return np.roll(audio, shift)

    else:
        return audio

def apply_augmentations(audio, sr, filename):
    """Apply multiple augmentations and save results"""
    augmentations = ['pitch_shift', 'time_stretch', 'add_noise', 'time_shift']
    augmented_audios = []

    print(f"Applying augmentations for {filename}:")

    for i, aug_type in enumerate(augmentations[:2]):  # Apply 2 augmentations per file
        augmented_audio = augment_audio(audio, sr, aug_type)
        augmented_audios.append(augmented_audio)

        # Save augmented file
        aug_filename = f"{os.path.splitext(filename)[0]}_{aug_type}.wav"
        aug_filepath = os.path.join(AUDIO_DIR, aug_filename)
        sf.write(aug_filepath, augmented_audio, sr)
        print(f"  - Created: {aug_filename}")

        # Visualize one augmented sample
        if i == 0:
            load_and_visualize_audio(aug_filepath, f"Augmented ({aug_type})")

    return augmented_audios

# Apply augmentations to all original files
print("\nApplying audio augmentations...")
all_audio_data = {}
for file in audio_files:
    if 'augmented' not in file:  # Only augment original files
        filepath = os.path.join(AUDIO_DIR, file)
        audio, sr = librosa.load(filepath, sr=None)
        all_audio_data[file] = {'audio': audio, 'sr': sr}
        apply_augmentations(audio, sr, file)

** Feature Extraction

In [None]:
# Feature Extraction Functions
def extract_audio_features(audio, sr):
    """Extract comprehensive audio features"""
    features = {}

    # MFCCs (Most important for voice recognition)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs, axis=1)
    mfccs_std = np.std(mfccs, axis=1)

    for i in range(13):
        features[f'mfcc_mean_{i}'] = mfccs_mean[i]
        features[f'mfcc_std_{i}'] = mfccs_std[i]

    # Spectral features
    spectral_centroids = librosa.feature.spectral_centroid(y=audio, sr=sr)
    features['spectral_centroid_mean'] = np.mean(spectral_centroids)
    features['spectral_centroid_std'] = np.std(spectral_centroids)

    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)
    features['spectral_rolloff_mean'] = np.mean(spectral_rolloff)
    features['spectral_rolloff_std'] = np.std(spectral_rolloff)

    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr)
    features['spectral_bandwidth_mean'] = np.mean(spectral_bandwidth)
    features['spectral_bandwidth_std'] = np.std(spectral_bandwidth)

    # Zero-crossing rate
    zcr = librosa.feature.zero_crossing_rate(audio)
    features['zcr_mean'] = np.mean(zcr)
    features['zcr_std'] = np.std(zcr)

    # RMS energy
    rms = librosa.feature.rms(y=audio)
    features['rms_mean'] = np.mean(rms)
    features['rms_std'] = np.std(rms)

    # Chroma features
    chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
    chroma_mean = np.mean(chroma, axis=1)
    for i in range(12):
        features[f'chroma_mean_{i}'] = chroma_mean[i]

    # Additional temporal features
    features['duration'] = len(audio) / sr
    features['max_amplitude'] = np.max(np.abs(audio))

    return features

def create_audio_features_dataset(audio_dir):
    """Create a comprehensive dataset from all audio files"""
    features_list = []

    audio_files = [f for f in os.listdir(audio_dir) if f.endswith(('.wav', '.mp3', '.m4a'))]

    for filename in audio_files:
        filepath = os.path.join(audio_dir, filename)

        # Extract user_id and phrase from filename
        parts = filename.replace('.wav', '').split('_')
        user_id = parts[0]
        phrase = '_'.join(parts[1:]) if len(parts) > 2 else parts[1]

        # Load audio
        audio, sr = librosa.load(filepath, sr=None)

        # Extract features
        features = extract_audio_features(audio, sr)

        # Add metadata
        features['filename'] = filename
        features['user_id'] = user_id
        features['phrase'] = phrase
        features['is_original'] = 'augmented' not in filename

        features_list.append(features)

    return pd.DataFrame(features_list)

print("Extracting audio features...")
audio_features_df = create_audio_features_dataset(AUDIO_DIR)
print(f"Extracted features from {len(audio_features_df)} audio files")
print(f"Dataset shape: {audio_features_df.shape}")

In [None]:
# Explore and Save Features
# Display basic info about the features
print("Audio Features Dataset Info:")
print(audio_features_df.info())

print("\nFirst few rows of the dataset:")
print(audio_features_df[['filename', 'user_id', 'phrase', 'is_original']].head())

print("\nUser distribution:")
print(audio_features_df['user_id'].value_counts())

print("\nPhrase distribution:")
print(audio_features_df['phrase'].value_counts())

# Save features to CSV
features_csv_path = 'audio_features.csv'
audio_features_df.to_csv(features_csv_path, index=False)
print(f"\nFeatures saved to: {features_csv_path}")

# Display feature statistics
feature_columns = [col for col in audio_features_df.columns if col not in ['filename', 'user_id', 'phrase', 'is_original']]
print(f"\nNumber of audio features: {len(feature_columns)}")
print("Feature statistics:")
print(audio_features_df[feature_columns[:5]].describe())  # Show first 5 features

** Model Training & Evaluation

In [None]:
# Prepare Data for Model Training
def prepare_voice_verification_data(df, target_user):
    """
    Prepare data for voice verification
    For a target user, create binary classification:
    - Positive: All samples from target_user
    - Negative: Samples from other users
    """
    # Create labels
    df['label'] = (df['user_id'] == target_user).astype(int)

    # Select features (exclude metadata)
    feature_cols = [col for col in df.columns if col not in
                   ['filename', 'user_id', 'phrase', 'is_original', 'label']]

    X = df[feature_cols]
    y = df['label']

    return X, y, feature_cols

def train_voice_verification_model(X, y, test_size=0.3, random_state=42):
    """Train and evaluate a voice verification model"""
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train Random Forest model
    model = RandomForestClassifier(
        n_estimators=100,
        max_depth=10,
        random_state=random_state
    )

    model.fit(X_train_scaled, y_train)

    # Make predictions
    y_pred = model.predict(X_test_scaled)
    y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]

    return model, scaler, X_test_scaled, y_test, y_pred, y_pred_proba

# Train model for each user
print("Training voice verification models for each user...")
users = audio_features_df['user_id'].unique()
models = {}
scalers = {}
results = {}

for user in users:
    print(f"\n--- Training model for {user} ---")

    # Prepare data
    X, y, feature_cols = prepare_voice_verification_data(audio_features_df, user)

    # Train model
    model, scaler, X_test, y_test, y_pred, y_pred_proba = train_voice_verification_model(X, y)

    # Store models and scalers
    models[user] = model
    scalers[user] = scaler

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    results[user] = {
        'accuracy': accuracy,
        'f1_score': f1,
        'feature_columns': feature_cols
    }

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"Test set size: {len(y_test)} samples")

In [None]:
# Model Evaluation and Visualization
def evaluate_model_performance(results):
    """Evaluate and display model performance"""
    print("=== VOICEPRINT VERIFICATION MODEL RESULTS ===\n")

    # Create results dataframe
    results_df = pd.DataFrame.from_dict(results, orient='index')

    # Display results
    for user, metrics in results.items():
        print(f"User: {user}")
        print(f"  Accuracy: {metrics['accuracy']:.4f}")
        print(f"  F1-Score: {metrics['f1_score']:.4f}")
        print(f"  Number of Features: {len(metrics['feature_columns'])}")
        print()

    # Plot performance comparison
    plt.figure(figsize=(10, 6))

    x = range(len(results_df))
    width = 0.35

    plt.bar([i - width/2 for i in x], results_df['accuracy'], width, label='Accuracy', alpha=0.7)
    plt.bar([i + width/2 for i in x], results_df['f1_score'], width, label='F1-Score', alpha=0.7)

    plt.xlabel('Users')
    plt.ylabel('Score')
    plt.title('Voice Verification Model Performance by User')
    plt.xticks(x, results_df.index)
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.ylim(0, 1.1)

    # Add value labels on bars
    for i, (acc, f1) in enumerate(zip(results_df['accuracy'], results_df['f1_score'])):
        plt.text(i - width/2, acc + 0.02, f'{acc:.3f}', ha='center', va='bottom')
        plt.text(i + width/2, f1 + 0.02, f'{f1:.3f}', ha='center', va='bottom')

    plt.tight_layout()
    plt.show()

    return results_df

# Evaluate all models
results_df = evaluate_model_performance(results)

In [None]:
# Feature Importance Analysis
def plot_feature_importance(models, feature_cols, top_n=15):
    """Plot the most important features across all models"""
    # Calculate average feature importance across all models
    avg_importance = np.zeros(len(feature_cols))

    for user, model in models.items():
        avg_importance += model.feature_importances_

    avg_importance /= len(models)

    # Create feature importance dataframe
    feature_importance_df = pd.DataFrame({
        'feature': feature_cols,
        'importance': avg_importance
    }).sort_values('importance', ascending=False)

    # Plot top N features
    plt.figure(figsize=(12, 8))
    top_features = feature_importance_df.head(top_n)

    plt.barh(range(len(top_features)), top_features['importance'][::-1])
    plt.yticks(range(len(top_features)), top_features['feature'][::-1])
    plt.xlabel('Average Feature Importance')
    plt.title(f'Top {top_n} Most Important Audio Features Across All Models')
    plt.grid(True, alpha=0.3, axis='x')
    plt.tight_layout()
    plt.show()

    return feature_importance_df

# Plot feature importance
feature_importance_df = plot_feature_importance(models, feature_cols)
print("Top 10 most important features:")
print(feature_importance_df.head(10)[['feature', 'importance']])

** Integration & System Demonstration

In [None]:
# Voice Verification Function
class VoiceVerificationSystem:
    def __init__(self, models, scalers, feature_columns):
        self.models = models
        self.scalers = scalers
        self.feature_columns = feature_columns

    def verify_voice(self, audio_filepath, claimed_user_id):
        """
        Verify if the voice in audio_file matches the claimed_user_id
        Returns: (is_verified, confidence_score)
        """
        try:
            # Load and extract features from the new audio
            audio, sr = librosa.load(audio_filepath, sr=None)
            features = extract_audio_features(audio, sr)

            # Create feature vector in correct order
            feature_vector = []
            for col in self.feature_columns:
                if col in features:
                    feature_vector.append(features[col])
                else:
                    feature_vector.append(0)  # Default value if feature missing

            feature_vector = np.array(feature_vector).reshape(1, -1)

            # Scale features
            if claimed_user_id in self.scalers:
                feature_vector_scaled = self.scalers[claimed_user_id].transform(feature_vector)
            else:
                return False, 0.0

            # Predict
            if claimed_user_id in self.models:
                model = self.models[claimed_user_id]
                prediction = model.predict(feature_vector_scaled)[0]
                confidence = model.predict_proba(feature_vector_scaled)[0, 1]

                return bool(prediction), confidence
            else:
                return False, 0.0

        except Exception as e:
            print(f"Error in voice verification: {e}")
            return False, 0.0

# Initialize the verification system
voice_system = VoiceVerificationSystem(models, scalers, feature_cols)
print("Voice Verification System initialized successfully!")

In [None]:
# System Demonstration
def simulate_authentication_flow():
    """Simulate the complete authentication flow"""
    print("=== AUTHENTICATION SYSTEM DEMONSTRATION ===\n")

    # Test cases
    test_cases = [
        # (audio_file, claimed_user, expected_result, description)
        ('user1_yes.wav', 'user1', True, "Valid user with correct phrase"),
        ('user2_confirm.wav', 'user2', True, "Valid user with correct phrase"),
        ('user1_yes.wav', 'user2', False, "Wrong user claim"),
        ('user2_confirm.wav', 'user1', False, "Wrong user claim"),
    ]

    print("Testing authentication flow:\n")

    for audio_file, claimed_user, expected, description in test_cases:
        audio_path = os.path.join(AUDIO_DIR, audio_file)

        if os.path.exists(audio_path):
            # Simulate facial recognition (would normally come from face model)
            print(f"üîπ Step 1: Facial Recognition")
            print(f"   - User presents face ‚Üí Recognized as '{claimed_user}'")

            # Voice verification
            print(f"üîπ Step 2: Voice Verification")
            print(f"   - User says phrase from: {audio_file}")

            is_verified, confidence = voice_system.verify_voice(audio_path, claimed_user)

            print(f"   - Voice Match: {'‚úÖ ACCEPT' if is_verified else '‚ùå REJECT'}")
            print(f"   - Confidence: {confidence:.4f}")
            print(f"   - Expected: {'‚úÖ' if is_verified == expected else '‚ùå'}")

            # Product recommendation (if both steps pass)
            if is_verified:
                print(f"üîπ Step 3: Product Recommendation")
                print(f"   - ‚úÖ ACCESS GRANTED")
                print(f"   - Displaying personalized products for {claimed_user}...")
                # Here you would integrate with the product recommendation model
                products = ["Smart Watch Pro", "Wireless Earbuds", "Fitness Tracker"]
                print(f"   - Recommended: {np.random.choice(products)}")
            else:
                print(f"üîπ Step 3: Access Denied")
                print(f"   - ‚ùå ACCESS DENIED - Voice verification failed")

            print(f"   Description: {description}")
            print("-" * 60)
        else:
            print(f"‚ùå Test file not found: {audio_file}")
            print("-" * 60)

# Run demonstration
simulate_authentication_flow()

In [None]:
# Simulate Unauthorized Attempt
def simulate_unauthorized_attempt():
    """Simulate an unauthorized access attempt"""
    print("\n=== SIMULATING UNAUTHORIZED ACCESS ATTEMPT ===\n")

    # Simulate someone trying to impersonate a user
    print("üö® SECURITY SCENARIO: Impersonation Attempt")
    print("An unauthorized person tries to access user1's account...\n")

    # They somehow pass facial recognition (stolen photo?)
    print("üîπ Step 1: Facial Recognition (bypassed with photo)")
    print("   - System recognizes: 'user1'")

    # But their voice doesn't match
    print("üîπ Step 2: Voice Verification")

    # Try different unauthorized scenarios
    unauthorized_scenarios = [
        ('user2_yes.wav', 'user2 trying to impersonate user1'),
        ('user3_confirm.wav', 'user3 trying to impersonate user1'),
    ]

    for audio_file, scenario in unauthorized_scenarios:
        audio_path = os.path.join(AUDIO_DIR, audio_file)

        if os.path.exists(audio_path):
            is_verified, confidence = voice_system.verify_voice(audio_path, 'user1')

            print(f"   - Scenario: {scenario}")
            print(f"   - Voice Match: {'‚ùå REJECT' if not is_verified else '‚ö†Ô∏è  FALSE ACCEPT (Security Breach!)'}")
            print(f"   - Confidence: {confidence:.4f}")

            if not is_verified:
                print("   - ‚úÖ SECURITY SYSTEM WORKING: Unauthorized access prevented!")
            else:
                print("   - üö® SECURITY BREACH: System incorrectly accepted unauthorized user!")

            print("   " + "-" * 40)
        else:
            print(f"   - Test file not found: {audio_file}")

# Run unauthorized attempt simulation
simulate_unauthorized_attempt()

In [None]:
# Save the Complete Model System
import joblib

def save_complete_system():
    """Save all models and system components"""
    system_components = {
        'voice_models': models,
        'scalers': scalers,
        'feature_columns': feature_cols,
        'voice_system': voice_system,
        'audio_features_df': audio_features_df,
        'results': results
    }

    joblib.dump(system_components, 'voice_verification_system.pkl')
    print("‚úÖ Complete voice verification system saved to 'voice_verification_system.pkl'")

    # Also save feature importance
    feature_importance_df.to_csv('feature_importance.csv', index=False)
    print("‚úÖ Feature importance saved to 'feature_importance.csv'")

save_complete_system()

print("\n" + "="*70)
print("VOICEPRINT VERIFICATION SYSTEM DEVELOPMENT COMPLETE!")
print("="*70)
print("\nSummary:")
print(f"‚Ä¢ Users in system: {list(users)}")
print(f"‚Ä¢ Total audio samples processed: {len(audio_features_df)}")
print(f"‚Ä¢ Number of audio features extracted: {len(feature_cols)}")
print(f"‚Ä¢ Average accuracy across users: {results_df['accuracy'].mean():.4f}")
print(f"‚Ä¢ Average F1-score across users: {results_df['f1_score'].mean():.4f}")
print(f"‚Ä¢ Models saved and ready for integration with facial recognition system")