In [None]:
# Install core ML libraries
!pip install tensorflow tensorflowjs librosa scikit-learn pandas numpy matplotlib seaborn joblib

# Install Kaggle API for dataset download
!pip install kaggle

# For audio processing
!pip install soundfile


Collecting tensorflowjs
  Downloading tensorflowjs-4.22.0-py3-none-any.whl.metadata (3.2 kB)
Collecting flax>=0.7.2 (from tensorflowjs)
  Downloading flax-0.10.7-py3-none-any.whl.metadata (11 kB)
Collecting importlib_resources>=5.9.0 (from tensorflowjs)
  Downloading importlib_resources-6.5.2-py3-none-any.whl.metadata (3.9 kB)
Collecting jax>=0.4.13 (from tensorflowjs)
  Downloading jax-0.6.2-py3-none-any.whl.metadata (13 kB)
Collecting jaxlib>=0.4.13 (from tensorflowjs)
  Downloading jaxlib-0.6.2-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.3 kB)
Collecting tensorflow-decision-forests>=1.5.0 (from tensorflowjs)
  Downloading tensorflow_decision_forests-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.3 kB)
Collecting packaging (from tensorflow)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Collecting optax (from flax>=0.7.2->tensorflowjs)
  Downloading optax-0.2.6-py3-none-any.whl.metadata (7.6 kB)
Collecting orbax-checkpoint (from fl

In [None]:
# wake_word_trainer.py - Complete Wake Word Detection Training Script
import os
import numpy as np
import pandas as pd
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models
import joblib

class WakeWordDetector:
    def __init__(self, sample_rate=16000, duration=1.0, n_mfcc=40):
        self.sample_rate = sample_rate
        self.duration = duration
        self.n_mfcc = n_mfcc
        self.max_len = int(sample_rate * duration)
        
    def download_dataset(self, data_dir="speech_commands_data"):
        """Downloads Google Speech Commands V2 dataset from Kaggle"""
        try:
            os.system(f"kaggle datasets download -d yashdogra/speech-commands -p {data_dir} --unzip")
            print(f"✅ Dataset downloaded to {data_dir}")
            return True
        except Exception as e:
            print(f"❌ Error downloading: {e}")
            print("Please manually download from: https://www.kaggle.com/datasets/yashdogra/speech-commands")
            return False
    
    def extract_features(self, audio_path):
        """Extract MFCC features from audio file"""
        try:
            # Load audio
            audio, sr = librosa.load(audio_path, sr=self.sample_rate, duration=self.duration)
            
            # Pad or truncate to fixed length
            if len(audio) > self.max_len:
                audio = audio[:self.max_len]
            else:
                audio = np.pad(audio, (0, self.max_len - len(audio)), 'constant')
            
            # Extract MFCC features
            mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=self.n_mfcc, n_fft=2048, hop_length=512)
            mfccs_processed = np.mean(mfccs.T, axis=0)
            
            return mfccs_processed
        except Exception as e:
            print(f"Error processing {audio_path}: {e}")
            return None
    
    def create_dataset(self, data_dir, wake_words=['yes', 'no']):
        """Create wake word dataset from Speech Commands"""
        features = []
        labels = []
        
        print(f"🔄 Processing audio files from {data_dir}...")
        
        for word_dir in os.listdir(data_dir):
            word_path = os.path.join(data_dir, word_dir)
            if os.path.isdir(word_path) and word_dir != '_background_noise_':
                word = word_dir.lower()
                
                # Label as wake word or not
                if word in [w.lower() for w in wake_words]:
                    label = 'wake_word'
                    max_samples = None  # Use all wake word samples
                else:
                    label = 'not_wake_word'
                    max_samples = 800  # Limit negative samples to balance dataset
                
                print(f"  Processing '{word}' -> {label}")
                
                audio_files = [f for f in os.listdir(word_path) if f.endswith('.wav')]
                if max_samples:
                    audio_files = audio_files[:max_samples]
                
                for i, audio_file in enumerate(audio_files):
                    audio_path = os.path.join(word_path, audio_file)
                    feature = self.extract_features(audio_path)
                    
                    if feature is not None:
                        features.append(feature)
                        labels.append(label)
                    
                    if i % 100 == 0 and i > 0:
                        print(f"    Processed {i}/{len(audio_files)} files...")
        
        print(f"✅ Total samples processed: {len(features)}")
        return np.array(features), np.array(labels)
    
    def create_model(self, input_shape):
        """Create neural network model"""
        model = tf.keras.Sequential([
            layers.Dense(128, activation='relu', input_shape=input_shape),
            layers.Dropout(0.3),
            layers.BatchNormalization(),
            
            layers.Dense(256, activation='relu'),
            layers.Dropout(0.4),
            layers.BatchNormalization(),
            
            layers.Dense(128, activation='relu'),
            layers.Dropout(0.3),
            
            layers.Dense(64, activation='relu'),
            layers.Dropout(0.2),
            
            layers.Dense(2, activation='softmax')  # Binary classification
        ])
        
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
            loss='categorical_crossentropy',
            metrics=['accuracy', 'precision', 'recall']
        )
        
        return model
    
    def train_model(self, data_dir="speech_commands_data", wake_words=['yes', 'no'], model_save_path="hey_jaan_model"):
        """Train the complete model"""
        
        # Step 1: Download dataset
        if not os.path.exists(data_dir):
            print("📥 Dataset not found. Downloading...")
            if not self.download_dataset(data_dir):
                return None, None
        
        # Step 2: Create dataset
        features, labels = self.create_dataset(data_dir, wake_words)
        
        if len(features) == 0:
            print("❌ No features extracted!")
            return None, None
        
        # Step 3: Encode labels
        label_encoder = LabelEncoder()
        y_encoded = label_encoder.fit_transform(labels)
        y_categorical = tf.keras.utils.to_categorical(y_encoded)
        
        # Show label distribution
        unique, counts = np.unique(labels, return_counts=True)
        print(f"\n📊 Label Distribution:")
        for label, count in zip(unique, counts):
            print(f"   {label}: {count} samples")
        
        # Step 4: Split data
        X_train, X_test, y_train, y_test = train_test_split(
            features, y_categorical, test_size=0.2, random_state=42, stratify=labels
        )
        
        print(f"\n📈 Data Split:")
        print(f"   Training: {len(X_train)} samples")
        print(f"   Testing: {len(X_test)} samples")
        
        # Step 5: Create and train model
        model = self.create_model((features.shape[1],))
        
        print(f"\n🧠 Model Architecture:")
        model.summary()
        
        # Callbacks for better training
        callbacks = [
            tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True, monitor='val_accuracy'),
            tf.keras.callbacks.ReduceLROnPlateau(patience=5, factor=0.5, monitor='val_loss'),
            tf.keras.callbacks.ModelCheckpoint(f"{model_save_path}_best.h5", save_best_only=True, monitor='val_accuracy')
        ]
        
        print(f"\n🚀 Starting training...")
        history = model.fit(
            X_train, y_train,
            epochs=50,
            batch_size=32,
            validation_data=(X_test, y_test),
            callbacks=callbacks,
            verbose=1
        )
        
        # Step 6: Evaluate
        test_loss, test_acc, test_precision, test_recall = model.evaluate(X_test, y_test, verbose=0)
        f1_score = 2 * (test_precision * test_recall) / (test_precision + test_recall + 1e-8)
        
        print(f"\n🎯 Final Results:")
        print(f"   Accuracy: {test_acc:.4f}")
        print(f"   Precision: {test_precision:.4f}")
        print(f"   Recall: {test_recall:.4f}")
        print(f"   F1-Score: {f1_score:.4f}")
        
        # Step 7: Save everything
        model.save(model_save_path)
        joblib.dump(label_encoder, f"{model_save_path}_label_encoder.pkl")
        
        # Save model info
        model_info = {
            'wake_words': wake_words,
            'sample_rate': self.sample_rate,
            'duration': self.duration,
            'n_mfcc': self.n_mfcc,
            'test_accuracy': float(test_acc),
            'test_precision': float(test_precision),
            'test_recall': float(test_recall),
            'f1_score': float(f1_score)
        }
        joblib.dump(model_info, f"{model_save_path}_info.pkl")
        
        print(f"\n💾 Model saved:")
        print(f"   Model: {model_save_path}")
        print(f"   Label encoder: {model_save_path}_label_encoder.pkl")
        print(f"   Info: {model_save_path}_info.pkl")
        
        return model, history
    
    def convert_to_tfjs(self, model_path, output_path):
        """Convert to TensorFlow.js"""
        try:
            import tensorflowjs as tfjs
            model = tf.keras.models.load_model(model_path)
            tfjs.converters.save_keras_model(model, output_path)
            print(f"✅ TensorFlow.js model saved to: {output_path}")
            return True
        except ImportError:
            print("❌ Install tensorflowjs: pip install tensorflowjs")
            return False

# Main training script
if __name__ == "__main__":
    print("🎤 Wake Word Detection Model Trainer")
    print("=" * 50)
    
    # Initialize
    detector = WakeWordDetector(sample_rate=16000, duration=1.0, n_mfcc=40)
    
    # Configuration
    DATA_DIR = "speech_commands_data"
    WAKE_WORDS = ['yes', 'no']  # Change to any words from dataset
    MODEL_NAME = "hey_jaan_wake_word_model"
    
    print(f"⚙️  Configuration:")
    print(f"   Wake words: {WAKE_WORDS}")
    print(f"   Data directory: {DATA_DIR}")
    print(f"   Model name: {MODEL_NAME}")
    
    # Train model
    model, history = detector.train_model(
        data_dir=DATA_DIR,
        wake_words=WAKE_WORDS,
        model_save_path=MODEL_NAME
    )
    
    if model:
        print(f"\n🎉 Training completed!")
        
        # Convert to TensorFlow.js
        tfjs_path = f"{MODEL_NAME}_tfjs"
        detector.convert_to_tfjs(MODEL_NAME, tfjs_path)
        
        print(f"\n🚀 Ready to use:")
        print(f"   Python model: {MODEL_NAME}")
        print(f"   Web model: {tfjs_path}")
        print(f"\n📝 Available words in dataset:")
        print("   yes, no, up, down, left, right, on, off, stop, go,")
        print("   zero, one, two, three, four, five, six, seven, eight, nine,")
        print("   bed, bird, cat, dog, happy, house, marvin, sheila, tree, wow")
    else:
        print("❌ Training failed!")


FileNotFoundError: [Errno 2] No such file or directory: 'wake_word_data/'

In [4]:
import os
print(os.getcwd())


/home/prajwal/vscode/neonexus-web
