# 🚀 Wakeword Training – Live Control & GPU Setup

Bu defter, Gradio uygulamasındaki eğitimle aynı paket sürümleri ve CUDA ayarlarını kullanır. Aşağıdaki hücreler ile eğitim oturumunu başlatabilir, duraklatabilir, devam ettirebilir, son kontrol noktasından (checkpoint) yeniden başlatabilir ve canlı metrikleri görüntüleyebilirsiniz.

In [None]:
# GPU ve paket kontrolü (Gradio ile aynı ortam)
import torch, numpy as np
print({
    'numpy': np.__version__,
    'torch': torch.__version__,
    'cuda_available': torch.cuda.is_available(),
    'cuda_device': torch.cuda.get_device_name(0) if torch.cuda.is_available() else None
})

## Configuration

In [None]:
# Eğitim kontrol API'si – gradio_app.py ile ortak kullanılır
import importlib, os
import gradio_app as appmod

# Uygulama tekil instance
app = appmod.app  # WakewordTrainingApp instance
trainer = app.trainer

print('Device:', app.device)


## Audio Processing Class

In [None]:
# Eğitim başlat (auto-extend açık: gradio_app.py tarafında etkin)
# Not: load_data’yı Gradio’da yaptıysanız tekrar yapmanız gerekmez; doğrudan start_training çağrısı yapılabilir.

status = app.start_training(
    epochs=10,
    lr=1e-4,
    batch_size=32,
    dropout=0.6,
)
print(status)

## Neural Network Model

In [None]:
# Canlı metrikleri görüntüle
status, fig, metrics = app.get_training_status()
print(status)
print(metrics)
fig

## Dataset Class

In [None]:
# Duraklat / Devam ettir
print(app.pause_training())
# ... bir süre bekleyip devam etmek için:
print(app.resume_training())

## Training Class

In [None]:
# Checkpoint’ten devam etme
print(app.continue_from_checkpoint())

## Data Loading and Preparation

In [None]:
# Modified Data Loading Section
def prepare_enhanced_datasets(wakeword_dir='positive_dataset',
                             negative_dir='negative_dataset',
                             background_dir='background_noise',
                             hard_negative_dir='hard_negatives'):
    """
    Prepare datasets with proper categorization

    Directory structure expected:
    - positive_dataset/: Your positive samples
    - negative_dataset/: General negative samples
    - hard_negatives/: Phonetically similar negatives (if separate)
    - background_noise/: 66 hours of background recordings
    """

## Model Training

In [None]:
# Update the training cell to use enhanced dataset
def create_enhanced_dataloaders(data_splits, processor, batch_size=16):
    """Create DataLoaders with the enhanced dataset"""

    # Unpack splits
    wake_train, hard_train, rand_train, bg_train = data_splits['train']
    wake_val, hard_val, rand_val, bg_val = data_splits['val']
    wake_test, hard_test, rand_test, bg_test = data_splits['test']

    # Create datasets
    train_dataset = EnhancedWakewordDataset(
        wakeword_files=wake_train,
        hard_negative_files=hard_train,
        random_negative_files=rand_train,
        background_files=bg_train,
        processor=processor,
        augment=True,
        background_mix_prob=0.7,
        snr_range=(0, 20)
    )

    val_dataset = EnhancedWakewordDataset(
        wakeword_files=wake_val,
        hard_negative_files=hard_val,
        random_negative_files=rand_val,
        background_files=bg_val,
        processor=processor,
        augment=False,
        background_mix_prob=0.5,  # Less mixing for validation
        snr_range=(5, 15)  # More conservative SNR for validation
    )

    test_dataset = EnhancedWakewordDataset(
        wakeword_files=wake_test,
        hard_negative_files=hard_test,
        random_negative_files=rand_test,
        background_files=bg_test,
        processor=processor,
        augment=False,
        background_mix_prob=0.5,
        snr_range=(5, 15)
    )

    # Create DataLoaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2,
        pin_memory=True if torch.cuda.is_available() else False
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=2,
        pin_memory=True if torch.cuda.is_available() else False
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=2,
        pin_memory=True if torch.cuda.is_available() else False
    )

    return train_loader, val_loader, test_loader


# Usage in your notebook:
if __name__ == "__main__":
    # Prepare datasets
    data_splits = prepare_enhanced_datasets()

    # Create processor (assuming it's already defined)
    processor = AudioProcessor()

    # Create DataLoaders
    train_loader, val_loader, test_loader = create_enhanced_dataloaders(
        data_splits,
        processor,
        batch_size=TrainingConfig.BATCH_SIZE
    )

    print("\n🚀 Ready for training with background noise integration!")
    print(f"   Background noise mixing probability: 70% for training")
    print(f"   SNR range: 0-20 dB for training, 5-15 dB for validation")
    print(f"   Total training batches: {len(train_loader)}")
    print(f"   Total validation batches: {len(val_loader)}")

## Model Evaluation

In [None]:
# Load best model for evaluation
if os.path.exists('best_wakeword_model.pth'):
    checkpoint = torch.load('best_wakeword_model.pth', map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    print(f"✅ Best model loaded (epoch {checkpoint['epoch'] + 1}, val_acc: {checkpoint['val_acc']:.2f}%)")

    # Evaluate on test set
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for data, target in tqdm(test_loader, desc="Evaluating"):
            data, target = data.to(device), target.to(device).squeeze()
            output = model(data)
            _, predicted = torch.max(output, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(target.cpu().numpy())

    # Calculate metrics
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print(f"\n📊 Test Set Performance:")
    print(f"   Accuracy: {accuracy:.4f}")
    print(f"   Precision: {precision:.4f}")
    print(f"   Recall: {recall:.4f}")
    print(f"   F1-Score: {f1:.4f}")

    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Negative', 'Wakeword'],
                yticklabels=['Negative', 'Wakeword'])
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

    # Classification report
    print("\n📋 Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=['Negative', 'Wakeword']))

else:
    print("❌ No trained model found. Please run the training cells first.")

## Model Inference

In [None]:
def predict_wakeword(audio_file_path, model, processor, device, threshold=0.8):
    """Predict if audio contains wakeword"""
    model.eval()

    # Process audio file
    mel_spec = processor.process_audio_file(audio_file_path, augment=False)

    if mel_spec is None:
        print(f"Error processing audio file: {audio_file_path}")
        return False, 0.0

    # Convert to tensor and add batch dimension
    mel_tensor = torch.FloatTensor(mel_spec).unsqueeze(0).unsqueeze(0).to(device)

    # Make prediction
    with torch.no_grad():
        output = model(mel_tensor)
        probabilities = torch.softmax(output, dim=1)
        wakeword_prob = probabilities[0][1].item()

    is_wakeword = wakeword_prob >= threshold

    return is_wakeword, wakeword_prob

# Test prediction function
if os.path.exists('best_wakeword_model.pth'):
    print("\n🎯 Wakeword Detection System Ready!")
    print("You can now use the predict_wakeword function for real-time detection.")
    print("\nExample usage:")
    print("is_wakeword, confidence = predict_wakeword('./test_files/3.wav', model, processor, device)")
    print(f"Result: {{'wakeword_detected': is_wakeword, 'confidence': confidence:.2f}}")

    # Create a simple test function
    def test_audio_file(file_path):
        if os.path.exists(file_path):
            is_wakeword, confidence = predict_wakeword(file_path, model, processor, device)
            print(f"\n📁 Testing: {file_path}")
            print(f"   Wakeword detected: {is_wakeword}")
            print(f"   Confidence: {confidence:.2f}")
            print(f"   Threshold: 0.80")
        else:
            print(f"\n❌ File not found: {file_path}")

    # Direct test with ./test_files/1.wav
    test_audio_file("./test_files/3.wav")
else:
    print("❌ Model not trained yet. Please run training cells first.")


## Save Final Model

In [None]:
# Save the complete model for deployment
if os.path.exists('best_wakeword_model.pth'):
    # Create a complete deployment package
    deployment_package = {
        'model_state_dict': model.state_dict(),
        'model_config': {
            'HIDDEN_SIZE': ModelConfig.HIDDEN_SIZE,
            'NUM_LAYERS': ModelConfig.NUM_LAYERS,
            'DROPOUT': ModelConfig.DROPOUT,
            'NUM_CLASSES': ModelConfig.NUM_CLASSES
        },
        'audio_config': {
            'SAMPLE_RATE': AudioConfig.SAMPLE_RATE,
            'DURATION': AudioConfig.DURATION,
            'N_MELS': AudioConfig.N_MELS,
            'N_FFT': AudioConfig.N_FFT,
            'HOP_LENGTH': AudioConfig.HOP_LENGTH,
            'FMIN': AudioConfig.FMIN,
            'FMAX': AudioConfig.FMAX
        },
        'training_info': {
            'best_val_accuracy': checkpoint.get('val_acc', 0),
            'epoch': checkpoint.get('epoch', 0) + 1,
            'device': str(device)
        },
        'classes': ['negative', 'wakeword']
    }

    # Save deployment package
    torch.save(deployment_package, 'wakeword_deployment_model.pth')
    print("✅ Deployment model saved as 'wakeword_deployment_model.pth'")

    # Save model architecture for reference
    with open('model_architecture.txt', 'w') as f:
        f.write("Wakeword Detection Model Architecture\n")
        f.write("================================\n\n")
        f.write("Model Type: CNN + LSTM\n")
        f.write(f"Input Shape: (1, {AudioConfig.N_MELS}, 31)\n")
        f.write(f"Hidden Size: {ModelConfig.HIDDEN_SIZE}\n")
        f.write(f"Number of Layers: {ModelConfig.NUM_LAYERS}\n")
        f.write(f"Dropout: {ModelConfig.DROPOUT}\n")
        f.write(f"Number of Classes: {ModelConfig.NUM_CLASSES}\n")
        f.write(f"Parameters: {sum(p.numel() for p in model.parameters()):,}\n")
        f.write(f"Device: {device}\n")

    print("✅ Model architecture saved as 'model_architecture.txt'")

    print("\n🎉 Model deployment package ready!")
    print("Files created:")
    print("   - wakeword_deployment_model.pth (complete model)")
    print("   - model_architecture.txt (model specs)")
    print("   - best_wakeword_model.pth (training checkpoint)")

else:
    print("❌ No trained model found to save.")

## System Summary

In [None]:
print(f"\n🚀 Next Steps:")
print(f"   1. Add your wakeword recordings to positive_dataset/")
print(f"   2. Add negative samples to negative_dataset/")
print(f"   3. Add background noise to background_noise/")
print(f"   4. Run the training cells above")
print(f"   5. Use the trained model for wakeword detection")