# VoiceAccess Basic Usage Tutorial

This notebook demonstrates basic usage of VoiceAccess for speech recognition in low-resource languages.

In [None]:
# Import required libraries
import sys
from pathlib import Path

# Add parent directory to path
sys.path.append(str(Path.cwd().parent))

from src.core.asr_engine import ASREngine
from src.core.config import Config
import IPython.display as ipd

## 1. Initialize ASR Engine

In [None]:
# Load default configuration
config = Config()

# Or load from file
# config = Config.from_file("../configs/default.yaml")

# Initialize ASR engine
engine = ASREngine(config)
print(f"ASR Engine initialized with device: {config.device}")

## 2. Load Pre-trained Model

In [None]:
# Load a pre-trained model
# Note: You need to download or train a model first
model_path = "../models/pretrained/wav2vec2-base.pt"
model_type = "wav2vec2"

try:
    engine.load_model(model_path, model_type=model_type)
    print(f"Model loaded successfully!")
    print(f"Model size: {engine.model.get_model_size_mb():.2f} MB")
    print(f"Parameters: {engine.model.get_num_params():,}")
except FileNotFoundError:
    print(f"Model file not found at {model_path}")
    print("Please download a pre-trained model or train your own.")

## 3. Transcribe Audio

In [None]:
# Example: Transcribe a single audio file
audio_path = "../data/raw/example.wav"

# Display audio
try:
    ipd.display(ipd.Audio(audio_path))
    
    # Transcribe
    transcription = engine.transcribe(audio_path)
    print(f"\nTranscription: {transcription}")
    
    # With confidence score
    text, confidence = engine.transcribe(audio_path, return_confidence=True)
    print(f"\nTranscription: {text}")
    print(f"Confidence: {confidence:.2%}")
    
except FileNotFoundError:
    print(f"Audio file not found at {audio_path}")
    print("Please add audio files to the data directory.")

## 4. Batch Transcription

In [None]:
# Transcribe multiple audio files
audio_files = [
    "../data/raw/audio1.wav",
    "../data/raw/audio2.wav",
    "../data/raw/audio3.wav"
]

# Filter existing files
existing_files = [f for f in audio_files if Path(f).exists()]

if existing_files:
    transcriptions = engine.transcribe_batch(existing_files)
    
    for audio_file, transcription in zip(existing_files, transcriptions):
        print(f"\nFile: {Path(audio_file).name}")
        print(f"Transcription: {transcription}")
else:
    print("No audio files found for batch transcription.")

## 5. Language Adaptation

In [None]:
# Adapt model to a new language
# This requires adaptation data in the specified format

language_code = "xyz"  # Replace with your language code
adaptation_data_path = f"../data/{language_code}/"

if Path(adaptation_data_path).exists():
    print(f"Adapting model to language: {language_code}")
    
    # Perform adaptation
    engine.adapt_to_language(language_code, adaptation_data_path)
    
    # Save adapted model
    adapted_model_path = f"../models/finetuned/{model_type}-{language_code}.pt"
    engine.model.save_checkpoint(adapted_model_path)
    print(f"Adapted model saved to: {adapted_model_path}")
else:
    print(f"No adaptation data found at {adaptation_data_path}")
    print("Please prepare your language data first.")

## 6. Model Evaluation

In [None]:
# Evaluate model on test data
test_data_path = "../data/test/"

if Path(test_data_path).exists():
    print("Evaluating model performance...")
    
    # Evaluate with WER and CER metrics
    results = engine.evaluate(
        test_data_path,
        metrics=["wer", "cer"]
    )
    
    print("\nEvaluation Results:")
    for metric, value in results.items():
        print(f"{metric.upper()}: {value:.2%}")
else:
    print(f"No test data found at {test_data_path}")

## 7. Audio Preprocessing

In [None]:
# Demonstrate audio preprocessing
from src.preprocessing.audio_processor import AudioProcessor
import matplotlib.pyplot as plt
import numpy as np

# Create audio processor
processor = AudioProcessor(config)

# Generate example audio
duration = 3  # seconds
sample_rate = config.sample_rate
t = np.linspace(0, duration, duration * sample_rate)
frequency = 440  # A4 note
waveform = 0.5 * np.sin(2 * np.pi * frequency * t)

# Add some noise
noise = 0.05 * np.random.randn(len(waveform))
waveform = waveform + noise

# Process audio
features = processor.process(waveform)

# Visualize
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 6))

# Plot waveform
ax1.plot(t[:1000], waveform[:1000])
ax1.set_xlabel('Time (s)')
ax1.set_ylabel('Amplitude')
ax1.set_title('Audio Waveform (first 1000 samples)')

# Plot features
if features.ndim > 1:
    ax2.imshow(features.T, aspect='auto', origin='lower')
    ax2.set_xlabel('Time frames')
    ax2.set_ylabel('Feature dimension')
    ax2.set_title('Processed Features')
else:
    ax2.plot(features[:1000])
    ax2.set_xlabel('Sample')
    ax2.set_ylabel('Amplitude')
    ax2.set_title('Processed Waveform (first 1000 samples)')

plt.tight_layout()
plt.show()

print(f"Original waveform shape: {waveform.shape}")
print(f"Processed features shape: {features.shape}")

## 8. Configuration Management

In [None]:
# Show current configuration
print("Current Configuration:")
print(f"Model Type: {config.model_type}")
print(f"Sample Rate: {config.sample_rate} Hz")
print(f"Device: {config.device}")
print(f"Batch Size: {config.batch_size}")
print(f"Learning Rate: {config.learning_rate}")

# Update configuration
config.update(
    batch_size=16,
    learning_rate=5e-5
)

print("\nUpdated Configuration:")
print(f"Batch Size: {config.batch_size}")
print(f"Learning Rate: {config.learning_rate}")

# Save configuration
config.to_file("../configs/custom_config.yaml")
print("\nConfiguration saved to: configs/custom_config.yaml")

## Next Steps

1. **Prepare Your Data**: Organize your audio files and transcriptions
2. **Train Models**: Use the training scripts to train on your language
3. **Fine-tune**: Adapt existing models to your specific use case
4. **Deploy**: Use the API server for production deployment
5. **Contribute**: Share your models and improvements with the community

For more examples, check the `examples/` directory and our documentation.