## 1. Install Required Libraries

In [None]:
!pip install -q librosa soundfile awscli boto3 tensorflow-hub

print("‚úÖ All libraries installed successfully!")
print("üì¶ TensorFlow Hub installed for YAMNet model")

## 2. Configure AWS S3 Access

**Add secrets in Kaggle:**
1. Settings ‚Üí Add-ons ‚Üí Secrets
2. Add: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION`

In [None]:
import os
from kaggle_secrets import UserSecretsClient

# Load AWS credentials from Kaggle Secrets
user_secrets = UserSecretsClient()

try:
    os.environ['AWS_ACCESS_KEY_ID'] = user_secrets.get_secret('AWS_ACCESS_KEY_ID')
    os.environ['AWS_SECRET_ACCESS_KEY'] = user_secrets.get_secret('AWS_SECRET_ACCESS_KEY')
    os.environ['AWS_DEFAULT_REGION'] = user_secrets.get_secret('AWS_REGION')
    print("‚úÖ AWS credentials loaded from Kaggle secrets")
except:
    print("‚ö†Ô∏è  Kaggle secrets not found. Add them in Settings ‚Üí Secrets")
    raise

# Verify AWS access
!aws s3 ls s3://alertreck/

## 3. Download Preprocessed Data from S3

In [None]:
# Create working directory
!mkdir -p /kaggle/working/preprocessed_data
!mkdir -p /kaggle/working/train_chunks

S3_BUCKET = "alertreck"
DATA_DIR = "/kaggle/working/preprocessed_data"

print("üì• Downloading preprocessed data from S3...")
print("Files: train_chunks (10x ~2GB), val_data.pkl (960MB)")
print("‚è∞ This may take 10-15 minutes depending on connection speed.\n")

# Download chunked training data
print("Downloading training chunks...")
!aws s3 sync s3://{S3_BUCKET}/preprocessed_data/train_chunks/ /kaggle/working/train_chunks/

# Download validation data and config
print("\nDownloading validation data...")
!aws s3 cp s3://{S3_BUCKET}/preprocessed_data/val_data.pkl {DATA_DIR}/val_data.pkl
!aws s3 cp s3://{S3_BUCKET}/preprocessed_data/preprocessing_config.json {DATA_DIR}/preprocessing_config.json

print("\n‚úÖ All data downloaded!")

# Load configuration
import json
with open(f'{DATA_DIR}/preprocessing_config.json', 'r') as f:
    config = json.load(f)

print(f"\nüìä Dataset Summary:")
print(f"  Training samples: {config['dataset_stats']['train_size']:,}")
print(f"  Validation samples: {config['dataset_stats']['val_size']:,}")
print(f"  Sample rate: {config['target_sr']} Hz")
print(f"  Duration: {config['duration']} seconds")

## 4. Load YAMNet Pretrained Model

YAMNet is a deep neural network trained on AudioSet to predict audio events. We'll use it as a feature extractor.

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np

print("üîß Loading YAMNet model from TensorFlow Hub...")
print("This may take a few minutes on first run...\n")

# Load YAMNet model
YAMNET_MODEL_URL = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(YAMNET_MODEL_URL)

print("‚úÖ YAMNet model loaded successfully!")
print("\nüìã YAMNet Details:")
print("  - Pre-trained on AudioSet (2M+ audio clips, 521 classes)")
print("  - Input: 16 kHz mono audio waveform")
print("  - Output: 1024-dimensional embedding per 0.96s frame")
print("  - Architecture: MobileNetV1 (efficient for audio)")

## 5. Load Preprocessed Data and Reconstruct Audio

We need to convert mel-spectrograms back to audio waveforms for YAMNet.

In [None]:
import pickle
import librosa
import gc
from tqdm import tqdm

print("üìÇ Loading preprocessed data...\n")

# Load training data from chunks
print("Loading training chunks...")
import glob
train_data = []
chunk_files = sorted(glob.glob('/kaggle/working/train_chunks/train_chunk_*.pkl'))
for chunk_file in chunk_files:
    with open(chunk_file, 'rb') as f:
        chunk = pickle.load(f)
        train_data.extend(chunk)
        del chunk
        gc.collect()

print(f"‚úÖ Loaded {len(train_data):,} training samples")

# Load validation data
with open(f'{DATA_DIR}/val_data.pkl', 'rb') as f:
    val_data = pickle.load(f)
print(f"‚úÖ Loaded {len(val_data):,} validation samples")

print(f"\nüìä Total samples: {len(train_data) + len(val_data):,}")

## 6. Extract YAMNet Embeddings from Audio

Convert mel-spectrograms to audio and extract YAMNet features.

In [None]:
def mel_to_audio(mel_spec_db, sr=22050, n_fft=2048, hop_length=512):
    """
    Convert mel-spectrogram (dB) back to audio waveform using Griffin-Lim.
    
    Args:
        mel_spec_db: Mel-spectrogram in dB (128, 431)
        sr: Sample rate
        n_fft: FFT window size
        hop_length: Hop length
        
    Returns:
        Audio waveform
    """
    # Convert from dB to power
    mel_spec = librosa.db_to_power(mel_spec_db)
    
    # Inverse mel-spectrogram to linear spectrogram
    spec = librosa.feature.inverse.mel_to_stft(mel_spec, sr=sr, n_fft=n_fft)
    
    # Reconstruct audio using Griffin-Lim algorithm
    audio = librosa.griffinlim(spec, hop_length=hop_length, n_iter=32)
    
    return audio


def resample_audio(audio, orig_sr, target_sr=16000):
    """
    Resample audio to target sample rate (YAMNet expects 16 kHz).
    
    Args:
        audio: Input audio waveform
        orig_sr: Original sample rate
        target_sr: Target sample rate (16 kHz for YAMNet)
        
    Returns:
        Resampled audio
    """
    if orig_sr != target_sr:
        audio = librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)
    return audio


def extract_yamnet_embedding(audio_waveform):
    """
    Extract YAMNet embedding from audio waveform.
    
    Args:
        audio_waveform: Audio waveform at 16 kHz
        
    Returns:
        Mean YAMNet embedding (1024-dimensional vector)
    """
    # YAMNet expects float32 tensor
    audio_tensor = tf.convert_to_tensor(audio_waveform, dtype=tf.float32)
    
    # Extract embeddings (scores, embeddings, spectrogram)
    scores, embeddings, spectrogram = yamnet_model(audio_tensor)
    
    # Average embeddings across time frames (10s audio ‚Üí multiple frames)
    # Each frame is 0.96s, so 10s audio ‚Üí ~10 frames
    mean_embedding = tf.reduce_mean(embeddings, axis=0).numpy()
    
    return mean_embedding


print("‚úÖ YAMNet feature extraction functions ready")
print("\nüìã Processing pipeline:")
print("  1. Mel-spectrogram (128, 431) ‚Üí Audio waveform (22050 Hz)")
print("  2. Resample audio (22050 Hz ‚Üí 16000 Hz for YAMNet)")
print("  3. Extract YAMNet embeddings (1024 features)")
print("  4. Average embeddings across time frames")

## 7. Process All Audio Files with YAMNet

Extract YAMNet embeddings for all training and validation samples.

In [None]:
def process_dataset_with_yamnet(data, split_name="train"):
    """
    Process all samples in dataset and extract YAMNet embeddings.
    
    Args:
        data: List of preprocessed samples (mel-spectrograms + labels)
        split_name: Name of split (for progress display)
        
    Returns:
        X: YAMNet embeddings (N, 1024)
        y: Labels (N,)
    """
    embeddings = []
    labels = []
    
    print(f"\nüîÑ Processing {split_name} data with YAMNet...")
    print(f"Total samples: {len(data):,}")
    print("This will take 5-15 minutes depending on dataset size...\n")
    
    for sample in tqdm(data, desc=f"Extracting {split_name} embeddings"):
        try:
            # Get mel-spectrogram
            mel_spec = sample['features']['mel_spectrogram']
            
            # Convert mel-spectrogram to audio
            audio = mel_to_audio(
                mel_spec,
                sr=config['target_sr'],
                n_fft=config['n_fft'],
                hop_length=config['hop_length']
            )
            
            # Resample to 16 kHz (YAMNet requirement)
            audio_16k = resample_audio(audio, orig_sr=config['target_sr'], target_sr=16000)
            
            # Extract YAMNet embedding
            embedding = extract_yamnet_embedding(audio_16k)
            
            # Store results
            embeddings.append(embedding)
            labels.append(sample['label']['threat_level'])
            
        except Exception as e:
            print(f"\nError processing sample: {e}")
            continue
    
    # Convert to numpy arrays
    X = np.array(embeddings, dtype=np.float32)
    y = np.array(labels, dtype=np.int32)
    
    print(f"\n‚úÖ {split_name} processing complete!")
    print(f"  Embeddings shape: {X.shape}")
    print(f"  Labels shape: {y.shape}")
    
    return X, y


# Process training data
X_train, y_train = process_dataset_with_yamnet(train_data, "train")

# Free memory
del train_data
gc.collect()

# Process validation data
X_val, y_val = process_dataset_with_yamnet(val_data, "validation")

# Free memory
del val_data
gc.collect()

print(f"\nüìä Final Dataset Shapes:")
print(f"  X_train: {X_train.shape}")
print(f"  y_train: {y_train.shape}")
print(f"  X_val: {X_val.shape}")
print(f"  y_val: {y_val.shape}")

## 8. Prepare Data for Training

In [None]:
from sklearn.utils.class_weight import compute_class_weight

class_names = ['BACKGROUND', 'THREAT_CONTEXT', 'THREAT']

print("Computing class weights from training data...")

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}

print("\nClass weights (for balanced training):")
for cls, weight in class_weight_dict.items():
    count = np.sum(y_train == cls)
    print(f"  {class_names[cls]}: {weight:.3f} (n={count:,})")

print(f"\n‚úÖ Data ready for training!")
print(f"  Input: YAMNet embeddings (1024 features)")
print(f"  Output: 3 threat classes")

## 9. Build Dense Model for YAMNet Embeddings

Since YAMNet embeddings are 1D vectors (not 2D images), we use a Dense Neural Network.

In [None]:
from tensorflow import keras
from tensorflow.keras import layers, models

# Configure GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("‚úÖ GPU memory growth enabled")
    except RuntimeError as e:
        print(f"‚ö†Ô∏è  Could not set memory growth: {e}")

# Enable mixed precision
tf.keras.mixed_precision.set_global_policy('mixed_float16')
print("‚úÖ Mixed precision enabled\n")

# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)

print("üöÄ Building Dense Neural Network for YAMNet Embeddings...\n")

def build_yamnet_classifier(input_dim=1024, num_classes=3):
    """
    Build dense classifier for YAMNet embeddings.
    
    Args:
        input_dim: Dimension of YAMNet embeddings (1024)
        num_classes: Number of output classes (3)
        
    Returns:
        Keras model
    """
    model = models.Sequential([
        # Input layer
        layers.Input(shape=(input_dim,)),
        
        # Dense block 1
        layers.Dense(512, activation='relu',
                    kernel_regularizer=keras.regularizers.l2(0.001)),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        
        # Dense block 2
        layers.Dense(256, activation='relu',
                    kernel_regularizer=keras.regularizers.l2(0.001)),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        
        # Dense block 3
        layers.Dense(128, activation='relu',
                    kernel_regularizer=keras.regularizers.l2(0.001)),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        
        # Output layer
        layers.Dense(num_classes, activation='softmax', dtype='float32')
    ])
    
    return model


# Build model
model = build_yamnet_classifier(input_dim=1024, num_classes=3)
model.summary()

print(f"\nüìä Model parameters: {model.count_params():,}")
print("üí° Simple dense network on top of YAMNet features")

# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("\n‚úÖ Model compiled!")
print("   Architecture: 3-layer dense network (512‚Üí256‚Üí128‚Üí3)")
print("   Regularization: L2 + BatchNorm + Dropout")

## 10. Setup Training Callbacks

In [None]:
from tensorflow.keras import callbacks

# Create model directory
!mkdir -p /kaggle/working/models

# Define callbacks
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=20,
    restore_best_weights=True,
    verbose=1
)

model_checkpoint = callbacks.ModelCheckpoint(
    filepath='/kaggle/working/models/best_yamnet_classifier.weights.h5',
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    verbose=1
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-7,
    verbose=1
)

callback_list = [early_stopping, model_checkpoint, reduce_lr]

print("‚úÖ Callbacks configured")

## 11. Train Model

In [None]:
print("üöÄ Starting YAMNet Classifier Training...\n")
print("üí° Training dense network on YAMNet embeddings")
print("üéØ Class weighting enabled for balanced training")
print("‚ö° Mixed precision + GPU acceleration\n")

print(f"üìä Dataset info:")
print(f"  Training samples: {len(X_train):,}")
print(f"  Validation samples: {len(X_val):,}")
print(f"  Feature dimension: {X_train.shape[1]}\n")

print("‚è≥ Expected training time: 2-5 minutes with GPU...\n")

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=64,
    class_weight=class_weight_dict,
    callbacks=callback_list,
    verbose=1
)

print("\n‚úÖ Training complete!")
print(f"Best validation loss: {min(history.history['val_loss']):.4f}")
print(f"Final training accuracy: {history.history['accuracy'][-1]:.4f}")
print(f"Final validation accuracy: {history.history['val_accuracy'][-1]:.4f}")

## 12. Plot Training History

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss
axes[0].plot(history.history['loss'], label='Train')
axes[0].plot(history.history['val_loss'], label='Validation')
axes[0].set_title('Model Loss (YAMNet Classifier)', fontweight='bold')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy
axes[1].plot(history.history['accuracy'], label='Train')
axes[1].plot(history.history['val_accuracy'], label='Validation')
axes[1].set_title('Model Accuracy (YAMNet Classifier)', fontweight='bold')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 13. Evaluate Model

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

print("üìä Evaluating on validation set...\n")

# Evaluate on validation set
val_results = model.evaluate(X_val, y_val, verbose=1)

print("\nValidation Results:")
print(f"  Loss: {val_results[0]:.4f}")
print(f"  Accuracy: {val_results[1]:.4f}")

# Get predictions
print("\nGenerating predictions...")
y_pred_proba = model.predict(X_val, verbose=1)
y_pred = np.argmax(y_pred_proba, axis=1)

# Classification report
print("\nClassification Report:")
print(classification_report(y_val, y_pred, target_names=class_names))

# Confusion matrix
cm = confusion_matrix(y_val, y_pred)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted', fontweight='bold')
plt.ylabel('True', fontweight='bold')
plt.title('Confusion Matrix - YAMNet Classifier', fontweight='bold', fontsize=14)
plt.tight_layout()
plt.show()

print("\n‚úÖ Evaluation complete!")
test_results = val_results

## 14. Save Model and Configuration

In [None]:
# Save full model
model.save('/kaggle/working/models/yamnet_classifier.keras')
print("‚úÖ Full model saved")

# Load best weights
model.load_weights('/kaggle/working/models/best_yamnet_classifier.weights.h5')
print("‚úÖ Loaded best weights from checkpoint")

# Export to TensorFlow Lite
print("\nExporting to TensorFlow Lite...")
print("Converting mixed precision model to float32...")

# Create float32 model
tf.keras.mixed_precision.set_global_policy('float32')
model_f32 = build_yamnet_classifier(input_dim=1024, num_classes=3)
model_f32.set_weights(model.get_weights())
print("‚úÖ Created float32 model")

# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model_f32)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_model = converter.convert()

with open('/kaggle/working/models/yamnet_classifier.tflite', 'wb') as f:
    f.write(tflite_model)

print(f"‚úÖ TensorFlow Lite model: {len(tflite_model) / 1024:.1f} KB")

# Save model configuration
model_config = {
    'model_type': 'YAMNet_Classifier',
    'feature_extractor': 'YAMNet (TensorFlow Hub)',
    'embedding_dim': 1024,
    'val_accuracy': float(test_results[1]) if test_results else None,
    'val_loss': float(test_results[0]) if test_results else None,
    'class_names': class_names,
    'preprocessing': config,
    'total_parameters': int(model.count_params())
}

import json
with open('/kaggle/working/models/yamnet_classifier_config.json', 'w') as f:
    json.dump(model_config, f, indent=2)

print("‚úÖ Model configuration saved")

# Upload to S3
print("\nUploading models to S3...")
!aws s3 cp /kaggle/working/models/yamnet_classifier.keras s3://{S3_BUCKET}/models/yamnet/
!aws s3 cp /kaggle/working/models/best_yamnet_classifier.weights.h5 s3://{S3_BUCKET}/models/yamnet/
!aws s3 cp /kaggle/working/models/yamnet_classifier.tflite s3://{S3_BUCKET}/models/yamnet/
!aws s3 cp /kaggle/working/models/yamnet_classifier_config.json s3://{S3_BUCKET}/models/yamnet/

print("\n‚úÖ Models uploaded to S3!")
print(f"   Location: s3://{S3_BUCKET}/models/yamnet/")
print("\nüì¶ Files uploaded:")
print("  - yamnet_classifier.keras (full model)")
print("  - best_yamnet_classifier.weights.h5 (best weights)")
print("  - yamnet_classifier.tflite (edge deployment)")
print("  - yamnet_classifier_config.json (configuration)")

## Summary

### YAMNet Transfer Learning Complete! üéâ

**Approach:**
- ‚úÖ Used YAMNet (pre-trained on AudioSet) as feature extractor
- ‚úÖ Extracted 1024-dimensional embeddings from audio
- ‚úÖ Trained simple dense classifier on embeddings
- ‚úÖ Much faster training than training CNN from scratch

**Model Architecture:**
1. **Feature Extraction**: YAMNet (frozen, pre-trained)
2. **Classifier**: 3-layer dense network (512‚Üí256‚Üí128‚Üí3)
3. **Regularization**: L2, BatchNorm, Dropout

**Advantages:**
- üöÄ Fast training (2-5 minutes)
- üí° Leverages AudioSet knowledge (521 audio classes)
- üì¶ Small model size (classifier only)
- üéØ Strong generalization from pre-trained features

**Deployment:**
- For Raspberry Pi: Use YAMNet TFLite + classifier TFLite
- Two-stage inference: YAMNet embeddings ‚Üí classifier

**Next Steps:**
1. Compare with Custom CNN results
2. Deploy best model to Raspberry Pi
3. Integrate with ranger alert system