In [23]:
# Install necessary libraries
!pip install librosa soundfile datasets codecarbon



In [24]:
# Import necessary libraries
import numpy as np
import librosa
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, IterableDataset
import torch.optim.lr_scheduler as lr_scheduler
from datasets import load_dataset
from huggingface_hub import login
from pathlib import Path
from codecarbon import EmissionsTracker  # For energy monitoring

In [25]:
# Sign in to Hugging Face for datasets
token = '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'
login(token)

In [26]:
# Load dataset in streaming mode
dataset = load_dataset("rfcx/frugalai", streaming=True)

# Limit the number of samples for training
max_samples = 1000  # Adjust based on your needs
dataset['train'] = dataset['train'].take(max_samples)

In [27]:
# Cell 5: Define the SpectrogramIterableDataset class with advanced preprocessing
class SpectrogramIterableDataset(IterableDataset):
    def __init__(self, iterable_dataset, n_fft=1024, hop_length=256, n_mels=64, target_size=(64, 64)):
        self.dataset = iterable_dataset
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.n_mels = n_mels
        self.target_size = target_size

    def add_noise(self, audio, noise_level=0.005):
        """Add random noise to the audio signal."""
        noise = np.random.randn(len(audio)) * noise_level
        return audio + noise

    def time_stretch(self, audio, rate=1.0):
        """Apply time-stretching to the audio signal."""
        return librosa.effects.time_stretch(audio, rate=rate)

    def pitch_shift(self, audio, sampling_rate, n_steps=2):
        """Apply pitch-shifting to the audio signal."""
        return librosa.effects.pitch_shift(audio, sr=sampling_rate, n_steps=n_steps)

    def frequency_masking(self, spectrogram, max_mask_freq=16):
        """Randomly mask frequency bands in the spectrogram."""
        if spectrogram.shape[0] <= 1:  # Skip if spectrogram is too small
            return spectrogram
        freq_mask = np.random.randint(1, min(max_mask_freq, spectrogram.shape[0]) + 1)  # Ensure valid range
        start = np.random.randint(0, spectrogram.shape[0] - freq_mask)
        spectrogram[start:start + freq_mask, :] = 0
        return spectrogram

    def time_masking(self, spectrogram, max_mask_time=16):
        """Randomly mask time segments in the spectrogram."""
        if spectrogram.shape[1] <= 1:  # Skip if spectrogram is too small
            return spectrogram
        time_mask = np.random.randint(1, min(max_mask_time, spectrogram.shape[1]) + 1)  # Ensure valid range
        start = np.random.randint(0, spectrogram.shape[1] - time_mask)
        spectrogram[:, start:start + time_mask] = 0
        return spectrogram

    def compute_delta_features(self, spectrogram):
        """Compute delta and delta-delta features for temporal dynamics."""
        delta = librosa.feature.delta(spectrogram)
        delta_delta = librosa.feature.delta(spectrogram, order=2)
        return np.stack([spectrogram, delta, delta_delta], axis=0)

    def process_audio(self, audio_array, sampling_rate):
        # Pad the audio signal if it's too short
        if len(audio_array) < self.n_fft:
            audio_array = np.pad(audio_array, (0, self.n_fft - len(audio_array)), mode='constant')

        # Data augmentation: Add noise, time-stretch, and pitch-shift
        audio_array = self.add_noise(audio_array)
        audio_array = self.time_stretch(audio_array, rate=np.random.uniform(0.9, 1.1))
        audio_array = self.pitch_shift(audio_array, sampling_rate, n_steps=np.random.randint(-2, 2))

        # Generate Mel spectrogram
        mel_spectrogram = librosa.feature.melspectrogram(
            y=audio_array, sr=sampling_rate, n_fft=self.n_fft,
            hop_length=self.hop_length, n_mels=self.n_mels
        )
        # Convert to log scale (dB)
        log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

        # Normalize to zero mean and unit variance
        if np.std(log_mel_spectrogram) > 0:  # Avoid division by zero
            log_mel_spectrogram = (log_mel_spectrogram - np.mean(log_mel_spectrogram)) / np.std(log_mel_spectrogram)
        else:
            log_mel_spectrogram = np.zeros_like(log_mel_spectrogram)  # Handle case where std is zero

        # Apply frequency and time masking
        log_mel_spectrogram = self.frequency_masking(log_mel_spectrogram)
        log_mel_spectrogram = self.time_masking(log_mel_spectrogram)

        # Compute delta features
        log_mel_spectrogram = self.compute_delta_features(log_mel_spectrogram)

        # Resize to target size
        log_mel_spectrogram = librosa.util.fix_length(log_mel_spectrogram, size=self.target_size[1], axis=2)
        log_mel_spectrogram = librosa.util.fix_length(log_mel_spectrogram, size=self.target_size[0], axis=1)

        # Return the spectrogram in its original shape (3D: [channels, height, width])
        return torch.tensor(log_mel_spectrogram, dtype=torch.float32)  # Shape: (3, height, width)
    def __iter__(self):
        for sample in iter(self.dataset):
            audio_array = sample['audio']['array']
            sampling_rate = sample['audio']['sampling_rate']
            label = sample['label']
            
            # Process audio to spectrogram
            spectrogram = self.process_audio(audio_array, sampling_rate)
            
            yield spectrogram, label
    def __len__(self):
        # Counts items manually
        return sum(1 for _ in iter(self.dataset)) # Counts the number of items

In [28]:
# Wrap the train IterableDataset
wrapped_train_dataset = SpectrogramIterableDataset(dataset['train'])

# Create DataLoader with smaller batch size
batch_size = 16  # Reduce from 32
train_loader = DataLoader(
    wrapped_train_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0  # Set to 0 if CPU resources are limited
)

# Wrap the test IterableDataset
wrapped_test_dataset = SpectrogramIterableDataset(dataset['test'])

# Create DataLoader with smaller batch size
batch_size = 16  # Reduce from 32
test_loader = DataLoader(
    wrapped_test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0  # Set to 0 if CPU resources are limited
)

In [29]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)  # Input channels = 3 (delta features)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 16 * 16, 128)  # Adjusted for target_size=(64, 64)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten the output for the fully connected layer
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return torch.sigmoid(x)

In [30]:
# Initialize model, loss function, and optimizer
model = CNNModel()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [31]:
import torch.optim.lr_scheduler as lr_scheduler
from codecarbon import EmissionsTracker

def train_model(model, train_loader, criterion, optimizer, num_epochs=2, max_batches=100):
    # Initialize CodeCarbon tracker
    tracker = EmissionsTracker()
    tracker.start()

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for batch_idx, (spectrograms, labels) in enumerate(train_loader):
            if batch_idx >= max_batches:  # Stop after max_batches
                break
            
            labels = labels.unsqueeze(1).float()  # Reshape labels to (batch_size, 1)

            # Forward pass
            outputs = model(spectrograms)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Log loss
            running_loss += loss.item()
            if (batch_idx + 1) % 10 == 0:  # Log every 10 batches
                print(f"Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}")
        
        print(f"Epoch [{epoch+1}/{num_epochs}], Average Loss: {running_loss/len(train_loader):.4f}")

    # Stop the tracker and print emissions
    emissions = tracker.stop()
    print(f"Training completed. Total CO2 emissions: {emissions} kg")

# Train the model
train_model(model, train_loader, criterion, optimizer)

[codecarbon INFO @ 11:18:27] [setup] RAM Tracking...
[codecarbon INFO @ 11:18:27] [setup] CPU Tracking...
 Mac OS detected: Please install Intel Power Gadget or enable PowerMetrics sudo to measure CPU

[codecarbon INFO @ 11:18:34] CPU Model on constant consumption mode: Intel(R) Core(TM) i5-8257U CPU @ 1.40GHz
[codecarbon INFO @ 11:18:34] [setup] GPU Tracking...
[codecarbon INFO @ 11:18:34] No GPU found.
[codecarbon INFO @ 11:18:34] >>> Tracker's metadata:
[codecarbon INFO @ 11:18:34]   Platform system: macOS-10.16-x86_64-i386-64bit
[codecarbon INFO @ 11:18:34]   Python version: 3.11.7
[codecarbon INFO @ 11:18:34]   CodeCarbon version: 2.8.3
[codecarbon INFO @ 11:18:34]   Available RAM : 8.000 GB
[codecarbon INFO @ 11:18:34]   CPU count: 8
[codecarbon INFO @ 11:18:34]   CPU model: Intel(R) Core(TM) i5-8257U CPU @ 1.40GHz
[codecarbon INFO @ 11:18:34]   GPU count: None
[codecarbon INFO @ 11:18:34]   GPU model: None
[codecarbon INFO @ 11:18:35] Saving emissions data to file /Users/sarahle

Epoch [1/2], Step [10/63], Loss: 0.6874
Epoch [1/2], Step [20/63], Loss: 0.5844


[codecarbon INFO @ 11:19:05] Energy consumed for RAM : 0.000025 kWh. RAM Power : 3.0 W
[codecarbon INFO @ 11:19:05] Energy consumed for all CPUs : 0.000063 kWh. Total CPU Power : 7.5 W
[codecarbon INFO @ 11:19:05] 0.000088 kWh of electricity used since the beginning.


Epoch [1/2], Step [30/63], Loss: 0.5883


[codecarbon INFO @ 11:19:20] Energy consumed for RAM : 0.000037 kWh. RAM Power : 3.0 W
[codecarbon INFO @ 11:19:20] Energy consumed for all CPUs : 0.000094 kWh. Total CPU Power : 7.5 W
[codecarbon INFO @ 11:19:20] 0.000131 kWh of electricity used since the beginning.


Epoch [1/2], Step [40/63], Loss: 0.6217


[codecarbon INFO @ 11:19:35] Energy consumed for RAM : 0.000050 kWh. RAM Power : 3.0 W
[codecarbon INFO @ 11:19:35] Energy consumed for all CPUs : 0.000125 kWh. Total CPU Power : 7.5 W
[codecarbon INFO @ 11:19:35] 0.000175 kWh of electricity used since the beginning.


Epoch [1/2], Step [50/63], Loss: 0.5839


[codecarbon INFO @ 11:19:50] Energy consumed for RAM : 0.000062 kWh. RAM Power : 3.0 W
[codecarbon INFO @ 11:19:50] Energy consumed for all CPUs : 0.000156 kWh. Total CPU Power : 7.5 W
[codecarbon INFO @ 11:19:50] 0.000219 kWh of electricity used since the beginning.


Epoch [1/2], Step [60/63], Loss: 0.6780
Epoch [1/2], Average Loss: 0.6246


[codecarbon INFO @ 11:20:05] Energy consumed for RAM : 0.000075 kWh. RAM Power : 3.0 W
[codecarbon INFO @ 11:20:05] Energy consumed for all CPUs : 0.000188 kWh. Total CPU Power : 7.5 W
[codecarbon INFO @ 11:20:05] 0.000262 kWh of electricity used since the beginning.


Epoch [2/2], Step [10/63], Loss: 0.5801


[codecarbon INFO @ 11:20:20] Energy consumed for RAM : 0.000087 kWh. RAM Power : 3.0 W
[codecarbon INFO @ 11:20:20] Energy consumed for all CPUs : 0.000219 kWh. Total CPU Power : 7.5 W
[codecarbon INFO @ 11:20:20] 0.000306 kWh of electricity used since the beginning.


Epoch [2/2], Step [20/63], Loss: 0.5409


[codecarbon INFO @ 11:20:35] Energy consumed for RAM : 0.000100 kWh. RAM Power : 3.0 W
[codecarbon INFO @ 11:20:35] Energy consumed for all CPUs : 0.000250 kWh. Total CPU Power : 7.5 W
[codecarbon INFO @ 11:20:35] 0.000350 kWh of electricity used since the beginning.
[codecarbon INFO @ 11:20:35] 0.000163 g.CO2eq/s mean an estimation of 5.153690782634176 kg.CO2eq/year


Epoch [2/2], Step [30/63], Loss: 0.5709
Epoch [2/2], Step [40/63], Loss: 0.6908


[codecarbon INFO @ 11:20:50] Energy consumed for RAM : 0.000112 kWh. RAM Power : 3.0 W
[codecarbon INFO @ 11:20:50] Energy consumed for all CPUs : 0.000281 kWh. Total CPU Power : 7.5 W
[codecarbon INFO @ 11:20:50] 0.000394 kWh of electricity used since the beginning.


Epoch [2/2], Step [50/63], Loss: 0.5604


[codecarbon INFO @ 11:21:05] Energy consumed for RAM : 0.000125 kWh. RAM Power : 3.0 W
[codecarbon INFO @ 11:21:05] Energy consumed for all CPUs : 0.000313 kWh. Total CPU Power : 7.5 W
[codecarbon INFO @ 11:21:05] 0.000437 kWh of electricity used since the beginning.


Epoch [2/2], Step [60/63], Loss: 0.6555


[codecarbon INFO @ 11:21:15] Energy consumed for RAM : 0.000133 kWh. RAM Power : 3.0 W
[codecarbon INFO @ 11:21:15] Energy consumed for all CPUs : 0.000334 kWh. Total CPU Power : 7.5 W
[codecarbon INFO @ 11:21:15] 0.000467 kWh of electricity used since the beginning.


Epoch [2/2], Average Loss: 0.5949
Training completed. Total CO2 emissions: 2.6173939494704938e-05 kg


In [34]:
# Save the model (optional)
torch.save(model.state_dict(), "./models/cnn_model.pth")
print("Model saved to cnn_model.pth")

Model saved to cnn_model.pth


In [35]:
def evaluate_model(model, test_loader):
    """
    Evaluate the trained model on the test dataset.
    
    Args:
        model (nn.Module): The trained CNN model.
        test_loader (DataLoader): DataLoader for the test dataset.
    """
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # Disable gradient computation for inference
        for spectrograms, labels in test_loader:
            try:
                outputs = model(spectrograms)
                predicted = (outputs > 0.5).float()  # Convert outputs to binary predictions
                total += labels.size(0)
                correct += (predicted.squeeze() == labels).sum().item()
            except Exception as e:
                print(f"Skipping batch due to error: {e}")

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")

In [36]:
# Evaluate the trained model
evaluate_model(model, test_loader)



ValueError: high <= 0