### Part 1

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

import torch 
import torchaudio


torch.random.manual_seed(42)

SAMPLE_SPEECH = torchaudio.utils.download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")

In [3]:
signal, sr = torchaudio.load(SAMPLE_SPEECH)

In [None]:
sr = 16000
hop_length = 160
n_mels = 80

melspec = torchaudio.transforms.MelSpectrogram(
    sample_rate=sr,
    hop_length=hop_length,
    n_mels=n_mels
)(signal)

from melbanks import LogMelFilterBanks
logmelbanks = LogMelFilterBanks(
    samplerate=sr,
    hop_length=hop_length,
    n_mels=n_mels,
    return_complex=True
)(signal)

assert torch.log(melspec + 1e-6).shape == logmelbanks.shape
assert torch.allclose(torch.log(melspec + 1e-6), logmelbanks)
fig = make_subplots(
    rows=2, 
    cols=1,
    subplot_titles=('torchaudio.transforms.MelSpectrogram (log scale)', 
                   'Custom LogMelFilterBanks Implementation'),
    vertical_spacing=0.1
)

melspec_np = torch.log(melspec + 1e-6)[0].numpy()
logmelbanks_np = logmelbanks[0].numpy()

fig.add_trace(
    go.Heatmap(
        z=melspec_np,
        colorscale='Viridis',
        colorbar=dict(title='dB', x=1.02, y=0.8, len=0.4),
        zmin=np.min(melspec_np),
        zmax=np.max(melspec_np)
    ),
    row=1, col=1
)

fig.add_trace(
    go.Heatmap(
        z=logmelbanks_np,
        colorscale='Viridis',
        colorbar=dict(title='dB', x=1.02, y=0.3, len=0.4),
        zmin=np.min(logmelbanks_np),
        zmax=np.max(logmelbanks_np)
    ),
    row=2, col=1
)

fig.update_layout(
    height=800,
    width=1000,
    title_text="Mel Spectrogram Comparison",
)

fig.update_yaxes(title_text="Mel Frequency Bin", row=1, col=1)
fig.update_yaxes(title_text="Mel Frequency Bin", row=2, col=1)

fig.update_xaxes(title_text="", row=1, col=1)
fig.update_xaxes(title_text="Time Frame", row=2, col=1)

fig.show()

diff = torch.log(melspec + 1e-6) - logmelbanks
diff_np = diff[0].numpy()

fig_diff = go.Figure(data=go.Heatmap(
    z=diff_np,
    colorscale='RdBu_r',  
    zmid=0,
    colorbar=dict(title='Difference (dB)')
))

fig_diff.update_layout(
    title='Difference between implementations',
    xaxis_title='Time Frame',
    yaxis_title='Mel Frequency Bin',
    height=400,
    width=1000
)

fig_diff.show()

### Part 2

In [5]:
import tqdm
import time
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score

from ptflops import get_model_complexity_info

import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

In [6]:
train = torchaudio.datasets.SPEECHCOMMANDS(
        root='./',
        download=False,
        subset='training'
)

valid = torchaudio.datasets.SPEECHCOMMANDS(
        root='./',
        download=False,
        subset='validation'
)

test = torchaudio.datasets.SPEECHCOMMANDS(
        root='./',
        download=False,
        subset='testing'
)

In [7]:
train_filtered = [sample for sample in tqdm.tqdm(train) if sample[2] in ['yes', 'no']]
valid_filtered = [sample for sample in tqdm.tqdm(valid) if sample[2] in ['yes', 'no']]
test_filtered = [sample for sample in tqdm.tqdm(test) if sample[2] in ['yes', 'no']]

  0%|          | 0/84843 [00:00<?, ?it/s]

100%|██████████| 84843/84843 [04:46<00:00, 296.03it/s]
100%|██████████| 9981/9981 [00:33<00:00, 297.74it/s]
100%|██████████| 11005/11005 [00:36<00:00, 300.10it/s]


In [8]:
class SpeechCommandsBinary(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
        self.label_to_index = {"yes": 1, "no": 0}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        waveform, sample_rate, label, *_ = self.data[idx]
        waveform = waveform.squeeze(0)
        if self.transform:
            waveform = self.transform(waveform)
        label_index = self.label_to_index[label]
        return waveform, label_index

In [18]:
def collate_fn(batch):
    inputs = [item[0] for item in batch]
    targets = torch.tensor([item[1] for item in batch])
    return inputs, targets

In [30]:
train_dataset = SpeechCommandsBinary(train_filtered)
valid_dataset = SpeechCommandsBinary(valid_filtered)
test_dataset = SpeechCommandsBinary(test_filtered)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4, collate_fn=collate_fn)
val_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False, num_workers=4, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4, collate_fn=collate_fn)

In [31]:
class CNNClassifier(nn.Module):
    def __init__(self, n_mels=80, groups=1):
        super(CNNClassifier, self).__init__()
        self.logmel = LogMelFilterBanks(n_mels=n_mels)
        self.conv1 = nn.Conv1d(n_mels, 32, kernel_size=3, stride=1, padding=1, groups=groups)
        self.bn1 = nn.BatchNorm1d(32)
        self.relu = nn.ReLU()
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(32, 2)

    def forward(self, x):
        if x.dim() > 2:
            x = x.squeeze(1)
        x = self.logmel(x)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.squeeze(-1)
        x = self.fc(x)
        return x

In [32]:
def pad_inputs(inputs):
    max_len = max(x.shape[-1] for x in inputs)
    padded = torch.zeros(len(inputs), max_len)
    for i, x in enumerate(inputs):
        padded[i, :x.shape[-1]] = x
    return padded

def train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10):
    history = {'train_loss': [], 'val_accuracy': [], 'epoch_time': []}
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        start_time = time.time()
        pbar = tqdm.tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
        for inputs, targets in pbar:
            inputs = pad_inputs(inputs).to(device)
            targets = targets.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            pbar.set_postfix(loss=f"{loss.item():.4f}")
            
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_time = time.time() - start_time
        val_acc = evaluate_model(model, val_loader, device)
        
        history['train_loss'].append(epoch_loss)
        history['val_accuracy'].append(val_acc)
        history['epoch_time'].append(epoch_time)
        
        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} - Val Acc: {val_acc:.4f} - Time: {epoch_time:.2f}s")
    return history

def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for inputs, targets in tqdm.tqdm(data_loader, desc="Evaluating", leave=False):
            inputs = pad_inputs(inputs).to(device)
            targets = targets.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())
    return accuracy_score(all_targets, all_preds)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def calculate_flops(model, input_size=(1, 16000)):
    macs, params = get_model_complexity_info(
        model, input_size, as_strings=False, print_per_layer_stat=False
    )
    flops = macs * 2 if macs is not None else 0
    return {'flops': flops, 'params': params}

In [33]:
!nvidia-smi

Fri Mar 28 16:54:06 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.154.05             Driver Version: 535.154.05   CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          On  | 00000000:2A:00.0 Off |                    0 |
| N/A   27C    P0              76W / 400W |  13320MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [34]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = 5
learning_rate = 0.001

results = {} 

n_mels_list = [20, 40, 80]
groups_list = [1, 2, 4, 8, 16]

for n_mels in n_mels_list:
    for groups in groups_list:
        if n_mels % groups != 0:
            print(f"Skipping n_mels={n_mels} with groups={groups} (invalid combination)")
            continue
        print(f"\nTraining model with n_mels={n_mels} and groups={groups}")
        model = CNNClassifier(n_mels=n_mels, groups=groups)
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        
        num_params = count_parameters(model)
        comp_info = calculate_flops(model)
        print(f"Parameters: {num_params:,}, FLOPs: {comp_info['flops']:,}")
        
        history = train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs)
        test_acc = evaluate_model(model, test_loader, device)
        print(f"Test Accuracy: {test_acc:.4f}")
        
        results[(n_mels, groups)] = {
            'history': history,
            'test_accuracy': test_acc,
            'params': num_params,
            'flops': comp_info['flops']
        }


Training model with n_mels=20 and groups=1
Parameters: 2,082, FLOPs: 1,245,260


Epoch 1/5:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch 1/5: 100%|██████████| 100/100 [00:05<00:00, 17.89it/s, loss=0.2158]
                                                           

Epoch 1/5 - Loss: 0.4202 - Val Acc: 0.8879 - Time: 5.59s


Epoch 2/5: 100%|██████████| 100/100 [00:05<00:00, 16.90it/s, loss=0.5160]
                                                           

Epoch 2/5 - Loss: 0.2647 - Val Acc: 0.9091 - Time: 5.92s


Epoch 3/5: 100%|██████████| 100/100 [00:05<00:00, 17.38it/s, loss=0.1939]
                                                           

Epoch 3/5 - Loss: 0.2282 - Val Acc: 0.9278 - Time: 5.75s


Epoch 4/5: 100%|██████████| 100/100 [00:05<00:00, 18.28it/s, loss=0.4613]
                                                           

Epoch 4/5 - Loss: 0.2091 - Val Acc: 0.9390 - Time: 5.47s


Epoch 5/5: 100%|██████████| 100/100 [00:05<00:00, 17.95it/s, loss=0.3944]
                                                           

Epoch 5/5 - Loss: 0.1928 - Val Acc: 0.9427 - Time: 5.57s


                                                           

Test Accuracy: 0.9466

Training model with n_mels=20 and groups=2
Parameters: 1,122, FLOPs: 1,051,340


Epoch 1/5: 100%|██████████| 100/100 [00:05<00:00, 18.27it/s, loss=0.3256]
                                                           

Epoch 1/5 - Loss: 0.4880 - Val Acc: 0.8755 - Time: 5.48s


Epoch 2/5: 100%|██████████| 100/100 [00:05<00:00, 18.63it/s, loss=0.2758]
                                                           

Epoch 2/5 - Loss: 0.3095 - Val Acc: 0.9166 - Time: 5.37s


Epoch 3/5: 100%|██████████| 100/100 [00:05<00:00, 18.44it/s, loss=0.2688]
                                                           

Epoch 3/5 - Loss: 0.2495 - Val Acc: 0.9166 - Time: 5.43s


Epoch 4/5: 100%|██████████| 100/100 [00:05<00:00, 17.93it/s, loss=0.2640]
                                                           

Epoch 4/5 - Loss: 0.2330 - Val Acc: 0.9191 - Time: 5.58s


Epoch 5/5: 100%|██████████| 100/100 [00:06<00:00, 16.08it/s, loss=0.3646]
                                                           

Epoch 5/5 - Loss: 0.2165 - Val Acc: 0.9240 - Time: 6.22s


                                                           

Test Accuracy: 0.9260

Training model with n_mels=20 and groups=4
Parameters: 642, FLOPs: 954,380


Epoch 1/5: 100%|██████████| 100/100 [00:05<00:00, 18.26it/s, loss=0.5526]
                                                           

Epoch 1/5 - Loss: 0.5946 - Val Acc: 0.8132 - Time: 5.48s


Epoch 2/5: 100%|██████████| 100/100 [00:05<00:00, 16.72it/s, loss=0.4515]
                                                           

Epoch 2/5 - Loss: 0.4147 - Val Acc: 0.8742 - Time: 5.98s


Epoch 3/5: 100%|██████████| 100/100 [00:05<00:00, 17.98it/s, loss=0.2689]
                                                           

Epoch 3/5 - Loss: 0.3295 - Val Acc: 0.8917 - Time: 5.56s


Epoch 4/5: 100%|██████████| 100/100 [00:06<00:00, 15.99it/s, loss=0.1510]
                                                           

Epoch 4/5 - Loss: 0.2954 - Val Acc: 0.8941 - Time: 6.26s


Epoch 5/5: 100%|██████████| 100/100 [00:05<00:00, 17.07it/s, loss=0.2407]
                                                           

Epoch 5/5 - Loss: 0.2699 - Val Acc: 0.9041 - Time: 5.86s


                                                           

Test Accuracy: 0.8920
Skipping n_mels=20 with groups=8 (invalid combination)
Skipping n_mels=20 with groups=16 (invalid combination)

Training model with n_mels=40 and groups=1
Parameters: 4,002, FLOPs: 2,445,140


Epoch 1/5: 100%|██████████| 100/100 [00:05<00:00, 18.39it/s, loss=0.3020]
                                                           

Epoch 1/5 - Loss: 0.4374 - Val Acc: 0.8917 - Time: 5.44s


Epoch 2/5: 100%|██████████| 100/100 [00:05<00:00, 17.35it/s, loss=0.2376]
                                                           

Epoch 2/5 - Loss: 0.2690 - Val Acc: 0.9290 - Time: 5.77s


Epoch 3/5: 100%|██████████| 100/100 [00:05<00:00, 17.16it/s, loss=0.1698]
                                                           

Epoch 3/5 - Loss: 0.2265 - Val Acc: 0.9091 - Time: 5.83s


Epoch 4/5: 100%|██████████| 100/100 [00:05<00:00, 17.75it/s, loss=0.2202]
                                                           

Epoch 4/5 - Loss: 0.2169 - Val Acc: 0.9352 - Time: 5.64s


Epoch 5/5: 100%|██████████| 100/100 [00:05<00:00, 17.05it/s, loss=0.2292]
                                                           

Epoch 5/5 - Loss: 0.1971 - Val Acc: 0.9278 - Time: 5.87s


                                                           

Test Accuracy: 0.9442

Training model with n_mels=40 and groups=2
Parameters: 2,082, FLOPs: 2,057,300


Epoch 1/5: 100%|██████████| 100/100 [00:05<00:00, 17.03it/s, loss=0.4407]
                                                           

Epoch 1/5 - Loss: 0.4713 - Val Acc: 0.8854 - Time: 5.88s


Epoch 2/5: 100%|██████████| 100/100 [00:05<00:00, 17.46it/s, loss=0.2274]
                                                           

Epoch 2/5 - Loss: 0.3044 - Val Acc: 0.8804 - Time: 5.73s


Epoch 3/5: 100%|██████████| 100/100 [00:05<00:00, 17.30it/s, loss=0.2280]
                                                           

Epoch 3/5 - Loss: 0.2525 - Val Acc: 0.9215 - Time: 5.78s


Epoch 4/5: 100%|██████████| 100/100 [00:05<00:00, 17.01it/s, loss=0.3602]
                                                           

Epoch 4/5 - Loss: 0.2296 - Val Acc: 0.9128 - Time: 5.88s


Epoch 5/5: 100%|██████████| 100/100 [00:05<00:00, 17.62it/s, loss=0.2468]
                                                           

Epoch 5/5 - Loss: 0.2175 - Val Acc: 0.9078 - Time: 5.68s


                                                           

Test Accuracy: 0.9284

Training model with n_mels=40 and groups=4
Parameters: 1,122, FLOPs: 1,863,380


Epoch 1/5: 100%|██████████| 100/100 [00:05<00:00, 16.91it/s, loss=0.5062]
                                                           

Epoch 1/5 - Loss: 0.5253 - Val Acc: 0.8730 - Time: 5.91s


Epoch 2/5: 100%|██████████| 100/100 [00:05<00:00, 17.34it/s, loss=0.2403]
                                                           

Epoch 2/5 - Loss: 0.3486 - Val Acc: 0.8867 - Time: 5.77s


Epoch 3/5: 100%|██████████| 100/100 [00:05<00:00, 17.94it/s, loss=0.2509]
                                                           

Epoch 3/5 - Loss: 0.2825 - Val Acc: 0.8804 - Time: 5.58s


Epoch 4/5: 100%|██████████| 100/100 [00:05<00:00, 18.15it/s, loss=0.4490]
                                                           

Epoch 4/5 - Loss: 0.2557 - Val Acc: 0.8991 - Time: 5.51s


Epoch 5/5: 100%|██████████| 100/100 [00:05<00:00, 17.64it/s, loss=0.2392]
                                                           

Epoch 5/5 - Loss: 0.2449 - Val Acc: 0.9029 - Time: 5.67s


                                                           

Test Accuracy: 0.9320

Training model with n_mels=40 and groups=8
Parameters: 642, FLOPs: 1,766,420


Epoch 1/5: 100%|██████████| 100/100 [00:05<00:00, 17.14it/s, loss=0.6257]
                                                           

Epoch 1/5 - Loss: 0.5986 - Val Acc: 0.8244 - Time: 5.84s


Epoch 2/5: 100%|██████████| 100/100 [00:05<00:00, 17.76it/s, loss=0.3689]
                                                           

Epoch 2/5 - Loss: 0.4431 - Val Acc: 0.8705 - Time: 5.63s


Epoch 3/5: 100%|██████████| 100/100 [00:05<00:00, 17.34it/s, loss=0.3693]
                                                           

Epoch 3/5 - Loss: 0.3594 - Val Acc: 0.8717 - Time: 5.77s


Epoch 4/5: 100%|██████████| 100/100 [00:05<00:00, 17.62it/s, loss=0.2550]
                                                           

Epoch 4/5 - Loss: 0.3119 - Val Acc: 0.8804 - Time: 5.68s


Epoch 5/5: 100%|██████████| 100/100 [00:05<00:00, 17.67it/s, loss=0.2760]
                                                           

Epoch 5/5 - Loss: 0.2879 - Val Acc: 0.8804 - Time: 5.66s


                                                           

Test Accuracy: 0.8920
Skipping n_mels=40 with groups=16 (invalid combination)

Training model with n_mels=80 and groups=1
Parameters: 7,842, FLOPs: 4,844,900


Epoch 1/5: 100%|██████████| 100/100 [00:05<00:00, 17.49it/s, loss=0.1829]
                                                           

Epoch 1/5 - Loss: 0.3526 - Val Acc: 0.9116 - Time: 5.72s


Epoch 2/5: 100%|██████████| 100/100 [00:05<00:00, 17.94it/s, loss=0.1731]
                                                           

Epoch 2/5 - Loss: 0.2352 - Val Acc: 0.9290 - Time: 5.58s


Epoch 3/5: 100%|██████████| 100/100 [00:05<00:00, 18.09it/s, loss=0.0621]
                                                           

Epoch 3/5 - Loss: 0.2115 - Val Acc: 0.9166 - Time: 5.53s


Epoch 4/5: 100%|██████████| 100/100 [00:05<00:00, 18.09it/s, loss=0.1951]
                                                           

Epoch 4/5 - Loss: 0.1879 - Val Acc: 0.8991 - Time: 5.53s


Epoch 5/5: 100%|██████████| 100/100 [00:05<00:00, 17.49it/s, loss=0.2930]
                                                           

Epoch 5/5 - Loss: 0.1732 - Val Acc: 0.9427 - Time: 5.72s


                                                           

Test Accuracy: 0.9417

Training model with n_mels=80 and groups=2
Parameters: 4,002, FLOPs: 4,069,220


Epoch 1/5: 100%|██████████| 100/100 [00:05<00:00, 17.49it/s, loss=0.3462]
                                                           

Epoch 1/5 - Loss: 0.4250 - Val Acc: 0.9004 - Time: 5.72s


Epoch 2/5: 100%|██████████| 100/100 [00:05<00:00, 17.44it/s, loss=0.2953]
                                                           

Epoch 2/5 - Loss: 0.2902 - Val Acc: 0.8917 - Time: 5.74s


Epoch 3/5: 100%|██████████| 100/100 [00:05<00:00, 17.46it/s, loss=0.3630]
                                                           

Epoch 3/5 - Loss: 0.2480 - Val Acc: 0.8979 - Time: 5.73s


Epoch 4/5: 100%|██████████| 100/100 [00:05<00:00, 17.40it/s, loss=0.4357]
                                                           

Epoch 4/5 - Loss: 0.2266 - Val Acc: 0.9116 - Time: 5.75s


Epoch 5/5: 100%|██████████| 100/100 [00:05<00:00, 17.92it/s, loss=0.3169]
                                                           

Epoch 5/5 - Loss: 0.2178 - Val Acc: 0.7933 - Time: 5.58s


                                                           

Test Accuracy: 0.7961

Training model with n_mels=80 and groups=4
Parameters: 2,082, FLOPs: 3,681,380


Epoch 1/5: 100%|██████████| 100/100 [00:05<00:00, 17.32it/s, loss=0.3673]
                                                           

Epoch 1/5 - Loss: 0.5256 - Val Acc: 0.8406 - Time: 5.78s


Epoch 2/5: 100%|██████████| 100/100 [00:05<00:00, 17.17it/s, loss=0.4457]
                                                           

Epoch 2/5 - Loss: 0.3652 - Val Acc: 0.8580 - Time: 5.83s


Epoch 3/5: 100%|██████████| 100/100 [00:05<00:00, 17.94it/s, loss=0.6581]
                                                           

Epoch 3/5 - Loss: 0.2992 - Val Acc: 0.7024 - Time: 5.58s


Epoch 4/5: 100%|██████████| 100/100 [00:05<00:00, 17.75it/s, loss=0.1921]
                                                           

Epoch 4/5 - Loss: 0.2760 - Val Acc: 0.8867 - Time: 5.64s


Epoch 5/5: 100%|██████████| 100/100 [00:05<00:00, 17.32it/s, loss=0.3352]
                                                           

Epoch 5/5 - Loss: 0.2545 - Val Acc: 0.8979 - Time: 5.78s


                                                           

Test Accuracy: 0.9175

Training model with n_mels=80 and groups=8
Parameters: 1,122, FLOPs: 3,487,460


Epoch 1/5: 100%|██████████| 100/100 [00:05<00:00, 17.72it/s, loss=0.4049]
                                                           

Epoch 1/5 - Loss: 0.5236 - Val Acc: 0.8755 - Time: 5.65s


Epoch 2/5: 100%|██████████| 100/100 [00:05<00:00, 17.46it/s, loss=0.3756]
                                                           

Epoch 2/5 - Loss: 0.3774 - Val Acc: 0.8879 - Time: 5.73s


Epoch 3/5: 100%|██████████| 100/100 [00:05<00:00, 17.80it/s, loss=0.3079]
                                                           

Epoch 3/5 - Loss: 0.3154 - Val Acc: 0.8667 - Time: 5.62s


Epoch 4/5: 100%|██████████| 100/100 [00:05<00:00, 17.67it/s, loss=0.2143]
                                                           

Epoch 4/5 - Loss: 0.2850 - Val Acc: 0.8892 - Time: 5.66s


Epoch 5/5: 100%|██████████| 100/100 [00:05<00:00, 17.80it/s, loss=0.2324]
                                                           

Epoch 5/5 - Loss: 0.2669 - Val Acc: 0.8941 - Time: 5.62s


                                                           

Test Accuracy: 0.9284

Training model with n_mels=80 and groups=16
Parameters: 642, FLOPs: 3,390,500


Epoch 1/5: 100%|██████████| 100/100 [00:05<00:00, 17.66it/s, loss=0.4684]
                                                           

Epoch 1/5 - Loss: 0.5576 - Val Acc: 0.8568 - Time: 5.70s


Epoch 2/5: 100%|██████████| 100/100 [00:05<00:00, 18.12it/s, loss=0.3840]
                                                           

Epoch 2/5 - Loss: 0.4362 - Val Acc: 0.8792 - Time: 5.52s


Epoch 3/5: 100%|██████████| 100/100 [00:05<00:00, 18.07it/s, loss=0.4219]
                                                           

Epoch 3/5 - Loss: 0.3699 - Val Acc: 0.8630 - Time: 5.54s


Epoch 4/5: 100%|██████████| 100/100 [00:05<00:00, 17.57it/s, loss=0.3924]
                                                           

Epoch 4/5 - Loss: 0.3319 - Val Acc: 0.8780 - Time: 5.69s


Epoch 5/5: 100%|██████████| 100/100 [00:05<00:00, 18.17it/s, loss=0.3233]
                                                           

Epoch 5/5 - Loss: 0.3095 - Val Acc: 0.8692 - Time: 5.51s


                                                           

Test Accuracy: 0.8653




In [None]:
fig = go.Figure()
for group in sorted(set(g for _, g in results.keys())):
    data = []
    for (n_mels, groups), res in sorted(results.items()):
        if groups == group:
            data.append({"n_mels": n_mels, "accuracy": res['test_accuracy']})
    
    if data:
        df = pd.DataFrame(data)
        fig.add_trace(go.Scatter(
            x=df["n_mels"],
            y=df["accuracy"],
            mode='lines+markers',
            name=f"groups={group}"
        ))

fig.update_layout(
    title="Test Accuracy vs n_mels (for different groups)",
    xaxis_title="Number of Mel Filterbanks (n_mels)",
    yaxis_title="Test Accuracy",
    legend_title="Groups",
    xaxis=dict(tickmode='array', tickvals=n_mels_list),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)
fig.show()

fig = go.Figure()
for (n_mels, groups), res in results.items():
    if groups == 1:  # fix groups
        fig.add_trace(go.Scatter(
            x=list(range(len(res['history']['train_loss']))),
            y=res['history']['train_loss'],
            mode='lines',
            name=f"n_mels={n_mels}"
        ))
fig.update_layout(
    title="Training Loss Comparison (groups=1)",
    xaxis_title="Epoch",
    yaxis_title="Train Loss",
    legend_title="n_mels"
)
fig.show()

accs = []
for (n_mels, groups), res in sorted(results.items()):
    if groups == 1:
        accs.append({"n_mels": n_mels, "accuracy": res['test_accuracy']})
if accs:
    df = pd.DataFrame(accs)
    fig = px.line(df, x="n_mels", y="accuracy", markers=True)
    fig.update_layout(
        title="Test Accuracy vs n_mels (groups=1)",
        xaxis_title="n_mels",
        yaxis_title="Test Accuracy"
    )
    fig.show()

accs_by_groups = []
for (n_mels, groups), res in sorted(results.items()):
    if n_mels == 80:
        accs_by_groups.append({"groups": groups, "accuracy": res['test_accuracy']})
if accs_by_groups:
    df = pd.DataFrame(accs_by_groups)
    fig = px.line(df, x="groups", y="accuracy", markers=True)
    fig.update_layout(
        title="Test Accuracy vs Groups (n_mels=80)",
        xaxis_title="groups",
        yaxis_title="Test Accuracy"
    )
    fig.show()

data_heatmap = []
for (n_mels, groups), res in results.items():
    data_heatmap.append({
        "n_mels": n_mels,
        "groups": groups,
        "accuracy": res['test_accuracy']
    })

if data_heatmap:
    df = pd.DataFrame(data_heatmap)
    pivot_df = df.pivot(index="groups", columns="n_mels", values="accuracy")
    fig = px.imshow(
        pivot_df,
        labels=dict(x="n_mels", y="groups", color="Test Accuracy"),
        x=pivot_df.columns,
        y=pivot_df.index,
        color_continuous_scale="viridis"
    )
    fig.update_layout(
        title="Test Accuracy Heatmap (n_mels vs groups)",
        xaxis_title="n_mels",
        yaxis_title="groups"
    )
    fig.show()

times = []
for (n_mels, groups), res in sorted(results.items()):
    if n_mels == 80:
        avg_time = sum(res['history']['epoch_time']) / len(res['history']['epoch_time'])
        times.append({"groups": groups, "avg_time": avg_time})
if times:
    df = pd.DataFrame(times)
    fig = px.line(df, x="groups", y="avg_time", markers=True)
    fig.update_layout(
        title="Epoch Training Time vs Groups (n_mels=80)",
        xaxis_title="groups",
        yaxis_title="Average Epoch Time (s)"
    )
    fig.show()

params_data = []
for (n_mels, groups), res in sorted(results.items()):
    if n_mels == 80:
        params_data.append({"groups": groups, "params": res['params']})
if params_data:
    df = pd.DataFrame(params_data)
    fig = px.line(df, x="groups", y="params", markers=True)
    fig.update_layout(
        title="Model Parameters vs Groups (n_mels=80)",
        xaxis_title="groups",
        yaxis_title="Number of Parameters"
    )
    fig.show()

flops_data = []
for (n_mels, groups), res in sorted(results.items()):
    if n_mels == 80:
        flops_data.append({"groups": groups, "flops": res['flops']})
if flops_data:
    df = pd.DataFrame(flops_data)
    fig = px.line(df, x="groups", y="flops", markers=True)
    fig.update_layout(
        title="Model FLOPs vs Groups (n_mels=80)",
        xaxis_title="groups",
        yaxis_title="FLOPs"
    )
    fig.show()

params_heatmap = []
for (n_mels, groups), res in results.items():
    params_heatmap.append({
        "n_mels": n_mels,
        "groups": groups,
        "params": res['params']
    })

if params_heatmap:
    df = pd.DataFrame(params_heatmap)
    pivot_df = df.pivot(index="groups", columns="n_mels", values="params")
    fig = px.imshow(
        pivot_df,
        labels=dict(x="n_mels", y="groups", color="Parameters"),
        x=pivot_df.columns,
        y=pivot_df.index,
        color_continuous_scale="viridis"
    )
    fig.update_layout(
        title="Model Parameters Heatmap (n_mels vs groups)",
        xaxis_title="n_mels",
        yaxis_title="groups"
    )
    fig.show()

flops_heatmap = []
for (n_mels, groups), res in results.items():
    flops_heatmap.append({
        "n_mels": n_mels,
        "groups": groups,
        "flops": res['flops']
    })

if flops_heatmap:
    df = pd.DataFrame(flops_heatmap)
    pivot_df = df.pivot(index="groups", columns="n_mels", values="flops")
    fig = px.imshow(
        pivot_df,
        labels=dict(x="n_mels", y="groups", color="FLOPs"),
        x=pivot_df.columns,
        y=pivot_df.index,
        color_continuous_scale="viridis"
    )
    fig.update_layout(
        title="Model FLOPs Heatmap (n_mels vs groups)",
        xaxis_title="n_mels",
        yaxis_title="groups"
    )
    fig.show()