In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import librosa
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report





In [None]:
# 📁 Step 3: Load ESC-50 CSV
# ✅ Update these paths with your actual file structure
CSV_PATH = r'C:\Users\nasrr\Desktop\CNN_Projects\AudioClassifier\ESC-50-master\ESC-50-master\meta\esc50.csv'
AUDIO_PATH = r'C:\Users\nasrr\Desktop\CNN_Projects\AudioClassifier\ESC-50-master\ESC-50-master\audio'


df = pd.read_csv(CSV_PATH)
print(df.head())

            filename  fold  target        category  esc10  src_file take
0   1-100032-A-0.wav     1       0             dog   True    100032    A
1  1-100038-A-14.wav     1      14  chirping_birds  False    100038    A
2  1-100210-A-36.wav     1      36  vacuum_cleaner  False    100210    A
3  1-100210-B-36.wav     1      36  vacuum_cleaner  False    100210    B
4  1-101296-A-19.wav     1      19    thunderstorm  False    101296    A


In [None]:
# 🔉 Step 4: Load YAMNet model
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')














In [None]:
# 🎧 Step 5: Convert audio to embeddings (X) and labels (y)
X = []
y = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    file_path = os.path.join(AUDIO_PATH, row['filename'])
    label = row['target']

    try:
        waveform, sr = librosa.load(file_path, sr=16000, mono=True)
        waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)

        _, embeddings, _ = yamnet_model(waveform)
        mean_embedding = tf.reduce_mean(embeddings, axis=0).numpy()

        X.append(mean_embedding)
        y.append(label)
    except Exception as e:
        print(f"Failed: {file_path} → {e}")

X = np.array(X)
y = np.array(y)

print("✅ Done: ", X.shape, y.shape)


100%|██████████| 2000/2000 [00:41<00:00, 48.07it/s]

✅ Done:  (2000, 1024) (2000,)





In [None]:
# 🧠 Step 6: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)

print("Train:", X_train.shape, "Test:", X_test.shape)

Train: (1600, 1024) Test: (400, 1024)


In [None]:
# 🧪 Step 7: Train Classifier
clf = LogisticRegression(max_iter=2000)
clf.fit(X_train, y_train)

In [None]:
# 📊 Step 8: Evaluate Model
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"✅ Validation Accuracy: {acc * 100:.2f}%")

print("\nDetailed Report:")
print(classification_report(y_test, y_pred))

✅ Validation Accuracy: 87.50%

Detailed Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00         8
           2       0.89      1.00      0.94         8
           3       1.00      1.00      1.00         8
           4       0.88      0.88      0.88         8
           5       1.00      1.00      1.00         8
           6       1.00      0.88      0.93         8
           7       0.80      1.00      0.89         8
           8       0.88      0.88      0.88         8
           9       1.00      1.00      1.00         8
          10       1.00      1.00      1.00         8
          11       1.00      1.00      1.00         8
          12       0.89      1.00      0.94         8
          13       0.67      0.50      0.57         8
          14       0.88      0.88      0.88         8
          15       0.73      1.00      0.84         8
          16       0.80      0.50

In [None]:
import torchaudio.transforms as T
import torch

augment = torch.nn.Sequential(
    T.FrequencyMasking(freq_mask_param=10),
    T.TimeMasking(time_mask_param=20)
)


In [None]:
MAX_FRAMES = 100  # We'll fix all embeddings to this length

X_seq = []
y_seq = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    file_path = os.path.join(AUDIO_PATH, row['filename'])
    label = row['target']

    try:
        waveform, sr = librosa.load(file_path, sr=16000, mono=True)
        waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)

        _, embeddings, _ = yamnet_model(waveform)
        emb_np = embeddings.numpy()  # shape: (frames, 1024)

        # Pad/truncate to MAX_FRAMES
        if emb_np.shape[0] < MAX_FRAMES:
            pad_width = MAX_FRAMES - emb_np.shape[0]
            emb_np = np.pad(emb_np, ((0, pad_width), (0, 0)), mode='constant')
        else:
            emb_np = emb_np[:MAX_FRAMES, :]

        X_seq.append(emb_np)
        y_seq.append(label)

    except Exception as e:
        print(f"Failed: {file_path} → {e}")

X_seq = np.array(X_seq)  # shape: (N, 100, 1024)
y_seq = np.array(y_seq)

100%|██████████| 2000/2000 [01:01<00:00, 32.68it/s]


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, stratify=y_seq, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

# DataLoaders
train_ds = TensorDataset(X_train, y_train)
test_ds = TensorDataset(X_test, y_test)
from torch.utils.data import random_split

val_pct = 0.1
val_size = int(val_pct * len(train_ds))
train_size = len(train_ds) - val_size

train_set, val_set = random_split(train_ds, [train_size, val_size])
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32)

test_loader = DataLoader(test_ds, batch_size=32)


In [None]:
class YAMNet1DCNN(nn.Module):
    def __init__(self, num_classes=50):
        super().__init__()
        self.conv1 = nn.Conv1d(1024, 512, kernel_size=5, padding=2) # 1024 Features 
        self.bn1 = nn.BatchNorm1d(512)
        self.pool1 = nn.MaxPool1d(2)

        self.conv2 = nn.Conv1d(512, 256, kernel_size=5, padding=2)
        self.bn2 = nn.BatchNorm1d(256)
        self.pool2 = nn.MaxPool1d(2)
        self.dropout = nn.Dropout(0.4)

        self.conv3 = nn.Conv1d(256, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm1d(128)
        self.pool3 = nn.AdaptiveMaxPool1d(1)

        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # (B, 100, 1024) → (B, 1024, 100)
        x = self.pool1(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool2(torch.relu(self.bn2(self.conv2(x))))
        x = self.dropout(x)  # 🔹 ADD THIS
        x = self.pool3(torch.relu(self.bn3(self.conv3(x))))
        x = x.squeeze(-1)
        return self.fc(x)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class YAMNet1DCNN_Improved(nn.Module):
    def __init__(self, num_classes=50):
        super().__init__()
        self.conv1 = nn.Conv1d(1024, 512, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm1d(512)
        self.pool1 = nn.MaxPool1d(2)

        self.conv2 = nn.Conv1d(512, 256, kernel_size=5, padding=2)
        self.bn2 = nn.BatchNorm1d(256)
        self.pool2 = nn.MaxPool1d(2)

        self.conv3 = nn.Conv1d(256, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm1d(128)

        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self.dropout = nn.Dropout(0.85)  # Slightly higher for better regularization
        self.fc = nn.Linear(128, num_classes)

        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # (B, 100, 1024) → (B, 1024, 100)
        x = self.pool1(F.gelu(self.bn1(self.conv1(x))))
        x = self.pool2(F.gelu(self.bn2(self.conv2(x))))
        x = F.gelu(self.bn3(self.conv3(x)))
        x = self.global_pool(x).squeeze(-1)
        x = self.dropout(x)
        return self.fc(x)


In [None]:
import torch
import torch.nn as nn
from sklearn.metrics import accuracy_score  # ✅ Needed for final test acc
from tqdm import tqdm  # ✅ Correct import
import os

# ✅ Ensure this is defined earlier in the notebook
# from your_model_file import YAMNet1DCNN_Improved

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = YAMNet1DCNN_Improved().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='max',
    factor=0.5,
    patience=15,
    min_lr=1e-6
)

best_val_acc = 0
patience = 200
no_improve_epochs = 0
saved_model_name = None

for epoch in range(1, 201):
    model.train()
    total_loss = 0
    correct = 0

    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        correct += (preds.argmax(1) == yb).sum().item()

    train_acc = 100. * correct / len(train_loader.dataset)

    # 🔍 Validation
    model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb)
            val_correct += (preds.argmax(1) == yb).sum().item()
            val_total += yb.size(0)
    val_acc = 100. * val_correct / val_total

    scheduler.step(val_acc)

    print(f"Epoch {epoch}: Loss={total_loss:.2f}, Train Acc={train_acc:.2f}%, Val Acc={val_acc:.2f}%")

    # ✅ Save model if val_acc >= 85 (allow duplicates with epoch number)
    if val_acc >= 85:
        val_str = f"{val_acc:.2f}".replace('.', '_')
        saved_model_name = f"Model_{val_str}_Epoch{epoch}.pt"
        torch.save(model.state_dict(), saved_model_name)
        print(f"✅ Saved model: {saved_model_name} (Val Acc: {val_acc:.2f}%)")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            no_improve_epochs = 0
    else:
        no_improve_epochs += 1
        if no_improve_epochs >= patience:
            print("⛔ Early stopping triggered.")
            break

model.eval()
all_preds = []
with torch.no_grad():
    for xb, _ in test_loader:
        xb = xb.to(device)
        preds = model(xb)
        all_preds.append(preds.cpu())

all_preds = torch.cat(all_preds).argmax(1).numpy()
acc = accuracy_score(y_test, all_preds)

print(f"\n✅ Final Test Accuracy: {acc * 100:.2f}%")

Epoch 1: Loss=175.96, Train Acc=4.72%, Val Acc=22.50%
Epoch 2: Loss=156.49, Train Acc=14.72%, Val Acc=38.12%
Epoch 3: Loss=137.34, Train Acc=29.93%, Val Acc=51.88%
Epoch 4: Loss=117.24, Train Acc=42.71%, Val Acc=52.50%
Epoch 5: Loss=105.16, Train Acc=49.51%, Val Acc=62.50%
Epoch 6: Loss=95.39, Train Acc=54.03%, Val Acc=65.62%
Epoch 7: Loss=85.45, Train Acc=59.17%, Val Acc=66.88%
Epoch 8: Loss=78.09, Train Acc=63.40%, Val Acc=71.25%
Epoch 9: Loss=72.35, Train Acc=67.92%, Val Acc=71.88%
Epoch 10: Loss=68.59, Train Acc=67.99%, Val Acc=72.50%
Epoch 11: Loss=63.12, Train Acc=70.00%, Val Acc=71.88%
Epoch 12: Loss=60.53, Train Acc=70.00%, Val Acc=78.12%
Epoch 13: Loss=56.07, Train Acc=73.82%, Val Acc=77.50%
Epoch 14: Loss=50.26, Train Acc=75.35%, Val Acc=77.50%
Epoch 15: Loss=49.39, Train Acc=75.49%, Val Acc=79.38%
Epoch 16: Loss=46.57, Train Acc=77.01%, Val Acc=80.00%
Epoch 17: Loss=45.33, Train Acc=77.36%, Val Acc=81.25%
Epoch 18: Loss=41.15, Train Acc=81.67%, Val Acc=80.00%
Epoch 19: Loss=

In [None]:
import os
import re
import torch
from tqdm import tqdm
from sklearn.metrics import accuracy_score

model_class = YAMNet1DCNN_Improved
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Match all model files like Model_91_25_Epoch58.pt
model_files = [f for f in os.listdir() if f.startswith("Model_") and f.endswith(".pt")]

# Optional: sort by val_acc descending (if filenames use Model_91_88_Epoch58.pt format)
def extract_val_acc(file):
    match = re.search(r'Model_(\d+)_(\d+)', file)
    if match:
        major, minor = match.groups()
        return float(f"{major}.{minor}")
    return 0

model_files.sort(key=extract_val_acc, reverse=True)

results = []

for model_file in model_files:
    model = model_class().to(device)
    model.load_state_dict(torch.load(model_file))
    model.eval()

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for xb, yb in tqdm(test_loader, desc=f"Testing {model_file}"):
            xb = xb.to(device)
            preds = model(xb)
            all_preds.extend(preds.argmax(1).cpu().numpy())
            all_labels.extend(yb.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds) * 100
    results.append((model_file, acc))

# Print sorted results
results.sort(key=lambda x: x[1], reverse=True)

print("\n📊 Model Comparison Results:")
for name, acc in results:
    print(f"{name:<35} → Test Accuracy: {acc:.2f}%")


Testing Model_90_00_Epoch105.pt: 100%|██████████| 13/13 [00:00<00:00, 26.96it/s]
Testing Model_90_00_Epoch114.pt: 100%|██████████| 13/13 [00:00<00:00, 26.60it/s]
Testing Model_90_00_Epoch93.pt: 100%|██████████| 13/13 [00:00<00:00, 26.71it/s]
Testing Model_90_00_Epoch99.pt: 100%|██████████| 13/13 [00:00<00:00, 26.84it/s]
Testing Model_90.pt: 100%|██████████| 13/13 [00:00<00:00, 26.74it/s]
Testing Model_91.pt: 100%|██████████| 13/13 [00:00<00:00, 26.30it/s]
Testing Model_92.pt: 100%|██████████| 13/13 [00:00<00:00, 26.46it/s]


📊 Model Comparison Results:
Model_90_00_Epoch114.pt             → Test Accuracy: 89.00%
Model_90_00_Epoch99.pt              → Test Accuracy: 89.00%
Model_92.pt                         → Test Accuracy: 88.75%
Model_90_00_Epoch93.pt              → Test Accuracy: 88.50%
Model_90_00_Epoch105.pt             → Test Accuracy: 88.25%
Model_90.pt                         → Test Accuracy: 85.50%
Model_91.pt                         → Test Accuracy: 85.25%



