In [225]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score

### Get Data

In [226]:
df = pd.read_csv('audio_data.csv')

In [227]:
# Extract MFCC feature columns
X = df.iloc[:,4:].values

# Convert labels to numeric numbers (0 for cat, 1 for dog)
y = df["label"].map({"cat": 0, "dog_barking": 1}).values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [228]:
def reshape_mfcc(mfcc_vector, height=40, width=33):
    padded = np.pad(mfcc_vector, (0, height * width - len(mfcc_vector)), mode='constant')
    return padded.reshape(height, width)

# Reshape all MFCC data
X_train = np.array([reshape_mfcc(x) for x in X_train])
X_test = np.array([reshape_mfcc(x) for x in X_test])

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)


In [229]:
class MFCCDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create dataset
train_dataset = MFCCDataset(X_train_tensor, y_train_tensor)
test_dataset = MFCCDataset(X_test_tensor, y_test_tensor)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


### Create Model

In [230]:
class CNN_Audio(nn.Module):
    def __init__(self):
        super(CNN_Audio, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 10 * 8, 128)
        self.fc2 = nn.Linear(128, 2)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CNN_Audio()


### Train Model

In [231]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

print("Training complete!")


Epoch 1/100, Loss: 0.5141658229487283
Epoch 2/100, Loss: 0.34555984820638386
Epoch 3/100, Loss: 0.251598732812064
Epoch 4/100, Loss: 0.2062331001673426
Epoch 5/100, Loss: 0.21343709102698735
Epoch 6/100, Loss: 0.1787304271544729
Epoch 7/100, Loss: 0.13520250150135585
Epoch 8/100, Loss: 0.10796044447592326
Epoch 9/100, Loss: 0.07323920247810227
Epoch 10/100, Loss: 0.05864212316061769
Epoch 11/100, Loss: 0.04954956618270704
Epoch 12/100, Loss: 0.03787895504917417
Epoch 13/100, Loss: 0.025203340952949866
Epoch 14/100, Loss: 0.018207392123128687
Epoch 15/100, Loss: 0.012534309444682938
Epoch 16/100, Loss: 0.009812285059264727
Epoch 17/100, Loss: 0.007876844644280416
Epoch 18/100, Loss: 0.005780015140771866
Epoch 19/100, Loss: 0.0048735115643856785
Epoch 20/100, Loss: 0.0039923881413415074
Epoch 21/100, Loss: 0.0035388953651168515
Epoch 22/100, Loss: 0.0028887545257540686
Epoch 23/100, Loss: 0.002541477617342025
Epoch 24/100, Loss: 0.0022489774440016064
Epoch 25/100, Loss: 0.001944632485641

In [233]:
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {accuracy}")


Test Accuracy: 0.9285714285714286
