In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score

### Get Data

In [5]:
df1 = pd.read_csv('audio_data.csv')

In [8]:
# Extract MFCC feature columns
X = df1.iloc[:,4:].values

# Convert labels to numeric numbers (0 for cat, 1 for dog)
y = df1["label"].map({"cat": 0, "dog_barking": 1}).values

# Train-test split
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
df2 = pd.read_csv('audio_data2.csv')
# Extract MFCC feature columns
X = df2.iloc[:,4:].values

# Convert labels to numeric numbers (0 for cat, 1 for dog)
y = df2["label"].map({"frog": 0,"donkey": 1,"cat": 2,"monkey": 3,"baby sheep": 4,"bird": 5,"lion": 6,"cow": 7,"dog": 8,"chicken": 9}).values

# Train-test split
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
def reshape_mfcc(mfcc_vector, height=40, width=33):
    padded = np.pad(mfcc_vector, (0, height * width - len(mfcc_vector)), mode='constant')
    return padded.reshape(height, width)

# Reshape all MFCC data
X_train = np.array([reshape_mfcc(x) for x in X_train2])
X_test = np.array([reshape_mfcc(x) for x in X_test2])

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)
y_train_tensor = torch.tensor(y_train2, dtype=torch.long)
y_test_tensor = torch.tensor(y_test2, dtype=torch.long)


In [11]:
class MFCCDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create dataset
train_dataset = MFCCDataset(X_train_tensor, y_train_tensor)
test_dataset = MFCCDataset(X_test_tensor, y_test_tensor)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


### Create Model

In [None]:
# Model fo audio_data
class CNN_Audio(nn.Module):
    def __init__(self):
        super(CNN_Audio, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 10 * 8, 128)
        self.fc2 = nn.Linear(128, 2)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CNN_Audio()


In [14]:
# Model fo audio_data2
class CNN_Audio(nn.Module):
    def __init__(self):
        super(CNN_Audio, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 10 * 8, 128)
        self.fc2 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CNN_Audio()


### Train Model

In [18]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

print("Training complete!")


Epoch 1/100, Loss: 0.038259437173390046
Epoch 2/100, Loss: 0.017985119161984094
Epoch 3/100, Loss: 0.03317824426966791
Epoch 4/100, Loss: 0.009852709735591466
Epoch 5/100, Loss: 0.005274671623108309
Epoch 6/100, Loss: 0.0024986936321231322
Epoch 7/100, Loss: 0.0011335747258272022
Epoch 8/100, Loss: 0.0006688549284053107
Epoch 9/100, Loss: 0.0005238059563383174
Epoch 10/100, Loss: 0.00044023484336391377
Epoch 11/100, Loss: 0.0003751760300824222
Epoch 12/100, Loss: 0.00033753744397612394
Epoch 13/100, Loss: 0.0003042651696887333
Epoch 14/100, Loss: 0.00027788082325759414
Epoch 15/100, Loss: 0.00025344269334499794
Epoch 16/100, Loss: 0.00023568619396358443
Epoch 17/100, Loss: 0.00021633745088295726
Epoch 18/100, Loss: 0.00019925290941430902
Epoch 19/100, Loss: 0.00018322311370866373
Epoch 20/100, Loss: 0.00017876076568567302
Epoch 21/100, Loss: 0.00016772969327296148
Epoch 22/100, Loss: 0.00015411332396279948
Epoch 23/100, Loss: 0.00014583982248530866
Epoch 24/100, Loss: 0.000133615364225

In [19]:
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {accuracy}")


Test Accuracy: 0.8669950738916257
