In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score

### Get Data

In [None]:
df = pd.read_csv('audio_data.csv')

In [None]:
# Extract MFCC feature columns
X = df.iloc[:,4:].values

# Convert labels to numeric numbers (0 for cat, 1 for dog)
y = df["label"].map({"cat": 0, "dog_barking": 1}).values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
def reshape_mfcc(mfcc_vector, height=40, width=33):
    padded = np.pad(mfcc_vector, (0, height * width - len(mfcc_vector)), mode='constant')
    return padded.reshape(height, width)

# Reshape all MFCC data
X_train = np.array([reshape_mfcc(x) for x in X_train])
X_test = np.array([reshape_mfcc(x) for x in X_test])

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)


In [None]:
class MFCCDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create dataset
train_dataset = MFCCDataset(X_train_tensor, y_train_tensor)
test_dataset = MFCCDataset(X_test_tensor, y_test_tensor)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


### Create Model

In [None]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(SimpleRNN, self).__init__()
        
        # Define the RNN layer
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        
        # Define the output layer
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        if x.dim() == 4:
            x = x.squeeze(1)
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.rnn.num_layers, x.size(0), self.rnn.hidden_size).to(x.device)
        
        # Forward pass through the RNN
        out, _ = self.rnn(x, h0)
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        
        return out

# Hyperparameters
input_size = 33  # Size of the input features
hidden_size = 20 # Number of features in the hidden state
output_size = 2  # Size of the output (ADJUST FOR NUMBER OF CLASSES)
num_layers = 2 # Number of RNN layers

# Instantiate the RNN model
model = SimpleRNN(input_size, hidden_size, output_size, num_layers)


### Train Model

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        
        optimizer.zero_grad()
        # inputs = inputs.squeeze(1)
        outputs = model(inputs)
        # outputs = outputs.squeeze(1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

print("Training complete!")


In [None]:
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {accuracy}")


# Iterating Hyperparameters #

In [None]:
def full_model_evaluation(input_size, hidden_size, output_size, num_layers, training_loops):
    # Instantiate the RNN model
    model = SimpleRNN(input_size, hidden_size, output_size, num_layers)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Training loop
    num_epochs = 100
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
    
        for inputs, labels in train_loader:
            
            optimizer.zero_grad()
            # inputs = inputs.squeeze(1)
            outputs = model(inputs)
            # outputs = outputs.squeeze(1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
    
        # print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")
    
    print("Training complete!")
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.numpy())
            all_labels.extend(labels.numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    print(f"{(hidden_size, num_layers)}Test Accuracy: {accuracy}")
    return accuracy


In [None]:
# Hyperparameters
input_size = 33  # Size of the input features
hidden_size = 33 # Number of features in the hidden state
output_size = 2  # Size of the output (ADJUST FOR NUMBER OF CLASSES)
num_layers = 2 # Number of RNN layers
training_loops = 100 #Number of training loops

In [None]:
test_accuracy = dict()
for hidden_size in range(15,46, 3): #ADJUST AS NEEDED
    for num_layers in range(1, 11): #ADJUST AS NEEDED
        model_accuracy = full_model_evaluation(input_size, hidden_size, output_size, num_layers, 100)
        test_accuracy[(hidden_size, num_layers)] = model_accuracy
best = min(d, key=d.get)
print("Best result is", best)