In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!ls gdrive/MyDrive

'1582813891324_gyroscope (1).gdoc'		 ip_project.py.gdoc
 1582813891324_gyroscope.gdoc			'Mailing List for Thesis.gsheet'
 1582822735425_1582813891324_gyroscope.gdoc	'Memes by Nilay Girgaonkar'
 20210316-AnswerKey.gdoc			'MS Unis .gdoc'
'2021AAPS0566H Nilay Girgaonkar Movie Posters'	 Nilay_Girgaonkar_Resume_Google.pdf
 2C.zip						 Open_Source_Robotic_Arm__Lite_Arm_i2__480446
'3-2 Expenditure.gsheet'			 Portfolio.gsite
'4-1 Expenses.gsheet'				'Resume_Nilay_Girgaonkar (1).pdf'
'BIT_PERFMNC1 (1) (1).pdf'			 Resume_Nilay_Girgaonkar.pdf
'BIT_PERFMNC1 (1).pdf'				'Structure-From_Motion Project Cope.gdoc'
 code_for_hw8					'Summer Internship Expenses.gsheet'
'Colab Notebooks'				'Super Meat Boy Speedrun'
'DOOM Stuff'					 test-sounds
'Getting started.pdf'				'WhatsApp Chat with Aditya Das (1).gdoc'
'Google Earth'					'WhatsApp Chat with Aditya Das (2).gdoc'
 gyroscope.gdoc					'WhatsApp Chat with Aditya Das.gdoc'
'I hate tea.gdoc'


In [3]:
!unzip gdrive/MyDrive/2C.zip

Archive:  gdrive/MyDrive/2C.zip
   creating: 2C/
   creating: 2C/fire/
  inflating: 2C/fire/Fire.just (1).wav  
  inflating: 2C/fire/Fire.just (10).wav  
  inflating: 2C/fire/Fire.just (100).wav  
  inflating: 2C/fire/Fire.just (101).wav  
  inflating: 2C/fire/Fire.just (102).wav  
  inflating: 2C/fire/Fire.just (103).wav  
  inflating: 2C/fire/Fire.just (104).wav  
  inflating: 2C/fire/Fire.just (105).wav  
  inflating: 2C/fire/Fire.just (106).wav  
  inflating: 2C/fire/Fire.just (107).wav  
  inflating: 2C/fire/Fire.just (108).wav  
  inflating: 2C/fire/Fire.just (109).wav  
  inflating: 2C/fire/Fire.just (11).wav  
  inflating: 2C/fire/Fire.just (110).wav  
  inflating: 2C/fire/Fire.just (111).wav  
  inflating: 2C/fire/Fire.just (112).wav  
  inflating: 2C/fire/Fire.just (113).wav  
  inflating: 2C/fire/Fire.just (114).wav  
  inflating: 2C/fire/Fire.just (115).wav  
  inflating: 2C/fire/Fire.just (116).wav  
  inflating: 2C/fire/Fire.just (117).wav  
  inflating: 2C/fire/Fire.just

In [None]:
!ls /content/2C/fire

'Fire.just (100).wav'  'Fire.mine (174).wav'   'Fire.noise (253).wav'  'Fire.noise (82).wav'
'Fire.just (101).wav'  'Fire.mine (175).wav'   'Fire.noise (254).wav'  'Fire.noise (83).wav'
'Fire.just (102).wav'  'Fire.mine (176).wav'   'Fire.noise (255).wav'  'Fire.noise (84).wav'
'Fire.just (103).wav'  'Fire.mine (177).wav'   'Fire.noise (256).wav'  'Fire.noise (85).wav'
'Fire.just (104).wav'  'Fire.mine (178).wav'   'Fire.noise (257).wav'  'Fire.noise (86).wav'
'Fire.just (105).wav'  'Fire.mine (179).wav'   'Fire.noise (258).wav'  'Fire.noise (87).wav'
'Fire.just (106).wav'  'Fire.mine (17).wav'    'Fire.noise (259).wav'  'Fire.noise (88).wav'
'Fire.just (107).wav'  'Fire.mine (180).wav'   'Fire.noise (25).wav'   'Fire.noise (89).wav'
'Fire.just (108).wav'  'Fire.mine (181).wav'   'Fire.noise (260).wav'  'Fire.noise (8).wav'
'Fire.just (109).wav'  'Fire.mine (182).wav'   'Fire.noise (261).wav'  'Fire.noise (90).wav'
'Fire.just (10).wav'   'Fire.mine (183).wav'   'Fire.noise (262).wav'  

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchaudio
from torchaudio.transforms import MelSpectrogram
from torch.utils.data import DataLoader, random_split, Dataset
import numpy as np
import os

In [5]:
LEARNING_RATE = 0.0005
BATCH_SIZE = 64
EPOCHS = 20
DROPOUT_RATE = 0.2
WINDOW_SIZE = 4
SLIDING_STEP = 2
FRAME_SIZE = 0.05
OVERLAP = 0.025
SAMPLE_RATE = 16000
FFT_POINTS = 1024
NUM_FILTERBANKS = 40
MIN_FREQ = 300
THRESHOLD = -52

In [6]:
mel_spectrogram = MelSpectrogram(
    sample_rate = SAMPLE_RATE,
    n_fft = FFT_POINTS,
    win_length = int(SAMPLE_RATE * FRAME_SIZE),
    hop_length = int(SAMPLE_RATE * (FRAME_SIZE - OVERLAP)),
    n_mels = NUM_FILTERBANKS,
    f_min = MIN_FREQ
)

In [7]:
class AudioDataset(Dataset):
  def __init__(self, root_dir, transform=None):
    self.root_dir = root_dir
    self.transform = transform
    self.data = []
    self.labels = []

    for label_name, label in zip(['fire', 'no_fire'], [1,0]):
      folder_path = os.path.join(root_dir, label_name)
      for filename in os.listdir(folder_path):
        if filename.endswith(".wav"):
          file_path = os.path.join(folder_path, filename)
          self.data.append(file_path)
          self.labels.append(label)

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):

    file_path = self.data[idx]
    waveform, sample_rate = torchaudio.load(file_path)

    if sample_rate != SAMPLE_RATE:
      waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=SAMPLE_RATE)(waveform)

    if self.transform:
      mel_spec = self.transform(waveform)
    else:
      mel_spec = mel_spectrogram(waveform)

    mel_spec_db = 10 * torch.log10(mel_spec + 1e-6)
    mel_spec_db = torch.clip(mel_spec_db, min=THRESHOLD)

    mel_spec_db = mel_spec_db.squeeze(0)

    label = self.labels[idx]
    return mel_spec_db, label

class AudioCNN(nn.Module):
  def __init__(self):
    super(AudioCNN, self).__init__()
    self.conv1 = nn.Conv1d(in_channels=NUM_FILTERBANKS, out_channels=8, kernel_size=3, padding = 1)
    self.pool1 = nn.MaxPool1d(kernel_size=2)
    self.conv2 = nn.Conv1d(in_channels=8, out_channels=16, kernel_size=3, padding=1)
    self.pool2  = nn.MaxPool1d(kernel_size=2)
    self.flatten = nn.Flatten()

    conv_output_size = self._get_conv_output_shape()
    self.fc1 = nn.Linear(conv_output_size, 64)

    self.dropout1 = nn.Dropout(DROPOUT_RATE)
    self.fc2 = nn.Linear(64, 2)
    self.dropout2 = nn.Dropout(DROPOUT_RATE)

  def forward(self, x):
    x = self.conv1(x)
    x = torch.relu(x)
    x = self.pool1(x)
    x = self.conv2(x)
    x = torch.relu(x)
    x = self.pool2(x)

    # Flatten dynamically based on current shape
    x = x.view(x.size(0), -1)
    x = self.fc1(x)
    x = self.dropout1(x)
    x = self.fc2(x)
    x = self.dropout2(x)
    x = torch.softmax(x, dim=1)
    return x

  def _get_conv_output_shape(self):
    dummy_input = torch.zeros(1, NUM_FILTERBANKS, 201)
    x = self.conv1(dummy_input)
    x = self.pool1(x)
    x = self.conv2(x)
    x = self.pool2(x)
    return x.numel()

def train_model(model, train_loader, val_loader, epochs, learning_rate):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr = learning_rate)
  scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=5, verbose=True)

  for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
      inputs = inputs.squeeze(1)
      optimizer.zero_grad()

      outputs = model(inputs)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss / len(train_loader)}")

    # Validate
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
      for inputs, labels in val_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    validation_accuracy = 100 * correct / total
    print(f"Validation Accuracy: {validation_accuracy:.2f}%")

    scheduler.step(validation_accuracy)

In [8]:
# Define the path to the dataset folder
dataset_path = '/content/2C'

# Initialize the dataset and split it
dataset = AudioDataset(root_dir=dataset_path, transform=mel_spectrogram)
train_size = int(0.6 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
# Get a single batch from the data loader
data_iter = iter(train_loader)  # or val_loader for validation set
inputs, labels = next(data_iter)

model = AudioCNN()
# Pass the batch through the model
outputs = model(inputs)

# Print the output shape
print("Final output shape:", outputs.shape)

Final output shape: torch.Size([64, 2])


In [9]:
model = AudioCNN()
train_model(model, train_loader, val_loader, EPOCHS, LEARNING_RATE)



Epoch [1/20], Loss: 0.7017052306069268
Validation Accuracy: 69.06%
Epoch [2/20], Loss: 0.6222120876665469
Validation Accuracy: 61.19%
Epoch [3/20], Loss: 0.5380660174069581
Validation Accuracy: 85.49%
Epoch [4/20], Loss: 0.4982305908644641
Validation Accuracy: 71.85%
Epoch [5/20], Loss: 0.508966119201095
Validation Accuracy: 86.19%
Epoch [6/20], Loss: 0.46932700938648647
Validation Accuracy: 85.66%
Epoch [7/20], Loss: 0.45072073075506425
Validation Accuracy: 89.16%
Epoch [8/20], Loss: 0.4404770588433301
Validation Accuracy: 89.51%
Epoch [9/20], Loss: 0.42310405329421713
Validation Accuracy: 90.21%
Epoch [10/20], Loss: 0.4063851910608786
Validation Accuracy: 89.51%
Epoch [11/20], Loss: 0.4020348279564469
Validation Accuracy: 88.99%
Epoch [12/20], Loss: 0.39709389209747314
Validation Accuracy: 89.51%
Epoch [13/20], Loss: 0.39420809348424274
Validation Accuracy: 90.21%
Epoch [14/20], Loss: 0.39855712762585394
Validation Accuracy: 91.61%
Epoch [15/20], Loss: 0.38987013035350376
Validation 

In [10]:
model.eval()

correct = 0
total = 0

with torch.no_grad():
    for data, labels in test_loader:

        #data, labels = data.to(device), labels.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

Test Accuracy: 92.66%


In [11]:
waveform, sample_rate = torchaudio.load('/content/gdrive/My Drive/test-sounds/randomnoise.wav')
if sample_rate != SAMPLE_RATE:
  resample_transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=SAMPLE_RATE)
  waveform = resample_transform(waveform)

mel_spectr = mel_spectrogram(waveform).squeeze(0)
mel_spectr -= mel_spectr.mean()
mel_spectr /= mel_spectr.std()

input_tensor = mel_spectr.unsqueeze(0)

model.eval()
with torch.no_grad():
    output = model(input_tensor)
    probabilities = output.squeeze()

print("Class probabilities:", probabilities.numpy())

Class probabilities: [0.56959224 0.4304077 ]


In [12]:
# Saving the model for use later

PATH = '/content/gdrive/MyDrive/fire-detection-audio-model.pth'
torch.save(model.state_dict(), PATH)

In [16]:
saved_model = AudioCNN()
saved_model.load_state_dict(torch.load(PATH))

  saved_model.load_state_dict(torch.load(PATH))


<All keys matched successfully>