In [18]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import librosa
from python_speech_features import mfcc
from playsound import playsound
import sounddevice as sd

In [3]:
class VoiceClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(VoiceClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])
        return out

In [4]:
input_size = 13  
hidden_size = 64  
output_size = 3  
learning_rate = 0.001
num_epochs = 100
batch_size = 16

In [5]:
model = VoiceClassifier(input_size, hidden_size, output_size)

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [19]:
import os
import librosa
import scipy.io.wavfile as wav

train_dir = 'sound'  
class_dirs = ['Noice', 'Silence', 'BabyCry'] 
train_data = []

for label, class_dir in enumerate(class_dirs):
    class_data = os.listdir(os.path.join(train_dir, class_dir))
    for audio_file in class_data:
        audio_path = os.path.join(train_dir, class_dir, audio_file)

        # Load audio file and convert to WAV format
        audio, sr = librosa.load(audio_path, sr=None)
        wav_path = os.path.splitext(audio_path)[0] + '.wav'
        wav.write(wav_path, sr, audio)

        # Read the WAV file and extract MFCC features
        sample_rate, audio = wav.read(wav_path)
        features = mfcc(audio, samplerate=sample_rate)

        # Append features and label to the train_data list
        train_data.append((features, label))




In [20]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for features, labels in train_data:
        features = torch.tensor(features, dtype=torch.float32)
        labels = torch.tensor(labels)

        optimizer.zero_grad()

        # Forward pass
        outputs = model(features.unsqueeze(0))

        #  loss
        loss = criterion(outputs, labels.unsqueeze(0))

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    if epoch%10==0 :
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_data):.4f}")

Epoch [1/100], Loss: 1.1857
Epoch [11/100], Loss: 0.1443
Epoch [21/100], Loss: 0.0476
Epoch [31/100], Loss: 0.0231
Epoch [41/100], Loss: 0.0135
Epoch [51/100], Loss: 0.0089
Epoch [61/100], Loss: 0.0065
Epoch [71/100], Loss: 0.0049
Epoch [81/100], Loss: 0.0038
Epoch [91/100], Loss: 0.0031


In [21]:
torch.save(model.state_dict(),"BabyCry.pth")

In [7]:
loaded_model=VoiceClassifier(input_size, hidden_size, output_size)
loaded_model.load_state_dict(torch.load('BabyCry.pth'))

<All keys matched successfully>

In [13]:
def process_audio(audio):
    features = mfcc(audio, samplerate=sample_rate)
    return features

def record_audio(duration, sample_rate):
    print("Recording audio...")
    audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='float32')
    sd.wait() 
    print("Finished recording.")
    return audio.flatten()

In [None]:
sample_rate = 16000
duration = 8  
input_audio = record_audio(duration, sample_rate)
input_features = process_audio(input_audio)
input_features = torch.tensor(input_features, dtype=torch.float32).unsqueeze(0)

Recording audio...


In [51]:
with torch.no_grad():
    model.eval()
    output = model(input_features)
    probabilities = nn.functional.softmax(output, dim=1)
    predicted_label = torch.argmax(probabilities, dim=1).item()
    class_names = ['Noice', 'Silence', 'BabyCry']
    if probabilities[0,predicted_label]<0.5:
        predicted_class="Silence"
    else:
        predicted_class=class_names[predicted_label]
print(f"Predicted voice: {predicted_class}")

Predicted voice: Silence


In [52]:
if predicted_class=='BabyCry':
    playsound('Twinkle.mp3')

In [53]:
probabilities

tensor([[0.2263, 0.4127, 0.3610]])

tensor(0.5404)

2