In [1]:
import tensorflow as tf
import numpy as np
import librosa
import os
from sklearn.preprocessing import LabelEncoder

In [2]:
# Function to extract features from audio clips
def extract_features(file_path, mfcc=True, chroma=True, mel=True):
    with open(file_path, "rb") as f:
        signal, sr = librosa.load(f, sr=None)
    features = []
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=40).T, axis=0)
        features.extend(mfccs)
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(y=signal, sr=sr).T,axis=0)
        features.extend(chroma)
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=signal, sr=sr).T,axis=0)
        features.extend(mel)
    return features

In [3]:
# Function to load dataset
def load_dataset(data_dir):
    X = []
    y = []
    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        for filename in os.listdir(label_dir):
            file_path = os.path.join(label_dir, filename)
            features = extract_features(file_path)
            X.append(features)
            y.append(label)
    return np.array(X), np.array(y)

In [4]:
# Prepare dataset
data_dir = "/Users/rakesh.rai/code/whistles/dataset"
X, y = load_dataset(data_dir)



In [5]:
# Split dataset into training and validation sets
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Build the model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [7]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [8]:
# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)

# Train the model
model.fit(X_train, y_train_encoded, epochs=50, batch_size=32, validation_data=(X_val, y_val_encoded))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x176eb04d0>

In [9]:
# Evaluate the model
loss, accuracy = model.evaluate(X_val, y_val_encoded)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

Validation Accuracy: 100.00%


In [10]:
# Function to extract features from audio segments
def extract_features(segment, sr, mfcc=True, chroma=True, mel=True):
    features = []
    if mfcc:
        # Compute MFCCs (Mel-frequency cepstral coefficients)
        mfccs = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=40)
        features.append(np.mean(mfccs, axis=1))  # Take mean along the time axis
    
    if chroma:
        # Compute chroma features
        chroma = librosa.feature.chroma_stft(y=segment, sr=sr)
        features.append(np.mean(chroma, axis=1))  # Take mean along the time axis
    
    if mel:
        # Compute Mel-scaled spectrogram
        mel_spectrogram = librosa.feature.melspectrogram(y=segment, sr=sr)
        features.append(np.mean(mel_spectrogram, axis=1))  # Take mean along the time axis
    
    return np.concatenate(features)  # Concatenate the feature arrays


# Function to detect pattern occurrences in a sound clip
def detect_pattern(file_path, threshold=0.5, window_size=1.0, hop_length=512):
    with open(file_path, "rb") as f:
        signal, sr = librosa.load(f, sr=None)
    
    occurrences = 0
    for i in range(0, len(signal) - hop_length, hop_length):
        segment = signal[i:i+hop_length]
        if len(segment) == hop_length:
            segment_features = extract_features(segment,sr)
            segment_features = np.expand_dims(segment_features, axis=0)  # Add batch dimension
            prediction = model.predict(segment_features)
            if prediction >= threshold:
                occurrences += 1
    return occurrences



In [11]:
# Example usage
sound_clip_path = "/Users/rakesh.rai/code/whistles/full_tracks/pressure-cooker-5431124yes.wav"
pattern_count = detect_pattern(sound_clip_path)
print(f"Number of pattern occurrences: {pattern_count}")



  return pitch_tuning(






Number of pattern occurrences: 89


In [12]:
def detect_continuous_class(predictions, threshold, min_duration):
    continuous_segments = []
    current_segment = None
    for timestamp, prediction in enumerate(predictions):
        if prediction > threshold:
            if current_segment is None:
                current_segment = {'start': timestamp, 'end': timestamp}
            else:
                current_segment['end'] = timestamp
        else:
            if current_segment is not None:
                segment_duration = current_segment['end'] - current_segment['start']
                if segment_duration >= min_duration:
                    continuous_segments.append(current_segment)
                current_segment = None
    
    # Check if the last segment satisfies the minimum duration
    if current_segment is not None:
        segment_duration = current_segment['end'] - current_segment['start']
        if segment_duration >= min_duration:
            continuous_segments.append(current_segment)
    
    return continuous_segments

In [15]:
# Usage
threshold = 0.7
min_duration = 5  # in seconds

audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=48000, sequence_stride=48000, batch_size=1)
segments = detect_continuous_class(model.predict(audio_slices), threshold, min_duration)
print(segments)

NameError: name 'wav' is not defined

In [14]:
# Example usage
sound_clip_path = "/Users/rakesh.rai/code/whistles/full_tracks/pressure-cooker-5431124yes.wav"
pattern_count = detect_pattern(sound_clip_path)
print(f"Number of pattern occurrences: {pattern_count}")





Number of pattern occurrences: 89
