In [109]:
# Library
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow.keras.models as models
import tensorflow.keras.layers as layers
import IPython
import sklearn
import seaborn as sns
from sklearn.utils import shuffle

# %load_ext tensorboard

In [110]:
new_model = models.load_model('export')





In [111]:
# Class Conf will save the settings we are going to use in this notebook
class conf:
    sr = 16000
    duration = 3
    hop_length = 340*duration
    fmin = 20
    fmax = sr // 2
    n_mels = 128
    n_fft = n_mels * 20
    samples = sr * duration
    epochs = 30

def read_audio(conf, pathname, trim_long_data):
    y, sr = librosa.load(pathname, sr=conf.sr)
    # trim silence
    if 0 < len(y): # workaround: 0 length causes error
        y, _ = librosa.effects.trim(y) # trim, top_db=default(60)
    # make it unified length to conf.samples
    if len(y) > conf.samples: # long enough
        if trim_long_data:
            y = y[0:0+conf.samples]
    else: # pad blank
        padding = conf.samples - len(y)    # add padding at both ends
        offset = padding // 2
        y = np.pad(y, (offset, conf.samples - len(y) - offset), 'constant')
    return y

def audio_to_melspectrogram(conf, audio):
    spectrogram = librosa.feature.melspectrogram(y=audio, 
                                                 sr=conf.sr,
                                                 n_mels=conf.n_mels,
                                                 hop_length=conf.hop_length,
                                                 n_fft=conf.n_fft,
                                                 fmin=conf.fmin,
                                                 fmax=conf.fmax)
    spectrogram = librosa.power_to_db(spectrogram)
    return spectrogram

def show_melspectrogram(conf, mels, title='Log-frequency power spectrogram'):
    librosa.display.specshow(mels, x_axis='time', y_axis='mel', 
                             sr=conf.sr, hop_length=conf.hop_length,
                            fmin=conf.fmin, fmax=conf.fmax)
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.show()

In [112]:
import librosa

# Load the WAV file
file_path = "download.wav"
def preprocess(file_path):
    sig, sr = librosa.load(file_path, sr=16000)

    # Define the duration of each clip in seconds
    clip_duration = 2  # 2 seconds

    num_clips = 3

    x_data = []

    # Create random clips from the audio file
    for i in range(num_clips):
        # Generate a random start sample index within the audio signal
        start_sample = np.random.randint(0, len(sig) - sr * clip_duration)
        
        # Extract the clip from the audio signal
        clip = sig[start_sample:start_sample + sr * clip_duration]
        
        # Perform feature extraction (e.g., mel spectrogram)
        mel_spec = audio_to_melspectrogram(conf, clip)
        
        x_data.append(mel_spec)

    x_data = np.array(x_data)
    return x_data


In [113]:
def split_audio(audio_data, w, h, threshold_level, tolerence=10):
    split_map = []
    start = 0
    data = np.abs(audio_data)
    threshold = threshold_level*np.mean(data[:25000])
    inside_sound = False
    near = 0
    for i in range(0,len(data)-w, h):
        win_mean = np.mean(data[i:i+w])
        if(win_mean>threshold and not(inside_sound)):
            inside_sound = True
            start = i
        if(win_mean<=threshold and inside_sound and near>tolerence):
            inside_sound = False
            near = 0
            split_map.append([start, i])
        if(inside_sound and win_mean<=threshold):
            near += 1
    return split_map

In [114]:
classes = np.array(['dog', 'chainsaw', 'crackling_fire', 'helicopter', 'rain',
       'crying_baby', 'clock_tick', 'sneezing', 'rooster', 'sea_waves'], dtype="object")
classes

array(['dog', 'chainsaw', 'crackling_fire', 'helicopter', 'rain',
       'crying_baby', 'clock_tick', 'sneezing', 'rooster', 'sea_waves'],
      dtype=object)

In [120]:
# To identify the sounds in the audio, we are going to cut the soundwave into several parts
# The clip will be clipped to it's highlight (noisiest) with certain interval

sound_clips = preprocess("download1.wav")
for clip in sound_clips:
    # clip, index = librosa.effects.trim(y[intvl[0]:intvl[1]],       
    #                                    top_db=20, frame_length=512, hop_length=64)
    # mel_spec = audio_to_melspectrogram(conf, clip)

    # testing = np.reshape(testing[:,:32], (128, 32))
    testing = np.expand_dims(clip, axis=(0, -1))
    # testing = np.reshape(testing[:, :, :32, :], (1, 128, 32, 1))
    # testing = np.random.randint(0, 1,size=(1,128,32,1))

    pred = new_model.predict(testing)
    
    # blank = np.zeros(intvl[0]-0)
    # blank2 = np.zeros(duration-intvl[1])
    # temp = np.append(blank,clip)
    # temp = np.append(temp,blank2)
    # librosa.display.waveplot(y = temp, sr = sr, )
    
    # print("Clip Number :", i)
    # print("Interval from : ", intvl[0]/16000, " to ",intvl[1]/16000, "seconds")
    # i += 1
    if pred[0][1] > 0.09:
        print("chainsaw")
    else:
        print("no chainsaw")
    
    

chainsaw
chainsaw
chainsaw
