In [1]:
import librosa
import pandas as pd
import numpy as np
import soundfile as sf

def exclude(audio_file, labels_file):
    """
    Exclude intervals from audio file based on labels file.
    :param audio_file: decimated audio file to process
    :param labels_file: decimated labels file to process
    :return: stripped audio file with only background noise
    """
    # Load audio file
    y, sr = sf.read(audio_file,dtype="float32")

    # Load labels file as pandas dataframe
    df = pd.read_csv(labels_file, delimiter='\t')

    # Convert start and end times to sample indices
    start_idx = librosa.time_to_samples(df['Begin Time (s)'], sr=sr)
    end_idx = librosa.time_to_samples(df['End Time (s)'], sr=sr)

    # Create a boolean mask for each frame
    frames = librosa.util.frame(y, frame_length=sr, hop_length=sr).T
    mask = np.ones(frames.shape[0], dtype=bool)

    # Loop over each interval and exclude corresponding frames
    for idx in range(len(start_idx)):
        start_frame = start_idx[idx] // sr
        end_frame = end_idx[idx] // sr
        mask[start_frame:end_frame+1] = False

    # Apply mask to frames
    frames_filtered = frames[mask]

    # Reshape filtered frames into audio signal
    y_filtered = frames_filtered.reshape(-1)
    
    filtered_filename = "avila_filtered.wav"
    sf.write(filtered_filename, y_filtered, sr)
    return sr, y_filtered

In [2]:
exclude("6805.230201090825_processed.wav", labels_file="6805.230201090825-SS.txt")

(8000,
 array([-0.05908203, -0.06390381, -0.06500244, ..., -0.07589722,
        -0.07809448, -0.08074951], dtype=float32))

In [None]:
# import os
# import librosa
# import numpy as np
# import matplotlib.pyplot as plt
# from scipy import stats

# import pandas as pd
# import librosa
# import numpy as np
# import soundfile as sf
# from scipy.io import wavfile
# import pdb
# import glob
# import io





# import numpy as np




# import numpy as np

# import os
# import librosa
# import numpy as np

# import matplotlib.pyplot as plt
# import pandas as pd
# import librosa
# import numpy as np
# import soundfile as sf
# from scipy import stats
# from scipy.stats import multivariate_normal
# # import tensorflow_addons as tfa




# import pdb
# import glob





# import librosa
# import pandas as pd
# import numpy as np






# def instantiate_dataset(location, spec = 'stft', verbose=False, batch_size=90, running=False, labels=None):
#     """
#     Takes in a decimated WAV file and implements PCEN (and grabs its bounding box annotations)
    
#     PARAMETERS
#     ----------
#         nums: string
#             Numeric portions of the decimated WAV file's name
#         spec: string
#             Computes either the STFT or Mel Spectrogram in the stream before running PCEN
#         verbose: boolean
#             Indicates whether or not to make output excessively detailed
#     ----------
    
#     RETURNS
#     ----------
#         N/A
#     ----------
#     """

#     data,sr = sf.read(location,dtype='float32')
   
    
    
    
    
    
#     WINDOW_SIZE_SEC = 0.1498
#     HOP_LEN_SEC = 0.05
#     # Reads-in WAV file information (and annotation information)

#     # Parameters needed for the stream
#     n_fft = int(WINDOW_SIZE_SEC * sr)
#     hop_length = int(HOP_LEN_SEC * sr)

#     # Streaming processes larger audio files faster (block-wise processing)
#     # note: when hop_length < frame_length, neighboring frames overlap
#     stream = librosa.stream(location, 
#                             block_length=200, # num of frames per block (400 => 1 min)
#                             frame_length=n_fft, # num of samples per frame
#                             hop_length=hop_length, # num of samples between start of each frame
#                             mono=True,
#                             fill_value=0)

#     # Make an array to store the frequency-averaged PCEN values
#     pcen_blocks = []

#     # Initialize the PCEN filter delays to steady state
#     zi = None

#     # Create a handle for storing the block STFT outputs
#     D = None
#     counter=0
#     for y_block in stream:
#         counter+=1
#         # Makes sure to compute the Mel Spectrogram or STFT before implementing PCEN
#         if spec == 'mel' or spec == 'stft':
#             if spec == 'mel':
#                 # Compute Mel Spectrogram
#                 D = librosa.feature.melspectrogram(y=y_block, sr=sr, 
#                                                    n_fft=n_fft, hop_length=hop_length, 
#                                                    n_mels=300, fmax=FREQUENCY_MAX,
#                                                    center=False)
#             else:
#                 # Compute the STFT (without padding, so center=False)
#                 D = librosa.stft(y_block, n_fft=n_fft, hop_length=hop_length,
#                                  center=False, out=D)
#         else:
#             print(f'{spec} is not a valid spec parameter.')
#             break

#         # Compute PCEN on the magnitude spectrum, using initial delays
#         # returned from our previous call (if any)
#         # store the final delays for use as zi in the next iteration
#         P, zi = librosa.pcen(np.abs(D), sr=sr, hop_length=hop_length,
#                              zi=zi, return_zf=True)
#         test_cv(P)
#         pcen_blocks.append(P)
        
# #         print(P.shape)
#     pcen_blocks=np.array(pcen_blocks)
#     pcen_blocks=np.expand_dims(pcen_blocks, axis=-1)
#     if running:
#         return pcen_blocks,sr
#     dataset = tf.data.Dataset.from_tensor_slices(pcen_blocks)
#     dataset = dataset.shuffle(buffer_size=len(pcen_blocks))
#     dataset = dataset.batch(batch_size)
    
#     return dataset,sr

# def test_cv(reshaped_spectrogram):
#     import numpy as np
#     import cv2
#     import matplotlib.pyplot as plt

#     reshaped_spectrogram=np.uint8(np.hstack(reshaped_spectrogram)* 255)


#     # Convert the single-channel spectrogram to a grayscale image
# #     gray_image = cv2.cvtColor(reshaped_spectrogram, cv2.COLOR_GRAY2BGR)

#     # Apply QuickShift algorithm
# #     result = cv2.pyrMeanShiftFiltering(gray_image, 20, 30)  # Adjust the parameters (20 and 30) as needed

#     # Reshape the result back to the original spectrogram shape
# #     segmented_spectrogram = result.reshape(reshaped_spectrogram.shape)

#     # Overlay the segmented regions on top of the spectrogram
# #     print(reshaped_spectrogram.shape)
#     plt.imshow(reshaped_spectrogram, aspect='auto', cmap='hot')  # Display the original spectrogram
# #     plt.imshow(segmented_spectrogram, alpha=0.5, aspect='auto', cmap='cool')  # Overlay the segmented regions
#     plt.colorbar()

#     plt.show()


# def main():
#     reshaped_spectrogram,sr=instantiate_dataset("../decimated-wavs/671658014.180928183606_processed.wav",running=True)
    
# if __name__ == '__main__':
#     main()