# 02_Split Wav Duration
#### Code to split audio wav files to 10s duration for consistency

In [18]:
#Import packages
import re
import matplotlib.pyplot as plt
import warnings
import numpy as np
import pandas as pd
import os
import pickle
import librosa
import librosa.display
from scipy.ndimage import zoom
from tqdm import tqdm
import soundfile as sf
import plotly.express as px

# Suppress the warning
warnings.filterwarnings('ignore', message='Trying to estimate tuning from empty frequency set.')

In [19]:
#Iteratively go through files
def walk_through_files(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".wav"):
                yield os.path.join(root, file)

In [45]:
#Splitting the WAVS

in_dir = fr'N:\Mark\Audio Diss\OCC_SVM_Data\yavari_2018\audios_part_2\GY3\Data\wav_to_split'
out_dir = fr'N:\Mark\Audio Diss\OCC_SVM_Data\yavari_2018\audios_part_2\GY3\Data\wav_split'

segment_duration = 10 #segment duration in seconds based on extracted gunshot duration
for fname in tqdm(walk_through_files(in_dir), desc='splitting files into wav'):
    audio, sample_rate = librosa.load(fname)
    duration = len(audio) / float(sample_rate)
    
    split = []
    segment_length = sample_rate * segment_duration
    num_sections = int(np.ceil(len(audio)/segment_length))
    
    for i in range(num_sections): #splitting the file
        t = audio[i * segment_length: (i+1) * segment_length]
        split.append(t)
        
    for i in range(num_sections): #saving the file
        strip_filetype = os.path.basename(fname[:-4])
        output = f'{strip_filetype}_{str(i)}.wav'
        sf.write(os.path.join(out_dir, output), split[i], sample_rate)

splitting files into wav: 20it [06:25, 19.28s/it]


In [3]:
#Processing Audio into dataframe - getting audio features
def process_audio_file_splits(fname):
    #stripping filetypes and dir from filename and split it to parts
    filename = os.path.basename(fname[:-4]) #strip dir
    f_parts = filename.split('_') #split at underscore
    rec_info, year_date, file_id, segment = f_parts
    
    #_Separating f_parts further
    #Only works for files following 'S4A03839_20180813_054429_99' convention
    r_type = rec_info[:2]
    r_name = rec_info[2:]
    year = year_date[:4]
    date = year_date[4:]
    
    #_loading audio for processing
    audio, sample_rate = librosa.load(fname)
    duration = len(audio)/float(sample_rate)
    
    #_creating Spectrograms and MFCC
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=128)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate)
    
    #_Creating Self-similarity matrix
    hop_length = 1024
    chroma = librosa.feature.melspectrogram(y=audio,sr=sample_rate, hop_length=hop_length)
    chroma_stack = librosa.feature.stack_memory(chroma, n_steps=10, delay=3)
    R = librosa.segment.recurrence_matrix(chroma_stack)
    R_aff = librosa.segment.recurrence_matrix(chroma_stack, metric='cosine', mode='affinity')
    ssm = R_aff
    
    #_Create shape (Modify if needed)
    desired_shape = (128,128)
    
    #_Normalise Spectrogram, MFCC, SSM for image creation
    log_mel_spectrogram_norm = (log_mel_spectrogram - np.min(log_mel_spectrogram)/(np.max(log_mel_spectrogram) - np.min(log_mel_spectrogram))) #log-min/max-min
    mfcc_norm = (mfcc - np.min(mfcc)) / (np.max(mfcc) - np.min(mfcc))
    ssm_norm = (ssm - np.min(ssm)) / (np.max(ssm) - np.min(ssm))
    
    #_Resize to desired_shape
    lms_r = zoom(log_mel_spectrogram_norm,
                (desired_shape[0] / log_mel_spectrogram_norm.shape[0],
                desired_shape[1] / log_mel_spectrogram_norm.shape[1]))
    mfcc_r = zoom(mfcc_norm,
                 (desired_shape[0] / mfcc_norm.shape[0],
                 desired_shape[1] / mfcc_norm.shape[1]))
    ssm_r = zoom(ssm_norm,
                (desired_shape[0] / ssm_norm.shape[0],
                desired_shape[1] / ssm_norm.shape[1]))
    
    #_Get the values
    return {
        'file_name': filename,
        'file_id': file_id,
        'r_type': r_type,
        'r_name': r_name,
        'date': date,
        'year': year,
        'signal': audio,
        'sample_rate': sample_rate,
        'duration': duration,
        'mel_spectrogram': mel_spectrogram,
        'log_mel_spectrogram': log_mel_spectrogram,
        'mfcc': mfcc,
        'ssm': ssm,
        'shape': desired_shape,
        'lms_r': lms_r,#for image gen
        'mfcc_r': mfcc_r, #for image gen
        'ssm_r': ssm_r #for image gen
    }

In [4]:
%%time

directory = fr'N:\Mark\Audio Diss\OCC_SVM_Data\yavari_2018\audios_part_2\GY3\Data\wav_split'
pickle_dir = fr'N:\Mark\Audio Diss\pickle_data'
pickle_file_path = os.path.join(pickle_dir, 'split_processed_audio.pickle')
batch_size = 10

is_processing_skipped = False
is_processing_complete = False

if os.path.exists(pickle_file_path):
    existing_data = pd.read_pickle(pickle_file_path)
    if np.logical_xor(len(existing_data), len(list(walk_through_files(directory)))):
        is_processing_skipped = True
        split_audio_data = pd.read_pickle(pickle_file_path)
        print('split_audio_data loaded')
        
if not is_processing_skipped:
    processed_data = []
    batch_counter = 0
        

    for fname in tqdm(walk_through_files(directory), desc='pickling'):
        try:
            processed_data.append(process_audio_file_splits(fname)) #Processes the files
            batch_counter += 1 #up the counter by 1
            
            #_Saves intermediate results and clear memory after processing a batch
            if batch_counter >= batch_size:
                split_audio_data = pd.DataFrame(processed_data)
                split_audio_data.to_pickle(pickle_file_path, protocol=pickle.HIGHEST_PROTOCOL) #_Appends dataframe to pickle file
                processed_data.clear()
                batch_counter = 0

        except Exception as e:
            print(f'Error processing file {fname}: {e}')

            
    if len(list(walk_through_files(directory))) == 0 and batch_counter == 0:
        is_processing_complete = True

    if is_processing_complete and len(processed_data) > 0:
        split_audio_data = pd.DataFrame(processed_data)
        split_audio_data.to_pickle(pickle_file_path, protocol=pickle.HIGHEST_PROTOCOL)
    elif not is_processing_complete:
        print('No files were processed') #Pickling the data

pickling: 7200it [36:41,  3.27it/s]

No files were processed
CPU times: total: 44min 18s
Wall time: 36min 41s





In [35]:
%%time #loading pickle file as split_audio_data
pickle_dir = fr'N:\Mark\Audio Diss\pickle_data'
pickle_file_path = os.path.join(pickle_dir, 'split_processed_audio.pickle')

if os.path.exists(pickle_file_path):
        split_audio_data = pd.read_pickle(pickle_file_path)
        print('split_audio_data loaded')
else:
    for fname in tqdm(walk_through_files(directory), desc='pickling'):
        if fname.endswith('.wav'):
            processed_data.append(process_audio_file_splits(fname))
    split_audio_data = pd.DataFrame(processed_data)
    split_audio_data.to_pickle(pickle_file_path, protocol=pickle.HIGHEST_PROTOCOL)

pickling: 7200it [58:12,  2.06it/s]


CPU times: total: 1h 10min 44s
Wall time: 1h 1min 50s


In [36]:
#Checking length of data
print(len(split_audio_data))

7200
