In [1]:
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import librosa
import pandas as pd
import IPython.display as ipd

# BASE_DIR = Path('../input/birdsong-recognition')
# train_df = pd.read_csv(BASE_DIR / 'train.csv')
# random_row = train_df.sample().squeeze()


class SNRSegmenter(object):

    def __init__(self, sample_rate, segment_len_ms, hop_len_ms, noise_len_ms, call_snr):
        self.segment_len_samples = int(sample_rate * segment_len_ms / 1000)
        self.hop_len_samples = int(sample_rate * hop_len_ms / 1000)
        self.noise_len_samples = int(sample_rate * noise_len_ms / 1000)

        self.call_snr = call_snr

    def _get_noise_level(self, sample):
        abs_max = []

        if len(sample) > self.noise_len_samples:
            idx = 0
            while idx + self.noise_len_samples < len(sample):
                abs_max.append(np.max(np.abs(sample[idx:(idx+self.noise_len_samples)])))
                idx += self.noise_len_samples
        else:
            abs_max.append(np.max(np.abs(sample)))

        return min(abs_max)

    def __call__(self, sample):

        noise_level = self._get_noise_level(sample)

        call_segments = []
        call_snrs = []
        
        if len(sample) > self.segment_len_samples:
            idx = 0
            while idx + self.segment_len_samples < len(sample):
                segment = sample[idx:(idx+self.segment_len_samples)]
                seg_abs_max = np.max(np.abs(segment))
                if seg_abs_max / noise_level > self.call_snr:
                    call_segments.append(segment)
                    call_snrs.append(seg_abs_max / noise_level)

                idx += self.hop_len_samples

        return call_segments, call_snrs




In [2]:
import soundfile as sf


In [5]:
!pip install tensorflow



In [6]:
!pip install tensorflow_io



In [3]:
import tensorflow as tf
import tensorflow_io as tfio




In [None]:
from tensorflow.keras.utils import to_categorical

In [9]:
def time_masking(audio, mask_duration):
    start_point = np.random.randint(0, len(audio) - mask_duration)
    audio[start_point:start_point + mask_duration] = 0
    return audio

def frequency_masking(audio, mask_range):
    start_bin = np.random.randint(0, mask_range)
    end_bin = min(start_bin + mask_range, audio.shape[0])
    audio[start_bin:end_bin] = 0
    return audio

# Example usage





In [None]:
import os

In [None]:
directory_path = "C:/Users/Vishal/Desktop/btps8e02/Dataset/MDVRKCL"

path2 = "C:/Users/Vishal/Desktop/btps8e02/Codes/VGGPD/dataset/hl4"


root = os.listdir(directory_path)

for entry in root:
    # print(entry)
    subdir_path = os.path.join(directory_path, entry) #  create the absolute path of the subdir
    subdir_path2 = os.path.join(path2, entry)
    # print(subdir_path)
    if os.path.isdir(subdir_path):  # check if it is a folder
        subdir_entries = os.listdir(subdir_path)  # get the content of the subdir
        for subentry in subdir_entries:
            wav_file = os.path.join(subdir_path, subentry)  # absolute path of the subentry
            y, sample_rate = librosa.load(wav_file)
            # Calculate the current DC offset (mean of the audio signal)
            dc_offset = np.mean(y)

# Apply the DC shift
            y_shifted = y - dc_offset

            audio = librosa.util.normalize(y_shifted)
            segment_len_ms = 4000
            hop_len_ms = 4000
            noise_len_ms = 500
            call_snr_thresh = 5

            segmenter = SNRSegmenter(sample_rate, segment_len_ms, hop_len_ms, noise_len_ms, call_snr_thresh)

            calls, call_snrs = segmenter(audio)
            h=1
            for i in calls:
                if len(subentry)>18:
                    temp = subentry[:13]+ "_2"
                else:
                    temp = subentry[:13]+ "_1"
                if entry == 'disease':
                    temp = temp+'_1_'
                else :
                    temp = temp + '_0_'

                temp_path = ""
                if(subentry[12]=='0'):
                    temp_path =  os.path.join(path2, 'UPDRS0')
                else :
                    temp_path =  os.path.join(path2, 'UPDRS1')


                
                file = os.path.join(subdir_path2, temp)
                file2 = os.path.join(temp_path, temp)
                # if(subentry[:4]=='ID05'):
                #     print(len(subentry))
                #     print(file)
                sf.write(file+str(h)+".wav",i,sample_rate)
                sf.write(file2+str(h)+".wav",i,sample_rate)
                h = h+1

            # here you can check everything you want for example if the subentry has a specific name etc
            #print(subentry_path)

            

In [64]:
directory_path = "C:/Users/Vishal/Downloads/ab/disease"
path2 = "C:/Users/Vishal/Downloads/ac"

root = os.listdir(directory_path)


for entry in root:
    # print(entry)
    wav_file = os.path.join(directory_path, entry)  # absolute path of the subentry
    #p2 = os.path.join(path2, entry)
    y, sample_rate = librosa.load(wav_file)

    # Calculate the current DC offset (mean of the audio signal)
    dc_offset = np.mean(y)
    # Apply the DC shift
    y_shifted = y - dc_offset
    # Applying Normalization
    audio = librosa.util.normalize(y_shifted)

    masked_audio = time_masking(audio, mask_duration=100) 
    mask_audio = frequency_masking(audio, mask_range=10)
    noise_added_signal = add_white_noise(audio,0.1)
    pitch_scaleup_signal = pitch_scale(audio,sample_rate,4) #scaling up by 4
    pitch_scaledown_signal = pitch_scale(audio,sample_rate,-4) #scaling down by 4
    random_gained_signal = random_gain(audio,2,4)
    inverted_signal  =polarity_inversion(audio)

    
    dt =1
    if(int(entry[8])==2):
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,pitch_scaleup_signal,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,pitch_scaledown_signal,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,random_gained_signal,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,mask_audio,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,masked_audio,sample_rate)

    elif(int(entry[8])==3):
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,pitch_scaleup_signal,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,pitch_scaledown_signal,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,random_gained_signal,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,mask_audio,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,masked_audio,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,noise_added_signal,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,inverted_signal,sample_rate)
    
    else:
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,pitch_scaleup_signal,sample_rate)
        dv= int(dt)+1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,pitch_scaledown_signal,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,random_gained_signal,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,mask_audio,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,masked_audio,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,noise_added_signal,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,inverted_signal,sample_rate)
        masked_audio = time_masking(audio, mask_duration=100) 
        mask_audio = frequency_masking(audio, mask_range=10)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,mask_audio,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,masked_audio,sample_rate)
        masked_audio = time_masking(audio, mask_duration=100) 
        mask_audio = frequency_masking(audio, mask_range=10)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,mask_audio,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,masked_audio,sample_rate)
        masked_audio = time_masking(audio, mask_duration=100) 
        mask_audio = frequency_masking(audio, mask_range=10)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,mask_audio,sample_rate)
        dt+=1
        dv = entry[:-3] + '_' + str(dt) +'_.wav'
        sf.write(dv,masked_audio,sample_rate)

    


        

    

In [47]:
directory_path = "C:/Users/Vishal/Downloads/ab/healthy"
path2 = "C:/Users/Vishal/Downloads/ac"

root = os.listdir(directory_path)
dt =1

for entry in root:
    # print(entry)
    wav_file = os.path.join(directory_path, entry)  # absolute path of the subentry
    #p2 = os.path.join(path2, entry)
    y, sample_rate = librosa.load(wav_file)
            # Calculate the current DC offset (mean of the audio signal)
    dc_offset = np.mean(y)

# Apply the DC shift
    y_shifted = y - dc_offset
    audio = librosa.util.normalize(y_shifted)
    masked_audio = time_masking(audio, mask_duration=100) 
    mask_audio = frequency_masking(audio, mask_range=10)
    
    dv = entry[:-4] + '_1_.wav'
    file = os.path.join(path2,dv)
    
    if dt%2 ==1:


                    #print(file)
        sf.write(file,mask_audio,sample_rate)
    else:
        sf.write(file,masked_audio,sample_rate)
    dt+=1

        
        
        

    

In [2]:
#pitch scaling
def pitch_scale(signal,sample_rate,no_of_semitones):# no of semitones we want to scale up or down
  return librosa.effects.pitch_shift(signal,sr =sample_rate,n_steps =no_of_semitones)
# polarity inversion // not so useful augmentation technique
def polarity_inversion(signal):
  return signal*-1
# adding white noise
def add_white_noise(signal, noise_percentage_factor):
    noise = np.random.normal(0, signal.std(), signal.size)
    augmented_signal = signal + noise * noise_percentage_factor
    return augmented_signal
# random gain // scale up the amplitude by some factor
def random_gain(signal, min_factor, max_factor):
    gain_rate = np.random.uniform(min_factor, max_factor)
    augmented_signal = signal * gain_rate
    return augmented_signal


In [3]:

 # _plot_signal_and_augmented_signal(signal,noise_added_signal,sample_rate)
  time_stretched_signal = time_stretch(signal,0.8)
  

IndentationError: unexpected indent (299077376.py, line 2)

In [None]:
time_masked_audio = time_masking(audio, mask_duration=100) 
freq_mask_audio = frequency_masking(audio, mask_range=10)
noise_added_signal = add_white_noise(audio,0.1)
pitch_scaleup_signal = pitch_scale(audio,sample_rate,4) #scaling up by 4
pitch_scaledown_signal = pitch_scale(audio,sample_rate,-4) #scaling down by 4
random_gained_signal = random_gain(audio,2,4)
inverted_signal  =polarity_inversion(audio)


In [9]:
import os

directory_path = "C:/Users/Vishal/Desktop/btps8e02/Dataset/MDVRKCL"

path2 = "C:/Users/Vishal/Desktop/btps8e02/Codes/VGGPD/dataset"


root = os.listdir(directory_path)

for entry in root:
    # print(entry)
    subdir_path = os.path.join(directory_path, entry) #  create the absolute path of the subdir
    subdir_path2 = os.path.join(path2, entry)
    # print(subdir_path)
    if os.path.isdir(subdir_path):  # check if it is a folder
        subdir_entries = os.listdir(subdir_path)  # get the content of the subdir
        for subentry in subdir_entries:
            wav_file = os.path.join(subdir_path, subentry)  # absolute path of the subentry
            y, sample_rate = librosa.load(wav_file)
            # Calculate the current DC offset (mean of the audio signal)
            dc_offset = np.mean(y)

# Apply the DC shift
            y_shifted = y - dc_offset

            audio = librosa.util.normalize(y_shifted)
            segment_len_ms = 4000
            hop_len_ms = 1000
            noise_len_ms = 500
            call_snr_thresh = 5

            segmenter = SNRSegmenter(sample_rate, segment_len_ms, hop_len_ms, noise_len_ms, call_snr_thresh)

            calls, call_snrs = segmenter(audio)
            h=1
            for i in calls:
                if len(subentry)>18:
                    temp = subentry[:13]+ "_2_"
                else:
                    temp = subentry[:13]+ "_1_"
                file = os.path.join(subdir_path2, temp)
                # if(subentry[:4]=='ID05'):
                #     print(len(subentry))
                #     print(file)
                sf.write(file+str(h)+".wav",i,sample_rate)
                h = h+1

            # here you can check everything you want for example if the subentry has a specific name etc
            #print(subentry_path)
            

In [15]:
ams = [0]*6

In [27]:
import os

directory_path = "C:/Users/Vishal/Downloads/aa"
path2 = "C:/Users/Vishal/Downloads/ab"

root = os.listdir(path2)

for entry in root:
    # print(entry)
     #  create the absolute path of the subdir
    subdir_path2 = os.path.join(path2, entry)
    if os.path.isdir(subdir_path2):  # check if it is a folder
        subdir_entries = os.listdir(subdir_path2)  # get the content of the subdir
        for subentry in subdir_entries:
            #print(subentry[8])
            ams[int(subentry[8])]+=1
            


            # here you can check everything you want for example if the subentry has a specific name etc
            #print(subentry_path)
            # if os.path.isdir(subentry_path):  # check if it is a folder
            #     subdir_entriez = os.listdir(subentry_path)
            #     for entry in subdir_entriez:
            #       wav_file = os.path.join(subentry_path, entry)
            #       # print(wav_file)
            #       if(wav_file[-3:]=='wav'):
            #         audio, sr = librosa.load(wav_file)
            #         arr = np.array(mfcc(audio,sr))
            #         np.savetxt(wav_file.replace('.wav', '_mfcc.txt'), arr, fmt="%d")
            #       # with open(wav_file.replace('wav', 'txt'), 'w') as filehandle:
            #       #   json.dump(arr.toList(), filehandle)
                 
            #       # with open(wav_file.replace('wav', 'txt'), "w") as txt_file:
            #       #   for line in arr:
            #       #     txt_file.write(" ".join(line) + "\n")



In [28]:
ams

[2434, 0, 818, 567, 130, 0]

In [None]:


output_csv_path = "normalized_mfcc_features.csv"
with open(output_csv_path, 'w') as file:
    # Write the header
    header = 'filename,' + ','.join([f'mfcc_{i}' for i in range(normalized_mfcc.shape[1])])
    file.write(header + '\n')

    # Write the data
    for fname, mfcc_row in zip(filenames, normalized_mfcc):
        row = fname + ',' + ','.join(map(str, mfcc_row))
        file.write(row + '\n')

In [None]:
plt.title(f'SNR = {call_snrs[1]}')
plt.plot(calls[0])
ipd.display(ipd.Audio(calls[10], rate=sample_rate))


# combined = ipd.Audio(calls[i], rate=sample_rate)
# combined =  combined-combined
# for i in calls:
#   # i
#   combined = combined + ipd.Audio(calls[i], rate=sample_rate)


In [None]:
concatenated_audio = b''.join(calls)


In [None]:
ipd.display(ipd.Audio(concatenated_audio, rate=sample_rate))

In [None]:
from pydub import AudioSegment

# Load the first audio file
concatenated_audio = AudioSegment.from_file(calls[0])

# Iterate over the remaining audio files
for audio_file in calls[1:]:
    audio = AudioSegment.from_file(audio_file)
    conctenated_audio += audio

# Export the concatenated audio file
conctenated_audio.export("concatenated.wav", format="wav")

# Play the concatenated audio file
ipd.display(ipd.Audio('concatenated.wav'))


ModuleNotFoundError: ignored

In [None]:
pip install pydub


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
