In [None]:
# Código público extraído de: nn
import numpy as np

#Use old segmentation
def segment_cough(x,fs, cough_padding=0.2,min_cough_len=0.2, th_l_multiplier = 0.1, th_h_multiplier = 2):
    """Preprocess the data by segmenting each file into individual coughs using a hysteresis comparator on the signal power
    
    Inputs:
    *x (np.array): cough signal
    *fs (float): sampling frequency in Hz
    *cough_padding (float): number of seconds added to the beginning and end of each detected cough to make sure coughs are not cut short
    *min_cough_length (float): length of the minimum possible segment that can be considered a cough
    *th_l_multiplier (float): multiplier of the RMS energy used as a lower threshold of the hysteresis comparator
    *th_h_multiplier (float): multiplier of the RMS energy used as a high threshold of the hysteresis comparator
    
    Outputs:
    *coughSegments (np.array of np.arrays): a list of cough signal arrays corresponding to each cough
    cough_mask (np.array): an array of booleans that are True at the indices where a cough is in progress"""
                
    cough_mask = np.array([False]*len(x))
    

    #Define hysteresis thresholds
    rms = np.sqrt(np.mean(np.square(x)))
    seg_th_l = th_l_multiplier * rms
    seg_th_h =  th_h_multiplier*rms

    #Segment coughs
    coughSegments = []
    padding = round(fs*cough_padding)
    min_cough_samples = round(fs*min_cough_len)
    cough_start = 0
    cough_end = 0
    cough_in_progress = False
    tolerance = round(0.01*fs)
    below_th_counter = 0
    
    for i, sample in enumerate(x**2):
        if cough_in_progress:
            if sample<seg_th_l:
                below_th_counter += 1
                if below_th_counter > tolerance:
                    cough_end = i+padding if (i+padding < len(x)) else len(x)-1
                    cough_in_progress = False
                    if (cough_end+1-cough_start-2*padding>min_cough_samples):
                        coughSegments.append(x[cough_start:cough_end+1])
                        cough_mask[cough_start:cough_end+1] = True
            elif i == (len(x)-1):
                cough_end=i
                cough_in_progress = False
                if (cough_end+1-cough_start-2*padding>min_cough_samples):
                    coughSegments.append(x[cough_start:cough_end+1])
            else:
                below_th_counter = 0
        else:
            if sample>seg_th_h:
                cough_start = i-padding if (i-padding >=0) else 0
                cough_in_progress = True
    
    return coughSegments, cough_mask

def compute_SNR(x, fs):
    """Compute the Signal-to-Noise ratio of the audio signal x (np.array) with sampling frequency fs (float)"""
    segments, cough_mask = segment_cough(x,fs)
    RMS_signal = 0 if len(x[cough_mask])==0 else np.sqrt(np.mean(np.square(x[cough_mask])))
    RMS_noise = np.sqrt(np.mean(np.square(x[~cough_mask])))
    SNR = 0 if (RMS_signal==0 or np.isnan(RMS_noise)) else 20*np.log10(RMS_signal/RMS_noise)
    return SNR

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import librosa
import os
import sys
sys.path.append(os.path.abspath('../src'))

	
# Cough segmentation 


## Processando as bases de áudio: audios_train_especialistas_plus_adicional e audios_train_especialistas

In [None]:
#!rm -rf /content/content/audios_expecialistas
#!rm -rf /content/audios_segmentados_concatenados_especialistas_completo
#!rm -rf /content/audios_segmentado_concatenado_especialistas_completo.zip

In [None]:
#!gdown --id 1iNBLFXyiS4kiJkPZJb_9-KO-LRYS4Z7V --output audios_wav_especialistas.zip
#!unzip audios_wav_especialistas.zip
#!rm -rf audios_wav_especialistas.zip

!gdown --id 1YOx_SiP0RT_2M9FKUK0HeDWPcx_mihGO --output audios_especialistas_plus_adicional_8750.zip
!unzip audios_especialistas_plus_adicional_8750.zip
!rm -rf audios_especialistas_plus_adicional_8750.zip

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
  inflating: content/audios_especialistas_plus_adicional_8750/09c2da3e-9de9-45d2-997b-0af4493b1c4e.wav  
  inflating: content/audios_especialistas_plus_adicional_8750/44b10042-8764-48b4-ac66-1020577eb4bc.wav  
  inflating: content/audios_especialistas_plus_adicional_8750/85d6d6c0-2c6f-4b82-abd2-456b1196030c.wav  
  inflating: content/audios_especialistas_plus_adicional_8750/72e01cb0-16aa-4080-a7c9-5ab830a88f1d.wav  
  inflating: content/audios_especialistas_plus_adicional_8750/509e338e-32d6-4a9d-89e1-0ea4a8aefd50.wav  
  inflating: content/audios_especialistas_plus_adicional_8750/9a672728-6f39-4035-b0b8-f49dfecc162a.wav  
  inflating: content/audios_especialistas_plus_adicional_8750/bb94e3f4-831c-4c04-b528-617cf04ab8c7.wav  
  inflating: content/audios_especialistas_plus_adicional_8750/25191b30-3de7-4a6c-9074-9876156a0c0a.wav  
  inflating: content/audios_especialistas_plus_adicional_8750/6139b5ef-e41c-451c-9a7f-6

In [None]:
import os
import matplotlib.pyplot as plt
import librosa
import librosa.display
import IPython.display as ipd

audio_fpath = "/content/content/audios_especialistas_plus_adicional_8750"
audio_clips = os.listdir(audio_fpath)
print("No. of .wav files in audio folder = ",len(audio_clips))


No. of .wav files in audio folder =  8750


In [None]:
#Concatenação dos trechos de tosse
import librosa
import numpy as np
import librosa.display
import soundfile as sf

import glob
import os
from os import listdir
from os.path import isfile, join
import audioread

!mkdir audios_segmentados_concatenados_especialistas_plus_adicional_completo
audio_fpath_destinoEspecialista = "/content/audios_segmentados_concatenados_especialistas_plus_adicional_completo/"

count=1

for i in audio_clips:
      x, fs = librosa.load(audio_fpath+'/'+i, sr=None)  

      #Segmentation with default parameters
      cough_segments, cough_mask = segment_cough(x,fs)

      #Remove extra signal before/after cough
      cough_segments, cough_mask = segment_cough(x,fs, cough_padding=0)

      #CONCATENANDO...
      print("Quantidade de segmentos: ")
      print(len(cough_segments))

      if (len(cough_segments) > 0):
        print('shape of seg [0] ==> ' + str(cough_segments[0].shape))
        z = cough_segments[0]
        print("y: 0")
    
        for y in range(len(cough_segments)):
          print("y+1: ")
          print(y+1)

          if (len(cough_segments) > (y+1)): 
            print('shape of seg ==> ' + str(cough_segments[y+1].shape))

            z = np.append(z,cough_segments[y+1])
            print('shape of z ==> ' + str(z.shape))
        
        ipd.display(ipd.Audio(data=z, rate=fs))
        
      else:
        z = x

      destino_local = audio_fpath_destinoEspecialista+'/'+i
      sf.write(destino_local, data= z, samplerate=fs, subtype='PCM_24')
      
      print("número do áudio: ")
      print(count)
      count = count+1

Output hidden; open in https://colab.research.google.com to view.

In [None]:

audio_fpath = "/content/audios_segmentados_concatenados_especialistas_plus_adicional_completo"
audio_clips = os.listdir(audio_fpath)
print("No. of .wav files in audio folder = ",len(audio_clips))

No. of .wav files in audio folder =  8750


In [None]:
!zip -r /content/audios_segmentados_concatenados_especialistas_plus_adicional_completo.zip /content/audios_segmentados_concatenados_especialistas_plus_adicional_completo

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
  adding: content/audios_segmentados_concatenados_especialistas_plus_adicional_completo/09c2da3e-9de9-45d2-997b-0af4493b1c4e.wav (deflated 82%)
  adding: content/audios_segmentados_concatenados_especialistas_plus_adicional_completo/44b10042-8764-48b4-ac66-1020577eb4bc.wav (deflated 2%)
  adding: content/audios_segmentados_concatenados_especialistas_plus_adicional_completo/85d6d6c0-2c6f-4b82-abd2-456b1196030c.wav (deflated 1%)
  adding: content/audios_segmentados_concatenados_especialistas_plus_adicional_completo/72e01cb0-16aa-4080-a7c9-5ab830a88f1d.wav (deflated 3%)
  adding: content/audios_segmentados_concatenados_especialistas_plus_adicional_completo/509e338e-32d6-4a9d-89e1-0ea4a8aefd50.wav (deflated 3%)
  adding: content/audios_segmentados_concatenados_especialistas_plus_adicional_completo/9a672728-6f39-4035-b0b8-f49dfecc162a.wav (deflated 2%)
  adding: content/audios_segmentados_concatenados_especialistas_plus

In [None]:
from google.colab import files
files.download("/content/audios_segmentados_concatenados_especialistas_plus_adicional_completo.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cp /content/audios_segmentados_concatenados_especialistas_plus_adicional_completo.zip /content/drive/MyDrive

In [None]:
%ls -lah /drive

ls: cannot access '/drive': No such file or directory
