### Importing all libraries

In [6]:
#Importing audio file paths
import os

#For managing dataframe
import pandas as pd
import numpy as np

#PyTorch
import torchaudio
import torch
from torch.utils.data import Dataset

# Misc.
import multiprocessing # will be used for loading data using multipler workers (cpu_count)

#Selecting device
device="cuda" if torch.cuda.is_available() else "cpu"

print(f"Using {device}")

Using cuda


### Recursively get all audio files

In [2]:
def audio_fetch(src,audio_paths):
    l=os.listdir(src)
    if(len(l)!=0):
        for i in range(len(l)):
            if(".wav" in l[i] or ".mp3" in l[i] or ".aac" in l[i]):
                audio_paths.append(str(src+l[i]))
            elif("." not in l[i]):
                try:
                    audio_fetch(str(src+"/"+l[i]+"/"),audio_paths)
                except:
                    continue
                            
# For getting all possible classes along with their label encoding as a dictionary
def class_fetch(df: pd.DataFrame())->(list,dict):
    s=set(sorted(df[1].unique()))
    
    toDel=[i for i in dict(df[1].value_counts(sort=True)).keys() if dict(df[1].value_counts(sort=True))[i]<3]
    for i in toDel:
        s.remove(i)
    d={}
    
    toDel_ids=[]
    for i in toDel:
        toDel_ids.extend(list(df[df[1]==i][0].values))
    
    
    count=0
    for i in s:
        d[i]=count
        count+=1
    return list(s),d, toDel_ids

### Dataset Class

In [3]:
class audio_dataset(Dataset):
    def __init__(self,
                 audio_path: str,
                 info: str, 
                 target_sample_rate=44000,
                 num_samples=800000,
                 transformations=None, 
                 header=None) -> None:
        
        self.paths=[]
        audio_fetch(audio_path,self.paths)
        self.target_sample_rate=target_sample_rate
        self.transformations=transformations
        self.target_samples=num_samples
        
        self.patient=dict()
        self.paths.sort() 
        
        # We are going to remove all classes than 2 patient data

        for i in range(101,227):
            self.patient[i]=[]
        
        for i in range(len(self.paths)):
            self.patient[int(self.paths[i].split("/")[-1].split("_")[0])].append(self.paths[i])
        
        self.info_df=pd.read_csv(info,header=header)
        self.classes, self.class_to_idx, self.ids_to_remove = class_fetch(self.info_df)

        # After getting the patient ids to remove all the data from patient list is removed
        for i in self.ids_to_remove:
            self.patient.pop(i)

        #Denotes sequence of audio files for a particular patient
        self.sequence=list(self.patient.values())


        # Audio path is updated after removing classing with small number of patients
        self.audio_path=[]
        for i in self.sequence:
            for j in i:
                self.audio_path.append(j)

    
    def get_class(self,file: str):
        return self.info_df[self.info_df[0]==int(file.split("/")[-1].split("_")[0])][1].values[0]



    # Necessary audio transformations: 
    def _cut_if_necessary(self, signal):
        if signal.shape[1] > self.target_samples:
            signal = signal[:, :self.target_samples]
        return signal

    def _right_pad_if_necessary(self, signal):
        length_signal = signal.shape[1]
        if length_signal < self.target_samples:
            num_missing_samples = self.target_samples - length_signal
            last_dim_padding = (0, num_missing_samples)
            signal = torch.nn.functional.pad(signal, last_dim_padding)
        return signal

    def _resample_if_necessary(self, signal, sr):
        if sr != self.target_sample_rate:
            resampler = torchaudio.transforms.Resample(sr, self.target_sample_rate)
            signal = resampler(signal)
        return signal

    def _mix_down_if_necessary(self, signal):
        if signal.shape[0] > 1:
            signal = torch.mean(signal, dim=0, keepdim=True)
        return signal

    
    def __len__(self) -> int:
        return len(self.paths)


    def __getitem__(self, index: int) -> torch.Tensor():
        self.val=self.paths[index]
        self.class_val=self.class_to_idx[self.get_class(self.val)]

        self.signal, self.sr = torchaudio.load(self.val)
        self.signal = self._resample_if_necessary(self.signal, self.sr)
        self.signal = self._mix_down_if_necessary(self.signal)
        self.signal = self._cut_if_necessary(self.signal)
        self.signal = self._right_pad_if_necessary(self.signal)
        self.signal = self.transformation(self.signal)
        
        return self.signal,self.class_to_idx[self.class_val]

##### Testing audio dataset

In [4]:
ds=audio_dataset(audio_path="../Sound_Classification/archive/Respiratory_Sound_Database/",info="../Sound_Classification/archive/respiratory_sound_database/Respiratory_Sound_Database/patient_diagnosis.csv")
ds.class_to_idx

{'Pneumonia': 0,
 'COPD': 1,
 'Bronchiolitis': 2,
 'Bronchiectasis': 3,
 'Healthy': 4,
 'URTI': 5}

In [5]:
print(len(ds.sequence))
print(ds.get_class(ds.audio_path[3]))

123
COPD


### Dataloader

In [None]:
num_workers=multiprocessing.cpu_count()