In [54]:
import numpy as np
import torch
from torch.utils.data import Dataset
import glob
import librosa
import warnings

ModuleNotFoundError: No module named 'librosa'

In [56]:
# dataset to return a single audio clip

class audio_data_single(Dataset):
    def __init__(self, directory):
        self.dir = directory
        self.filelist = glob.glob('{}/**/*.m4a'.format(self.dir),recursive=True)

    def __len__(self):
        return len(self.filelist)

    def __getitem__(self, idx):
        audio_path = self.filelist[idx]
        #resamples, normalises vol and trims silences
        audio = preprocess_wav(audio_path)

        return audio


class audio_data_ge2e(Dataset):
    """
    returns a batch of preprocessed audio files matching the requirements for the ge2e loss:
    varying emotions and utterances, speaker is the batch (ix), fixed level
    input directory: root directory of audio files
    input intensity: constant intensity to take
    8 emotions & 30 utterances per speaker - returns 8x30 processed wavs stored in a nested dictionary 
    """
    def __init__(self, directory: str, intensity: int):
        self.intensity_level = 'level_' + str(intensity)
        self.dir = directory
        self.filelist = glob.glob('{}\\**\\{}\\*.m4a'.format(self.dir,self.intensity_level),recursive=True)
        self.emotions = sorted(list(set(path.split('\\')[3] for path in self.filelist)))
        self.speakers = sorted(list(set(path.split('\\')[1] for path in self.filelist)))
        self.utterances = sorted(list(set(path.split('\\')[5].split('.')[0] for path in self.filelist)))

    def __len__(self):
        return len(self.speakers)

    def __getitem__(self, idx) -> dict:
        # selects one speaker and takes all utterances and emotions

        # create empty dictionary
        # loop through all emotions
        # loop through all utterances
        # preprocess audio and add to dictionary
        import warnings
        output_dict = {}

        speaker = self.speakers[idx]

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            for emotion in self.emotions:
                output_dict[emotion] = {}
                for utterance in self.utterances:
                    f = f'{self.dir}/{speaker}/audio/{emotion}/{self.intensity_level}/{utterance}.m4a'
          #resamples, normalises vol and trims silences
                    audio = preprocess_wav(f)
                    output_dict[emotion][utterance] = audio

        return output_dict

In [57]:
d = audio_data_ge2e('MEAD_AUDIO',3)

In [58]:
d[0]

NameError: name 'preprocess_wav' is not defined

In [3]:
import os
import sys

In [29]:
%conda info


     active environment : None
       user config file : C:\Users\theod\.condarc
 populated config files : 
          conda version : 4.10.1
    conda-build version : 3.21.4
         python version : 3.8.8.final.0
       virtual packages : __cuda=11.1=0
                          __win=0=0
                          __archspec=1=x86_64
       base environment : C:\Users\theod\anaconda3  (writable)
      conda av data dir : C:\Users\theod\anaconda3\etc\conda
  conda av metadata url : https://repo.anaconda.com/pkgs/main
           channel URLs : https://repo.anaconda.com/pkgs/main/win-64
                          https://repo.anaconda.com/pkgs/main/noarch
                          https://repo.anaconda.com/pkgs/r/win-64
                          https://repo.anaconda.com/pkgs/r/noarch
                          https://repo.anaconda.com/pkgs/msys2/win-64
                          https://repo.anaconda.com/pkgs/msys2/noarch
          package cache : C:\Users\theod\anaconda3\pkgs
           

In [80]:
import glob
import os
ff = glob.glob('**\\*.m4a',recursive=True)

In [81]:
dest = 'MEAD_AUDIO_FILT\\'+'\\'.join(ff[0].split('\\')[1:])

In [82]:
import shutil
os.makedirs(os.path.dirname(dest), exist_ok=True)
shutil.copyfile(ff[0],dest)

'MEAD_AUDIO_FILT\\M003\\audio\\angry\\level_1\\001.m4a'

In [83]:
ff.filter()

['MEAD_AUDIO\\M003\\audio\\angry\\level_1\\001.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\002.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\003.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\004.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\005.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\006.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\007.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\008.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\009.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\010.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\011.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\012.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\013.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\014.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\015.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\016.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\017.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\018.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\019

In [84]:
root = 'MEAD_AUDIO'
keep = 'level_3'

f = glob.glob('{}\\**\\*.m4a'.format(root), recursive=True)
f

['MEAD_AUDIO\\M003\\audio\\angry\\level_1\\001.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\002.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\003.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\004.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\005.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\006.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\007.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\008.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\009.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\010.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\011.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\012.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\013.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\014.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\015.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\016.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\017.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\018.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_1\\019

In [85]:
f = [file for file in f if keep in file.split('\\')]
f

['MEAD_AUDIO\\M003\\audio\\angry\\level_3\\001.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\002.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\003.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\004.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\005.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\006.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\007.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\008.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\009.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\010.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\011.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\012.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\013.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\014.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\015.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\016.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\017.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\018.m4a',
 'MEAD_AUDIO\\M003\\audio\\angry\\level_3\\019

In [86]:
d = {1:[]}

In [87]:
d[1].append(5)

In [88]:
d

{1: [5]}