In [2]:
import numpy as np
import pandas as pd
import torch
import torchaudio
from torch.utils.data import Sampler
from torch.utils.data import Dataset

In [3]:
import random
import os
import shutil

## Make different data structure

class_id (speaker) --> samples (fac samples)

In [63]:
samples_per_id = []
new_path = '../data/LibriSpeech/train-clean-100-v2/'
for inf in info:
    id = inf['path'].split('/')[-2]
    
    if not os.path.isdir(os.path.join(new_path, id)):
        os.mkdir(os.path.join(new_path, id))
        
    path_dst = os.path.join(new_path,id)
    samples_per_id.append(len(inf['files']))
    
    if len(inf['files']) < 10:
        continue;
        
    for file in inf['files']:
        if file.split('.')[-1] == 'txt':
            continue;
            
        path_src = inf['path']
        src = os.path.join(path_src,file)
        dst = os.path.join(path_dst,file)
        os.replace(src,dst)

In [64]:
samples_per_id

[54,
 56,
 51,
 38,
 47,
 63,
 45,
 35,
 33,
 47,
 43,
 37,
 31,
 43,
 38,
 117,
 117,
 59,
 45,
 42,
 43,
 29,
 97,
 64,
 68,
 51,
 58,
 42,
 37,
 38,
 80,
 44,
 21,
 60,
 21,
 19,
 31,
 89,
 52,
 66,
 46,
 40,
 26,
 20,
 30,
 55,
 53,
 41,
 41,
 37,
 53,
 81,
 26,
 40,
 50,
 37,
 28,
 53,
 20,
 21,
 52,
 39,
 111,
 29,
 42,
 31,
 32,
 36,
 43,
 39,
 98,
 41,
 33,
 36,
 62,
 60,
 29,
 31,
 29,
 33,
 43,
 64,
 29,
 95,
 67,
 46,
 83,
 85,
 52,
 37,
 31,
 29,
 62,
 116,
 39,
 41,
 46,
 31,
 84,
 51,
 62,
 57,
 52,
 55,
 48,
 33,
 42,
 37,
 38,
 42,
 41,
 33,
 44,
 40,
 48,
 124,
 36,
 38,
 36,
 21,
 27,
 7,
 15,
 51,
 62,
 35,
 37,
 41,
 38,
 43,
 37,
 42,
 38,
 34,
 70,
 42,
 45,
 71,
 42,
 35,
 42,
 38,
 32,
 36,
 40,
 43,
 40,
 96,
 50,
 31,
 40,
 39,
 38,
 38,
 56,
 57,
 78,
 52,
 51,
 35,
 30,
 108,
 34,
 18,
 65,
 53,
 51,
 82,
 58,
 56,
 87,
 33,
 127,
 35,
 31,
 47,
 20,
 100,
 51,
 73,
 55,
 65,
 43,
 22,
 14,
 34,
 39,
 40,
 87,
 34,
 90,
 46,
 43,
 42,
 31,
 21,
 32,
 34,
 50

## Make dataloader

In [4]:
data_path = "../data/LibriSpeech/train-clean-100-v2/"

In [24]:
class SequenceDataset(Dataset):
    def __init__(self, path, min_seq, max_seq, downsampling):
        self.min_seq = min_seq
        self.max_seq = max_seq
        self.downsampling = downsampling
        
        self.df = pd.DataFrame(self.index_subset(path))

        # Index of dataframe has direct correspondence to item in dataset
        self.df = self.df.assign(id=self.df.index.values)

        # Convert arbitrary class names of dataset to ordered 0-(num_speakers - 1) integers
        self.unique_characters = sorted(self.df['class_name'].unique())
        self.class_name_to_id = {self.unique_characters[i]: i for i in range(self.num_classes())}
        self.df = self.df.assign(class_id=self.df['class_name'].apply(lambda c: self.class_name_to_id[c]))

        # Create dicts
        self.datasetid_to_filepath = self.df.to_dict()['filepath']
        self.datasetid_to_class_id = self.df.to_dict()['class_id']
        
    def __getitem__(self, item):
        sample, samplerate = torchaudio.load(self.datasetid_to_filepath[item])
        if (int)(sample.shape[-1]/samplerate) >= self.max_seq:
            seq_length = (int)(random.uniform(self.min_seq, self.max_seq)*samplerate)
        
            index = random.randrange(0,sample.shape[-1]-seq_length)
            sample = sample[:,index:index+seq_length]
        padding = torch.zeros(self.max_seq*samplerate - sample.shape[-1]).unsqueeze(0)
        sample = torch.cat((sample,padding),1)
        sample = sample[:,::self.downsampling]
        
        label = self.datasetid_to_class_id[item]
        return(sample,label)
        
    def __len__(self):
        return(len(self.df))

    def num_classes(self):
        return(len(self.df['class_name'].unique()))
    
    @staticmethod
    def index_subset(path):
        samples = []
        for root, folders, files in os.walk(path):
            if not files:
                continue;
            
            alphabet = root.split('/')[-2]
            class_name = root.split('/')[-1]
            
            for file in files:
                samples.append({
                    'alphabet': alphabet,
                    'class_name': class_name,
                    'filepath': os.path.join(root, file)
                })
                
        return(samples)

In [6]:
class NShotTaskSampler(Sampler):
    def __init__(self,
                 dataset: torch.utils.data.Dataset,
                 episodes_per_epoch: int = None,
                 n: int = None,
                 k: int = None,
                 q: int = None,
                 num_tasks: int = 1):
        """PyTorch Sampler subclass that generates batches of n-shot, k-way, q-query tasks.

        Each n-shot task contains a "support set" of `k` sets of `n` samples and a "query set" of `k` sets
        of `q` samples. The support set and the query set are all grouped into one Tensor such that the first n * k
        samples are from the support set while the remaining q * k samples are from the query set.

        The support and query sets are sampled such that they are disjoint i.e. do not contain overlapping samples.

        # Arguments
            dataset: Instance of torch.utils.data.Dataset from which to draw samples
            episodes_per_epoch: Arbitrary number of batches of n-shot tasks to generate in one epoch
            n_shot: int. Number of samples for each class in the n-shot classification tasks.
            k_way: int. Number of classes in the n-shot classification tasks.
            q_queries: int. Number query samples for each class in the n-shot classification tasks.
            num_tasks: Number of n-shot tasks to group into a single batch
            fixed_tasks: If this argument is specified this Sampler will always generate tasks from
                the specified classes (removed)
        """
        super(NShotTaskSampler, self).__init__(dataset)
        self.episodes_per_epoch = episodes_per_epoch
        self.dataset = dataset
        if num_tasks < 1:
            raise ValueError('num_tasks must be > 1.')

        self.num_tasks = num_tasks
        # TODO: Raise errors if initialise badly
        self.k = k
        self.n = n
        self.q = q

        self.i_task = 0

    def __len__(self):
        return self.episodes_per_epoch

    def __iter__(self):
        for _ in range(self.episodes_per_epoch):
            batch = []

            for task in range(self.num_tasks):
                # Get random classes
                episode_classes = np.random.choice(self.dataset.df['class_id'].unique(), size=self.k, replace=False)

                df = self.dataset.df[self.dataset.df['class_id'].isin(episode_classes)]

                support_k = {k: None for k in episode_classes}
                for k in episode_classes:
                    # Select support examples
                    support = df[df['class_id'] == k].sample(self.n)
                    support_k[k] = support

                    for i, s in support.iterrows():
                        batch.append(s['id'])

                for k in episode_classes:
                    query = df[(df['class_id'] == k) & (~df['id'].isin(support_k[k]['id']))].sample(self.q)
                    for i, q in query.iterrows():
                        batch.append(q['id'])

            yield np.stack(batch)

In [7]:
from torch.utils.data import DataLoader

In [29]:
train_dataset = SequenceDataset(data_path, min_seq = 1, max_seq = 3, downsampling = 4)

In [26]:
train_taskloader = DataLoader(
            train_dataset,
            batch_sampler=NShotTaskSampler(train_dataset, 1, 5, 5, 5),
            num_workers=2
    )

In [27]:
for batch_index, batch in enumerate(train_taskloader):
    x, y = batch
    print(x.shape)
    print(y)

torch.Size([50, 1, 12000])
tensor([223, 223, 223, 223, 223,  81,  81,  81,  81,  81,  25,  25,  25,  25,
         25, 200, 200, 200, 200, 200, 237, 237, 237, 237, 237, 223, 223, 223,
        223, 223,  81,  81,  81,  81,  81,  25,  25,  25,  25,  25, 200, 200,
        200, 200, 200, 237, 237, 237, 237, 237])


In [28]:
x[0][0].shape

torch.Size([12000])

In [144]:
31440/16000

1.965

In [79]:
samples = []
for root, folders, files in os.walk(data_path):
    if not files:
        continue
    
    alphabet = root.split('/')[-2]
    class_id = root.split('/')[-1]
    
    for file in files:
        samples.append({
            'subset':'train',
            'alphabet': alphabet,
            'class_id': class_id,
            'filepath': os.path.join(root,file)
        })

In [81]:
df = pd.DataFrame(data=samples)
df

Unnamed: 0,subset,alphabet,class_id,filepath
0,train,train-clean-100-v2,1069,../data/LibriSpeech/train-clean-100-v2/1069/10...
1,train,train-clean-100-v2,1069,../data/LibriSpeech/train-clean-100-v2/1069/10...
2,train,train-clean-100-v2,1069,../data/LibriSpeech/train-clean-100-v2/1069/10...
3,train,train-clean-100-v2,1069,../data/LibriSpeech/train-clean-100-v2/1069/10...
4,train,train-clean-100-v2,1069,../data/LibriSpeech/train-clean-100-v2/1069/10...
...,...,...,...,...
28522,train,train-clean-100-v2,5022,../data/LibriSpeech/train-clean-100-v2/5022/50...
28523,train,train-clean-100-v2,5022,../data/LibriSpeech/train-clean-100-v2/5022/50...
28524,train,train-clean-100-v2,5022,../data/LibriSpeech/train-clean-100-v2/5022/50...
28525,train,train-clean-100-v2,5022,../data/LibriSpeech/train-clean-100-v2/5022/50...


In [82]:
df = df.assign(id=df.index.values)

In [85]:
unique_characters = sorted(df['class_id'].unique())

In [86]:
unique_characters

['103',
 '1034',
 '1040',
 '1069',
 '1081',
 '1088',
 '1098',
 '1116',
 '118',
 '1183',
 '1235',
 '1246',
 '125',
 '1263',
 '1334',
 '1355',
 '1363',
 '1447',
 '1455',
 '150',
 '1502',
 '1553',
 '1578',
 '1594',
 '1624',
 '163',
 '1723',
 '1737',
 '1743',
 '1841',
 '1867',
 '1898',
 '19',
 '1926',
 '196',
 '1963',
 '1970',
 '198',
 '1992',
 '200',
 '2002',
 '2007',
 '201',
 '2092',
 '211',
 '2136',
 '2159',
 '2182',
 '2196',
 '226',
 '2289',
 '229',
 '233',
 '2384',
 '2391',
 '2416',
 '2436',
 '248',
 '250',
 '2514',
 '2518',
 '254',
 '26',
 '2691',
 '27',
 '2764',
 '2817',
 '2836',
 '2843',
 '289',
 '2893',
 '2910',
 '2911',
 '2952',
 '298',
 '2989',
 '302',
 '307',
 '311',
 '3112',
 '3168',
 '32',
 '3214',
 '322',
 '3235',
 '3240',
 '3242',
 '3259',
 '328',
 '332',
 '3374',
 '3436',
 '3440',
 '3486',
 '3526',
 '3607',
 '3664',
 '3699',
 '3723',
 '374',
 '3807',
 '3830',
 '3857',
 '3879',
 '39',
 '3947',
 '3982',
 '3983',
 '40',
 '4014',
 '4018',
 '403',
 '405',
 '4051',
 '4088',
 '41

In [None]:
for i in range(num_classes()):
    class_name_to_id = {unique_characters[i]:i}

In [88]:
df.to_dict()['filepath']

{0: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133709-0000.flac',
 1: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133699-0001.flac',
 2: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133709-0041.flac',
 3: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133699-0017.flac',
 4: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133709-0016.flac',
 5: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133699-0040.flac',
 6: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133699-0037.flac',
 7: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133709-0036.flac',
 8: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133709-0020.flac',
 9: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133699-0021.flac',
 10: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133699-0020.flac',
 11: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133709-0021.flac',
 12: '../data/LibriSpeech/train-clean-100-v2/1069/1069-133709-0037.flac',
 13: '../data/LibriSpeech/train-clean-100-v2/106

In [92]:
classes = np.random.choice(df['class_id'].unique(), size=5, replace=False)
classes

array(['5688', '4340', '7278', '307', '374'], dtype=object)

In [94]:
df[df['class_id'].isin(classes)]

Unnamed: 0,subset,alphabet,class_id,filepath,id
413,train,train-clean-100-v2,307,../data/LibriSpeech/train-clean-100-v2/307/307...,413
414,train,train-clean-100-v2,307,../data/LibriSpeech/train-clean-100-v2/307/307...,414
415,train,train-clean-100-v2,307,../data/LibriSpeech/train-clean-100-v2/307/307...,415
416,train,train-clean-100-v2,307,../data/LibriSpeech/train-clean-100-v2/307/307...,416
417,train,train-clean-100-v2,307,../data/LibriSpeech/train-clean-100-v2/307/307...,417
...,...,...,...,...,...
26989,train,train-clean-100-v2,374,../data/LibriSpeech/train-clean-100-v2/374/374...,26989
26990,train,train-clean-100-v2,374,../data/LibriSpeech/train-clean-100-v2/374/374...,26990
26991,train,train-clean-100-v2,374,../data/LibriSpeech/train-clean-100-v2/374/374...,26991
26992,train,train-clean-100-v2,374,../data/LibriSpeech/train-clean-100-v2/374/374...,26992


In [95]:
support = {k:None for k in classes}

In [96]:
support

{'5688': None, '4340': None, '7278': None, '307': None, '374': None}

In [99]:
ff = df[df['class_id'] == '5688'].sample(5)

In [100]:
ff

Unnamed: 0,subset,alphabet,class_id,filepath,id
26810,train,train-clean-100-v2,5688,../data/LibriSpeech/train-clean-100-v2/5688/56...,26810
26762,train,train-clean-100-v2,5688,../data/LibriSpeech/train-clean-100-v2/5688/56...,26762
26837,train,train-clean-100-v2,5688,../data/LibriSpeech/train-clean-100-v2/5688/56...,26837
26790,train,train-clean-100-v2,5688,../data/LibriSpeech/train-clean-100-v2/5688/56...,26790
26834,train,train-clean-100-v2,5688,../data/LibriSpeech/train-clean-100-v2/5688/56...,26834


In [103]:
for i, s in ff.iterrows():
    print(s['subset'])

train
train
train
train
train


In [12]:
path = "../data/LibriSpeech/train-clean-100-v2/103/103-1240-0001.flac"

In [26]:
waveform, sample_rate = torchaudio.load(path)

In [34]:
waveform.shape[-1]

255120

In [30]:
length = sample_rate*3
length

48000

In [69]:
index = random.randrange(0,waveform.shape[-1]-length)

In [70]:
waveform[:,index:index+length].shape

torch.Size([1, 48000])

In [18]:
waveform[:,:10].shape

torch.Size([1, 10])

In [78]:
(int)(random.uniform(1, 3)*16000)

38094

In [13]:
data, samplerate = sf.read(path)

In [14]:
data

array([-0.00585938, -0.00445557, -0.00671387, ...,  0.00067139,
        0.00341797,  0.00466919])

In [107]:
np.newaxis

In [109]:
d = data[np.newaxis,:]

In [110]:
d.shape

(1, 240960)

In [6]:
samplerate

16000

In [9]:
len(data)

240960

In [8]:
len(data)/samplerate

15.06

In [12]:
(16000*3)/4

12000.0

In [13]:
200*200*3

120000

In [15]:
len(data[::4])

60240

In [21]:
a = np.array([1,2,3,4,5])

In [22]:
a[::4]

array([1, 5])