In [1]:
import librosa
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
def audio_norm(data):
    max_data = np.max(np.absolute(data))
    return data/(max_data+1e-6)*0.5

In [3]:
class Config(object):
    def __init__(self,
                 sampling_rate=16000, audio_duration=4, n_classes=41,
                 use_mfcc=False, n_folds=10, learning_rate=0.0001, 
                 max_epochs=50, n_mfcc=40, datagen_num = 2):
        self.sampling_rate = sampling_rate
        self.audio_duration = audio_duration
        self.n_classes = n_classes
        self.use_mfcc = use_mfcc
        self.n_mfcc = n_mfcc
        self.n_folds = n_folds
        self.learning_rate = learning_rate
        self.max_epochs = max_epochs
        self.datagen_num = datagen_num
        self.audio_length = self.sampling_rate * self.audio_duration
        if self.use_mfcc:
            self.dim = (self.n_mfcc, 1 + int(np.floor(self.audio_length/512)), 1)
        else:
            self.dim = (self.audio_length, 1)

In [27]:
def prepare_data(df, config, data_dir):
    X = np.empty(shape=(df.shape[0] * config.datagen_num, config.dim[0], config.dim[1], 1))
    y = np.empty(df.shape[0] * config.datagen_num)
    input_length = config.audio_length
    for i, fname in enumerate(df.index):
        print(fname+' ({0}/{1})'.format(i,df.shape[0]))
        file_path = data_dir + fname
        data, _ = librosa.core.load(file_path, sr=config.sampling_rate)
        data = audio_norm(data)
        for j in range(config.datagen_num):
            shifted_data = data
            #pitch shift
            #bpo = 24 #how many steps per octave
            #pr = 3/24 #pitch shift range
            #ps = int(np.random.uniform(-pr * bpo, pr * bpo) + 0.5) #how many (fractional) half-steps to shift y
            #shifted_data = librosa.effects.pitch_shift(shifted_data, config.sampling_rate, n_steps = ps, bins_per_octave = bpo)
            # time stretch
            tr = 1.1 #speed up/down rate
            lgtr = np.log(tr)
            ts = 2 ** np.random.uniform(-lgtr,lgtr)
            shifted_data = librosa.effects.time_stretch(shifted_data, ts)
            #white noise
            #wnvr = 0.05 # white noise volume range
            #wnv  = np.random.uniform(0, wnvr) # white noise volume, random
            #shifted_data += np.random.uniform(-wnv, wnv, shifted_data.shape)
            # Random offset / Padding
            if len(shifted_data) < input_length:
                ratio = input_length/len(shifted_data)
                ratio = np.ceil(ratio)
                shifted_data = np.tile(shifted_data,int(ratio))
            max_offset = len(shifted_data) - input_length
            offset = np.random.randint(max_offset)
            shifted_data = shifted_data[offset:(input_length+offset)]
            #mfcc
            shifted_data = librosa.feature.mfcc(shifted_data, sr=config.sampling_rate, n_mfcc=config.n_mfcc)
            shifted_data = np.expand_dims(shifted_data, axis=-1)
            X[ i * config.datagen_num + j, :] = shifted_data
            y[ i * config.datagen_num + j] = df.label_idx[i]
    return X, y

In [28]:
def prepare_test_data(config, data_dir='../input/audio_test/'):
    df = pd.read_csv('../input/test.csv')
    test_data = np.empty(shape=(df.shape[0], config.dim[0], config.dim[1], 1))
    input_length = config.audio_length
    for i, fname in enumerate(df['fname']):
        print(fname)
        file_path = data_dir + fname
        data, _ = librosa.core.load(file_path, sr=config.sampling_rate)
        if len(data)==0:
            data = np.zeros(88200)
        data = audio_norm(data)
        # Random offset / Padding
        if len(data) < input_length:
            ratio = input_length/len(data)
            ratio = np.ceil(ratio)
            data = np.tile(data,int(ratio))
        max_offset = len(data) - input_length
        offset = np.random.randint(max_offset)
            data = data[offset:(input_length+offset)]
        data = librosa.feature.mfcc(data, sr=config.sampling_rate, n_mfcc=config.n_mfcc)
        data = np.expand_dims(data, axis=-1)
        test_data[i] = data
    return test_data

In [29]:
config = Config(sampling_rate=44100, audio_duration=4, n_folds=5, 
                learning_rate=0.001, use_mfcc=True, n_mfcc=40, datagen_num=5)
train = pd.read_csv("../input/train.csv")
LABELS = list(train.label.unique())
label_idx = {label: i for i, label in enumerate(LABELS)}
train.set_index("fname", inplace=True)
train["label_idx"] = train.label.apply(lambda x: label_idx[x])

In [None]:
test_data = prepare_test_data(config, data_dir='../input/audio_test/')

00063640.wav
0013a1db.wav
002bb878.wav
002d392d.wav
00326aa9.wav
0038a046.wav
003995fa.wav
005ae625.wav
007759c4.wav
008afd93.wav
00a161c0.wav
00a7a2f6.wav
00ae03f6.wav
00b2404e.wav
00beb030.wav
00c4d5b8.wav
00c92c05.wav
00ccf065.wav
00d0ab77.wav
00dffe3a.wav
00e33205.wav
00e7ed07.wav
00eac343.wav
00f2f692.wav
0102a895.wav
010a0b3a.wav
010eed01.wav
01115622.wav
01198b7e.wav
01207ce5.wav
0137bfba.wav
014eb52e.wav
01608fa3.wav
0169348e.wav
016e52da.wav
0183b482.wav
0185de6b.wav
01a5a2a3.wav
01aed32a.wav
01afccc5.wav
01b4cdb5.wav
01b85217.wav
01bb344f.wav
01bede12.wav
01c9d6d3.wav
01e628cb.wav
01e6e112.wav
01e99e2d.wav
01eccb6e.wav
01f27f99.wav
01f29edb.wav
01f9883e.wav
01fd22ba.wav
01fe4a01.wav
02107093.wav
02182ebd.wav
02198549.wav
0220d48a.wav
02210ee7.wav
022170df.wav
023431b2.wav
023e7db3.wav
023eab1f.wav
02482fdc.wav
024e45b1.wav
026820e6.wav
02697329.wav
027a8b14.wav
027dbe3e.wav
027f06cc.wav
028278f0.wav
028db587.wav
02960f07.wav
0298b2c6.wav
02a0eb3c.wav
02ad476f.wav
02af55f6.wav

1146e063.wav
11544687.wav
115a258b.wav
115aa41f.wav
116b77d5.wav
11704d14.wav
11770392.wav
117dcac8.wav
11844b37.wav
11867b61.wav
1189cf83.wav
118f1763.wav
11986117.wav
119a3278.wav
11a10ce8.wav
11a4f814.wav
11ae787e.wav
11cc6d11.wav
11e6a1ec.wav
11e80486.wav
11e95378.wav
11edc852.wav
11f661bd.wav
11f6b674.wav
11fa11a0.wav
12034b1a.wav
120c7292.wav
1221a798.wav
122abe8c.wav
122f1508.wav
12394644.wav
123986b6.wav
1242ac46.wav
1248a779.wav
1248f9e2.wav
12508a7b.wav
12584f90.wav
125c89cb.wav
125dccd9.wav
12679aee.wav
126a48d2.wav
1271e7aa.wav
12770ffc.wav
12798b6a.wav
127a43ec.wav
127d3803.wav
12a0327a.wav
12a83756.wav
12ac9a66.wav
12adec75.wav
12ae3ff9.wav
12bb4b63.wav
12bf9735.wav
12bfa703.wav
12c73c11.wav
12d71e7f.wav
12d94273.wav
12e91a56.wav
12ed3971.wav
12edfe79.wav
12f5ec05.wav
12f71a9d.wav
12f9efda.wav
12fb2e35.wav
130112d1.wav
1302eade.wav
130988c6.wav
13184046.wav
13331371.wav
13354f83.wav
133599c8.wav
13371a0b.wav
133a1dac.wav
13489639.wav
138bb73e.wav
1391b6b2.wav
13979584.wav

225e3135.wav
225f0626.wav
2261073e.wav
226ea2c6.wav
227f2c50.wav
228153f2.wav
22824c18.wav
22891802.wav
2293e34d.wav
22abde0a.wav
22b2efa2.wav
22b5a548.wav
22c3952c.wav
22c798ac.wav
22ca2dbb.wav
22e88b33.wav
22f3b36c.wav
22f70afe.wav
23029663.wav
23071dd2.wav
23094c55.wav
230ce5d5.wav
23176e01.wav
231d230c.wav
231e9e57.wav
231f314d.wav
232750a8.wav
232ec97b.wav
23300ded.wav
233996cf.wav
233a83b7.wav
233d0591.wav
233d8cbb.wav
235dbbf0.wav
2369256d.wav
23732830.wav
23824542.wav
23852f22.wav
238d7b37.wav
238d8cad.wav
238deaad.wav
23984d44.wav
239988bc.wav
2399acef.wav
23bae3b1.wav
23bf31da.wav
23ca551e.wav
23cdfc67.wav
23d1d920.wav
23d4cbff.wav
23e94e4f.wav
23ede927.wav
23f2b1f7.wav
23f592dd.wav
23f8b4b0.wav
23fc49d9.wav
240c45cb.wav
240d2bb2.wav
240dfb34.wav
241224d6.wav
242118f2.wav
2421d14b.wav
2423598a.wav
2429265b.wav
242ea92c.wav
2436090e.wav
243ab0b7.wav
2442e7c6.wav
244618d7.wav
244baebc.wav
244dc716.wav
24599c96.wav
24602db6.wav
2472cdfe.wav
247431d8.wav
247a90a0.wav
2487cd7b.wav

34be5213.wav
34c7a37b.wav
34cddac7.wav
34d1fffa.wav
34d84768.wav
34f6f431.wav
34ff2f37.wav
350b89b8.wav
350ed42d.wav
35115db0.wav
351b0ba1.wav
351dcc25.wav
35235d3f.wav
352c769d.wav
352f143a.wav
3531e0f4.wav
3533dc16.wav
3538f800.wav
353b44b7.wav
353f55db.wav
3542eddc.wav
35497fc4.wav
355eeedf.wav
355f0738.wav
356cab15.wav
3579ff4b.wav
3592e0cb.wav
3597c4e5.wav
3598cc58.wav
359c9ba3.wav
35b91bb1.wav
35bb05b0.wav
35bf4ae5.wav
35c0f6b7.wav
35cb8bd1.wav
35ce58ae.wav
35d008df.wav
35d0858d.wav
35da43f9.wav
35def6dc.wav
35e1140b.wav
35e4b0fd.wav
35f3a48c.wav
35f6a4b8.wav
35ff3f29.wav
35ffae5a.wav
360348af.wav
360e9c8e.wav
360ed752.wav
361285ac.wav
3617db48.wav
361c028f.wav
361fbd17.wav
3620c12b.wav
3623b02e.wav
363a3760.wav
364d2e90.wav
36507474.wav
365a4c89.wav
365f9044.wav
3660d731.wav
3668b282.wav
36690c27.wav
3670cc04.wav
367ba81f.wav
36a0c334.wav
36a2202d.wav
36a92b29.wav
36a98063.wav
36b4867e.wav
36ba1f72.wav
36cd9e16.wav
36d47091.wav
36dbdb68.wav
36e470e4.wav
36e7790c.wav
36e79aec.wav

462a705d.wav
462c7777.wav
462ec068.wav
46321665.wav
46380229.wav
463d4298.wav
463de778.wav
463e231d.wav
4646a6e3.wav
464ae66b.wav
465c6192.wav
46622a4c.wav
466647df.wav
4666bee5.wav
466e8bab.wav
467c5a5a.wav
4683f13c.wav
4698193e.wav
4699c352.wav
46a20346.wav
46a24375.wav
46a4d46b.wav
46ab7601.wav
46aebc33.wav
46b06055.wav
46b5a369.wav
46d15a22.wav
46e1f70a.wav
46f07933.wav
46f0b51b.wav
4706cebd.wav
471a6fca.wav
471ab81d.wav
47268a0a.wav
47333aac.wav
4733f511.wav
473bd952.wav
473e9a47.wav
4741ae62.wav
47494a3a.wav
47538a81.wav
47542ddc.wav
47744c05.wav
47793988.wav
47795659.wav
47913bc5.wav
4797fc8b.wav
479ab3cf.wav
479d9994.wav
47a26734.wav
47ac630d.wav
47acbfcd.wav
47af6236.wav
47b7eb95.wav
47bba234.wav
47c48965.wav
47c57028.wav
47ca6f5b.wav
47e5c682.wav
47eba90e.wav
47ebd5f5.wav
47f5e3e1.wav
47f7b3fa.wav
47fa98ea.wav
480fb007.wav
481623f9.wav
48165dd3.wav
481c84da.wav
481f3844.wav
48395d1c.wav
483c028d.wav
483feb77.wav
48432e76.wav
484e69ce.wav
484e96f4.wav
4854915c.wav
486c058b.wav

57ee6620.wav
57f200ec.wav
57f9f1e8.wav
58017079.wav
580468b6.wav
58050f57.wav
58072015.wav
580cf7d2.wav
58151c44.wav
582aaabc.wav
582bf94d.wav
582c743f.wav
583119bf.wav
5835b312.wav
583c89f0.wav
583e2d70.wav
58477ce0.wav
584821f7.wav
5851365b.wav
58577d4d.wav
585a58c9.wav
585c8c34.wav
585fbcea.wav
5864f403.wav
58663e0d.wav
5869a324.wav
586a68fb.wav
587a184f.wav
587e867e.wav
5880cfd4.wav
5882e87b.wav
58848256.wav
58864b07.wav
5895612c.wav
589de45d.wav
58a13aa9.wav
58bd136f.wav
58d73bbd.wav
58f490d2.wav
59082bc2.wav
590cef8a.wav
59173448.wav
59190ef2.wav
591d35d6.wav
591d9bdc.wav
591f738e.wav
59214b26.wav
5924a0f4.wav
592805ad.wav
59293ace.wav
592e1577.wav
592f32bb.wav
5931d373.wav
5938a674.wav
59393627.wav
59405f12.wav
5948a83b.wav
594e74ae.wav
5965d649.wav
5968f9ac.wav
59698313.wav
596cf925.wav
597a143a.wav
597dc329.wav
5990b620.wav
5997bbc5.wav
59af58a3.wav
59be5629.wav
59c0b2f4.wav
59d5c03f.wav
59e47801.wav
59eb7008.wav
59f27ebf.wav
5a15c0d0.wav
5a24eb9c.wav
5a2b60df.wav
5a3491fc.wav

699ea314.wav
69b3838c.wav
69ba69e3.wav
69bd1969.wav
69c0b1e9.wav
69c6b75d.wav
69ca1307.wav
69cca687.wav
69d1b79f.wav
69d8927f.wav
69eb69bf.wav
69f00142.wav
69fb974d.wav
69fcaa90.wav
6a010a3b.wav
6a1faea9.wav
6a3a9e86.wav
6a454d5f.wav
6a49870b.wav
6a4a0f6c.wav
6a4c6be2.wav
6a4cac8d.wav
6a516e3c.wav
6a52566a.wav
6a5304b5.wav
6a59da0e.wav
6a6928b4.wav
6a720a79.wav
6a770aa1.wav
6a825284.wav
6a8317a5.wav
6a8e35d8.wav
6a8e3fbf.wav
6a9260c2.wav
6a95684b.wav
6a9f8d7f.wav
6aa576a2.wav
6aa953a8.wav
6aaac9d2.wav
6aaaedea.wav
6aac4b3b.wav
6aad5e4c.wav
6aaffcf2.wav
6ab2a13b.wav
6ab6ca9b.wav
6ab784a0.wav
6ab8e56f.wav
6abe08e1.wav
6ac046a1.wav
6ac32ae8.wav
6ad4ccea.wav
6ad8d33e.wav
6add5aad.wav
6ae82a23.wav
6aef9576.wav
6aefce55.wav
6af4f3e5.wav
6afad514.wav
6afedaca.wav
6b0629d4.wav
6b0f949f.wav
6b3059b8.wav
6b3141b1.wav
6b32bcea.wav
6b36f034.wav
6b3b040e.wav
6b3d7d7e.wav
6b41e710.wav
6b4292c6.wav
6b45c352.wav
6b4608c9.wav
6b50678c.wav
6b5773d7.wav
6b6364d4.wav
6b65befd.wav
6b67e860.wav
6b6c0405.wav

79b42a4b.wav
79b90729.wav
79b9d0ac.wav
79c42635.wav
79c9f959.wav
79cf3157.wav
79db77f8.wav
79ea9c8d.wav
79f68bc0.wav
7a08f881.wav
7a135d94.wav
7a1461f6.wav
7a21d4ba.wav
7a24dfe2.wav
7a2954e4.wav
7a29799f.wav
7a384640.wav
7a39c609.wav
7a3ddfdd.wav
7a3dfe00.wav
7a493aa4.wav
7a52fb48.wav
7a596406.wav
7a5aa493.wav
7a6823fe.wav
7a720227.wav
7a73f213.wav
7a774e4f.wav
7a7d547f.wav
7a8b587a.wav
7a914c27.wav
7a929de6.wav
7a94d08a.wav
7a96fbb4.wav
7a985ef9.wav
7a991ba0.wav
7aa42206.wav
7aa58339.wav
7ab9f7af.wav
7aba998a.wav
7abe967a.wav
7ac649fa.wav
7ac71752.wav
7ad14962.wav
7af6ef7f.wav
7af73c7d.wav
7af77bfe.wav
7b188557.wav
7b1adebd.wav
7b202f31.wav
7b2d48f6.wav
7b2f865c.wav
7b3215eb.wav
7b3376cd.wav
7b3c2236.wav
7b3d5cce.wav
7b49d63f.wav
7b578497.wav
7b5ec964.wav
7b604493.wav
7b650aeb.wav
7b672f5a.wav
7b69ef3e.wav
7b6e5762.wav
7b6e969e.wav
7b72fccc.wav
7b78381b.wav
7b7dad57.wav
7b7dd15e.wav
7b851138.wav
7b90e704.wav
7b944469.wav
7b97e638.wav
7bb156d7.wav
7bcf5502.wav
7bd7f530.wav
7be1e9f8.wav

8a23c5d3.wav
8a24496e.wav
8a25df61.wav
8a2793a4.wav
8a340933.wav
8a382bf3.wav
8a3e131b.wav
8a4865fe.wav
8a54a88f.wav
8a5535d7.wav
8a5fdff9.wav
8a65cf0f.wav
8a67db0c.wav
8a686d60.wav
8a764cbf.wav
8a772083.wav
8a775bde.wav
8a790989.wav
8a82613e.wav
8a8290ff.wav
8a82ff41.wav
8a86d731.wav
8a8f86ec.wav
8a93cd50.wav
8a94d1c5.wav
8a97db28.wav
8aa3ebc6.wav
8aa74761.wav
8aab9bdd.wav
8aba992e.wav
8ac13a9b.wav
8aca08a7.wav
8ace0a5f.wav
8ad56763.wav
8ad60086.wav
8ad6985a.wav
8ae9fe4f.wav
8af2209c.wav
8af9d4b0.wav
8affb34b.wav
8b01169f.wav
8b0dd9f6.wav
8b11746f.wav
8b14bd95.wav
8b1e87cb.wav
8b21e03d.wav
8b3869ca.wav
8b3f9d2e.wav
8b407f48.wav
8b42e5d6.wav
8b47b684.wav
8b52043a.wav
8b5a7fc6.wav
8b5aad24.wav
8b60ec4c.wav
8b6ab5a1.wav
8b79f42a.wav
8b7c0e68.wav
8b80bd7b.wav
8b81813e.wav
8b81da3c.wav
8b855fd2.wav
8b963a89.wav
8b997cca.wav
8b9e79a2.wav
8bb55062.wav
8bba3aef.wav
8bbb55ea.wav
8bbc0de9.wav
8bc79cc2.wav
8bd10f07.wav
8bd2fdda.wav
8bd5a1c6.wav
8bd94b92.wav
8bdd4257.wav
8bec536a.wav
8bf5bdd4.wav

9b4b0e42.wav
9b4c38a0.wav
9b51add4.wav
9b5afc0b.wav
9b5fd9aa.wav
9b660e81.wav
9b6955a0.wav
9b6a33b1.wav
9b6a4d3a.wav
9b7614f5.wav
9b7dba6d.wav
9b864a90.wav
9b90b201.wav
9ba882fa.wav
9bb0e4ff.wav
9bb401a1.wav
9bb58c86.wav
9bb6e17e.wav
9bb8f8b6.wav
9bbc1586.wav
9bc11fc2.wav
9bc1b7ae.wav
9bc1cda5.wav
9bc8fef8.wav
9bc9408c.wav
9bccee27.wav
9beda058.wav
9bfac103.wav
9bff1d02.wav
9c0037b8.wav
9c063f54.wav
9c08f924.wav
9c14157b.wav
9c17a9ab.wav
9c1947db.wav
9c1b8f23.wav
9c224683.wav
9c31bbec.wav
9c3f96c0.wav
9c4462a9.wav
9c53edce.wav
9c5b37c5.wav
9c69283b.wav
9c7058f4.wav
9c7a676e.wav
9c7cee6e.wav
9c80c2dc.wav
9c92302a.wav
9c9a09d2.wav
9c9a4ab0.wav
9c9dbb32.wav
9caf2d82.wav
9cb34b9e.wav
9cb91ac4.wav
9cd2f7ce.wav
9cd68da7.wav
9cdb3e29.wav
9cdb4129.wav
9cdc4961.wav
9ce11db1.wav
9ce3e302.wav
9ce62ba5.wav
9ceaec5b.wav
9cec791e.wav
9ced683f.wav
9cff1925.wav
9d0d471d.wav
9d14aaf4.wav
9d241042.wav
9d299d8c.wav
9d2d9c56.wav
9d321f74.wav
9d3391f6.wav
9d45fb82.wav
9d4f422d.wav
9d5064a8.wav
9d61aae9.wav

ac4e170d.wav
ac507f36.wav
ac507fd0.wav
ac590977.wav
ac71fa9c.wav
ac755dd6.wav
ac7b9e18.wav
ac7c8ae3.wav
ac7eec59.wav
ac822579.wav
ac8b9268.wav
ac8c9f4c.wav
ac8cce64.wav
ac909b62.wav
ac9151cf.wav
ac916661.wav
ac9f8827.wav
aca050ea.wav
aca46609.wav
acb2260c.wav
acb2ae9a.wav
acb33e73.wav
acb60d34.wav
acba91ee.wav
accb746f.wav
acd9adb9.wav
acdcfb8f.wav
ace88278.wav
acf4569a.wav
acf4890c.wav
ad0ab6fa.wav
ad142ed0.wav
ad1a8bce.wav
ad1f2187.wav
ad23f99c.wav
ad2ee4b2.wav
ad323692.wav
ad4c828c.wav
ad4d97de.wav
ad5821b3.wav
ad617575.wav
ad6e3b22.wav
ad71a0ba.wav
ad72561f.wav
ad7b2dfe.wav
ad7bd363.wav
ad7dbf27.wav
ad814adf.wav
ad8a6cdd.wav
ad8b78e2.wav
ad8d702a.wav
ad8de222.wav
ad936e6a.wav
ada15329.wav
ada2b04f.wav
adbf84f5.wav
adc41a55.wav
add0a091.wav
add0efb3.wav
add3bd1d.wav
add600ff.wav
ade59799.wav
adf7b2fa.wav
ae17caec.wav
ae1e828a.wav
ae23366f.wav
ae25e2b8.wav
ae2e8d4f.wav
ae302df0.wav
ae331164.wav
ae374569.wav
ae3dc480.wav
ae43398f.wav
ae48e08f.wav
ae4e8c4f.wav
ae4fbd56.wav
ae57c6d1.wav

In [None]:
np.save('../data_gen/test_mfcc.npy',test_data)
del test_data

In [None]:
X_train, y = prepare_data(train, config, '../input/audio_train/')

In [None]:
np.save('../data_gen/train_mfcc_stretch=1.1_num=5_len=4.npy',X_train)
np.save('../data_gen/label_stretch=1.1_num=5_len=4.npy',y)