# Free Spoken Digit Dataset (FSDD) preprocessing
Serves as a preprocessing tool for the FSDD dataset. It generates the CSV file in the proper format for the DeepSpeech model training. It also converts the WAV files to the right format and samplerate.

## Imports

In [73]:
import sys
import os
import string
import pydub

Define the numeric to string label translation.

In [74]:
audio_dir = '/opt/shared/datasets/free-spoken-digit-dataset/recordings/'
digits = {'0': 'zero',
          '1': 'one',
          '2': 'two',
          '3': 'three',
          '4': 'four',
          '5': 'five',
          '6': 'six',
          '7': 'seven',
          '8': 'eight',
          '9': 'nine'}

Create list of all WAV files from the specified dir.

In [75]:
files = os.listdir(audio_dir)
files = [f for f in files if f.endswith('.wav')]

Function that converts a WAV file to the right WAV format and 16kHz samplerate.

In [76]:
def up_sample_wav(path, sample_rate=16000):
    pydub.AudioSegment.from_wav(path).set_frame_rate(sample_rate).set_channels(1).export(path, format='wav')
    # "s16le" is a PCM signed 16-bit little-endian wave format

Generate the csv file and convert the audio files into a proper format.

In [77]:
train_csv = open(os.path.join(audio_dir, 'train.csv'), 'w+')
dev_csv = open(os.path.join(audio_dir, 'dev.csv'), 'w+')
test_csv = open(os.path.join(audio_dir, 'test.csv'), 'w+')
for csv in [train_csv, dev_csv, test_csv]:
    csv.write('wav_filename,wav_filesize,transcript' + '\n')

count = 0
number = len(files)
for f in files:
    print(count, '/', number, f)
    count +=1
    full_digit = digits[f.split('_')[0]]
    f_num = int(f.split('_')[2].split('.')[0])
    f_full_path = os.path.join(audio_dir, f)
    up_sample_wav(f_full_path)
    f_size = os.stat(f_full_path).st_size
    csv_entry = os.path.join(audio_dir, f) + ',' + str(f_size) + ',' + full_digit + '\n'
    if f_num < 5:
        test_csv.write(csv_entry)
    elif f_num < 9:
        dev_csv.write(csv_entry)
    else:
        train_csv.write(csv_entry)

0 / 2000 8_yweweler_4.wav
1 / 2000 3_yweweler_47.wav
2 / 2000 0_jackson_43.wav
3 / 2000 3_yweweler_40.wav
4 / 2000 9_nicolas_24.wav
5 / 2000 7_yweweler_22.wav
6 / 2000 1_nicolas_44.wav
7 / 2000 4_nicolas_47.wav
8 / 2000 4_theo_14.wav
9 / 2000 2_theo_17.wav
10 / 2000 7_jackson_49.wav
11 / 2000 2_yweweler_14.wav
12 / 2000 6_yweweler_10.wav
13 / 2000 7_theo_44.wav
14 / 2000 5_theo_33.wav
15 / 2000 9_nicolas_7.wav
16 / 2000 0_nicolas_28.wav
17 / 2000 0_jackson_37.wav
18 / 2000 8_jackson_32.wav
19 / 2000 8_jackson_4.wav
20 / 2000 5_nicolas_11.wav
21 / 2000 7_jackson_35.wav
22 / 2000 2_nicolas_46.wav
23 / 2000 8_nicolas_41.wav
24 / 2000 8_nicolas_33.wav
25 / 2000 4_jackson_21.wav
26 / 2000 8_nicolas_43.wav
27 / 2000 8_nicolas_47.wav
28 / 2000 0_theo_8.wav
29 / 2000 9_theo_27.wav
30 / 2000 7_theo_14.wav
31 / 2000 5_yweweler_24.wav
32 / 2000 4_theo_45.wav
33 / 2000 4_theo_6.wav
34 / 2000 0_nicolas_45.wav
35 / 2000 6_theo_23.wav
36 / 2000 0_jackson_35.wav
37 / 2000 8_nicolas_8.wav
38 / 2000 4_y

437 / 2000 6_jackson_13.wav
438 / 2000 0_theo_11.wav
439 / 2000 1_jackson_2.wav
440 / 2000 7_theo_36.wav
441 / 2000 3_nicolas_23.wav
442 / 2000 9_theo_40.wav
443 / 2000 8_nicolas_20.wav
444 / 2000 4_theo_11.wav
445 / 2000 4_nicolas_1.wav
446 / 2000 4_theo_4.wav
447 / 2000 8_nicolas_15.wav
448 / 2000 5_yweweler_14.wav
449 / 2000 1_yweweler_3.wav
450 / 2000 3_nicolas_16.wav
451 / 2000 0_nicolas_7.wav
452 / 2000 9_theo_34.wav
453 / 2000 7_theo_10.wav
454 / 2000 5_jackson_9.wav
455 / 2000 3_yweweler_49.wav
456 / 2000 7_yweweler_41.wav
457 / 2000 0_jackson_32.wav
458 / 2000 3_jackson_42.wav
459 / 2000 4_jackson_16.wav
460 / 2000 8_theo_38.wav
461 / 2000 6_yweweler_30.wav
462 / 2000 8_theo_7.wav
463 / 2000 5_jackson_39.wav
464 / 2000 0_yweweler_45.wav
465 / 2000 1_yweweler_25.wav
466 / 2000 2_jackson_43.wav
467 / 2000 7_theo_49.wav
468 / 2000 6_nicolas_43.wav
469 / 2000 6_yweweler_40.wav
470 / 2000 4_yweweler_25.wav
471 / 2000 6_theo_24.wav
472 / 2000 1_nicolas_14.wav
473 / 2000 4_nicolas_4.

934 / 2000 3_jackson_7.wav
935 / 2000 9_jackson_20.wav
936 / 2000 6_nicolas_29.wav
937 / 2000 9_theo_32.wav
938 / 2000 2_yweweler_6.wav
939 / 2000 6_yweweler_11.wav
940 / 2000 8_nicolas_42.wav
941 / 2000 2_yweweler_8.wav
942 / 2000 6_yweweler_44.wav
943 / 2000 3_jackson_6.wav
944 / 2000 2_yweweler_37.wav
945 / 2000 3_jackson_20.wav
946 / 2000 3_nicolas_9.wav
947 / 2000 7_jackson_31.wav
948 / 2000 4_nicolas_5.wav
949 / 2000 2_theo_11.wav
950 / 2000 2_jackson_39.wav
951 / 2000 8_theo_11.wav
952 / 2000 2_yweweler_42.wav
953 / 2000 2_yweweler_29.wav
954 / 2000 4_yweweler_26.wav
955 / 2000 7_yweweler_37.wav
956 / 2000 2_jackson_23.wav
957 / 2000 7_nicolas_44.wav
958 / 2000 4_nicolas_44.wav
959 / 2000 5_theo_48.wav
960 / 2000 7_nicolas_19.wav
961 / 2000 2_theo_18.wav
962 / 2000 5_theo_1.wav
963 / 2000 7_nicolas_1.wav
964 / 2000 2_yweweler_44.wav
965 / 2000 5_jackson_49.wav
966 / 2000 8_jackson_5.wav
967 / 2000 5_jackson_1.wav
968 / 2000 5_yweweler_33.wav
969 / 2000 9_jackson_38.wav
970 / 200

1438 / 2000 0_yweweler_13.wav
1439 / 2000 0_yweweler_4.wav
1440 / 2000 8_yweweler_31.wav
1441 / 2000 5_nicolas_19.wav
1442 / 2000 9_theo_31.wav
1443 / 2000 4_nicolas_12.wav
1444 / 2000 6_yweweler_25.wav
1445 / 2000 0_yweweler_6.wav
1446 / 2000 0_nicolas_23.wav
1447 / 2000 8_nicolas_32.wav
1448 / 2000 3_yweweler_25.wav
1449 / 2000 8_yweweler_21.wav
1450 / 2000 9_jackson_10.wav
1451 / 2000 4_theo_36.wav
1452 / 2000 4_jackson_9.wav
1453 / 2000 8_nicolas_17.wav
1454 / 2000 6_yweweler_43.wav
1455 / 2000 1_nicolas_13.wav
1456 / 2000 6_yweweler_27.wav
1457 / 2000 5_theo_37.wav
1458 / 2000 7_jackson_41.wav
1459 / 2000 3_nicolas_36.wav
1460 / 2000 4_nicolas_32.wav
1461 / 2000 8_nicolas_19.wav
1462 / 2000 2_yweweler_48.wav
1463 / 2000 8_jackson_7.wav
1464 / 2000 2_jackson_29.wav
1465 / 2000 6_nicolas_47.wav
1466 / 2000 1_yweweler_21.wav
1467 / 2000 1_yweweler_41.wav
1468 / 2000 4_theo_47.wav
1469 / 2000 4_theo_13.wav
1470 / 2000 6_yweweler_1.wav
1471 / 2000 7_theo_42.wav
1472 / 2000 7_nicolas_2.

1968 / 2000 2_jackson_34.wav
1969 / 2000 0_yweweler_9.wav
1970 / 2000 7_yweweler_43.wav
1971 / 2000 7_nicolas_7.wav
1972 / 2000 6_nicolas_44.wav
1973 / 2000 1_nicolas_16.wav
1974 / 2000 7_jackson_28.wav
1975 / 2000 3_jackson_0.wav
1976 / 2000 5_theo_39.wav
1977 / 2000 5_yweweler_39.wav
1978 / 2000 1_nicolas_48.wav
1979 / 2000 0_nicolas_37.wav
1980 / 2000 5_nicolas_40.wav
1981 / 2000 6_yweweler_5.wav
1982 / 2000 7_yweweler_46.wav
1983 / 2000 0_jackson_25.wav
1984 / 2000 1_jackson_22.wav
1985 / 2000 7_jackson_22.wav
1986 / 2000 0_yweweler_12.wav
1987 / 2000 5_theo_9.wav
1988 / 2000 1_nicolas_4.wav
1989 / 2000 9_theo_9.wav
1990 / 2000 6_theo_29.wav
1991 / 2000 9_yweweler_23.wav
1992 / 2000 6_jackson_27.wav
1993 / 2000 3_nicolas_25.wav
1994 / 2000 0_nicolas_46.wav
1995 / 2000 9_nicolas_3.wav
1996 / 2000 5_jackson_24.wav
1997 / 2000 6_theo_3.wav
1998 / 2000 2_nicolas_44.wav
1999 / 2000 4_theo_29.wav
