In [None]:
!pip install pydub

In [None]:
import hashlib
import os
import shutil

import pandas as pd
from IPython.display import Audio
from sklearn.model_selection import train_test_split

from pydub import AudioSegment
from pydub.utils import make_chunks

In [None]:
!ls

daps  drive  sample_data


In [None]:
!rm -r daps*

In [None]:
def load_domain_data(domains):
    for domain in domains:
        print(f"Loading data from {domain}")
        shutil.copytree(f"drive/MyDrive/DAPS/daps/{domain}", f"daps/{domain}")
    print("Files loaded successfully.")

In [None]:
domains = ["clean",
           "cleanraw",
           "ipad_balcony1",
           "ipad_bedroom1",
           "ipad_confroom1",
           "ipad_confroom2",
           "ipadflat_confroom1",
           "ipadflat_office1",
           "ipad_livingroom1",
           "ipad_office1",
           "ipad_office2",
           "iphone_balcony1",
           "iphone_bedroom1",
           "iphone_livingroom1",
           "produced"]

#domains = ["clean"] # for test

load_domain_data(domains)

Loading data from clean
Loading data from cleanraw
Loading data from ipad_balcony1
Loading data from ipad_bedroom1
Loading data from ipad_confroom1
Loading data from ipad_confroom2
Loading data from ipadflat_confroom1
Loading data from ipadflat_office1
Loading data from ipad_livingroom1
Loading data from ipad_office1
Loading data from ipad_office2
Loading data from iphone_balcony1
Loading data from iphone_bedroom1
Loading data from iphone_livingroom1
Loading data from produced
Files loaded successfully.


In [None]:
!rm daps*/*/._*.wav

In [None]:
def set_experiment_seed(seed_value):
    # cpu variables
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)

    # python variables
    random.seed(seed_value)

    # cuda variables and config
    torch.cuda.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    return seed_value

In [None]:
def get_speaker_id(file):
    return file.split("_")[0]

def get_file_name(file):
    file_split = file.split("_")
    file_name = f"{file_split[0]}_{file_split[1]}{file[-4:]}"
    return file_name

def create_csv_for_directory(audio_directory):
    metadata_df = pd.DataFrame(columns=["Filename", "SpeakerID"])

    for file in os.listdir(audio_directory):
        if file[-4:] != ".wav":
            continue
        if file[0] == ".":
            continue
        speaker_id = get_speaker_id(file)
        file_name = get_file_name(file)

        metadata_df.loc[len(metadata_df)] = [file_name, speaker_id]

    return metadata_df
    #metadata_df.to_csv(audio_directory + "/metadata.csv", index=False)

In [None]:
SEED = set_experiment_seed(313)
dataframe = create_csv_for_directory("daps/clean")

In [None]:
#dataframe = pd.read_csv("daps/clean/metadata.csv")
dataframe.head()

Unnamed: 0,Filename,SpeakerID
0,f7_script4.wav,f7
1,f2_script2.wav,f2
2,f2_script5.wav,f2
3,f4_script2.wav,f4
4,m3_script4.wav,m3


In [None]:
X = dataframe["Filename"]
y = dataframe["SpeakerID"]
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    stratify=y,
                                                    shuffle=True,
                                                    random_state=SEED,
                                                    test_size=0.25)

In [None]:
def count_distributions(fold):
    males = 0
    females = 0
    scripts = [0] * 5
    speakers = [0] * 20

    for item in fold:
        if "m" in item:
            males += 1
            speakers[int(item.split("_")[0][1:]) -1] += 1
        elif "f" in item:
            females += 1
            speakers[int(item.split("_")[0][1:]) -1 + 10] += 1
        scripts[int(item[-5]) - 1] += 1

    print(f"males: {males}, females: {females}")
    print(scripts)
    print(speakers)

In [None]:
X_train.values

array(['m10_script5.wav', 'm10_script2.wav', 'm8_script3.wav',
       'f9_script2.wav', 'f5_script1.wav', 'f2_script1.wav',
       'm9_script1.wav', 'm9_script5.wav', 'm6_script4.wav',
       'm7_script5.wav', 'f7_script1.wav', 'm3_script2.wav',
       'm7_script2.wav', 'f1_script2.wav', 'm6_script5.wav',
       'f2_script5.wav', 'm2_script5.wav', 'f10_script3.wav',
       'm3_script5.wav', 'f3_script4.wav', 'm2_script1.wav',
       'm4_script3.wav', 'f8_script4.wav', 'f1_script5.wav',
       'f6_script5.wav', 'f1_script4.wav', 'm7_script4.wav',
       'f6_script2.wav', 'm4_script4.wav', 'm5_script3.wav',
       'm9_script3.wav', 'm1_script3.wav', 'm2_script4.wav',
       'f4_script2.wav', 'f8_script3.wav', 'f10_script4.wav',
       'm7_script3.wav', 'f2_script2.wav', 'm4_script1.wav',
       'm5_script4.wav', 'f6_script3.wav', 'f7_script2.wav',
       'f2_script3.wav', 'm3_script4.wav', 'f5_script2.wav',
       'f5_script4.wav', 'm10_script1.wav', 'f7_script4.wav',
       'm6_script3.

In [None]:
"""array(['f4_script4.wav', 'm1_script5.wav', 'f7_script5.wav',
       'f7_script3.wav', 'f8_script2.wav', 'f9_script4.wav',
       'm2_script3.wav', 'f1_script1.wav', 'm3_script1.wav',
       'm7_script1.wav', 'm6_script1.wav', 'f6_script1.wav',
       'f5_script5.wav', 'f9_script3.wav', 'f10_script2.wav',
       'm4_script2.wav', 'f8_script5.wav', 'm1_script1.wav',
       'm9_script2.wav', 'm5_script1.wav', 'm6_script2.wav',
       'm10_script4.wav', 'm8_script5.wav', 'f2_script4.wav',
       'f3_script1.wav'], dtype=object)"""

"array(['f4_script4.wav', 'm1_script5.wav', 'f7_script5.wav',\n       'f7_script3.wav', 'f8_script2.wav', 'f9_script4.wav',\n       'm2_script3.wav', 'f1_script1.wav', 'm3_script1.wav',\n       'm7_script1.wav', 'm6_script1.wav', 'f6_script1.wav',\n       'f5_script5.wav', 'f9_script3.wav', 'f10_script2.wav',\n       'm4_script2.wav', 'f8_script5.wav', 'm1_script1.wav',\n       'm9_script2.wav', 'm5_script1.wav', 'm6_script2.wav',\n       'm10_script4.wav', 'm8_script5.wav', 'f2_script4.wav',\n       'f3_script1.wav'], dtype=object)"

In [None]:
X_test.values

array(['f4_script4.wav', 'm1_script5.wav', 'f7_script5.wav',
       'f7_script3.wav', 'f8_script2.wav', 'f9_script4.wav',
       'm2_script3.wav', 'f1_script1.wav', 'm3_script1.wav',
       'm7_script1.wav', 'm6_script1.wav', 'f6_script1.wav',
       'f5_script5.wav', 'f9_script3.wav', 'f10_script2.wav',
       'm4_script2.wav', 'f8_script5.wav', 'm1_script1.wav',
       'm9_script2.wav', 'm5_script1.wav', 'm6_script2.wav',
       'm10_script4.wav', 'm8_script5.wav', 'f2_script4.wav',
       'f3_script1.wav'], dtype=object)

In [None]:
hash_object = hashlib.sha1(str(X_test.values).encode('utf-8'))
test_hash = hash_object.hexdigest()
test_hash

'46c92a7fd36a6e625696418e7c1fc37245d97a5c'

In [None]:
#46c92a7fd36a6e625696418e7c1fc37245d97a5c

In [None]:
count_distributions(X_train.values)

males: 38, females: 37
[12, 15, 17, 16, 15]
[3, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4]


In [None]:
count_distributions(X_test.values)

males: 12, females: 13
[8, 5, 3, 4, 5]
[2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1]


In [None]:
def create_dirs_and_move_files(dataset_dir="daps"):
    for file in os.listdir(dataset_dir):
        subdir = os.path.join(dataset_dir, file)
        if os.path.isdir(subdir):
            print(f"Organizing {subdir}")
            organize_domain(subdir)

In [None]:
def organize_domain(subdir):
    try:
        dir_path = os.path.join(subdir, "train")
        os.mkdir(dir_path)
        move_files(subdir, "train", X_train)
    except OSError as error:
        print(f"Directory 'train' already exists.")

    try:
        dir_path = os.path.join(subdir, "test")
        os.mkdir(dir_path)
        move_files(subdir, "test", X_test)
    except OSError as error:
        print(f"Directory 'test' already exists.")

In [None]:
def move_files(domain_path, directory, fold):
    domain = domain_path.split("/")[1]
    for entry in fold:
        entry_domain = entry.split(".")[0] + f"_{domain}.wav"
        path_to_file = os.path.join(domain_path, entry_domain)
        new_path = os.path.join(domain_path, directory, entry)
        shutil.move(path_to_file, new_path)

In [None]:
create_dirs_and_move_files()

Organizing daps/ipad_confroom1
Organizing daps/ipadflat_confroom1
Organizing daps/cleanraw
Organizing daps/iphone_livingroom1
Organizing daps/clean
Organizing daps/ipadflat_office1
Organizing daps/iphone_balcony1
Organizing daps/produced
Organizing daps/ipad_office2
Organizing daps/ipad_livingroom1
Organizing daps/ipad_office1
Organizing daps/ipad_balcony1
Organizing daps/ipad_confroom2
Organizing daps/ipad_bedroom1
Organizing daps/iphone_bedroom1


In [None]:
def listen_audio(audio_path):
    wn = Audio(audio_path)
    return display(wn)

In [None]:
#listen_audio("daps/clean/train/m8_script2.wav")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
total_silence = 0

def process_audio(file_path, chunk_length_ms):
    global total_silence

    track = AudioSegment.from_wav(file_path, 'wav')

    duration = len(track)
    trimmed_track = strip_silence(track)
    new_duration = len(trimmed_track)
    trim_amount = (duration - new_duration) / 1000
    total_silence += trim_amount
    print(f"Trimmed {trim_amount} seconds in {file_path}.")
    #trimmed_track.export(file_path, format='wav')
    #return #for test

    chunks = make_chunks(trimmed_track, chunk_length_ms)
    file_name, file_extension = os.path.splitext(file_path)
    for i, chunk in enumerate(chunks):
        chunk_name = file_name + '_{0}'.format(i) + file_extension
        #print('exporting', chunk_name)
        chunk.export(chunk_name, format='wav')

def split_audios(target, chunk_length_ms):
    for fold in ["train", "test"]:
        dataset = os.listdir(f"{target}/{fold}")
        print(f"splitting files in {target} {fold}")
        for file in dataset:
            if('.wav' in file and '._' not in file):
                file_path = f"{target}/{fold}/{file}"
                process_audio(file_path, chunk_length_ms)
                os.remove(file_path) #comment for tests

def split_dir():
    #shutil.copytree("daps", "daps_split")
    rootdir = "daps"
    for file in os.listdir(rootdir):
        d = os.path.join(rootdir, file)
        if os.path.isdir(d):
            print(f"Splitting audios in {d}.")
            split_audios(d, chunk_length_ms=5000)

def strip_silence(track):
    start_trim = detect_leading_silence(track)
    end_trim = detect_leading_silence(track.reverse())

    duration = len(track)
    trimmed_track = track[start_trim:duration-end_trim]
    return trimmed_track

def detect_leading_silence(track, silence_threshold=-30.0, chunk_size=1):
    trim_ms = 0

    assert chunk_size > 0 # to avoid infinite loop
    while track[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(track):
        trim_ms += chunk_size

    return trim_ms

In [None]:
split_dir()

Splitting audios in daps/ipad_confroom1.
splitting files in daps/ipad_confroom1 train
Trimmed 1.899 seconds in daps/ipad_confroom1/train/m4_script4.wav.
Trimmed 8.318 seconds in daps/ipad_confroom1/train/f2_script5.wav.
Trimmed 8.534 seconds in daps/ipad_confroom1/train/f10_script5.wav.
Trimmed 7.547 seconds in daps/ipad_confroom1/train/f3_script2.wav.
Trimmed 7.361 seconds in daps/ipad_confroom1/train/f5_script2.wav.
Trimmed 7.971 seconds in daps/ipad_confroom1/train/f10_script4.wav.
Trimmed 8.228 seconds in daps/ipad_confroom1/train/m6_script5.wav.
Trimmed 8.288 seconds in daps/ipad_confroom1/train/m6_script4.wav.
Trimmed 8.133 seconds in daps/ipad_confroom1/train/m2_script4.wav.
Trimmed 7.556 seconds in daps/ipad_confroom1/train/f8_script4.wav.
Trimmed 7.589 seconds in daps/ipad_confroom1/train/m9_script1.wav.
Trimmed 6.959 seconds in daps/ipad_confroom1/train/f1_script2.wav.
Trimmed 7.24 seconds in daps/ipad_confroom1/train/f5_script3.wav.
Trimmed 7.385 seconds in daps/ipad_confroo

In [None]:
print(f"Trimmed a total of {total_silence / 60} minutes of silence.")

Trimmed a total of 142.8993500000003 minutes of silence.


In [None]:
#listen_audio("daps/clean/train/m8_script2.wav")

In [None]:
# copy files TO drive
#!mv daps daps_split
#!cp -r daps_split drive/MyDrive/DAPS/daps_split

In [None]:
# copy files FROM drive
#!cp -r drive/MyDrive/DAPS/daps_split daps_split

In [None]:
#!ls drive/MyDrive/DAPS/daps_split/clean/test | wc -l

785


In [None]:
!zip -r daps_split.zip daps_split/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: daps_split/ipadflat_confroom1/train/m4_script1_13.wav (deflated 29%)
  adding: daps_split/ipadflat_confroom1/train/m3_script4_21.wav (deflated 36%)
  adding: daps_split/ipadflat_confroom1/train/f5_script4_2.wav (deflated 20%)
  adding: daps_split/ipadflat_confroom1/train/m10_script3_23.wav (deflated 38%)
  adding: daps_split/ipadflat_confroom1/train/m8_script3_14.wav (deflated 33%)
  adding: daps_split/ipadflat_confroom1/train/m2_script5_11.wav (deflated 30%)
  adding: daps_split/ipadflat_confroom1/train/f1_script5_5.wav (deflated 30%)
  adding: daps_split/ipadflat_confroom1/train/m4_script5_27.wav (deflated 31%)
  adding: daps_split/ipadflat_confroom1/train/f7_script4_3.wav (deflated 31%)
  adding: daps_split/ipadflat_confroom1/train/m6_script4_23.wav (deflated 29%)
  adding: daps_split/ipadflat_confroom1/train/f2_script5_15.wav (deflated 24%)
  adding: daps_split/ipadflat_confroom1/train/m10_script5_12.wav (de

In [None]:
!cp daps_split.zip drive/MyDrive/DAPS/daps_split.zip

In [None]:
!rm -r daps_split/