In [1]:
from pydub import AudioSegment

In [2]:
# Load your audio file
audio = AudioSegment.from_file("./audio/bal_train/--cB2ZVjpnA.flac")

# Get sampling rate
sampling_rate = audio.frame_rate

# Get bitrate
bitrate = audio.frame_width * audio.frame_rate * 8

print(f"Frame Width: {audio.frame_width} (I think it's in Bytes)")
print(f"Sampling Rate: {sampling_rate/1000} kHz")
print(f"Bitrate: {bitrate/1000} kbps")

Frame Width: 8 (I think it's in Bytes)
Sampling Rate: 48.0 kHz
Bitrate: 3072.0 kbps


## Resample sanity check

In [3]:
# Load the FLAC file
audio = AudioSegment.from_file("./audio/bal_train/--cB2ZVjpnA.flac", format="flac")

# Resample the audio to 16kHz
resampled_audio = audio.set_frame_rate(16000)

# Export the resampled audio
resampled_audio.export("test.flac", format="flac")

<_io.BufferedRandom name='test.flac'>

In [4]:
# Load your audio file
audio = AudioSegment.from_file("test.flac")

# Get sampling rate
sampling_rate = audio.frame_rate

# Get bitrate
bitrate = audio.frame_width * audio.frame_rate * 8

print(f"Frame Width: {audio.frame_width} (I think it's in Bytes)")
print(f"Sampling Rate: {sampling_rate/1000} kHz")
print(f"Bitrate: {bitrate/1000} kbps")

Frame Width: 8 (I think it's in Bytes)
Sampling Rate: 16.0 kHz
Bitrate: 1024.0 kbps


In [29]:
# Load your audio file
audio = AudioSegment.from_file("flac_16kHz/-i-9C48tduw.flac")

# Get sampling rate
sampling_rate = audio.frame_rate

# Get bitrate
bitrate = audio.frame_width * audio.frame_rate * 8

print(f"Frame Width: {audio.frame_width} (I think it's in Bytes)")
print(f"Sampling Rate: {sampling_rate/1000} kHz")
print(f"Bitrate: {bitrate/1000} kbps")

Frame Width: 8 (I think it's in Bytes)
Sampling Rate: 16.0 kHz
Bitrate: 1024.0 kbps


## LTU Open-QA

In [10]:
import json
from tqdm import tqdm

In [6]:
with open("../ltu-openasqa/openasqa_10.3M_v2.json") as f:
    all_data = json.load(f)

In [7]:
len(all_data)

10324230

In [17]:
audioset_data = []
for x in tqdm(all_data):
    if "audioset/dave_version" in x['audio_id']: 
        audioset_data.append(x)

100%|██████████████████████████████████████████████████████████████████████████████| 10324230/10324230 [00:05<00:00, 1990711.01it/s]


In [19]:
len(audioset_data)

4309357

In [25]:
flac_names = set()
for x in tqdm(audioset_data):
    flac_name = x['audio_id'].split("/")[-1]
    flac_names.add(flac_name)

100%|█████████████████████████████████████████████████████████████████████████████████| 4309357/4309357 [00:05<00:00, 800883.27it/s]


In [26]:
len(flac_names)

523258

In [27]:
flac_names = list(flac_names)

In [28]:
with open("ltu-audioset-ids.txt", "w") as f:
    f.write("\n".join(flac_names))

# Write a run script

In [34]:
def write(path, start_id, end_id):
    # [start_id, end_id)
    text = ""
    text += "#!/bin/bash\n"
    text += "#$ -S /bin/bash\n\n"
    text += "export HF_DATASETS_CACHE=/cache/.cache/huggingface/datasets\n"
    text += "export HF_HOME=/cache/.cache/huggingface\n"
    text += "export HF_HUB_CACHE=/cache/.cache/huggingface/hub\n\n"
    for i in range(start_id, end_id):
        text += "python download_filter_resample.py unbal_train{:03}\n".format(i)
    # print(text)
    with open(path, "w") as f:
        f.write(text)

In [35]:
write("./run_download_unbal_0.sh", 0, 100)

In [36]:
write("./run_download_unbal_1.sh", 100, 200)

In [37]:
write("./run_download_unbal_2.sh", 200, 300)

In [38]:
write("./run_download_unbal_3.sh", 300, 400)

In [39]:
write("./run_download_unbal_4.sh", 400, 500)

In [40]:
write("./run_download_unbal_5.sh", 500, 600)

In [41]:
write("./run_download_unbal_6.sh", 600, 700)

In [42]:
write("./run_download_unbal_7.sh", 700, 800)

In [43]:
write("./run_download_unbal_8.sh", 800, 870)