In [1]:
from kaggle.api.kaggle_api_extended import KaggleApi
import os
from py7zr import unpack_7zarchive
import shutil
from datasets import Dataset

In [24]:
api = KaggleApi()
api.authenticate()

In [25]:
download_path = "./data_raw"
os.makedirs(download_path, exist_ok=True)

In [26]:
api.competition_download_file('tensorflow-speech-recognition-challenge', path=download_path, file_name = 'train.7z')

Downloading train.7z to ./data_raw


100%|██████████| 1.04G/1.04G [00:03<00:00, 365MB/s]







In [3]:
shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive)
shutil.unpack_archive('./data_raw/train.7z', './data_raw/')

In [4]:
final_labels = ['yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go', 'silence', 'unknown']
idname = {i: name for i, name in enumerate(final_labels)}
nameid = {name: i for i, name in idname.items()}

In [5]:
data_dir = './data_raw/train/audio'
data = []
for folder in os.listdir(data_dir):
    folder_path = os.path.join(data_dir, folder)
    if os.path.isdir(folder_path):
        label = folder
        label_name = label
        if label == '_background_noise_':
            label_name = 'silence'
        elif label not in final_labels:
            label_name = 'unknown'
        label_id = nameid[label_name]

        for file_name in os.listdir(folder_path):
            if file_name.endswith(".wav"):
                relative_path = os.path.join(label, file_name).replace("\\", "/")
                data.append({
                    "audio_path": relative_path,
                    "label": label_name,
                    "label_id": label_id
                })

In [7]:
def read_list(filepath):
    with open(filepath, "r") as f:
        return set(line.strip().replace("\\", "/") for line in f if line.strip())

val_list = read_list("./data_raw/train/validation_list.txt")
test_list = read_list("./data_raw/train/testing_list.txt")

train_data, val_data, test_data = [], [], []

for example in data:
    path = example["audio_path"]
    if path in val_list:
        val_data.append(example)
    elif path in test_list:
        test_data.append(example)
    else:
        train_data.append(example)


train_dataset = Dataset.from_list(train_data)
val_dataset = Dataset.from_list(val_data)
test_dataset = Dataset.from_list(test_data)

print(f"Train: {len(train_dataset)}, Validation: {len(val_dataset)}, Test: {len(test_dataset)}")

Train: 51094, Validation: 6798, Test: 6835


In [10]:
output_dir  = "./data"
os.makedirs(output_dir, exist_ok=True)

def save_dataset(dataset, name, output_dir):
    df = dataset.to_pandas()
    output_path = os.path.join(output_dir, f"{name}.csv")
    df.to_csv(output_path, index=False)

save_dataset(train_dataset, "train", output_dir)
save_dataset(val_dataset, "validation", output_dir)
save_dataset(test_dataset, "test", output_dir)

### Wczytywanie danych 

In [6]:
from AudioDataset import AudioDataset
import torch

dataset = AudioDataset(
    csv_path="./data/train.csv",
    audio_dir="./data_raw/train/audio"
)

In [8]:
dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True)

for features, labels in dataloader:
    print(features.shape)
    print(labels.shape)
    break

torch.Size([16, 32, 80])
torch.Size([16])
