# Before to start read carefully the instructions provided in the README file to get the data and organize it.

In [1]:
from src.utils import check_folders, get_buckeye_data, get_timit_data, get_labels, pad_set

buckeye_corpus_path = "../Word-Segmenter/data/Buckeye"
train_indices = "../Word-Segmenter/data/buckeye_train_data.txt"
val_indices = "../Word-Segmenter/data/buckeye_val_data.txt"
test_indices = "../Word-Segmenter/data/buckeye_test_data.txt"

timit_testset_path = "../Word-Segmenter/data/TIMIT/data/TEST"
timit_indices= "../Word-Segmenter/data/timit_test_data.txt"

#check if path exists
check_folders(buckeye_corpus_path, timit_testset_path)

SR=16000
FRAME_SIZE=int(0.025*SR)
HOP_LENGTH=int(0.025*SR)

buckeye_train_wavs, buckeye_train_bounds= get_buckeye_data(buckeye_corpus_path, train_indices, SR)
buckeye_val_wavs, buckeye_val_bounds= get_buckeye_data(buckeye_corpus_path, val_indices, SR)
buckeye_test_wavs, buckeye_test_bounds= get_buckeye_data(buckeye_corpus_path, test_indices, SR)

timit_wavs, timit_bounds= get_timit_data(timit_testset_path, timit_indices, SR)


buckeye_train_labels= get_labels(buckeye_train_wavs, 
                         buckeye_train_bounds, 
                         SR, 
                         FRAME_SIZE, 
                         HOP_LENGTH)

buckeye_val_labels= get_labels(buckeye_val_wavs, 
                       buckeye_val_bounds, 
                       SR, 
                       FRAME_SIZE, 
                       HOP_LENGTH,
                       type='test')

buckeye_test_labels= get_labels(buckeye_test_wavs, 
                        buckeye_test_bounds, 
                        SR, 
                        FRAME_SIZE, 
                        HOP_LENGTH, 
                        type='test')

timit_labels= get_labels(timit_wavs,
                         timit_bounds,
                         SR,
                         FRAME_SIZE,
                         HOP_LENGTH,
                         type='test')
print('\n')
print('Buckeye Train samples:', len(buckeye_train_wavs), len(buckeye_train_labels), len(buckeye_train_bounds))
print('Buckey Val samples:', len(buckeye_val_wavs), len(buckeye_val_labels), len(buckeye_val_bounds))
print('Buckeye Test samples:', len(buckeye_test_wavs), len(buckeye_test_labels), len(buckeye_test_bounds))
print('Timit Test samples:', len(timit_wavs), len(timit_labels), len(timit_bounds))

datasets= (buckeye_train_wavs, 
           buckeye_train_labels, 
           buckeye_train_bounds, 
           buckeye_val_wavs, 
           buckeye_val_labels, 
           buckeye_val_bounds, 
           buckeye_test_wavs, 
           buckeye_test_labels, 
           buckeye_test_bounds)

buckeye_wavs, buckeye_labels, buckeye_bounds= pad_set(datasets, buckeye_test_wavs, buckeye_test_labels, buckeye_test_bounds)
timit_wavs, timit_labels, timit_bounds= pad_set(datasets, timit_wavs, timit_labels, timit_bounds)

test_sets= {'buckeye': (buckeye_wavs, buckeye_labels, buckeye_bounds), 'timit': (timit_wavs, timit_labels, timit_bounds)}


Skipping empty file: s12_9953.word
Skipping empty file: s02_6218.word
Skipping empty file: s36_91.word
Skipping empty file: s25_41.word


Buckeye Train samples: 8201 8201 8201
Buckey Val samples: 1016 1016 1016
Buckeye Test samples: 1048 1048 1048
Timit Test samples: 1680 1680 1680


# Follow the instructions displayed under the cell.

In [2]:
from src.utils import choose_dataset, retrieve_model, select_model_and_dataset
from src.dataset import get_loader
import torch

model_name, dataset= select_model_and_dataset()

SR=16000
HOP_LENGTH=int(0.025*SR)
NUM_CLASSES=3
BATCH_SIZE= 32
N_DEV=0
time=0
frames_out=0
verbose=False
freeze=True
device = torch.device(f'cuda:{N_DEV}' if torch.cuda.is_available() else 'cpu')

wavs, labels, bounds, tolerance= choose_dataset(dataset, test_sets, SR)

print('Wavs shape:', wavs.shape, "Labels shape:", labels.shape, "Bounds shape: ", bounds.shape, "\n")

# Get the loader
loader= get_loader(wavs, 
                   labels, 
                   bounds, 
                   BATCH_SIZE,
                   type='test')

# Retrieve the time axis value of the input and the number of frames in the model's output.
a, l, b = next(iter(loader))
time = a.shape[1]
frames_out = l.shape[1]

print(f'Time points: {time}')
print(f'Frames out: {frames_out}')

# Get the model
model= retrieve_model(model_name, 
                      time, 
                      frames_out, 
                      NUM_CLASSES, 
                      verbose, 
                      freeze)

Choose a model to test:
1 HuBERT
2 Wav2Vec 2.0
3 CNN
4 CRNN

Choose a dataset to test:
1 Buckeye
2 Timit

You chose model HuBERT and dataset Buckeye

Wavs shape: (1048, 144581) Labels shape: (1048, 361) Bounds shape:  (1048, 22, 2) 

Time points: 144581
Frames out: 361


In [3]:
from src.utils import predict

bounds = predict( model, 
                loader, 
                device, 
                tolerance = tolerance, 
                hop_length = HOP_LENGTH, 
                frame_selection = "mid",
                desc= 'Test',
                plot_bounds=False,
                batch_plot_id=1)

Test:   0%|          | 0/33 [00:00<?, ?it/s]

Test: 100%|██████████| 33/33 [00:16<00:00,  2.04it/s]

Test Precision: 0.8999648609445512, Test Recall: 0.7928456512937014, Test F value: 0.8427989977407062, Test OS: -0.11871499315020075, Test R value: 0.8489727692261502




