In [None]:

def normalize_data(train_data, val_data):
    # Połącz wszystkie MFCC coefficients z danych treningowych w jedną tablicę
    all_mfccs = np.concatenate([np.array([item[0] for item in sequence]) for sequence in train_data])

    print(all_mfccs.shape())
    # Oblicz średnią i odchylenie standardowe
    mean = np.mean(all_mfccs, axis=0)
    std = np.std(all_mfccs, axis=0)

    print(mean.shape())
    print(std.shape())
    # Funkcja do normalizacji pojedynczej sekwencji
    def normalize_sequence(sequence):
        return [( (np.array(mfcc) - mean) / std, label ) for mfcc, label in sequence]

    # Znormalizuj dane treningowe i walidacyjne
    normalized_train_data = [normalize_sequence(sequence) for sequence in train_data]
    normalized_val_data = [normalize_sequence(sequence) for sequence in val_data]

    return normalized_train_data, normalized_val_data, mean, std


## Audio data normalization function

# **Data preprocessing**

In [2]:
import os
from scipy.io.wavfile import read
import csv
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import numpy as np
import librosa

REFRESH_TIME = 0.25  # seconds
BATCH_SIZE = 16

# Directories with data
data_dir = '../../train-sequences'

# Function to load labels from csv file
def load_labels(csv_file_v):
    labels_v = []
    with open(csv_file_v, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Pomijamy nagłówek
        for row in reader:
            if row[0] == 'silence':
                labels_v.append((2, int(row[1]), int(row[2])))
            elif row[0] == 'inhale':
                labels_v.append((1, int(row[1]), int(row[2])))
            elif row[0] == 'exhale':
                labels_v.append((0, int(row[1]), int(row[2])))
    return labels_v

# Function to get the label for a given time
def get_label_for_time(labels_v, start_frame, end_frame):
    label_counts = [0, 0, 0]  # 0: exhale, 1: inhale, 2: silence

    for label_it, start, end in labels_v:
        if start < end_frame and end > start_frame:
            overlap_start = max(start, start_frame)
            overlap_end = min(end, end_frame)
            overlap_length = overlap_end - overlap_start
            label_counts[label_it] += overlap_length

    return label_counts.index(max(label_counts))

# Creating list of files
wav_files = [os.path.join(data_dir, file) for file in os.listdir(data_dir) if file.endswith('.wav')]
train_data = []

# Main loop to preprocess data into MFCCs
for wav_file in wav_files:
    csv_file = wav_file.replace('.wav', '.csv')
    if not os.path.exists(csv_file):
        continue

    # Load audio and labels
    sr, y = read(wav_file)

    if sr != 44100:
        # raise Exception("Sampling rate is not 44100 its {}".format(sr))
        print("Sampling rate is not 44100 its {}".format(sr))

    labels = load_labels(csv_file)

    # Calculate chunk size
    chunk_size = int(sr * REFRESH_TIME)

    # List of MFCCs for every data sequence (it will be a list of lists of tuples (mfcc coefficients, label))
    mfcc_sequence = []

    # Iterate through every 0.25s audio chunk
    for i in range(0, len(y), chunk_size):
        frame = y[i:i + chunk_size]
        if len(frame) == chunk_size:
            frame = frame.astype(np.float32)
            frame /= np.iinfo(np.int16).max
            mfcc = librosa.feature.mfcc(y=frame, sr=sr)
            mfcc_mean = mfcc.mean(axis=1)
            label = get_label_for_time(labels, i, i + chunk_size)
            mfcc_sequence.append((mfcc_mean, label))

    if mfcc_sequence:
        train_data.append(mfcc_sequence)

In [3]:
# Check length of every sequence

lengths = [len(seq) for seq in train_data]
print("Min length: ", min(lengths))
print("Max length: ", max(lengths))
print(lengths)

Min length:  40
Max length:  40
[40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,

# **Data Loader**

In [4]:
# Split data into train and validation sets
train_data, val_data = train_test_split(train_data, test_size=0.2)

# DataLoader and collate function
from model_classes import AudioDataset
import torch

train_dataset = AudioDataset(train_data)
val_dataset = AudioDataset(val_data)

def collate_fn(batch):
    sequences, labels_t = zip(*batch)
    lengths_t = [seq.size(0) for seq in sequences]
    max_length = max(lengths_t)
    padded_sequences = torch.zeros(len(sequences), max_length, 20)
    padded_labels = torch.zeros(len(sequences), max_length, dtype=torch.long)
    for j, seq in enumerate(sequences):
        padded_sequences[j, :seq.size(0), :] = seq
        padded_labels[j, :len(labels_t[j])] = labels_t[j]
    return padded_sequences, padded_labels

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)

# **Training**

In [5]:
import time
import torch.optim as optim
from tqdm import tqdm
from sklearn.metrics import accuracy_score
from model_classes import AudioClassifierLSTM as AudioClassifier
import torch.nn as nn

REFRESH_TIME = 0.25  # Refresh time in seconds in future realtime
NUM_EPOCHS = 100  # Number of epochs (the more epoch the better model, but it takes more time)
PATIENCE_TIME = 10  # Number of epochs without improvement in validation accuracy that will stop training
LEARNING_RATE = 0.001  # Learning rate
BATCH_SIZE = 16  # Batch size (amount of sequences in one batch)

# Check if CUDA is available (learning on GPU is much faster)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device: ", device)

total_time = time.time()
start_time = time.time()

# Create model object
print("Creating model...")
model = AudioClassifier()
model = model.to(device)
print("Model created, time: ", time.time() - start_time)

# Define loss function and optimizer (network parameters)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# These are just for early stopping
best_val_accuracy = 0.0
early_stopping_counter = 0

print("Training model...")
start_time = time.time()

# Iterate through epochs
for epoch in range(NUM_EPOCHS):

    # Enable training on model object
    model.train()

    # Initialize running loss and accuracy
    running_loss = 0.0
    running_accuracy = 0.0
    # It's just a fancy progress bar in console
    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{NUM_EPOCHS}', unit='batch')

    # Iterate through batches
    for inputs, labels in progress_bar:

        # Move inputs and labels to the device (GPU or CPU)
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Jeśli model zwraca więcej niż jedną wartość, przypisz odpowiednią wartość do outputs
        if isinstance(outputs, tuple):
            outputs = outputs[0]

        # Flattening outputs and labels from [batch_size, max_length, num_classes]
        outputs = outputs.view(-1, outputs.size(-1))  # Flattening to [batch_size * max_length, num_classes]
        labels = labels.view(-1)  # Flattening to [batch_size * max_length]

        # Calculate loss
        loss = criterion(outputs, labels)

        # Backward pass (calculate gradients)
        loss.backward()

        # Update weights according to the calculated gradients
        optimizer.step()

        # Calculate running loss and accuracy
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        running_accuracy += accuracy_score(labels.cpu(), predicted.cpu())

        # Update progress bar
        progress_bar.set_postfix(loss=running_loss / len(progress_bar),
                                  accuracy=running_accuracy / len(progress_bar))

    # Print the loss and accuracy for the epoch
    print('Train Loss: {:.4f}, Train Accuracy: {:.4f}'.format(running_loss / len(train_loader),
                                                              running_accuracy / len(train_loader)))

    # After training on the whole training set, we can evaluate the model on the validation set
    model.eval()
    val_running_loss = 0.0
    val_running_accuracy = 0.0

    # We don't need to calculate gradients during validation
    with torch.no_grad():

        # Iterate through validation set
        for inputs, labels in val_loader:

            # Move inputs and labels to the device
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(inputs)

            # Jeśli model zwraca więcej niż jedną wartość, przypisz odpowiednią wartość do outputs
            if isinstance(outputs, tuple):
                outputs = outputs[0]

            # As previous, we need to flatten outputs and labels
            outputs = outputs.view(-1, outputs.size(-1)) # Flattening to [batch_size * max_length, num_classes]
            labels = labels.view(-1) # Flattening to [batch_size * max_length]

            # Calculate loss
            loss = criterion(outputs, labels)

            # Calculate running loss (cumulative loss over batches) and add current epoch's accuracy to the running (cumulative) accuracy
            val_running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            val_running_accuracy += accuracy_score(labels.cpu(), predicted.cpu())

    # Calculate cumulative loss and accuracy for the validation set
    avg_val_loss = val_running_loss / len(val_loader)
    avg_val_accuracy = val_running_accuracy / len(val_loader)

    # And print it
    print('Val Loss: {:.4f}, Val Accuracy: {:.4f}'.format(avg_val_loss, avg_val_accuracy))

    # Learning rate scheduler (changing learning rate during training)
    scheduler.step()

    # Early stopping (if there is no improvement in validation accuracy for PATIENCE_TIME epochs, we stop training)
    if avg_val_accuracy > best_val_accuracy:
        best_val_accuracy = avg_val_accuracy
        early_stopping_counter = 0
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= PATIENCE_TIME:
            print("Early stopping triggered. No improvement in validation accuracy.")
            break

# And print final results
print('Finished Training, time: ', time.time() - start_time)
print('Saving model...')
start_time = time.time()
#TODO
torch.save(model.state_dict(), 'audio_lstm_classifier_test.pth')
print("Model saved, time: ", time.time() - start_time)
print("Finished, Total time: ", time.time() - total_time)

Device:  cpu
Creating model...
Model created, time:  0.007501840591430664




Training model...


  mfcc_sequence = torch.tensor(mfcc_sequence, dtype=torch.float32)
Epoch 1/100: 100%|██████████| 18/18 [00:01<00:00, 15.45batch/s, accuracy=0.489, loss=1.04] 


Train Loss: 1.0434, Train Accuracy: 0.4890
Val Loss: 0.9299, Val Accuracy: 0.5871


Epoch 2/100: 100%|██████████| 18/18 [00:01<00:00, 16.88batch/s, accuracy=0.614, loss=0.861]


Train Loss: 0.8613, Train Accuracy: 0.6136
Val Loss: 0.8421, Val Accuracy: 0.6155


Epoch 3/100: 100%|██████████| 18/18 [00:01<00:00, 14.95batch/s, accuracy=0.679, loss=0.736] 


Train Loss: 0.7356, Train Accuracy: 0.6793
Val Loss: 0.6553, Val Accuracy: 0.7132


Epoch 4/100: 100%|██████████| 18/18 [00:01<00:00, 16.35batch/s, accuracy=0.742, loss=0.625] 


Train Loss: 0.6246, Train Accuracy: 0.7417
Val Loss: 0.5680, Val Accuracy: 0.7621


Epoch 5/100: 100%|██████████| 18/18 [00:01<00:00, 15.59batch/s, accuracy=0.767, loss=0.555] 


Train Loss: 0.5550, Train Accuracy: 0.7667
Val Loss: 0.5546, Val Accuracy: 0.7737


Epoch 6/100: 100%|██████████| 18/18 [00:01<00:00, 15.73batch/s, accuracy=0.785, loss=0.53]  


Train Loss: 0.5304, Train Accuracy: 0.7849
Val Loss: 0.5505, Val Accuracy: 0.7782


Epoch 7/100: 100%|██████████| 18/18 [00:00<00:00, 18.37batch/s, accuracy=0.798, loss=0.496] 


Train Loss: 0.4956, Train Accuracy: 0.7984
Val Loss: 0.4769, Val Accuracy: 0.8101


Epoch 8/100: 100%|██████████| 18/18 [00:00<00:00, 20.88batch/s, accuracy=0.809, loss=0.473]


Train Loss: 0.4727, Train Accuracy: 0.8089
Val Loss: 0.4768, Val Accuracy: 0.8063


Epoch 9/100: 100%|██████████| 18/18 [00:00<00:00, 19.47batch/s, accuracy=0.812, loss=0.448] 


Train Loss: 0.4485, Train Accuracy: 0.8119
Val Loss: 0.4733, Val Accuracy: 0.8083


Epoch 10/100: 100%|██████████| 18/18 [00:00<00:00, 19.41batch/s, accuracy=0.823, loss=0.443] 


Train Loss: 0.4428, Train Accuracy: 0.8232
Val Loss: 0.4652, Val Accuracy: 0.8097


Epoch 11/100: 100%|██████████| 18/18 [00:00<00:00, 19.80batch/s, accuracy=0.83, loss=0.421]  


Train Loss: 0.4214, Train Accuracy: 0.8301
Val Loss: 0.4470, Val Accuracy: 0.8192


Epoch 12/100: 100%|██████████| 18/18 [00:00<00:00, 18.94batch/s, accuracy=0.834, loss=0.408]


Train Loss: 0.4081, Train Accuracy: 0.8335
Val Loss: 0.4433, Val Accuracy: 0.8193


Epoch 13/100: 100%|██████████| 18/18 [00:00<00:00, 20.61batch/s, accuracy=0.839, loss=0.399] 


Train Loss: 0.3990, Train Accuracy: 0.8392
Val Loss: 0.4306, Val Accuracy: 0.8328


Epoch 14/100: 100%|██████████| 18/18 [00:00<00:00, 19.40batch/s, accuracy=0.842, loss=0.391]


Train Loss: 0.3907, Train Accuracy: 0.8417
Val Loss: 0.4397, Val Accuracy: 0.8214


Epoch 15/100: 100%|██████████| 18/18 [00:00<00:00, 19.08batch/s, accuracy=0.84, loss=0.391]  


Train Loss: 0.3905, Train Accuracy: 0.8401
Val Loss: 0.4316, Val Accuracy: 0.8253


Epoch 16/100: 100%|██████████| 18/18 [00:00<00:00, 22.84batch/s, accuracy=0.847, loss=0.376] 


Train Loss: 0.3757, Train Accuracy: 0.8468
Val Loss: 0.4333, Val Accuracy: 0.8262


Epoch 17/100: 100%|██████████| 18/18 [00:00<00:00, 20.60batch/s, accuracy=0.85, loss=0.37]  


Train Loss: 0.3701, Train Accuracy: 0.8500
Val Loss: 0.4212, Val Accuracy: 0.8335


Epoch 18/100: 100%|██████████| 18/18 [00:01<00:00, 17.67batch/s, accuracy=0.855, loss=0.365] 


Train Loss: 0.3649, Train Accuracy: 0.8551
Val Loss: 0.4189, Val Accuracy: 0.8336


Epoch 19/100: 100%|██████████| 18/18 [00:00<00:00, 20.82batch/s, accuracy=0.852, loss=0.362] 


Train Loss: 0.3622, Train Accuracy: 0.8516
Val Loss: 0.4233, Val Accuracy: 0.8285


Epoch 20/100: 100%|██████████| 18/18 [00:00<00:00, 20.91batch/s, accuracy=0.859, loss=0.357] 


Train Loss: 0.3565, Train Accuracy: 0.8588
Val Loss: 0.4212, Val Accuracy: 0.8353


Epoch 21/100: 100%|██████████| 18/18 [00:00<00:00, 18.73batch/s, accuracy=0.865, loss=0.349] 


Train Loss: 0.3492, Train Accuracy: 0.8651
Val Loss: 0.4193, Val Accuracy: 0.8377


Epoch 22/100: 100%|██████████| 18/18 [00:00<00:00, 19.91batch/s, accuracy=0.861, loss=0.348] 


Train Loss: 0.3481, Train Accuracy: 0.8605
Val Loss: 0.4170, Val Accuracy: 0.8408


Epoch 23/100: 100%|██████████| 18/18 [00:00<00:00, 20.87batch/s, accuracy=0.863, loss=0.347] 


Train Loss: 0.3473, Train Accuracy: 0.8626
Val Loss: 0.4187, Val Accuracy: 0.8349


Epoch 24/100: 100%|██████████| 18/18 [00:00<00:00, 18.27batch/s, accuracy=0.862, loss=0.347]


Train Loss: 0.3471, Train Accuracy: 0.8619
Val Loss: 0.4153, Val Accuracy: 0.8394


Epoch 25/100: 100%|██████████| 18/18 [00:00<00:00, 19.46batch/s, accuracy=0.864, loss=0.342] 


Train Loss: 0.3415, Train Accuracy: 0.8640
Val Loss: 0.4141, Val Accuracy: 0.8413


Epoch 26/100: 100%|██████████| 18/18 [00:00<00:00, 20.41batch/s, accuracy=0.865, loss=0.344] 


Train Loss: 0.3435, Train Accuracy: 0.8650
Val Loss: 0.4162, Val Accuracy: 0.8375


Epoch 27/100: 100%|██████████| 18/18 [00:00<00:00, 19.77batch/s, accuracy=0.863, loss=0.343]


Train Loss: 0.3427, Train Accuracy: 0.8629
Val Loss: 0.4126, Val Accuracy: 0.8414


Epoch 28/100: 100%|██████████| 18/18 [00:00<00:00, 20.16batch/s, accuracy=0.865, loss=0.34] 


Train Loss: 0.3403, Train Accuracy: 0.8654
Val Loss: 0.4125, Val Accuracy: 0.8417


Epoch 29/100: 100%|██████████| 18/18 [00:00<00:00, 20.68batch/s, accuracy=0.867, loss=0.338]


Train Loss: 0.3380, Train Accuracy: 0.8672
Val Loss: 0.4156, Val Accuracy: 0.8368


Epoch 30/100: 100%|██████████| 18/18 [00:01<00:00, 17.38batch/s, accuracy=0.868, loss=0.335]


Train Loss: 0.3350, Train Accuracy: 0.8677
Val Loss: 0.4116, Val Accuracy: 0.8439


Epoch 31/100: 100%|██████████| 18/18 [00:00<00:00, 20.91batch/s, accuracy=0.868, loss=0.335] 


Train Loss: 0.3351, Train Accuracy: 0.8683
Val Loss: 0.4120, Val Accuracy: 0.8404


Epoch 32/100: 100%|██████████| 18/18 [00:00<00:00, 22.43batch/s, accuracy=0.865, loss=0.334] 


Train Loss: 0.3344, Train Accuracy: 0.8652
Val Loss: 0.4133, Val Accuracy: 0.8401


Epoch 33/100: 100%|██████████| 18/18 [00:00<00:00, 20.48batch/s, accuracy=0.865, loss=0.334] 


Train Loss: 0.3344, Train Accuracy: 0.8652
Val Loss: 0.4118, Val Accuracy: 0.8412


Epoch 34/100: 100%|██████████| 18/18 [00:00<00:00, 18.55batch/s, accuracy=0.868, loss=0.334] 


Train Loss: 0.3337, Train Accuracy: 0.8680
Val Loss: 0.4119, Val Accuracy: 0.8419


Epoch 35/100: 100%|██████████| 18/18 [00:00<00:00, 19.88batch/s, accuracy=0.869, loss=0.333] 


Train Loss: 0.3329, Train Accuracy: 0.8688
Val Loss: 0.4113, Val Accuracy: 0.8422


Epoch 36/100: 100%|██████████| 18/18 [00:00<00:00, 18.89batch/s, accuracy=0.866, loss=0.336] 


Train Loss: 0.3365, Train Accuracy: 0.8655
Val Loss: 0.4123, Val Accuracy: 0.8392


Epoch 37/100: 100%|██████████| 18/18 [00:01<00:00, 16.27batch/s, accuracy=0.869, loss=0.332] 


Train Loss: 0.3319, Train Accuracy: 0.8689
Val Loss: 0.4114, Val Accuracy: 0.8398


Epoch 38/100: 100%|██████████| 18/18 [00:01<00:00, 16.68batch/s, accuracy=0.866, loss=0.338] 


Train Loss: 0.3376, Train Accuracy: 0.8665
Val Loss: 0.4112, Val Accuracy: 0.8412


Epoch 39/100: 100%|██████████| 18/18 [00:01<00:00, 16.25batch/s, accuracy=0.871, loss=0.328] 


Train Loss: 0.3284, Train Accuracy: 0.8707
Val Loss: 0.4119, Val Accuracy: 0.8408


Epoch 40/100: 100%|██████████| 18/18 [00:00<00:00, 19.13batch/s, accuracy=0.868, loss=0.332] 


Train Loss: 0.3322, Train Accuracy: 0.8681
Val Loss: 0.4105, Val Accuracy: 0.8419
Early stopping triggered. No improvement in validation accuracy.
Finished Training, time:  40.510581731796265
Saving model...
Model saved, time:  0.008564949035644531
Finished, Total time:  41.459911823272705
