In [23]:
import os

import polars as pl

from data.source.pg_experiment import get_pg_experiment_dataframe
from models.SimplifiedLightweightCNN import SimplifiedLightweightCNN
%load_ext autoreload
%autoreload 1
%aimport models.SimplifiedLightweightCNN
from models.SimpleCNN_v2 import train, evaluate
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
from path import RESULT_DIRECTORY
import wandb

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [38]:
df_pron, df_tone = get_pg_experiment_dataframe(".ogg")
df_pron_new, df_tone_new = get_pg_experiment_dataframe(".ogg", newVersion=True)

df_stageI_polish = df_pron.filter((pl.col("stage") == 1) & (pl.col("mother") == "polish"))
df_other_lang = df_pron.filter(
    (pl.col("stage") == 1) & ~(pl.col("mother") == "polish")
)
df_stageII = df_pron.filter((pl.col("stage") == 2))

df_stageI_polish_new = df_pron_new.filter((pl.col("stage") == 1) & (pl.col("mother") == "polish"))
df_other_lang_new = df_pron_new.filter(
    (pl.col("stage") == 1) & ~(pl.col("mother") == "polish")
)
df_stageII_new = df_pron_new.filter((pl.col("stage") == 2))

print(80 * "-")
print(f"Shape of the old dataset: {df_pron.shape},Shape of the new dataset: {df_pron_new.shape}.")
print(
    f"Shape of the old dataset only polish, pron, stage I: {df_stageI_polish.shape}, Shape of the new dataset only polish, pron, stage I: {df_stageI_polish_new.shape}.")
print(
    f"Shape of the old dataset other than polish, pron, stage I: {df_other_lang.shape}, Shape of the new dataset other than polish, pron, stage I: {df_other_lang_new.shape}.")
print(
    f"Shape of the old dataset stage II : {df_stageII.shape}, Shape of the new dataset stage II: {df_stageII_new.shape}.")


--------------------------------------------------------------------------------
Shape of the old dataset: (12850, 8),Shape of the new dataset: (12913, 8).
Shape of the old dataset only polish, pron, stage I: (5732, 8), Shape of the new dataset only polish, pron, stage I: (5595, 8).
Shape of the old dataset other than polish, pron, stage I: (284, 8), Shape of the new dataset other than polish, pron, stage I: (260, 8).
Shape of the old dataset stage II : (6834, 8), Shape of the new dataset stage II: (7058, 8).


In [39]:
import polars as pl
import numpy as np
from typing import Tuple


def stratified_split(df: pl.DataFrame, label_col: str, train_frac=0.8, val_frac=0.1, seed=42) -> Tuple[
    pl.DataFrame, pl.DataFrame, pl.DataFrame]:
    classes = df.select(label_col).unique().to_series()
    train_rows, val_rows, test_rows = [], [], []

    rng = np.random.RandomState(seed)

    for cls in classes:
        class_df = df.filter(pl.col(label_col) == cls)
        n = class_df.height
        indices = rng.permutation(n)

        train_end = int(train_frac * n)
        val_end = int((train_frac + val_frac) * n)

        train_rows.append(class_df[indices[:train_end]])
        val_rows.append(class_df[indices[train_end:val_end]])
        test_rows.append(class_df[indices[val_end:]])

    train_df = pl.concat(train_rows)
    val_df = pl.concat(val_rows)
    test_df = pl.concat(test_rows)

    return train_df, val_df, test_df


In [50]:
from dataset import Cast, TorchDataset
from pytorch_dataloader import build_collate_fn, PaddingCollate, DefaultCollate

from transformation import TorchVadMFCC
from develop import reload_function

reload_function(TorchVadMFCC)

TRAIN_SPLIT = 0.6
VAL_SPLIT = 0.2
TEST_SPLIT = 1 - TRAIN_SPLIT - VAL_SPLIT
train_pl, val_pl, test_pl = stratified_split(df_stageI_polish_new, label_col="value", train_frac=TRAIN_SPLIT,
                                             val_frac=VAL_SPLIT)

to_dataset = lambda dataframe: TorchDataset(
    Cast(dataframe.get_column("rec_path"), TorchVadMFCC(n_mfcc=20)),
    Cast(dataframe.get_column("value"), lambda x: torch.tensor(float(x)).float()),
)

collate_fn = build_collate_fn(
    PaddingCollate(mode="SET_MAX_LEN", max_len=128),
    DefaultCollate()
)
dataset_train = to_dataset(train_pl)
dataset_val = to_dataset(val_pl)
dataset_test = to_dataset(test_pl)

In [None]:
from pytorch_dataloader import MemoryLoadedDataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_loader = DataLoader(dataset_train, batch_size=16, shuffle=True, collate_fn=collate_fn, num_workers=0)
val_loader = DataLoader(dataset_val, batch_size=16, shuffle=False, collate_fn=collate_fn, num_workers=0)
test_loader = DataLoader(dataset_test, batch_size=16, shuffle=False, collate_fn=collate_fn, num_workers=0)

train_loader = MemoryLoadedDataLoader(train_loader, device=device)
print("Loaded train loader into memory")
val_loader = MemoryLoadedDataLoader(val_loader, device=device)
print("Loaded validation loader into memory")

C:\Users\fisch\Documents\studiaMagisterskie\sem1\zespolowyProjektBadawczy\repo\Mandarin_Pronunciation_Recognition_Project\data\source\pg_dataset\new_recordings\stageI\761\a8.wav has no speech segments, using full waveform
C:\Users\fisch\Documents\studiaMagisterskie\sem1\zespolowyProjektBadawczy\repo\Mandarin_Pronunciation_Recognition_Project\data\source\pg_dataset\new_recordings\stageI\944\a3.wav has no speech segments, using full waveform


In [42]:
model = SimplifiedLightweightCNN(input_channels=1, num_classes=1, dropout_rate=0.3)
model.eval()

SimplifiedLightweightCNN(
  (features): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout2d(p=0.1, inplace=False)
    (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Dropout2d(p=0.2, inplace=False)
    (10): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Dropout2d(p=0.3, inplace=False)
    (15): AdaptiveAvgPool2d(o

In [43]:
# Model variables definition.
pth = "SimplifiedLightweightCNN.pth"
lr = 5e-4  # Reduce from 1e-3
epochs = 100
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)  # Add L2 regularization
# Add learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=5
)
criterion = nn.BCELoss()

# Start a new wandb run to track this script.
run = wandb.init(
    # name of the run
    name="SimplifiedLightweightCNN-run-3",
    config={
        "Name": 'SimplifiedLightweightCNN',
        "learning_rate": lr,
        "optimizer": "Adam",
        "criterion": "BCELoss",
        "architecture": "SimplifiedLightweightCNN",
        "dataset": "Stage-I-only-polish",
        "train_val_test(%)": f'{TRAIN_SPLIT}-{VAL_SPLIT}-{TEST_SPLIT}',
        "epochs": epochs,
    },
)

# Training loop
for epoch in range(epochs):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = evaluate(model, val_loader, criterion, device)
    # Update learning rate
    scheduler.step(val_loss)
    # Logging the metadata for each epoch so that the charts can be generated on the dashboard
    run.log({"train_acc": train_acc, "train_loss": train_loss, "val_acc": val_acc, "val_loss": val_loss, })
    print(
        f"Epoch {epoch + 1}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

run.log({"model_eval": model.eval()})
# Saving the model to pth and adding it to the artifacts of the run, there is 5GB of memory on wandb, so we should be fine.
torch.save(model.state_dict(), os.path.join(RESULT_DIRECTORY, pth))
artifact = wandb.Artifact("SimplifiedLightweightCNN-model", type="model")
artifact.add_file(os.path.join(RESULT_DIRECTORY, pth))
run.log_artifact(artifact)

# Finish the run so it gets sent to the remote. You can discover the run right after that on the dashboard.
run.finish()


wandb: Currently logged in as: fischbach-kamil (fischbach-kamil-pg) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


Epoch 1, Train Loss: 0.6768, Train Acc: 0.5728, Val Loss: 0.6597, Val Acc: 0.6030
Epoch 2, Train Loss: 0.6603, Train Acc: 0.6124, Val Loss: 0.6510, Val Acc: 0.6038
Epoch 3, Train Loss: 0.6490, Train Acc: 0.6208, Val Loss: 0.6461, Val Acc: 0.6038
Epoch 4, Train Loss: 0.6461, Train Acc: 0.6197, Val Loss: 0.6417, Val Acc: 0.6073
Epoch 5, Train Loss: 0.6383, Train Acc: 0.6371, Val Loss: 0.6387, Val Acc: 0.6056
Epoch 6, Train Loss: 0.6340, Train Acc: 0.6368, Val Loss: 0.6350, Val Acc: 0.6152
Epoch 7, Train Loss: 0.6296, Train Acc: 0.6400, Val Loss: 0.6347, Val Acc: 0.6073
Epoch 8, Train Loss: 0.6250, Train Acc: 0.6362, Val Loss: 0.6277, Val Acc: 0.6178
Epoch 9, Train Loss: 0.6186, Train Acc: 0.6514, Val Loss: 0.6228, Val Acc: 0.6222
Epoch 10, Train Loss: 0.6122, Train Acc: 0.6528, Val Loss: 0.6224, Val Acc: 0.6283


0,1
train_acc,▁▄▅▅▇▇▇▇██
train_loss,█▆▅▅▄▃▃▂▂▁
val_acc,▁▁▁▂▂▄▂▅▆█
val_loss,█▆▅▅▄▃▃▂▁▁

0,1
model_eval,SimplifiedLightweigh...
train_acc,0.65281
train_loss,0.61223
val_acc,0.62827
val_loss,0.62244


In [11]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#
# model_saved = LightweightCNN()
# model_saved.load_state_dict(torch.load(os.path.join(RESULT_DIRECTORY, "initial_model_t&val_080101split.pth")))
# model_saved.to(device)
# model_saved.eval()

RuntimeError: Error(s) in loading state_dict for SimpleCNN:
	Missing key(s) in state_dict: "bn1.weight", "bn1.bias", "bn1.running_mean", "bn1.running_var", "bn2.weight", "bn2.bias", "bn2.running_mean", "bn2.running_var", "bn3.weight", "bn3.bias", "bn3.running_mean", "bn3.running_var". 