In [28]:
import os
from data.source.pg_experiment import get_pg_experiment_dataset
import polars as pl
from models.SimpleCNN import PronunciationDataset, collate_fn, SimpleCNN, train
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
from path import RESULT_DIRECTORY

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

In [24]:
df_pron, _ = get_pg_experiment_dataset(".wav")
df_stageI_polish = df_pron.filter((pl.col("stage") == 1) & (pl.col("mother") == "polish"))

df_other_lang = df_pron.filter(
    (pl.col("stage") == 1) & ~(pl.col("mother") == "polish")
)



In [None]:
dataset = PronunciationDataset(df_stageI_polish)
dataset_other = PronunciationDataset(df_other_lang)

In [None]:
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = SimpleCNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCELoss()

epochs = 10
for epoch in range(epochs):
    loss = train(model, dataloader, optimizer, criterion, device)
    print(f"Epoch {epoch+1}, Loss: {loss:.4f}")

Epoch 1, Loss: 0.6681
Epoch 2, Loss: 0.6379
Epoch 3, Loss: 0.6176
Epoch 4, Loss: 0.6021
Epoch 5, Loss: 0.5846
Epoch 6, Loss: 0.5572
Epoch 7, Loss: 0.5329
Epoch 8, Loss: 0.5086
Epoch 9, Loss: 0.4753
Epoch 10, Loss: 0.4344


In [None]:
torch.save(model.state_dict(), os.path.join(RESULT_DIRECTORY, "initial_model.pth"))

In [30]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_saved = SimpleCNN()
model_saved.load_state_dict(torch.load(os.path.join(RESULT_DIRECTORY, "initial_model.pth")))
model_saved.to(device)
model_saved.eval()

SimpleCNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc1): Linear(in_features=16384, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
)

In [None]:
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)

model_saved.eval()
total_correct = 0
total_samples = 0

with torch.no_grad():
    for i, (spec, label) in enumerate(dataloader):
        spec = spec.to(device)
        label = label.to(device)
        output = model_saved(spec)
        pred = (output > 0.5).float()
        total_correct += (pred == label).sum().item()
        total_samples += label.size(0)

accuracy = total_correct / total_samples if total_samples > 0 else 0
print(f"Total accuracy on {total_samples} samples: {accuracy:.4f}")
# Accuracy on all polish mother language samples from I Stage. 
# There was no train-test-val split for this model.

Total accuracy on 5732 samples: 0.8381


In [None]:
other_loader = DataLoader(dataset_other, batch_size=1, shuffle=False, collate_fn=collate_fn)

model_saved.eval()
total_correct = 0
total_samples = 0

with torch.no_grad():
    for i, (spec, label) in enumerate(other_loader):
        spec = spec.to(device)
        label = label.to(device)
        output = model_saved(spec)
        pred = (output > 0.5).float()
        total_correct += (pred == label).sum().item()
        total_samples += label.size(0)

accuracy = total_correct / total_samples if total_samples > 0 else 0
print(f"Total accuracy on {total_samples} samples: {accuracy:.4f}")
# Accuracy on all NON-polish mother language samples from I Stage.

Total accuracy on 284 samples: 0.6761
