In [6]:
from dotenv import load_dotenv
load_dotenv()
import logging
import numpy as np
import pandas as pd
import torch
from src.logger.cometml import CometMLWriter
logger = logging.getLogger("smoke")
project_config = {"trainer": {"resume_from": None}, "note": "smoke"}
writer = CometMLWriter(
    logger=logger,
    project_config=project_config,
    project_name="asr-hw",
    workspace="progphys",
    run_name="smoke-test",
    mode="online",
)

writer.set_step(0, mode="demo")
writer.add_scalar("loss", 1.0)

writer.set_step(1, mode="demo")
writer.add_image("img", (np.random.rand(32, 64) * 255).astype(np.uint8))

sr = 16000
t = torch.linspace(0, 1, sr)

audio_3ch_torch = torch.stack([
    torch.sin(2 * torch.pi * 220 * t),
    torch.sin(2 * torch.pi * 440 * t),
    torch.sin(2 * torch.pi * 880 * t),
], dim=0) 

writer.set_step(2, mode="demo")
writer.add_audio("sine_3ch_torch", audio_3ch_torch, sample_rate=sr)

t_np = np.linspace(0, 1, sr, endpoint=False)
audio_3ch_np = np.stack([
    np.sin(2 * np.pi * 220 * t_np),
    np.sin(2 * np.pi * 440 * t_np),
    np.sin(2 * np.pi * 880 * t_np),
], axis=1).astype(np.float32)   


writer.set_step(3, mode="demo")
writer.add_audio("sine_3ch_numpy", audio_3ch_np, sample_rate=sr)


writer.set_step(4, mode="demo")
writer.add_audio("from_file_3ch", "test.wav")

writer.add_text("pred", "hello")
writer.add_histogram("h", torch.randn(1000))

writer.add_table("t", pd.DataFrame({"a":[1,2], "b":[3,4]}))

writer.exp.end()


[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/progphys/asr-hw/f8fff82d17f34cebbcb0db9b4c3801c6

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : smoke-test
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/progphys/asr-hw/f8fff82d17f34cebbcb0db9b4c3801c6
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     loss_demo              : 1.0
[1;38;5;39mCOMET INFO:[0m     steps_per_sec_demo [4] : (52.87368476709142, 8000.0)
[1;38;5;39mCOMET INFO:[0m   Others:
[1;38;5;39mCOMET INFO:[0m     Name            : 

[[ 0.0000000e+00  0.0000000e+00  0.0000000e+00]
 [ 8.6291738e-02  1.7193973e-01  3.3875823e-01]
 [ 1.7193973e-01  3.3875823e-01  6.3745725e-01]
 ...
 [-1.7185840e-01 -3.3860287e-01 -6.3720274e-01]
 [-8.6304612e-02 -1.7196518e-01 -3.3880684e-01]
 [ 1.3670491e-05  2.7340982e-05  5.4681965e-05]]


In [8]:
#check ctc decode

from src.text_encoder.ctc_text_encoder import CTCTextEncoder

model = CTCTextEncoder()
fake_inds = torch.tensor([0, 2, 2, 0, 3, 3, 3, 0, 4], dtype=torch.long)
print(model.ctc_decode(fake_inds))
print(model.decode(fake_inds))


bcd
bbcccd


In [None]:
from src.metrics.utils import calc_cer,calc_wer
#chatgpt generate tests
eps = 1e-6

tests = [
    ("", "", 0.0, 0.0),
    ("", "a", 1.0, 1.0),
    ("abcd", "aecd", 0.25, 1.0),
    ("abcd", "", 1.0, 1.0),
    ("abcd", "abbcd", 0.25, 1.0),
    ("a b c", "a b", None, 1/3),
    ("a b", "a x b", None, 0.5),
]

for t, p, cer_exp, wer_exp in tests:
    cer = calc_cer(t, p)
    wer = calc_wer(t, p)

    if cer_exp is not None:
        assert abs(cer - cer_exp) < eps, (t, p, cer, cer_exp)

    if wer_exp is not None:
        assert abs(wer - wer_exp) < eps, (t, p, wer, wer_exp)


ZeroDivisionError: division by zero