In [None]:
import librosa
import numpy as np
from collections import defaultdict
from tabulate import tabulate
from os import listdir
from os.path import isfile
import csv
import os


def get_mfcc(x, fs, n_mfcc, n_fft, win_length, hop_length, n_mels, count_delta, count_delta_delta):
    try:
        mfcc = librosa.feature.mfcc(
            y=x, sr=fs, n_mfcc=n_mfcc, n_fft=n_fft,
            win_length=win_length, hop_length=hop_length, n_mels=n_mels
        ).T


    

        counted_delta = 0

        first_feature_id = 1
        mfcc = mfcc[:, first_feature_id:] #DODANIE OBSŁUGI FIRST FEATURE W PÓŹNIEJSZYM ETAPIE
        if count_delta == True and count_delta_delta==False:
            deltas = librosa.feature.delta(mfcc.T).T
            mfcc = np.concatenate((mfcc, deltas), axis=1)
            # print("Obliczono delte")
            counted_delta = 1


        if count_delta == True and count_delta_delta==True:   #wartości delty-delty nie liczą się poprawnie
            delta_deltas = librosa.feature.delta(mfcc.T, order=2).T
            mfcc = np.concatenate((mfcc, delta_deltas), axis=1)
            # print("Obliczono delte-delte")
            counted_delta = 2     #DODANIE OBSŁUGI WYŚWIETLANIA INFO O DELTACH W PÓŹNIEJSZYM ETAPIE

    except Exception as e:
        print(f"Błąd podczas obliczania MFCC: {e}")
        return None

    return mfcc


def load_mfcc_params():

    try:
        current_dir = os.path.dirname(os.path.abspath(__file__))
    except NameError:
        current_dir = os.getcwd()

    editable_path = os.path.join(current_dir, "..", "mfcc_params.csv")
    default_path = os.path.join(current_dir, "..", "default_mfcc_params.csv")

    params = None
    source = None

    # WCZYTANIE PLIKU Z PARAMETRAMI MFCC
    def parse_value(v):
      v = v.strip()
      if v.lower() in ["true", "false"]:
          return v.lower() == "true"
      try:
          return int(v)
      except ValueError:
          try:
              return float(v)
          except ValueError:
              return v  # ZOSTAWIA TEKST, JEŚLI NIE DA SIĘ PRZEKONWERTOWAĆ


    try:
        with open(editable_path, "r", encoding="utf-8") as f:
            reader = csv.DictReader(f)
            params = {k: parse_value(v) for k, v in next(reader).items()}
            source = "mfcc_params.csv"
    except Exception as e:
        print(f"Nie udało się wczytać pliku edytowalnego: {e}")
        print("Używam pliku domyślnego `default_mfcc_params.csv`")

        with open(default_path, "r", encoding="utf-8") as f:
            reader = csv.DictReader(f)
            params = {k: parse_value(v) for k, v in next(reader).items()}
            source = "default_mfcc_params.csv"


    # TABELA Z PARAMETRAMI MFCC
    print("Parametry MFCC:")
    table_data = [{"parametr": k, "wartość": v} for k, v in params.items()]
    print(tabulate(table_data, headers="keys", tablefmt="grid"))
    print(f"Źródło parametrów: {source}")


    return params


def loadTrainFilesAndMFCC(mfcc_params, showTable):


    wavpaths = [f for f in listdir(".") if isfile(f) and f.endswith('.wav')]
    wavpaths.sort()

    sound_data = defaultdict(list)

    print(f"Znaleziono {len(wavpaths)} plików audio.")

    for f in wavpaths:
        x, fs = librosa.load(f, sr=16000, mono=True)
        try:
            mfcc = get_mfcc(x, fs, **mfcc_params)
        except Exception as e:
            print(f"Błąd MFCC dla {f}: {e}")
            mfcc = None

        sound_data[f[0:2]].append({
            "MFCC": mfcc,
            "num": f[6],
            "filename": f
        })
    print(f"Wczytano pliki i wyznaczono z nich macierze MFCC.")

    # TABELA Z PODSUMOWANIEM
    if showTable:
        rows = []
        for spk, items in sound_data.items():
            for d in items:
                row = {
                    "speaker": spk,
                    "num": d["num"],
                    "filename": d["filename"]
                }

                if isinstance(d.get("MFCC"), np.ndarray):
                    row["MFCC"] = f"shape={d['MFCC'].shape}"
                else:
                    row["MFCC"] = "None"

                rows.append(row)

        print(tabulate(rows, headers="keys", tablefmt="grid"))

    return sound_data




In [67]:
def prepare_training_data(sound_data, showTable):
    import numpy as np
    from collections import defaultdict

    training_data = defaultdict(list)

    for speaker, samples in sound_data.items():
        for s in samples:
            mfcc = s.get("MFCC")
            label = s.get("num")

            if mfcc is None or not isinstance(mfcc, np.ndarray):
                continue

            training_data[label].append(mfcc)

    # ŁĄCZMYMY DANE MFCC W MACIERZE DLA KOLEJNYCH CYFR
    for label in training_data:
        training_data[label] = np.vstack(training_data[label])

    # TABELA Z PODSUMOWANIEM
    if showTable==True:
      print("Przygotowane dane do treningu GMM:")
      summary = [
          {"cyfra": k, "liczba ramek": v.shape[0], "liczba cech": v.shape[1]}
          for k, v in training_data.items()
      ]
      from tabulate import tabulate
      print(tabulate(summary, headers="keys", tablefmt="grid"))

    print("Przygotowano dane do treningu GMM")
    return training_data

In [None]:
##############   MAIN FUNCTION    ##############

mfcc_params = load_mfcc_params()
sound_data = loadTrainFilesAndMFCC(mfcc_params, False);
training_data = prepare_training_data(sound_data, True)

Mounted at /content/gdrive/
Parametry MFCC:
+-------------------+-----------+
| parametr          |   wartość |
| n_mfcc            |        20 |
+-------------------+-----------+
| n_fft             |      1024 |
+-------------------+-----------+
| win_length        |       512 |
+-------------------+-----------+
| hop_length        |       256 |
+-------------------+-----------+
| n_mels            |        64 |
+-------------------+-----------+
| count_delta       |      True |
+-------------------+-----------+
| count_delta_delta |      True |
+-------------------+-----------+
Źródło parametrów: mfcc_params.csv
Znaleziono 220 plików audio.
Wczytano pliki i wyznacozno z nich macierze MFCC.
Przygotowane dane do treningu GMM:
+---------+----------------+---------------+
|   cyfra |   liczba ramek |   liczba cech |
|       0 |            870 |            38 |
+---------+----------------+---------------+
|       1 |            914 |            38 |
+---------+----------------+----------