Для проверки модели произведу идентификацию по своему голосу. Для этого необходимо обучить модель на данных, включающих мой голос, а также применить фильтрацию по порогу, который определяет, есть тот или иной голос в рассматрвиаемой базе голосов.

Тестировать буду наилучшую модель, а именно catboost с подобранными параметрами

In [1]:
from my_funcs import get_dataframe, get_model_and_params, get_feature
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_pickle('experiments/crowd_train_all_data_embedded.pkl')

In [3]:
df_t = get_dataframe(df, 100, 10)
df_t.head()

354


Unnamed: 0,audio_path,source_id,audio_feature
0,crowd_train\wavs/cb9733d3521a52dd7a578503dc982...,02d58cb47f02f8884aaa45b0f7dd7714,"[0.36070194840431213, 0.36269721388816833, 0.3..."
1,crowd_train\wavs/40dddbe3d8c0bcb4fd8b3c933f660...,02d58cb47f02f8884aaa45b0f7dd7714,"[0.38155198097229004, 0.4070906341075897, 0.44..."
2,crowd_train\wavs/19a6f9ce1902ffb0f45c05f5e621e...,02d58cb47f02f8884aaa45b0f7dd7714,"[0.36854031682014465, 0.4035727083683014, 0.44..."
3,crowd_train\wavs/d9050a9ec2523b61435ad2a2b4aca...,02d58cb47f02f8884aaa45b0f7dd7714,"[0.33979716897010803, 0.35216024518013, 0.3810..."
4,crowd_train\wavs/a931c958489a78e12956ebee1e13a...,02d58cb47f02f8884aaa45b0f7dd7714,"[0.32755956053733826, 0.3753350079059601, 0.39..."


In [4]:
df_me = pd.read_parquet('evaluation/eval_df.pqt')
df_me.head()

Unnamed: 0,audio_path,source_id,audio_feature
0,maria\Record_0.wav,maria,"[0.22580918669700623, 0.23982486128807068, 0.1..."
1,maria\Record_1.wav,maria,"[0.2748687267303467, 0.2674121558666229, 0.265..."
2,maria\Record_2.wav,maria,"[0.21786867082118988, 0.25627416372299194, 0.3..."
3,maria\Record_3.wav,maria,"[0.3032059073448181, 0.2491905242204666, 0.236..."
4,maria\Record_4.wav,maria,"[0.17937365174293518, 0.1685197949409485, 0.26..."


In [5]:
eval_df = pd.concat([df_t, df_me])

In [6]:
X = np.vstack(eval_df.audio_feature.to_numpy()).reshape(eval_df.shape[0] , -1)
le = LabelEncoder()
y = le.fit_transform(eval_df.source_id)

X.shape, y.shape

((3560, 498), (3560,))

In [7]:
le.transform(['me', 'maria'])

array([355, 354])

In [8]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, random_state=42, stratify=y)

In [9]:
experiment = '354 classes, 5 per class'
model_name = 'catboost tuned'
_, params = get_model_and_params(experiment, model_name)
params

Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

 - numpy (current: 1.21.6, required: numpy==1.22.4)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


{'iterations': 1000,
 'devices': 0,
 'task_type': 'GPU',
 'bagging_temperature': 1,
 'depth': 4,
 'random_strength': 5}

Дообучение модели на + 2 новых классах

In [10]:
params['devices'] = '0'

already_exists = True

In [11]:
import mlflow
import catboost

from sklearn import metrics

if not already_exists:
    mlflow.set_tracking_uri("http://127.0.0.1:5000")
    experiment = mlflow.set_experiment(f"{len(set(y))} classes, evaluation")

    run_name = "catboost tuned evaluation voices"

    with mlflow.start_run(run_name=run_name) as run:
        model_cb = catboost.CatBoostClassifier(verbose=10, **params)

        model_cb.fit(X_train, y_train)
        predicts = model_cb.predict(X_val)

        mlflow.log_metric("train f1_weighted", metrics.f1_score(y_train, model_cb.predict(X_train), average="weighted"))
        mlflow.log_metric("f1_weighted", metrics.f1_score(y_val, predicts, average="weighted"))
        mlflow.log_metric("f1_micro", metrics.f1_score(y_val, predicts, average="micro"))
        mlflow.log_metric("accuracy", metrics.accuracy_score(y_val, predicts))
        
        mlflow.log_params(params)

        mlflow.catboost.log_model(model_cb, artifact_path=f"mlflow/{run_name}/model")

        model_cb.save_model('cb_model')
else:
    model_cb = catboost.CatBoostClassifier()
    model_cb.load_model('cb_model')

In [12]:
import gradio as gr

def authorize(audio):
    features = get_feature(audio)
    probs = model_cb.predict_proba(features)
    return f'Предcказан класс {le.inverse_transform([np.argmax(probs)])} с вероятностью {np.max(probs)}'

with gr.Blocks() as demo:
  with gr.Row():
      with gr.Column():
        audio = gr.Audio(sources=["microphone", "upload"], type="filepath")
      with gr.Column():
        textbox = gr.Textbox()
  
  button = gr.Button("Отправить")
  button.click(fn=authorize, inputs=[audio], outputs=[textbox])

demo.launch(debug=True);

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.


In [21]:
import IPython.display as ipd

audio_file = df_t[df_t['source_id'] == '28245faacb3c27e84322dc9547f667ff'].audio_path.iloc[2]
ipd.Audio(audio_file)