Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.6.0 #12

Merged
merged 1 commit into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,10 @@ sphinx:

conda:
environment: rtd_environment.yml

# This part is necessary otherwise the project is not built
python:
version: 3.9
install:
- method: pip
path: .
62 changes: 62 additions & 0 deletions extensions/ivector/ivector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,29 @@ void pybind_ivector_extractor(py::module &m) {
py::arg("opts"),
py::arg("extractor"),
py::call_guard<py::gil_scoped_release>())
.def(py::pickle(
[](const PyClass &p) { // __getstate__
/* Return a tuple that fully encodes the state of the object */
std::ostringstream os;
bool binary = true;
p.Write(os, binary);
return py::make_tuple(
py::bytes(os.str()));
},
[](py::tuple t) { // __setstate__
if (t.size() != 1)
throw std::runtime_error("Invalid state!");

/* Create a new C++ instance */
PyClass *p = new PyClass();

/* Assign any additional state */
std::istringstream str(t[0].cast<std::string>());
p->Read(str, true);

return p;
}
))
.def("update", [](
PyClass &stats,
IvectorExtractor &extractor,
Expand Down Expand Up @@ -930,6 +953,29 @@ void pybind_plda(py::module &m) {
},
py::arg("utterance_ivector"),
py::arg("transformed_enrolled_ivectors"))
.def(py::pickle(
[](const PyClass &p) { // __getstate__
/* Return a tuple that fully encodes the state of the object */
std::ostringstream os;
bool binary = true;
p.Write(os, binary);
return py::make_tuple(
py::bytes(os.str()));
},
[](py::tuple t) { // __setstate__
if (t.size() != 1)
throw std::runtime_error("Invalid state!");

/* Create a new C++ instance */
PyClass *p = new PyClass();

/* Assign any additional state */
std::istringstream str(t[0].cast<std::string>());
p->Read(str, true);

return p;
}
))
.def("TransformIvector",
py::overload_cast<const PldaConfig &,
const VectorBase<double> &,
Expand Down Expand Up @@ -1363,6 +1409,22 @@ void init_ivector(py::module &_m) {
py::arg("normalize") = true,
py::arg("scaleup") = true);

m.def("ivector_normalize_length",
[](
Vector<double>* ivector,
bool normalize = true,
bool scaleup = true
) {
py::gil_scoped_release gil_release;
double norm = ivector->Norm(2.0);
double ratio = norm / sqrt(ivector->Dim());
if (!scaleup) ratio = norm;
if (normalize) ivector->Scale(1.0 / ratio);
},
py::arg("ivector"),
py::arg("normalize") = true,
py::arg("scaleup") = true);

m.def("ivector_subtract_mean",
[](
std::vector<Vector<float>*> &ivectors
Expand Down
88 changes: 58 additions & 30 deletions extensions/transform/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,25 @@ void pybind_cmvn(py::module &m) {
py::arg("uttlist"),
py::arg("feat_reader"));

m.def("apply_cmvn",
[](
const Matrix<BaseFloat> &feats,
const Matrix<double> &cmvn_stats,
bool reverse = false,
bool norm_vars = false
){
py::gil_scoped_release release;
Matrix<BaseFloat> feat_out(feats);
if (reverse) {
ApplyCmvnReverse(cmvn_stats, norm_vars, &feat_out);
} else {
ApplyCmvn(cmvn_stats, norm_vars, &feat_out);
}

return feat_out;
},
py::arg("feats"), py::arg("cmvn_stats"), py::arg("reverse") = false, py::arg("norm_vars") = false);

m.def("ApplyCmvn",
&ApplyCmvn,
"Apply cepstral mean and variance normalization to a matrix of features. "
Expand Down Expand Up @@ -380,6 +399,8 @@ void pybind_fmllr_diag_gmm(py::module &m) {
py::arg("feats"))
.def("accumulate_from_alignment",
[](PyClass& spk_stats,
const TransitionModel &alignment_trans_model,
const AmDiagGmm &alignment_am_gmm,
const TransitionModel &trans_model,
const AmDiagGmm &am_gmm,
const Matrix<BaseFloat> &feats,
Expand All @@ -391,49 +412,51 @@ void pybind_fmllr_diag_gmm(py::module &m) {
bool two_models = false
){
py::gil_scoped_release gil_release;
Posterior pdf_post;
Posterior post;
Posterior posterior;

AlignmentToPosterior(ali, &posterior);

AlignmentToPosterior(ali, &post);
if (distributed)
WeightSilencePostDistributed(trans_model, silence_set,
silence_scale, &post);
WeightSilencePostDistributed(alignment_trans_model, silence_set,
silence_scale, &posterior);
else
WeightSilencePost(trans_model, silence_set,
silence_scale, &post);
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
WeightSilencePost(alignment_trans_model, silence_set,
silence_scale, &posterior);

Posterior pdf_posterior;
ConvertPosteriorToPdfs(alignment_trans_model, posterior, &pdf_posterior);

if (!two_models){
for (size_t i = 0; i < pdf_post.size(); i++) {
for (size_t j = 0; j < pdf_post[i].size(); j++) {
int32 pdf_id = pdf_post[i][j].first;
spk_stats.AccumulateForGmm(am_gmm.GetPdf(pdf_id),
for (size_t i = 0; i < pdf_posterior.size(); i++) {
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
int32 pdf_id = pdf_posterior[i][j].first;
spk_stats.AccumulateForGmm(alignment_am_gmm.GetPdf(pdf_id),
feats.Row(i),
pdf_post[i][j].second);
pdf_posterior[i][j].second);
}
}
}
else{


GaussPost gpost(pdf_post.size());
GaussPost gpost(posterior.size());
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
for (size_t i = 0; i < pdf_post.size(); i++) {
gpost[i].reserve(pdf_post[i].size());
for (size_t j = 0; j < pdf_post[i].size(); j++) {
int32 pdf_id = pdf_post[i][j].first;
BaseFloat weight = pdf_post[i][j].second;
const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);
for (size_t i = 0; i < posterior.size(); i++) {
gpost[i].reserve(pdf_posterior[i].size());
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
int32 pdf_id = pdf_posterior[i][j].first;
BaseFloat weight = pdf_posterior[i][j].second;
const DiagGmm &gmm = alignment_am_gmm.GetPdf(pdf_id);
Vector<BaseFloat> this_post_vec;
BaseFloat like =
gmm.ComponentPosteriors(feats.Row(i), &this_post_vec);
this_post_vec.Scale(weight);
if (rand_prune > 0.0)
for (int32 k = 0; k < this_post_vec.Dim(); k++)
this_post_vec(k) = RandPrune(this_post_vec(k),
rand_prune);
for (int32 k = 0; k < this_post_vec.Dim(); k++)
this_post_vec(k) = RandPrune(this_post_vec(k),
rand_prune);
if (!this_post_vec.IsZero())
gpost[i].push_back(std::make_pair(pdf_id, this_post_vec));
gpost[i].push_back(std::make_pair(pdf_id, this_post_vec));
tot_like_this_file += like * weight;
tot_weight += weight;
}
Expand All @@ -450,6 +473,8 @@ void pybind_fmllr_diag_gmm(py::module &m) {
}
}
},
py::arg("alignment_trans_model"),
py::arg("alignment_am_gmm"),
py::arg("trans_model"),
py::arg("am_gmm"),
py::arg("feats"),
Expand All @@ -461,6 +486,8 @@ void pybind_fmllr_diag_gmm(py::module &m) {
py::arg("two_models") = false)
.def("accumulate_from_lattice",
[](PyClass* spk_stats,
const TransitionModel &alignment_trans_model,
const AmDiagGmm &alignment_am_gmm,
const TransitionModel &trans_model,
const AmDiagGmm &am_gmm,
const Matrix<BaseFloat> &feats,
Expand Down Expand Up @@ -490,13 +517,13 @@ void pybind_fmllr_diag_gmm(py::module &m) {
Posterior post;
double lat_like = LatticeForwardBackward(lat, &post);
if (distributed)
WeightSilencePostDistributed(trans_model, silence_set,
WeightSilencePostDistributed(alignment_trans_model, silence_set,
silence_scale, &post);
else
WeightSilencePost(trans_model, silence_set,
WeightSilencePost(alignment_trans_model, silence_set,
silence_scale, &post);
Posterior pdf_post;
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
ConvertPosteriorToPdfs(alignment_trans_model, post, &pdf_post);
if (!two_models){
for (size_t i = 0; i < post.size(); i++) {
for (size_t j = 0; j < pdf_post[i].size(); j++) {
Expand All @@ -517,7 +544,7 @@ void pybind_fmllr_diag_gmm(py::module &m) {
for (size_t j = 0; j < pdf_post[i].size(); j++) {
int32 pdf_id = pdf_post[i][j].first;
BaseFloat weight = pdf_post[i][j].second;
const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);
const DiagGmm &gmm = alignment_am_gmm.GetPdf(pdf_id);
Vector<BaseFloat> this_post_vec;
BaseFloat like =
gmm.ComponentPosteriors(feats.Row(i), &this_post_vec);
Expand All @@ -542,6 +569,8 @@ void pybind_fmllr_diag_gmm(py::module &m) {
}
}
},
py::arg("alignment_trans_model"),
py::arg("alignment_am_gmm"),
py::arg("trans_model"),
py::arg("am_gmm"),
py::arg("feats"),
Expand All @@ -567,13 +596,12 @@ void pybind_fmllr_diag_gmm(py::module &m) {
.def("compute_transform",
[](PyClass& f, const AmDiagGmm &am_gmm,
const FmllrOptions &fmllr_opts){
py::gil_scoped_release gil_release;
BaseFloat impr, tot_t;
Matrix<BaseFloat> transform(am_gmm.Dim(), am_gmm.Dim()+1);
{
transform.SetUnit();
f.Update(fmllr_opts, &transform, &impr, &tot_t);
return transform;
return py::make_tuple(transform, impr, tot_t);
}
},
py::arg("am_gmm"),
Expand Down
4 changes: 3 additions & 1 deletion kalpy/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class Segment:
channel: typing.Optional[int] = 0

def load_audio(self):
duration = self.end - self.begin
duration = None
if self.end is not None and self.begin is not None:
duration = self.end - self.begin
y, _ = librosa.load(
self.file_path,
sr=16000,
Expand Down
19 changes: 16 additions & 3 deletions kalpy/feat/cmvn.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,26 @@ def compute_cmvn_for_export(

Returns
-------
:class:`_kalpy.matrix.FloatMatrixBase`
:class:`_kalpy.matrix.DoubleMatrix`
Feature matrix for the segment
"""
cmvn, num_done, num_error = transform.calculate_cmvn(utterance_list, feature_reader)
if False:
cmvn_stats = DoubleMatrix()
is_init = False
num_done = 0
num_error = 0
for utt in utterance_list:
print(utt)
feats = feature_reader.Value(utt)
if not is_init:
transform.InitCmvnStats(feats.NumCols(), cmvn_stats)
is_init = True
transform.AccCmvnStats(feats, None, cmvn_stats)
num_done += 1
cmvn_stats, num_done, num_error = transform.calculate_cmvn(utterance_list, feature_reader)
self.num_done += num_done
self.num_error += num_error
return cmvn
return cmvn_stats

def export_cmvn(
self,
Expand Down
Loading