In [1]:
import sys
if "../" not in sys.path: sys.path.insert(0,"../");

## Building utilities for marynlp

Creating multiple utilities for building models, downloading the files and a tone of other things

In [2]:
## Module downloading sequence

In [3]:
## Using the SED Morpheme template
from marynlp.utils.storage import download as dl
from marynlp.utils import storage

from pathlib import Path

# setup the download bucket
bucket = storage.get_bucket("../resources/mary_africa_credentials_key.json", "marynlp-private")

In [4]:
# Downloading th econtents needed

def get_file_infer_google_cloud(blob_name, bucket):
    _file = storage.local.get_path_from_store(blob_name)
    
    if not Path(_file).exists():
        _file = dl.file_from_google_to_store(blob_name, bucket)

    return str(_file)


In [5]:
from experimental.sed import MorphologyAnalyzer
from typing import List, Tuple

import logging
logging.basicConfig(level=logging.INFO)

class WordBreaker(object):
    def __init__(self, ma: MorphologyAnalyzer):
        self.ma_ = ma
        
    def break_word(self, word: str) -> Tuple[str]:
        return tuple([ su for su in self.ma_.break_text([word])[word]])
    
morpheme_template_file = get_file_infer_google_cloud("models/sed_morpheme_template.txt", bucket)
analyzer = MorphologyAnalyzer(morpheme_template_file); analyzer
bk = WordBreaker(analyzer)
bk.break_word("walisema")

('wa', 'lis', 'ma')

### Voice sample

Making the model file work as needed

In [6]:
# we are sure we downloaded the file and it's placed here
voice_model_path = str(storage.local.get_path_from_store('voice/mnm-early-6k-ep100-bc64')); voice_model_path

'/home/iam-kevin/.marynlp/store/voice/mnm-early-6k-ep100-bc64'

In [35]:
import torch
from torchaudio.transforms import MelSpectrogram
import torch.nn as nn
import torch.nn.functional as F

from pathlib import Path

import pandas as pd

from overrides import overrides

from experimental.voice.text_encoders import CharacterEncoder, GreedyEncoder
from experimental.voice.nn import SpeechRecognitionModel


# SPEECH_RECOGNITION_MODELS = {
#     "mnm23-early": "voice/mnm-early-6k-ep100-bc64.zip"
# }


class InferenceSpeechRecognitionV23(object):
    """The Speech Recognizer

    Args:
        speech_recogn_model: A Trained Speech Recognition model
        char_list: List of all the characters to consider in the model
    """
    def __init__(self,
                 speech_recogn_model: SpeechRecognitionModel,
                 character_encoder: CharacterEncoder,
                 test_transformer: nn.Module):

        self.speech_model = speech_recogn_model
        self.greedy_encoder = GreedyEncoder(character_encoder)
        self.test_transform = test_transformer

    def recognize(self, spect_tensor: torch.Tensor):
        """
        Takes in an input data
        """
        output = self.test_transform(spect_tensor).unsqueeze(1)
        output = self.speech_model(output)
        output = F.log_softmax(output, dim=2)

        return self.greedy_encoder.decode_test(output)


In [36]:
!ls {voice_model_path}/mnm-early-6k-ep100-bc64/final_model

/home/iam-kevin/.marynlp/store/voice/mnm-early-6k-ep100-bc64/mnm-early-6k-ep100-bc64/final_model


In [37]:
import csv

char_summary_csv_file = f"{voice_model_path}/mnm-early-6k-ep100-bc64/char_summary.csv"
model_full_path = f"{voice_model_path}/mnm-early-6k-ep100-bc64/final_model"
with open(char_summary_csv_file, 'r') as csvfilebuffer:
    reader = csv.DictReader(csvfilebuffer)
    ls = [row["char"] for row in reader]

char_encoder = CharacterEncoder(data=ls)


default_model_params = dict(
    n_cnn_layers=3,
    n_rnn_layers=5,
    rnn_dim=512,
    n_feats=128,
)

# def load_model_from_path(model_path):
speech_model = SpeechRecognitionModel(n_class=char_encoder.count + 1, **default_model_params)
speech_model.load_state_dict(torch.load(str(model_full_path), map_location=torch.device('cpu')))

# `MelSpectrogram` was the same transformation that was used when training th emode.
#  so its safe to say that the same should be used for inference

# NOTE: keep in mind that this is NOT  the version of pytorch that was used in the model creation
inference_speech_model = InferenceSpeechRecognitionV23(speech_model, char_encoder, MelSpectrogram()); inference_speech_model

<__main__.InferenceSpeechRecognitionV23 at 0x7f1a135b3150>