In [None]:
"""
:param str train_dataset_path: Path to the train data input file
:param str conf_path: Path to the hyperparameters configuration input file
:param str model_path: Path to the model output file

:dvc-in train_dataset_path: ./data/intermediate/train_dataset.txt
:dvc-in conf_path: ./data/input/conf.json
:dvc-out model_path: ./data/model/classifier.bin
"""
# Following code in this cell will not be add in the generated Python script
# They are values only for notebook purpose
train_dataset_path = '../data/intermediate/train_dataset.txt'
conf_path = '../data/input/conf.json'
model_path = '../data/model/classifier.bin'

In [None]:
# No effect
with open(train_dataset_path, 'r') as fd:
        train_data_lines = fd.readlines()
train_data_lines

In [None]:
import json
with open(conf_path, 'r') as fd:
    conf = json.load(fd)

In [None]:
# No effect
conf

In [None]:
import fasttext as ft
from tempfile import TemporaryDirectory
import shutil
from os import remove, makedirs
from os.path import join, exists, dirname
def train(fasttext_data_path: str, fasttext_model_path: str, epochs: int, learning_rate: float):
    with TemporaryDirectory() as tmp_dir:
        # Fasttext automatically add .bin at the end of the output model file name so
        # we use a temporary file to keep control on output file path
        model_tmp_path = join(tmp_dir, 'model')
        ft.supervised(fasttext_data_path, model_tmp_path, lr=learning_rate, epoch=epochs, silent=0)
        if exists(fasttext_model_path):
            remove(fasttext_model_path)
        makedirs(dirname(fasttext_model_path), exist_ok=True)
        shutil.copy(f'{model_tmp_path}.bin', fasttext_model_path)



In [None]:
train(train_dataset_path, model_path, 
      epochs=conf['epoch'], learning_rate=conf['learning_rate'])