## Build model

In [None]:
%%bash

if [ ! -d ./data ]; then
    mkdir ./data
fi

curl https://dl.fbaipublicfiles.com/fasttext/data/cooking.stackexchange.tar.gz --output data/cooking.stackexchange.tar.gz && tar xvzf data/cooking.stackexchange.tar.gz -C data
head -n 12404 data/cooking.stackexchange.txt > data/cooking.train
tail -n 3000 data/cooking.stackexchange.txt > data/cooking.valid

In [None]:
import fasttext
import numpy as np

In [None]:
model = fasttext.train_supervised(input="./data/cooking.train", lr=0.8, epoch=70, loss='hs')

In [None]:
model.test("./data/cooking.valid")

In [None]:
class_names = model.labels

In [None]:
class_names = [s.replace("__label__", "") for s in class_names]

In [None]:
k = len(class_names)
idx_to_labels = {i:k for k, i in zip(class_names, range(k))}
labels_to_idx = {k:i for k, i in zip(class_names, range(k))}

In [None]:
labels_to_idx['cake'], k

In [None]:
def get_predictions(model, text_list):
    
    predictions = model.predict(text_list, k=k)
    x, y = predictions
    
    probabilities_full_list = []
    for label_list, prob_list in zip(x, y):
        label_prob_pair_dict = {}
        for lbl, prob in zip(label_list, prob_list):
            label_prob_pair_dict[lbl.replace("__label__", "")] = prob
        probabilities_list = []
        for cls in class_names:
            if cls in label_prob_pair_dict:
                p = label_prob_pair_dict[cls]
                probabilities_list.append(p)
            else:
                probabilities_list.append(0.0)
        probabilities_full_list.append(probabilities_list)
        
    return np.array(probabilities_full_list)

In [None]:
%%time
results = get_predictions(model, ["cake"]*1000*10)

# Unbox

In [None]:
import unboxapi
# client = unboxapi.UnboxClient("YOUR_API_KEY_HERE")
unboxapi.DEPLOYMENT = unboxapi.DeploymentType.ONPREM
unboxapi.api.UNBOX_ENDPOINT = "http://0.0.0.0:8080/api" # Use this for local testing
client = unboxapi.UnboxClient("b7e910c2-2101-4fa9-a52b-8a9ba137ae1e")

### Upload model

In [None]:
from unboxapi.models import ModelType

ml_model = client.add_model(
    function=get_predictions, 
    model=model,
    model_type=ModelType.fasttext,
    class_names=class_names,
    name='Cooking Fast Text',
    description='this is my fasttext model'
)
ml_model.to_dict()

### Upload dataset

In [None]:
# @Gabe to fix this part
import pandas as pd
small_df = pd.read_csv("cooking_validation.csv")

In [None]:
dataset = client.add_dataset(
    df=small_df,
    class_names=class_names,
    label_column_name='label',
    text_column_name='text',
    name="Small Cooking Dataset",
    description='this is my fasttext validation dataset'
)
dataset.to_dict()