In [1]:
import ipywidgets as widgets
from fastai.text.all import *
from fastai.callback.fp16 import *
import pathlib
from pathlib import Path
#for download
import gdown

In [2]:
# for local
model_file = 'fastai_133_langs_v3'

# for downloading model from gdrive
MODEL_URL = "https://drive.google.com/uc?id=1Qf8ZMbzoEFSGxQ04DOPo01BEcL-43qeu"
gdown.download(MODEL_URL, model_file, quiet=True)


'fastai_133_langs_v3'

In [3]:
# character tokeniser used in fastai to support fastai model loading
from collections.abc import Iterable

def flatten(l):
    for el in l:
        if isinstance(el, Iterable) and not isinstance(el, (str, bytes)):
            yield from flatten(el)
        else:
            yield el
class CharTokenizer():
        
    def __call__(self, items):
        
        # List where I temporarly store the tokens ['xxbos', 'h', 'e', 'l', 'l', 'o', 'xxeos'] as 
        # they are being parsed.
        final_list = []
        
        # We don't want to mess with the special fastai tokens
        special_chars = ['xxunk','xxpad','xxbos','xxeos','xxfld','xxrep','xxwrep','xxup','xxmaj']
        
        # Break up string into words, if word in special_chars dont touch it. Otherwise break up each
        # word into each character.
        for words in items:
            tmp = list(flatten([list(word) if word not in special_chars else word 
                                                for word in words.split()]))
            # tmp = []
            # for word in words.split():
            #     if word not in special_chars:
            #         # for char in word:
            #         tmp.extend(list(word))
            #     else:
            #         tmp.append(word)
            # tmp has each token 'xxbos', 'xxmaj', 'h', 'e', 'l', 'l', 'o', ',', 'w', 'h', ....]
            # We need to put the tmp list into another list to generate a generator below
            final_list.append(tmp)
        
        # Returns a generator
        return (t for t in final_list)


In [4]:
# next 2 lines are for windows
# temp = pathlib.PosixPath
# pathlib.PosixPath = pathlib.WindowsPath

# next line for linux
learner = load_learner(model_file)

In [7]:
# ipywidgets
inp_text = widgets.Text(
    placeholder='Type your text',
    description='Text:',
    disabled=False
)
lbl_example = widgets.HTML(
    value="彼の発言で私の希望は失われた。<br/>他的話讓我失去了希望。<br/>His remarks lost my hope.<br/>Ses remarques m'ont fait perdre espoir.<br/>Sus comentarios perdieron mi esperanza.<br/>उनकी टिप्पणियों ने मेरी आशा खो दी।",
    placeholder='Some HTML',
    description='Example texts:',
)
lbl_pred = widgets.Label()
lbl_conf = widgets.Label()
btn_run = widgets.Button(description='Detect')

In [8]:
# trigger function
def on_click_classify(change):
    text = inp_text.value.strip()

    pred,pred_idx,probs = learner.predict(text)
    lbl_pred.value = f'Prediction: {pred}'
    lbl_conf.value = f'Confidence: {probs[pred_idx]:.04f}'
    attributions, prediction = get_attributions_for_sentence(text)
    
btn_run.on_click(on_click_classify)


In [9]:
#final layout
widgets.VBox([widgets.Label('Detect Language!'), 
      inp_text, lbl_example, btn_run, 
      widgets.VBox([lbl_pred,lbl_conf],
                 layout={'border': '1px solid black'})])

VBox(children=(Label(value='Detect Language!'), Text(value='彼の発言で私の希望は失われた。', description='Text:', placeholder…