## Takeaway name classifier deep learning live demo


In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai import *
from fastai.text import *
import string
from unidecode import unidecode


## Build a Fastai Data Loader

Load in the dataframe and extract indexes for training, validation and balanced trainings.

In [3]:
df = pd.read_csv('after_21_11_2019_meeting_clean.csv')

valid_idx = df[df.valid].index
train_idx = df[~df.valid].index

bal_idx = []
for k, v in zip(df.index, df.bal):
    bal_idx += [k]*v



In [4]:
class LetterTokenizer(BaseTokenizer):
    "Character level tokenizer function."
    def __init__(self, lang): pass
    def tokenizer(self, t:str) -> List[str]:
        out = []
        i = 0
        while i < len(t):
            if t[i:].startswith(BOS):
                out.append(BOS)
                i += len(BOS)
            else:
                out.append(t[i])
                i += 1
        return out
            
    def add_special_cases(self, toks:Collection[str]): pass

In [5]:
itos = [UNK, BOS] + list(string.ascii_lowercase + " -'@&)(." +"0123456789")

In [6]:
vocab=Vocab(itos)
tokenizer=Tokenizer(LetterTokenizer, pre_rules=[], post_rules=[])

In [7]:
train_df = df.iloc[train_idx, [0,3]]
bal_df = df.iloc[bal_idx, [0,3]]
valid_df = df.iloc[valid_idx, [0,3]]

## Classifier with Just Eat data

In [8]:
data = TextClasDataBunch.from_df(path='.', train_df=bal_df, valid_df=valid_df,
                         tokenizer=tokenizer, vocab=vocab,
                         mark_fields=False, bs=512)

In [9]:
learn = text_classifier_learner(data, AWD_LSTM, drop_mult=0.4, bptt=70)


In [10]:
learn.load('single_cat_small')

RNNLearner(data=TextClasDataBunch;

Train: LabelList (50000 items)
x: TextList
xxbos   xxunk f a l a f i l,xxbos   xxunk f a l a f i l,xxbos   xxunk f a l a f i l,xxbos   xxunk f a l a f i l,xxbos   xxunk f a l a f i l
y: CategoryList
Other,Other,Other,Other,Other
Path: .;

Valid: LabelList (5000 items)
x: TextList
xxbos   2 4 7,xxbos   xxunk d e s s e r t s,xxbos   1 0   t o   1 0   i n   d e l h i,xxbos   1 3   0 2   d e s s e r t   c a f e,xxbos   1 4 2 3   c h i n a   k i t c h e n
y: CategoryList
Burgers,Desserts,Indian,Desserts,Chinese
Path: .;

Test: None, model=SequentialRNN(
  (0): MultiBatchEncoder(
    (module): AWD_LSTM(
      (encoder): Embedding(46, 400, padding_idx=1)
      (encoder_dp): EmbeddingDropout(
        (emb): Embedding(46, 400, padding_idx=1)
      )
      (rnns): ModuleList(
        (0): WeightDropout(
          (module): LSTM(400, 1150, batch_first=True)
        )
        (1): WeightDropout(
          (module): LSTM(1150, 1150, batch_first=True)
        )
  

In [11]:
learn.predict("mcdonalds")

(Category Burgers,
 tensor(0),
 tensor([0.4355, 0.0104, 0.0069, 0.2434, 0.0332, 0.0399, 0.0934, 0.0090, 0.1201,
         0.0082]))

In [12]:
learn.predict("codfather")

(Category Fish & Chips,
 tensor(4),
 tensor([1.5580e-03, 3.1403e-04, 8.1530e-05, 4.2188e-04, 9.9319e-01, 1.8859e-04,
         6.3263e-04, 8.6976e-05, 3.4305e-03, 9.9172e-05]))

In [13]:
learn.predict("sapna")

(Category Indian,
 tensor(5),
 tensor([0.0337, 0.0355, 0.0686, 0.0039, 0.0087, 0.5477, 0.0711, 0.1509, 0.0623,
         0.0176]))

In [14]:
learn.predict("fire and dough")

(Category Desserts,
 tensor(3),
 tensor([0.1098, 0.0801, 0.0437, 0.1995, 0.0397, 0.1736, 0.1095, 0.0361, 0.1297,
         0.0783]))

In [15]:
learn.predict("top chef")

(Category Chinese,
 tensor(2),
 tensor([1.5544e-03, 1.2759e-02, 8.5258e-01, 5.9515e-04, 3.1316e-03, 2.2290e-02,
         4.8611e-02, 9.7889e-03, 3.1811e-02, 1.6876e-02]))

In [16]:
learn.predict("papa johns")

(Category Pizza,
 tensor(8),
 tensor([0.1552, 0.0066, 0.0363, 0.0036, 0.1184, 0.1063, 0.0786, 0.0043, 0.4622,
         0.0286]))

In [18]:
learn.predict("gormans")

(Category Fish & Chips,
 tensor(4),
 tensor([0.0680, 0.0146, 0.0171, 0.0006, 0.6248, 0.0195, 0.1366, 0.0016, 0.1045,
         0.0126]))